Skip to content

Utils

InternalDataFrameConcat(dfs, **kwargs)

Concatenate dataframes.

Parameters:

Name Type Description Default
dfs Iterable[InternalDataFrame]

The dataframes to concatenate.

required

Returns:

Name Type Description
InternalDataFrame InternalDataFrame

The concatenated dataframe.

Source code in adala/utils/internal_data.py
15
16
17
18
19
20
21
22
23
24
25
26
27
def InternalDataFrameConcat(
    dfs: Iterable[InternalDataFrame], **kwargs
) -> InternalDataFrame:
    """
    Concatenate dataframes.

    Args:
        dfs (Iterable[InternalDataFrame]): The dataframes to concatenate.

    Returns:
        InternalDataFrame: The concatenated dataframe.
    """
    return pd.concat(dfs, **kwargs)

print_dataframe(dataframe)

Print dataframe to console.

Source code in adala/utils/logs.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def print_dataframe(dataframe: InternalDataFrame):
    """
    Print dataframe to console.
    """
    num_rows = 5
    table = Table(show_header=True, header_style="bold magenta")
    # index_name = dataframe.index.name or 'index'
    # table.add_column(index_name)

    for column in dataframe.columns:
        table.add_column(str(column))

    for index, value_list in enumerate(dataframe.iloc[:num_rows].values.tolist()):
        # row = [str(index)]
        row = []
        row += [str(x) for x in value_list]
        table.add_row(*row)

    # Update the style of the table
    table.row_styles = ["none", "dim"]
    table.box = box.SIMPLE_HEAD

    console.print(table)

print_error(text)

Print error message to console.

Source code in adala/utils/logs.py
29
30
31
32
33
def print_error(text: str):
    """
    Print error message to console.
    """
    error_console.print(text)

print_series(data)

Print series to console.

Source code in adala/utils/logs.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def print_series(data: InternalSeries):
    """
    Print series to console.
    """

    # Create a Rich Table with a column for each series value
    table = Table(show_header=True, header_style="bold magenta")

    # Add a column for each value in the series with the index as the header
    for index in data.index:
        table.add_column(str(index))

    # Add a single row with all the values from the series
    table.add_row(*[str(value) for value in data])

    # Print the table with the Rich console
    console.print(table)

print_text(text, style=None, streaming_style=False)

Print text to console with optional style and streaming style.

Source code in adala/utils/logs.py
16
17
18
19
20
21
22
23
24
25
26
def print_text(text: str, style=None, streaming_style=False):
    """
    Print text to console with optional style and streaming style.
    """
    if streaming_style:
        for char in text:
            console.print(char, sep="", end="", style=style)
            time.sleep(0.01)
        console.print()
    else:
        console.print(text, style=style)

fuzzy_match(x, y, threshold=0.8)

Fuzzy match string values in two series.

Parameters:

Name Type Description Default
x InternalSeries

The first series.

required
y InternalSeries

The second series.

required
threshold float

The threshold to use for fuzzy matching. Defaults to 0.8.

0.8

Returns:

Name Type Description
InternalSeries InternalSeries

The series with fuzzy match results.

Source code in adala/utils/matching.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def fuzzy_match(x: InternalSeries, y: InternalSeries, threshold=0.8) -> InternalSeries:
    """
    Fuzzy match string values in two series.

    Args:
        x (InternalSeries): The first series.
        y (InternalSeries): The second series.
        threshold (float): The threshold to use for fuzzy matching. Defaults to 0.8.

    Returns:
        InternalSeries: The series with fuzzy match results.
    """
    result = x.combine(y, lambda x, y: _fuzzy_match(x, y, threshold))
    return result

match_options(query, options, splitter=None)

Match a query to a list of options. If splitter is not None, the query will be split by the splitter and each part will be matched separately, then joined by the splitter.

Parameters:

Name Type Description Default
query str

The query.

required
options List[str]

The options.

required
splitter str

The splitter. Defaults to None.

None

Returns:

Name Type Description
str str

The matched option.

Source code in adala/utils/matching.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def match_options(query: str, options: List[str], splitter: str = None) -> str:
    """
    Match a query to a list of options.
    If splitter is not None, the query will be split by the splitter and each part will be matched separately, then joined by the splitter.

    Args:
        query (str): The query.
        options (List[str]): The options.
        splitter (str): The splitter. Defaults to None.

    Returns:
        str: The matched option.
    """

    # hard constraint: the item must be in the query
    filtered_items = [item for item in options if item in query]
    if not filtered_items:
        # make the best guess - find the most similar item to the query
        filtered_items = options

    # soft constraint: find the most similar item to the query
    matched_items = []
    # split query by self.splitter
    if splitter:
        qs = query.split(splitter)
    else:
        qs = [query]

    for q in qs:
        scores = list(
            map(
                lambda item: difflib.SequenceMatcher(None, q, item).ratio(),
                filtered_items,
            )
        )
        matched_items.append(filtered_items[scores.index(max(scores))])
    if splitter:
        return splitter.join(matched_items)
    return matched_items[0]