Utils

`InternalDataFrameConcat(dfs, **kwargs)`

Concatenate dataframes.

Parameters:

Name	Type	Description	Default
`dfs`	`Iterable[InternalDataFrame]`	The dataframes to concatenate.	required

Returns:

Name	Type	Description
`InternalDataFrame`	`InternalDataFrame`	The concatenated dataframe.

Source code in adala/utils/internal_data.py

def InternalDataFrameConcat(
    dfs: Iterable[InternalDataFrame], **kwargs
) -> InternalDataFrame:
    """
    Concatenate dataframes.

    Args:
        dfs (Iterable[InternalDataFrame]): The dataframes to concatenate.

    Returns:
        InternalDataFrame: The concatenated dataframe.
    """
    return pd.concat(dfs, **kwargs)

`print_dataframe(dataframe)`

Print dataframe to console.

Source code in adala/utils/logs.py

def print_dataframe(dataframe: InternalDataFrame):
    """
    Print dataframe to console.
    """
    num_rows = 5
    table = Table(show_header=True, header_style="bold magenta")
    # index_name = dataframe.index.name or 'index'
    # table.add_column(index_name)

    for column in dataframe.columns:
        table.add_column(str(column))

    for index, value_list in enumerate(dataframe.iloc[:num_rows].values.tolist()):
        # row = [str(index)]
        row = []
        row += [str(x) for x in value_list]
        table.add_row(*row)

    # Update the style of the table
    table.row_styles = ["none", "dim"]
    table.box = box.SIMPLE_HEAD

    console.print(table)

`print_error(text)`

Print error message to console.

Source code in adala/utils/logs.py

def print_error(text: str):
    """
    Print error message to console.
    """
    error_console.print(text)

`print_series(data)`

Print series to console.

Source code in adala/utils/logs.py

def print_series(data: InternalSeries):
    """
    Print series to console.
    """

    # Create a Rich Table with a column for each series value
    table = Table(show_header=True, header_style="bold magenta")

    # Add a column for each value in the series with the index as the header
    for index in data.index:
        table.add_column(str(index))

    # Add a single row with all the values from the series
    table.add_row(*[str(value) for value in data])

    # Print the table with the Rich console
    console.print(table)

`print_text(text, style=None, streaming_style=False)`

Print text to console with optional style and streaming style.

Source code in adala/utils/logs.py

def print_text(text: str, style=None, streaming_style=False):
    """
    Print text to console with optional style and streaming style.
    """
    if streaming_style:
        for char in text:
            console.print(char, sep="", end="", style=style)
            time.sleep(0.01)
        console.print()
    else:
        console.print(text, style=style)

`fuzzy_match(x, y, threshold=0.8)`

Fuzzy match string values in two series.

Parameters:

Name	Type	Description	Default
`x`	`InternalSeries`	The first series.	required
`y`	`InternalSeries`	The second series.	required
`threshold`	`float`	The threshold to use for fuzzy matching. Defaults to 0.8.	`0.8`

Returns:

Name	Type	Description
`InternalSeries`	`InternalSeries`	The series with fuzzy match results.

Source code in adala/utils/matching.py

def fuzzy_match(x: InternalSeries, y: InternalSeries, threshold=0.8) -> InternalSeries:
    """
    Fuzzy match string values in two series.

    Args:
        x (InternalSeries): The first series.
        y (InternalSeries): The second series.
        threshold (float): The threshold to use for fuzzy matching. Defaults to 0.8.

    Returns:
        InternalSeries: The series with fuzzy match results.
    """
    result = x.combine(y, lambda x, y: _fuzzy_match(x, y, threshold))
    return result

`match_options(query, options, splitter=None)`

Match a query to a list of options. If splitter is not None, the query will be split by the splitter and each part will be matched separately, then joined by the splitter.

Parameters:

Name	Type	Description	Default
`query`	`str`	The query.	required
`options`	`List[str]`	The options.	required
`splitter`	`str`	The splitter. Defaults to None.	`None`

Returns:

Name	Type	Description
`str`	`str`	The matched option.

Source code in adala/utils/matching.py

def match_options(query: str, options: List[str], splitter: str = None) -> str:
    """
    Match a query to a list of options.
    If splitter is not None, the query will be split by the splitter and each part will be matched separately, then joined by the splitter.

    Args:
        query (str): The query.
        options (List[str]): The options.
        splitter (str): The splitter. Defaults to None.

    Returns:
        str: The matched option.
    """

    # hard constraint: the item must be in the query
    filtered_items = [item for item in options if item in query]
    if not filtered_items:
        # make the best guess - find the most similar item to the query
        filtered_items = options

    # soft constraint: find the most similar item to the query
    matched_items = []
    # split query by self.splitter
    if splitter:
        qs = query.split(splitter)
    else:
        qs = [query]

    for q in qs:
        scores = list(
            map(
                lambda item: difflib.SequenceMatcher(None, q, item).ratio(),
                filtered_items,
            )
        )
        matched_items.append(filtered_items[scores.index(max(scores))])
    if splitter:
        return splitter.join(matched_items)
    return matched_items[0]