| | from statsmodels.stats.multicomp import pairwise_tukeyhsd |
| | from string import ascii_lowercase, ascii_uppercase |
| | import tqdm |
| | import pandas as pd |
| | from itertools import product |
| |
|
| | |
| | single_chars = list(ascii_lowercase) + list(ascii_uppercase) |
| | underscore_chars = [''.join(p) for p in product(['@'], single_chars)] |
| | CLD_ALPHABET = single_chars + underscore_chars |
| |
|
| | def asserts_non_significance(col: list[bool], i: int, j: int) -> bool: |
| | """Assert whether i and j are represented as non-significant in the column |
| | i.e., if the corresponding values in the column are different |
| | |
| | Parameters |
| | ---------- |
| | col : list[bool] |
| | current column |
| | i : int |
| | index of first treatment |
| | j : int |
| | index of second treatment |
| | |
| | Returns |
| | ------- |
| | bool |
| | If the non-significance is represented accurately |
| | """ |
| | return col[i] and col[j] |
| |
|
| | def insert(column: list[bool], i: int, j: int): |
| | """Duplicates column and in one of its copies flip entry i to 0, |
| | and in the other copy flip entry j to 0 |
| | |
| | Parameters |
| | ---------- |
| | column : list[bool] |
| | Original column |
| | i : int |
| | Index of first group |
| | j : int |
| | Index of second group |
| | |
| | Returns |
| | ------- |
| | list[bool], list[bool] |
| | New columns after duplication and flip |
| | """ |
| | col_i = column.copy() |
| | col_j = column.copy() |
| | col_i[i] = False |
| | col_j[j] = False |
| | return col_i, col_j |
| |
|
| | def can_be_absorbed(new_col: list[bool], ref_col: list[bool]) -> bool: |
| | """An old column absorbs the new column |
| | if it has a 1 in every row in which the new column has one |
| | |
| | Parameters |
| | ---------- |
| | new_col : list[bool] |
| | Column to add |
| | ref_col : list[bool] |
| | Old column we are checking if it can absorb new_col |
| | |
| | Returns |
| | ------- |
| | bool |
| | Whether old column cand absorb new_col |
| | """ |
| | return all(ref_col[i] for i, x in enumerate(new_col) if x) |
| |
|
| | def absorb(new_column: list[bool], columns: list[list[bool]]) -> list[list[bool]]: |
| | """Absorb new column into existing columns if the condition allows |
| | |
| | Parameters |
| | ---------- |
| | new_column : list[bool] |
| | Column to add |
| | columns : list[list[bool]] |
| | existing columns |
| | |
| | Returns |
| | ------- |
| | list[list[bool]] |
| | Columns after absorption |
| | """ |
| | if any(can_be_absorbed(new_column, c) for c in columns): |
| | return columns |
| | return columns + [new_column] |
| |
|
| | def cld(comparisons: pd.DataFrame) -> dict[str, str]: |
| | """ |
| | Compact Letter Display |
| | |
| | Compute the compact letter display using the insert-absorb algorithm. |
| | |
| | See the following papers for more information: |
| | (1) https://doi.org/10.1016/j.csda.2006.09.035 |
| | (2) https://doi.org/10.1198/1061860043515 |
| | |
| | Parameters |
| | ---------- |
| | comparisons : pd.DataFrame |
| | A DataFrame containing the pairwise comparisons produced by: |
| | https://www.statsmodels.org/dev/generated/statsmodels.stats.multicomp.pairwise_tukeyhsd.html |
| | """ |
| | unique_groups = set(comparisons["group1"].unique()) |
| | unique_groups = unique_groups.union(set(comparisons["group2"].unique())) |
| | unique_groups = list(unique_groups) |
| | unique_groups_indices = {g: i for i, g in enumerate(unique_groups)} |
| |
|
| | sig_diff = comparisons[comparisons["reject"]] |
| | print(f"Found {len(sig_diff)} significantly different pairs") |
| |
|
| | |
| | solution = [[True] * len(unique_groups)] |
| |
|
| | for _, row in tqdm.tqdm(sig_diff.iterrows(), total=len(sig_diff)): |
| | i = unique_groups_indices[row["group1"]] |
| | j = unique_groups_indices[row["group2"]] |
| |
|
| | has_changed: bool = True |
| | while has_changed: |
| | has_changed = False |
| |
|
| | for idx in range(len(solution)): |
| | if asserts_non_significance(solution[idx], i, j): |
| | |
| | col_i, col_j = insert(solution[idx], i, j) |
| |
|
| | |
| | solution.pop(idx) |
| |
|
| | |
| | |
| | solution = absorb(col_i, solution) |
| | solution = absorb(col_j, solution) |
| |
|
| | has_changed = True |
| | break |
| |
|
| | |
| | letters = [""] * len(unique_groups) |
| |
|
| | for ci, col in enumerate(solution): |
| | letter = CLD_ALPHABET[ci] |
| | for idx, has_letter in enumerate(col): |
| | if has_letter: |
| | letters[idx] += letter |
| |
|
| | return {group: sorted(letter) for group, letter in zip(unique_groups, letters)} |
| |
|
| | from statsmodels.stats.multicomp import pairwise_tukeyhsd |
| | import tqdm |
| |
|
| | def add_cld_to_leaderboard( |
| | leaderboard: pd.DataFrame, |
| | scores: pd.DataFrame, |
| | metric: str, |
| | ): |
| | """Add the compact letter display to the leaderboard. |
| | |
| | Parameters |
| | ---------- |
| | leaderboard : pd.DataFrame |
| | The full leaderboard DataFrame |
| | scores : pd.DataFrame |
| | The **raw** scores DataFrame, with all replicates from bootstrapping |
| | metric_ : str |
| | The metric label to calculate CLD for. |
| | """ |
| | ordered_methods = leaderboard["user"].values |
| |
|
| | scores = scores[["Sample", "user", metric]] |
| | scores[metric] = scores[metric].astype(float) |
| |
|
| | |
| | |
| | |
| | stats = pairwise_tukeyhsd(endog=scores[metric], groups=scores["user"]) |
| | |
| | |
| | summary_table = stats.summary() |
| | |
| | data = summary_table.data[1:] |
| | columns = summary_table.data[0] |
| | comparisons = pd.DataFrame(data=data, columns=columns) |
| |
|
| | letter_mapping = {} |
| | letter_code = cld(comparisons) |
| |
|
| | cld_column = [""] * len(leaderboard) |
| | for idx, method in enumerate(ordered_methods): |
| | try: |
| | letters = letter_code[str(method)] |
| |
|
| | for letter in letters: |
| | if letter not in letter_mapping: |
| | letter_mapping[letter] = CLD_ALPHABET[len(letter_mapping)] |
| | cld_column[idx] += letter_mapping[letter] |
| | except KeyError: |
| | cld_column[idx] = "None" |
| |
|
| | leaderboard["CLD"] = cld_column |
| |
|
| | return leaderboard |