import pandas as pd from dash import html, dcc from dash_iconify import DashIconify import dash_mantine_components as dmc import base64 import countryflag button_style = { "display": "inline-block", "marginBottom": "10px", "marginRight": "15px", "marginTop": "30px", "padding": "6px 16px", "backgroundColor": "#082030", "color": "white", "borderRadius": "6px", "textDecoration": "none", "fontWeight": "bold", "fontSize": "14px", } company_icon_map = { "google": "../assets/icons/google.png", "distilbert": "../assets/images/hf.svg", "sentence-transformers": "../assets/images/hf.svg", "facebook": "../assets/icons/meta.png", "openai": "../assets/icons/openai.png", "amazon": "../assets/icons/amazon.png", "microsoft": "../assets/icons/microsoft.png", } country_emoji_fallback = { "User": "👤", "Organization": "🏢", "Model": "📦", } meta_cols_map = { "org_country_single": ["org_country_single", "total_downloads"], "author": [ "org_country_single", "author", "total_downloads", ], "derived_author": [ "org_country_single", "derived_author", "total_downloads", ], "model": [ "org_country_single", "author", "derived_author", "merged_modality", "total_downloads", ], } # Chip renderer def chip(text, bg_color="#F0F0F0"): return html.Span( text, style={ "backgroundColor": bg_color, "padding": "4px 10px", "borderRadius": "12px", "margin": "2px", "display": "inline-flex", "alignItems": "center", "fontSize": "14px", }, ) # Progress bar for % of total def progress_bar(percent, bar_color="#AC482A"): return html.Div( style={ "position": "relative", "backgroundColor": "#E0E0E0", "borderRadius": "8px", "height": "20px", "width": "100%", "overflow": "hidden", }, children=[ html.Div( style={ "backgroundColor": bar_color, "width": f"{percent}%", "height": "100%", "borderRadius": "8px", "transition": "width 0.5s", } ), html.Div( f"{percent:.1f}%", style={ "position": "absolute", "top": 0, "left": "50%", "transform": "translateX(-50%)", "color": "black", "fontWeight": "bold", "fontSize": "12px", "lineHeight": "20px", "textAlign": "center", }, ), ], ) # Helper to convert DataFrame to CSV and encode for download def df_to_download_link(df, filename): csv_string = df.to_csv(index=False) b64 = base64.b64encode(csv_string.encode()).decode() return html.Div( html.A( children=dmc.ActionIcon( DashIconify(icon="mdi:download", width=24), size="lg", color="#082030", ), id=f"download-{filename}", download=f"{filename}.csv", href=f"data:text/csv;base64,{b64}", target="_blank", title="Download CSV", style={ "padding": "6px 12px", "display": "inline-flex", "alignItems": "center", "justifyContent": "center", }, ), style={"textAlign": "right"}, ) # Helper to get popover content for each metadata type def get_metadata_popover_content(icon, name, meta_type): popover_texts = { "country": f"Country: {name}", "author": f"Author/Organization: {name}", "downloads": f"Total downloads: {name}", "modality": f"Modality: {name}", } return popover_texts.get(meta_type, name) # Chip renderer with hovercard def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None): hovercard_content = get_metadata_popover_content(icon, text, meta_type) return dmc.HoverCard( width="auto", shadow="md", position="top", children=[ dmc.HoverCardTarget( html.Span( text, style={ "backgroundColor": bg_color, "padding": "4px 10px", "borderRadius": "12px", "margin": "2px", "display": "inline-flex", "alignItems": "center", "fontSize": "14px", "cursor": "pointer", "transition": "background-color 0.15s", }, # Add a class for hover effect className="chip-hover-darken" ) ), dmc.HoverCardDropdown(dmc.Text(hovercard_content, size="sm")), ], ) # Render multiple chips in one row, each with popover def render_chips(metadata_list, chip_color): chips = [] for icon, name, meta_type in metadata_list: if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")): chips.append( dmc.HoverCard( width=220, shadow="md", position="top", children=[ dmc.HoverCardTarget( html.Span( [ html.Img( src=icon, style={"height": "18px", "marginRight": "6px"}, ), name, ], style={ "backgroundColor": chip_color, "padding": "4px 10px", "borderRadius": "12px", "margin": "2px", "display": "inline-flex", "alignItems": "left", "fontSize": "14px", "cursor": "pointer", }, ) ), dmc.HoverCardDropdown( dmc.Text( get_metadata_popover_content(icon, name, meta_type), size="sm", ) ), ], ) ) else: chips.append( chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon) ) return html.Div( chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"} ) def render_table_content( df, download_df, chip_color, bar_color="#AC482A", filename="data" ): return html.Div( [ # Add download button above the table df_to_download_link(download_df, filename), # Wrap the table in a horizontal scroll container so the table can be wide html.Div( # scroll wrapper html.Table( [ html.Thead( html.Tr( [ html.Th( "Rank", className="rank-col", style={ "backgroundColor": "#F0F0F0", "textAlign": "left", }, ), html.Th( "Name", className="name-col", style={ "backgroundColor": "#F0F0F0", "textAlign": "left", }, ), html.Th( "Metadata", className="metadata-col", style={ "backgroundColor": "#F0F0F0", "textAlign": "left", "marginRight": "10px", }, ), html.Th( "% of Total", className="percent-col", style={ "backgroundColor": "#F0F0F0", "textAlign": "left", }, ), ] ) ), html.Tbody( [ html.Tr( [ html.Td(idx + 1, style={"textAlign": "center"}), html.Td(row["Name"], className="name-cell", style={"textAlign": "left"}), html.Td(render_chips(row["Metadata"], chip_color), className="metadata-cell", style={"textAlign": "left", "whiteSpace": "normal", "wordBreak": "break-word"}), html.Td( progress_bar(row["% of total"], bar_color), className="percent-cell", style={"textAlign": "center", "minWidth": "180px", "padding": "8px"}, ), ] ) for idx, row in df.iterrows() ] ), ], # allow the table to be wider than its container (minWidth prevents squish) style={"borderCollapse": "collapse", "width": "100%", "minWidth": "980px", "tableLayout": "auto"}, className="leaderboard-table", ), className="leaderboard-scroll-wrapper", style={"overflowX": "auto", "-webkit-overflow-scrolling": "touch", "width": "100%"}, ), ] ) # Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query) def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True): """ Get top N entries for a leaderboard Args: filtered_df: Pandas DataFrame (already filtered by time from DuckDB query) group_col: Column to group by top_n: Number of top entries to return derived_author_toggle: If True, attribute to model uploader (derived_author); if False, attribute to original model creator (author) Returns: tuple: (display_df, download_df) """ # Group by and get top N top = ( filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]] .sum() .nlargest(top_n, columns="total_downloads") .reset_index() .rename( columns={ group_col: "Name", "total_downloads": "Total Value", "percent_of_total": "% of total", } ) ) # Create a downloadable version of the leaderboard download_top = top.copy() download_top["Total Value"] = download_top["Total Value"].astype(int) download_top["% of total"] = download_top["% of total"].round(2) # All relevant metadata columns meta_cols = meta_cols_map.get(group_col, []) # Collect all metadata per top n for each category (country, author, model) meta_map = {} download_map = {} for name in top["Name"]: name_data = filtered_df[filtered_df[group_col] == name] meta_map[name] = {} download_map[name] = {} for col in meta_cols: if col in name_data.columns: unique_vals = name_data[col].unique() meta_map[name][col] = list(unique_vals) download_map[name][col] = list(unique_vals) # Function to build metadata chips def build_metadata(nm): meta = meta_map.get(nm, {}) chips = [] # Countries for c in meta.get("org_country_single", []): if c == "United States of America": c = "USA" if c == "user": c = "User" try: flag_emoji = countryflag.getflag(c) if not flag_emoji or flag_emoji == c: flag_emoji = country_emoji_fallback.get(c, "🌍") except Exception: flag_emoji = country_emoji_fallback.get(c, "🌍") chips.append((flag_emoji, c, "country")) # Add downloads chip for country (only once) # Author - use derived_author_toggle to determine which column author_key = "derived_author" if derived_author_toggle else "author" for a in meta.get(author_key, []): icon = company_icon_map.get(a, "") if icon == "": if meta.get("merged_country_groups_single", ["User"])[0] != "User": icon = "🏢" else: icon = "👤" chips.append((icon, a, "author")) # Modality for m in meta.get("merged_modality", []): if pd.notna(m): chips.append(("", m, "modality")) # Total downloads for d in meta.get("total_downloads", []): formatted_downloads = format_large_number(d) chips.append(("⬇️", formatted_downloads, "downloads")) return chips # Function to create downloadable dataframe metadata def build_download_metadata(nm): meta = download_map.get(nm, {}) download_info = {} for col in meta_cols: if col not in meta or not meta[col]: continue vals = meta.get(col, []) if vals: download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v)) else: download_info[col] = "" return download_info # Apply metadata builder to top dataframe top["Metadata"] = top["Name"].astype(object).apply(build_metadata) # Capitalize "user" back to "User" for display top["Name"] = top["Name"].replace("user", "User") # Build download dataframe with metadata download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]] download_info_df = pd.DataFrame(download_info_list) download_top = pd.concat([download_top, download_info_df], axis=1) return top[["Name", "Metadata", "% of total"]], download_top def get_top_n_from_duckdb( con, group_col, top_n=10, time_filter=None, view="all_downloads" ): """ Query DuckDB directly to get top N entries with minimal data transfer Args: con: DuckDB connection object group_col: Column to group by top_n: Number of top entries time_filter: Optional tuple of (start_timestamp, end_timestamp) Returns: Pandas DataFrame with only the rows needed for top N """ # Build time filter clause time_clause = "" if time_filter: start = pd.to_datetime(time_filter[0], unit="s") end = pd.to_datetime(time_filter[1], unit="s") time_clause = f"WHERE time >= '{start}' AND time <= '{end}'" # If grouping by country, group by the transformed country column if group_col == "org_country_single": group_expr = """CASE WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online' ELSE org_country_single END""" else: group_expr = group_col # When grouping by derived_author, lookup the country where derived_author = author if group_col == "derived_author": query = f""" WITH base_data AS ( SELECT {group_expr} AS group_key, CASE WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' WHEN org_country_single IN ('International', 'Online') THEN 'International/Online' ELSE org_country_single END AS org_country_single, author, derived_author, merged_country_groups_single, merged_modality, downloads, model FROM {view} {time_clause} ), -- Create a lookup table for derived_author -> country author_country_lookup AS ( SELECT DISTINCT author, FIRST_VALUE(org_country_single) OVER (PARTITION BY author ORDER BY downloads DESC) AS author_country FROM base_data WHERE author IS NOT NULL ), total_downloads_cte AS ( SELECT SUM(downloads) AS total_downloads_all FROM base_data ), top_items AS ( SELECT b.group_key AS name, SUM(b.downloads) AS total_downloads, ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total, COALESCE(acl.author_country, ANY_VALUE(b.org_country_single)) AS org_country_single, ANY_VALUE(b.author) AS author, ANY_VALUE(b.derived_author) AS derived_author, ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single, ANY_VALUE(b.merged_modality) AS merged_modality, ANY_VALUE(b.model) AS model FROM base_data b CROSS JOIN total_downloads_cte t LEFT JOIN author_country_lookup acl ON b.group_key = acl.author GROUP BY b.group_key, acl.author_country, t.total_downloads_all ) SELECT * FROM top_items ORDER BY total_downloads DESC LIMIT {top_n}; """ else: query = f""" WITH base_data AS ( SELECT {group_expr} AS group_key, CASE WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' WHEN org_country_single IN ('International', 'Online') THEN 'International/Online' ELSE org_country_single END AS org_country_single, author, derived_author, merged_country_groups_single, merged_modality, downloads, model FROM {view} {time_clause} ), total_downloads_cte AS ( SELECT SUM(downloads) AS total_downloads_all FROM base_data ), top_items AS ( SELECT b.group_key AS name, SUM(b.downloads) AS total_downloads, ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total, ANY_VALUE(b.org_country_single) AS org_country_single, ANY_VALUE(b.author) AS author, ANY_VALUE(b.derived_author) AS derived_author, ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single, ANY_VALUE(b.merged_modality) AS merged_modality, ANY_VALUE(b.model) AS model FROM base_data b CROSS JOIN total_downloads_cte t GROUP BY b.group_key, t.total_downloads_all ) SELECT * FROM top_items ORDER BY total_downloads DESC LIMIT {top_n}; """ try: return con.execute(query).fetchdf() except Exception as e: print(f"Error querying DuckDB: {e}") return pd.DataFrame() def format_large_number(n): """Shorten large numbers, e.g. 5,000,000 -> '5 million'.""" if n >= 1_000_000_000: return f"{n / 1_000_000_000:.1f} billion" elif n >= 1_000_000: return f"{n / 1_000_000:.1f} million" elif n >= 1_000: return f"{n / 1_000:.1f}k" else: return str(int(n))