open-model-evolution / graphs /leaderboard.py
emsesc's picture
make more mobile friendly
a8701cd
raw
history blame
21.4 kB
import pandas as pd
from dash import html, dcc
from dash_iconify import DashIconify
import dash_mantine_components as dmc
import base64
import countryflag
button_style = {
"display": "inline-block",
"marginBottom": "10px",
"marginRight": "15px",
"marginTop": "30px",
"padding": "6px 16px",
"backgroundColor": "#082030",
"color": "white",
"borderRadius": "6px",
"textDecoration": "none",
"fontWeight": "bold",
"fontSize": "14px",
}
company_icon_map = {
"google": "../assets/icons/google.png",
"distilbert": "../assets/images/hf.svg",
"sentence-transformers": "../assets/images/hf.svg",
"facebook": "../assets/icons/meta.png",
"openai": "../assets/icons/openai.png",
"amazon": "../assets/icons/amazon.png",
"microsoft": "../assets/icons/microsoft.png",
}
country_emoji_fallback = {
"User": "πŸ‘€",
"Organization": "🏒",
"Model": "πŸ“¦",
}
meta_cols_map = {
"org_country_single": ["org_country_single", "total_downloads"],
"author": [
"org_country_single",
"author",
"total_downloads",
],
"derived_author": [
"org_country_single",
"derived_author",
"total_downloads",
],
"model": [
"org_country_single",
"author",
"derived_author",
"merged_modality",
"total_downloads",
],
}
# Chip renderer
def chip(text, bg_color="#F0F0F0"):
return html.Span(
text,
style={
"backgroundColor": bg_color,
"padding": "4px 10px",
"borderRadius": "12px",
"margin": "2px",
"display": "inline-flex",
"alignItems": "center",
"fontSize": "14px",
},
)
# Progress bar for % of total
def progress_bar(percent, bar_color="#AC482A"):
return html.Div(
style={
"position": "relative",
"backgroundColor": "#E0E0E0",
"borderRadius": "8px",
"height": "20px",
"width": "100%",
"overflow": "hidden",
},
children=[
html.Div(
style={
"backgroundColor": bar_color,
"width": f"{percent}%",
"height": "100%",
"borderRadius": "8px",
"transition": "width 0.5s",
}
),
html.Div(
f"{percent:.1f}%",
style={
"position": "absolute",
"top": 0,
"left": "50%",
"transform": "translateX(-50%)",
"color": "black",
"fontWeight": "bold",
"fontSize": "12px",
"lineHeight": "20px",
"textAlign": "center",
},
),
],
)
# Helper to convert DataFrame to CSV and encode for download
def df_to_download_link(df, filename):
csv_string = df.to_csv(index=False)
b64 = base64.b64encode(csv_string.encode()).decode()
return html.Div(
html.A(
children=dmc.ActionIcon(
DashIconify(icon="mdi:download", width=24),
size="lg",
color="#082030",
),
id=f"download-{filename}",
download=f"{filename}.csv",
href=f"data:text/csv;base64,{b64}",
target="_blank",
title="Download CSV",
style={
"padding": "6px 12px",
"display": "inline-flex",
"alignItems": "center",
"justifyContent": "center",
},
),
style={"textAlign": "right"},
)
# Helper to get popover content for each metadata type
def get_metadata_popover_content(icon, name, meta_type):
popover_texts = {
"country": f"Country: {name}",
"author": f"Author/Organization: {name}",
"downloads": f"Total downloads: {name}",
"modality": f"Modality: {name}",
}
return popover_texts.get(meta_type, name)
# Chip renderer with hovercard
def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None):
hovercard_content = get_metadata_popover_content(icon, text, meta_type)
return dmc.HoverCard(
width="auto",
shadow="md",
position="top",
children=[
dmc.HoverCardTarget(
html.Span(
text,
style={
"backgroundColor": bg_color,
"padding": "4px 10px",
"borderRadius": "12px",
"margin": "2px",
"display": "inline-flex",
"alignItems": "center",
"fontSize": "14px",
"cursor": "pointer",
"transition": "background-color 0.15s",
},
# Add a class for hover effect
className="chip-hover-darken"
)
),
dmc.HoverCardDropdown(dmc.Text(hovercard_content, size="sm")),
],
)
# Render multiple chips in one row, each with popover
def render_chips(metadata_list, chip_color):
chips = []
for icon, name, meta_type in metadata_list:
if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")):
chips.append(
dmc.HoverCard(
width=220,
shadow="md",
position="top",
children=[
dmc.HoverCardTarget(
html.Span(
[
html.Img(
src=icon,
style={"height": "18px", "marginRight": "6px"},
),
name,
],
style={
"backgroundColor": chip_color,
"padding": "4px 10px",
"borderRadius": "12px",
"margin": "2px",
"display": "inline-flex",
"alignItems": "left",
"fontSize": "14px",
"cursor": "pointer",
},
)
),
dmc.HoverCardDropdown(
dmc.Text(
get_metadata_popover_content(icon, name, meta_type),
size="sm",
)
),
],
)
)
else:
chips.append(
chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon)
)
return html.Div(
chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
)
def render_table_content(
df, download_df, chip_color, bar_color="#AC482A", filename="data"
):
return html.Div(
[
# Add download button above the table
df_to_download_link(download_df, filename),
# Wrap the table in a horizontal scroll container so the table can be wide
html.Div(
# scroll wrapper
html.Table(
[
html.Thead(
html.Tr(
[
html.Th(
"Rank",
className="rank-col",
style={
"backgroundColor": "#F0F0F0",
"textAlign": "left",
},
),
html.Th(
"Name",
className="name-col",
style={
"backgroundColor": "#F0F0F0",
"textAlign": "left",
},
),
html.Th(
"Metadata",
className="metadata-col",
style={
"backgroundColor": "#F0F0F0",
"textAlign": "left",
"marginRight": "10px",
},
),
html.Th(
"% of Total",
className="percent-col",
style={
"backgroundColor": "#F0F0F0",
"textAlign": "left",
},
),
]
)
),
html.Tbody(
[
html.Tr(
[
html.Td(idx + 1, style={"textAlign": "center"}),
html.Td(row["Name"], className="name-cell", style={"textAlign": "left"}),
html.Td(render_chips(row["Metadata"], chip_color), className="metadata-cell", style={"textAlign": "left", "whiteSpace": "normal", "wordBreak": "break-word"}),
html.Td(
progress_bar(row["% of total"], bar_color),
className="percent-cell",
style={"textAlign": "center", "minWidth": "180px", "padding": "8px"},
),
]
)
for idx, row in df.iterrows()
]
),
],
# allow the table to be wider than its container (minWidth prevents squish)
style={"borderCollapse": "collapse", "width": "100%", "minWidth": "980px", "tableLayout": "auto"},
className="leaderboard-table",
),
className="leaderboard-scroll-wrapper",
style={"overflowX": "auto", "-webkit-overflow-scrolling": "touch", "width": "100%"},
),
]
)
# Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
"""
Get top N entries for a leaderboard
Args:
filtered_df: Pandas DataFrame (already filtered by time from DuckDB query)
group_col: Column to group by
top_n: Number of top entries to return
derived_author_toggle: If True, attribute to model uploader (derived_author); if False, attribute to original model creator (author)
Returns:
tuple: (display_df, download_df)
"""
# Group by and get top N
top = (
filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]]
.sum()
.nlargest(top_n, columns="total_downloads")
.reset_index()
.rename(
columns={
group_col: "Name",
"total_downloads": "Total Value",
"percent_of_total": "% of total",
}
)
)
# Create a downloadable version of the leaderboard
download_top = top.copy()
download_top["Total Value"] = download_top["Total Value"].astype(int)
download_top["% of total"] = download_top["% of total"].round(2)
# All relevant metadata columns
meta_cols = meta_cols_map.get(group_col, [])
# Collect all metadata per top n for each category (country, author, model)
meta_map = {}
download_map = {}
for name in top["Name"]:
name_data = filtered_df[filtered_df[group_col] == name]
meta_map[name] = {}
download_map[name] = {}
for col in meta_cols:
if col in name_data.columns:
unique_vals = name_data[col].unique()
meta_map[name][col] = list(unique_vals)
download_map[name][col] = list(unique_vals)
# Function to build metadata chips
def build_metadata(nm):
meta = meta_map.get(nm, {})
chips = []
# Countries
for c in meta.get("org_country_single", []):
if c == "United States of America":
c = "USA"
if c == "user":
c = "User"
try:
flag_emoji = countryflag.getflag(c)
if not flag_emoji or flag_emoji == c:
flag_emoji = country_emoji_fallback.get(c, "🌍")
except Exception:
flag_emoji = country_emoji_fallback.get(c, "🌍")
chips.append((flag_emoji, c, "country"))
# Add downloads chip for country (only once)
# Author - use derived_author_toggle to determine which column
author_key = "derived_author" if derived_author_toggle else "author"
for a in meta.get(author_key, []):
icon = company_icon_map.get(a, "")
if icon == "":
if meta.get("merged_country_groups_single", ["User"])[0] != "User":
icon = "🏒"
else:
icon = "πŸ‘€"
chips.append((icon, a, "author"))
# Modality
for m in meta.get("merged_modality", []):
if pd.notna(m):
chips.append(("", m, "modality"))
# Total downloads
for d in meta.get("total_downloads", []):
formatted_downloads = format_large_number(d)
chips.append(("⬇️", formatted_downloads, "downloads"))
return chips
# Function to create downloadable dataframe metadata
def build_download_metadata(nm):
meta = download_map.get(nm, {})
download_info = {}
for col in meta_cols:
if col not in meta or not meta[col]:
continue
vals = meta.get(col, [])
if vals:
download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v))
else:
download_info[col] = ""
return download_info
# Apply metadata builder to top dataframe
top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
# Capitalize "user" back to "User" for display
top["Name"] = top["Name"].replace("user", "User")
# Build download dataframe with metadata
download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
download_info_df = pd.DataFrame(download_info_list)
download_top = pd.concat([download_top, download_info_df], axis=1)
return top[["Name", "Metadata", "% of total"]], download_top
def get_top_n_from_duckdb(
con, group_col, top_n=10, time_filter=None, view="all_downloads"
):
"""
Query DuckDB directly to get top N entries with minimal data transfer
Args:
con: DuckDB connection object
group_col: Column to group by
top_n: Number of top entries
time_filter: Optional tuple of (start_timestamp, end_timestamp)
Returns:
Pandas DataFrame with only the rows needed for top N
"""
# Build time filter clause
time_clause = ""
if time_filter:
start = pd.to_datetime(time_filter[0], unit="s")
end = pd.to_datetime(time_filter[1], unit="s")
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
# If grouping by country, group by the transformed country column
if group_col == "org_country_single":
group_expr = """CASE
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
ELSE org_country_single
END"""
else:
group_expr = group_col
# When grouping by derived_author, lookup the country where derived_author = author
if group_col == "derived_author":
query = f"""
WITH base_data AS (
SELECT
{group_expr} AS group_key,
CASE
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
ELSE org_country_single
END AS org_country_single,
author,
derived_author,
merged_country_groups_single,
merged_modality,
downloads,
model
FROM {view}
{time_clause}
),
-- Create a lookup table for derived_author -> country
author_country_lookup AS (
SELECT DISTINCT
author,
FIRST_VALUE(org_country_single) OVER (PARTITION BY author ORDER BY downloads DESC) AS author_country
FROM base_data
WHERE author IS NOT NULL
),
total_downloads_cte AS (
SELECT SUM(downloads) AS total_downloads_all
FROM base_data
),
top_items AS (
SELECT
b.group_key AS name,
SUM(b.downloads) AS total_downloads,
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
COALESCE(acl.author_country, ANY_VALUE(b.org_country_single)) AS org_country_single,
ANY_VALUE(b.author) AS author,
ANY_VALUE(b.derived_author) AS derived_author,
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
ANY_VALUE(b.merged_modality) AS merged_modality,
ANY_VALUE(b.model) AS model
FROM base_data b
CROSS JOIN total_downloads_cte t
LEFT JOIN author_country_lookup acl ON b.group_key = acl.author
GROUP BY b.group_key, acl.author_country, t.total_downloads_all
)
SELECT *
FROM top_items
ORDER BY total_downloads DESC
LIMIT {top_n};
"""
else:
query = f"""
WITH base_data AS (
SELECT
{group_expr} AS group_key,
CASE
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
ELSE org_country_single
END AS org_country_single,
author,
derived_author,
merged_country_groups_single,
merged_modality,
downloads,
model
FROM {view}
{time_clause}
),
total_downloads_cte AS (
SELECT SUM(downloads) AS total_downloads_all
FROM base_data
),
top_items AS (
SELECT
b.group_key AS name,
SUM(b.downloads) AS total_downloads,
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
ANY_VALUE(b.org_country_single) AS org_country_single,
ANY_VALUE(b.author) AS author,
ANY_VALUE(b.derived_author) AS derived_author,
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
ANY_VALUE(b.merged_modality) AS merged_modality,
ANY_VALUE(b.model) AS model
FROM base_data b
CROSS JOIN total_downloads_cte t
GROUP BY b.group_key, t.total_downloads_all
)
SELECT *
FROM top_items
ORDER BY total_downloads DESC
LIMIT {top_n};
"""
try:
return con.execute(query).fetchdf()
except Exception as e:
print(f"Error querying DuckDB: {e}")
return pd.DataFrame()
def format_large_number(n):
"""Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
if n >= 1_000_000_000:
return f"{n / 1_000_000_000:.1f} billion"
elif n >= 1_000_000:
return f"{n / 1_000_000:.1f} million"
elif n >= 1_000:
return f"{n / 1_000:.1f}k"
else:
return str(int(n))