design overhaul and add switch functionality
Browse files- app.py +403 -108
- assets/images/Hf-logo-with-title.svg +0 -9
- assets/images/{dpi-logo.svg → dpi.svg} +2 -2
- assets/images/hf.svg +1 -0
- assets/styles.css +123 -0
- graphs/leaderboard.py +134 -304
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -4,49 +4,62 @@ import dash_mantine_components as dmc
|
|
| 4 |
import duckdb
|
| 5 |
import time
|
| 6 |
from graphs.leaderboard import (
|
| 7 |
-
|
| 8 |
get_top_n_leaderboard,
|
| 9 |
render_table_content,
|
| 10 |
)
|
|
|
|
| 11 |
|
| 12 |
# Initialize the app
|
| 13 |
app = Dash()
|
| 14 |
server = app.server
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
hf_parquet_url = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/filtered_df.parquet"
|
| 22 |
-
|
| 23 |
-
print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
|
| 24 |
-
try:
|
| 25 |
-
overall_start_time = time.time()
|
| 26 |
-
|
| 27 |
# Install and load httpfs extension for remote file access
|
| 28 |
con.execute("INSTALL httpfs;")
|
| 29 |
con.execute("LOAD httpfs;")
|
| 30 |
-
|
| 31 |
# Create a view that references the remote parquet file
|
| 32 |
con.execute(f"""
|
| 33 |
-
CREATE OR REPLACE VIEW
|
| 34 |
-
SELECT * FROM read_parquet('{
|
| 35 |
""")
|
| 36 |
-
|
| 37 |
-
# Get column list and basic info
|
| 38 |
-
columns = con.execute("DESCRIBE filtered_df").fetchdf()
|
| 39 |
-
print("Columns:", columns['column_name'].tolist())
|
| 40 |
-
|
| 41 |
# Get time range for slider
|
| 42 |
-
time_range = con.execute(
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
print(msg)
|
| 48 |
except Exception as e:
|
| 49 |
-
err_msg = f"Failed to load dataset. Error: {e}"
|
| 50 |
print(err_msg)
|
| 51 |
raise
|
| 52 |
|
|
@@ -54,6 +67,18 @@ except Exception as e:
|
|
| 54 |
start_ts = int(start_dt.timestamp())
|
| 55 |
end_ts = int(end_dt.timestamp())
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
marks = []
|
| 58 |
# Add start label (e.g. "Jan 2020")
|
| 59 |
marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
|
|
@@ -66,6 +91,59 @@ for yr in range(start_dt.year, end_dt.year + 1):
|
|
| 66 |
# Add end label (e.g. "Dec 2024")
|
| 67 |
marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# Create a dcc slider for time range selection by year
|
| 70 |
time_slider = dmc.RangeSlider(
|
| 71 |
id="time-slider",
|
|
@@ -80,11 +158,14 @@ time_slider = dmc.RangeSlider(
|
|
| 80 |
size="md",
|
| 81 |
radius="xl",
|
| 82 |
marks=marks,
|
| 83 |
-
style={"width": "
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
)
|
| 86 |
|
| 87 |
-
#
|
| 88 |
app.layout = dmc.MantineProvider(
|
| 89 |
theme={
|
| 90 |
"colorScheme": "light",
|
|
@@ -92,6 +173,7 @@ app.layout = dmc.MantineProvider(
|
|
| 92 |
"fontFamily": "Inter, sans-serif",
|
| 93 |
},
|
| 94 |
children=[
|
|
|
|
| 95 |
html.Div(
|
| 96 |
[
|
| 97 |
# Header
|
|
@@ -129,7 +211,7 @@ app.layout = dmc.MantineProvider(
|
|
| 129 |
html.A(
|
| 130 |
children=[
|
| 131 |
html.Img(
|
| 132 |
-
src="assets/images/dpi
|
| 133 |
style={
|
| 134 |
"height": "28px",
|
| 135 |
"verticalAlign": "middle",
|
|
@@ -140,12 +222,13 @@ app.layout = dmc.MantineProvider(
|
|
| 140 |
],
|
| 141 |
href="https://www.dataprovenance.org/",
|
| 142 |
target="_blank",
|
|
|
|
| 143 |
style={
|
| 144 |
"display": "inline-block",
|
| 145 |
"padding": "6px 14px",
|
| 146 |
"fontSize": 13,
|
| 147 |
-
"color": "#
|
| 148 |
-
|
| 149 |
"borderRadius": "18px",
|
| 150 |
"fontWeight": "700",
|
| 151 |
"textDecoration": "none",
|
|
@@ -155,22 +238,51 @@ app.layout = dmc.MantineProvider(
|
|
| 155 |
html.A(
|
| 156 |
children=[
|
| 157 |
html.Img(
|
| 158 |
-
src="assets/images/
|
| 159 |
style={
|
| 160 |
"height": "30px",
|
| 161 |
"verticalAlign": "middle",
|
| 162 |
},
|
| 163 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
],
|
| 165 |
href="https://huggingface.co/",
|
| 166 |
target="_blank",
|
|
|
|
| 167 |
style={
|
| 168 |
"display": "inline-flex",
|
| 169 |
"padding": "6px 14px",
|
| 170 |
"alignItems": "center",
|
| 171 |
-
"
|
| 172 |
"borderRadius": "18px",
|
| 173 |
"textDecoration": "none",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
},
|
| 175 |
),
|
| 176 |
],
|
|
@@ -206,29 +318,8 @@ app.layout = dmc.MantineProvider(
|
|
| 206 |
"marginBottom": 20,
|
| 207 |
},
|
| 208 |
),
|
| 209 |
-
# Button
|
| 210 |
html.Div(
|
| 211 |
-
children=
|
| 212 |
-
html.Button(
|
| 213 |
-
"Read the paper",
|
| 214 |
-
id="my-button",
|
| 215 |
-
style={
|
| 216 |
-
"padding": "10px 20px",
|
| 217 |
-
"fontSize": 16,
|
| 218 |
-
"margin": "0 auto",
|
| 219 |
-
"display": "block",
|
| 220 |
-
"backgroundColor": "#AC482A",
|
| 221 |
-
"color": "white",
|
| 222 |
-
"border": "none",
|
| 223 |
-
"borderRadius": "5px",
|
| 224 |
-
"cursor": "pointer",
|
| 225 |
-
},
|
| 226 |
-
),
|
| 227 |
-
],
|
| 228 |
-
style={"textAlign": "center", "marginBottom": 20},
|
| 229 |
-
),
|
| 230 |
-
html.Div(
|
| 231 |
-
children="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s...",
|
| 232 |
style={
|
| 233 |
"fontSize": 14,
|
| 234 |
"marginTop": 18,
|
|
@@ -268,6 +359,42 @@ app.layout = dmc.MantineProvider(
|
|
| 268 |
],
|
| 269 |
mb=10,
|
| 270 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
html.Span(
|
| 272 |
id="global-toggle-status",
|
| 273 |
style={
|
|
@@ -290,8 +417,74 @@ app.layout = dmc.MantineProvider(
|
|
| 290 |
},
|
| 291 |
),
|
| 292 |
time_slider,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
],
|
| 294 |
-
style={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
),
|
| 296 |
],
|
| 297 |
style={
|
|
@@ -329,7 +522,27 @@ app.layout = dmc.MantineProvider(
|
|
| 329 |
"borderBottom": "3px solid #082030",
|
| 330 |
},
|
| 331 |
children=[
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
],
|
| 334 |
),
|
| 335 |
dcc.Tab(
|
|
@@ -350,7 +563,27 @@ app.layout = dmc.MantineProvider(
|
|
| 350 |
"borderBottom": "3px solid #082030",
|
| 351 |
},
|
| 352 |
children=[
|
| 353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
],
|
| 355 |
),
|
| 356 |
dcc.Tab(
|
|
@@ -371,7 +604,27 @@ app.layout = dmc.MantineProvider(
|
|
| 371 |
"borderBottom": "3px solid #082030",
|
| 372 |
},
|
| 373 |
children=[
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
],
|
| 376 |
),
|
| 377 |
],
|
|
@@ -381,7 +634,7 @@ app.layout = dmc.MantineProvider(
|
|
| 381 |
"borderRadius": "18px",
|
| 382 |
"padding": "32px",
|
| 383 |
"marginTop": "12px",
|
| 384 |
-
"marginBottom": "
|
| 385 |
"marginLeft": "50px",
|
| 386 |
"marginRight": "50px",
|
| 387 |
},
|
|
@@ -396,30 +649,21 @@ app.layout = dmc.MantineProvider(
|
|
| 396 |
],
|
| 397 |
)
|
| 398 |
|
|
|
|
| 399 |
# Callbacks for interactivity
|
| 400 |
# -- helper utilities to consolidate duplicated callback logic --
|
| 401 |
-
def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n):
|
| 402 |
"""
|
| 403 |
Query DuckDB directly to get top N entries with metadata
|
| 404 |
This minimizes data transfer by doing aggregation in DuckDB
|
| 405 |
"""
|
| 406 |
# Build time filter clause
|
| 407 |
-
|
| 408 |
if slider_value and len(slider_value) == 2:
|
| 409 |
start = pd.to_datetime(slider_value[0], unit="s")
|
| 410 |
end = pd.to_datetime(slider_value[1], unit="s")
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
# Apply country replacements in the query
|
| 414 |
-
country_case = """
|
| 415 |
-
CASE
|
| 416 |
-
WHEN org_country_single = 'HF' THEN 'United States of America'
|
| 417 |
-
WHEN org_country_single = 'International' THEN 'International/Online'
|
| 418 |
-
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 419 |
-
ELSE org_country_single
|
| 420 |
-
END as org_country_single
|
| 421 |
-
"""
|
| 422 |
-
|
| 423 |
# Build the aggregation query to get top N with all needed metadata
|
| 424 |
# This query groups by the target column and aggregates downloads
|
| 425 |
# while collecting all metadata we need for chips
|
|
@@ -427,35 +671,63 @@ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n):
|
|
| 427 |
WITH base_data AS (
|
| 428 |
SELECT
|
| 429 |
{group_col},
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
author,
|
| 432 |
merged_country_groups_single,
|
| 433 |
merged_modality,
|
| 434 |
downloads,
|
| 435 |
-
estimated_parameters,
|
| 436 |
model
|
| 437 |
-
FROM
|
| 438 |
-
{
|
| 439 |
),
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
FROM base_data
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
)
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
FROM
|
| 452 |
-
|
| 453 |
-
|
| 454 |
"""
|
| 455 |
-
|
| 456 |
return con.execute(query).fetchdf()
|
| 457 |
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
# Normalize label on first load
|
| 460 |
if current_label is None:
|
| 461 |
current_label = default_label
|
|
@@ -472,22 +744,28 @@ def _leaderboard_callback_logic(n_clicks, slider_value, current_label, group_col
|
|
| 472 |
top_n, new_label = 10, "▼ Show Top 50"
|
| 473 |
|
| 474 |
# Get filtered and aggregated data directly from DuckDB
|
| 475 |
-
df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n)
|
| 476 |
-
|
| 477 |
# Process the already-filtered data
|
| 478 |
df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n)
|
| 479 |
-
return render_table_content(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
# -- end helpers --
|
| 481 |
|
|
|
|
| 482 |
# Callbacks for interactivity (modularized)
|
| 483 |
@app.callback(
|
| 484 |
Output("top_countries-table", "children"),
|
| 485 |
Output("top_countries-toggle", "children"),
|
| 486 |
Input("top_countries-toggle", "n_clicks"),
|
| 487 |
Input("time-slider", "value"),
|
|
|
|
| 488 |
State("top_countries-toggle", "children"),
|
| 489 |
)
|
| 490 |
-
def update_top_countries(n_clicks, slider_value, current_label):
|
| 491 |
return _leaderboard_callback_logic(
|
| 492 |
n_clicks,
|
| 493 |
slider_value,
|
|
@@ -496,34 +774,40 @@ def update_top_countries(n_clicks, slider_value, current_label):
|
|
| 496 |
filename="top_countries",
|
| 497 |
default_label="▼ Show Top 50",
|
| 498 |
chip_color="#F0F9FF",
|
|
|
|
| 499 |
)
|
| 500 |
|
|
|
|
| 501 |
@app.callback(
|
| 502 |
Output("top_developers-table", "children"),
|
| 503 |
Output("top_developers-toggle", "children"),
|
| 504 |
Input("top_developers-toggle", "n_clicks"),
|
| 505 |
Input("time-slider", "value"),
|
|
|
|
| 506 |
State("top_developers-toggle", "children"),
|
| 507 |
)
|
| 508 |
-
def update_top_developers(n_clicks, slider_value, current_label):
|
| 509 |
return _leaderboard_callback_logic(
|
| 510 |
n_clicks,
|
| 511 |
slider_value,
|
| 512 |
current_label,
|
| 513 |
group_col="author",
|
| 514 |
filename="top_developers",
|
| 515 |
-
default_label="▼ Show
|
| 516 |
chip_color="#F0F9FF",
|
|
|
|
| 517 |
)
|
| 518 |
|
|
|
|
| 519 |
@app.callback(
|
| 520 |
Output("top_models-table", "children"),
|
| 521 |
Output("top_models-toggle", "children"),
|
| 522 |
Input("top_models-toggle", "n_clicks"),
|
| 523 |
Input("time-slider", "value"),
|
|
|
|
| 524 |
State("top_models-toggle", "children"),
|
| 525 |
)
|
| 526 |
-
def update_top_models(n_clicks, slider_value, current_label):
|
| 527 |
return _leaderboard_callback_logic(
|
| 528 |
n_clicks,
|
| 529 |
slider_value,
|
|
@@ -532,18 +816,29 @@ def update_top_models(n_clicks, slider_value, current_label):
|
|
| 532 |
filename="top_models",
|
| 533 |
default_label="▼ Show More",
|
| 534 |
chip_color="#F0F9FF",
|
|
|
|
| 535 |
)
|
| 536 |
|
|
|
|
| 537 |
@app.callback(
|
| 538 |
-
Output("time-slider", "
|
| 539 |
-
Input("time-slider", "value")
|
| 540 |
)
|
| 541 |
-
def
|
| 542 |
-
|
| 543 |
-
end_label = pd.to_datetime(values[1], unit="s").strftime("%b %Y")
|
| 544 |
-
return [start_label, end_label]
|
| 545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
# Run the app
|
| 548 |
if __name__ == "__main__":
|
| 549 |
-
app.run(debug=True)
|
|
|
|
|
|
|
|
|
| 4 |
import duckdb
|
| 5 |
import time
|
| 6 |
from graphs.leaderboard import (
|
| 7 |
+
button_style,
|
| 8 |
get_top_n_leaderboard,
|
| 9 |
render_table_content,
|
| 10 |
)
|
| 11 |
+
from dash_iconify import DashIconify
|
| 12 |
|
| 13 |
# Initialize the app
|
| 14 |
app = Dash()
|
| 15 |
server = app.server
|
| 16 |
|
| 17 |
+
def load_parquet_to_duckdb(con, parquet_url, view_name):
|
| 18 |
+
"""
|
| 19 |
+
Loads a parquet file from a remote URL into DuckDB as a view.
|
| 20 |
+
Returns (start_dt, end_dt) for the 'time' column.
|
| 21 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Install and load httpfs extension for remote file access
|
| 23 |
con.execute("INSTALL httpfs;")
|
| 24 |
con.execute("LOAD httpfs;")
|
| 25 |
+
|
| 26 |
# Create a view that references the remote parquet file
|
| 27 |
con.execute(f"""
|
| 28 |
+
CREATE OR REPLACE VIEW {view_name} AS
|
| 29 |
+
SELECT * FROM read_parquet('{parquet_url}')
|
| 30 |
""")
|
| 31 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Get time range for slider
|
| 33 |
+
time_range = con.execute(
|
| 34 |
+
f"SELECT MIN(time) as min_time, MAX(time) as max_time FROM {view_name}"
|
| 35 |
+
).fetchdf()
|
| 36 |
+
start_dt = pd.to_datetime(time_range["min_time"].iloc[0])
|
| 37 |
+
end_dt = pd.to_datetime(time_range["max_time"].iloc[0])
|
| 38 |
+
return start_dt, end_dt
|
| 39 |
+
|
| 40 |
+
# DuckDB connection (global)
|
| 41 |
+
con = duckdb.connect(database=":memory:", read_only=False)
|
| 42 |
+
|
| 43 |
+
# Load parquet files from Hugging Face using DuckDB
|
| 44 |
+
HF_DATASET_ID = "emsesc/open_model_evolution_data"
|
| 45 |
+
hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
|
| 46 |
+
hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
|
| 47 |
+
|
| 48 |
+
print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
|
| 49 |
+
try:
|
| 50 |
+
overall_start_time = time.time()
|
| 51 |
+
|
| 52 |
+
# Load both parquet files as views
|
| 53 |
+
start_dt, end_dt = load_parquet_to_duckdb(con, hf_parquet_url_1, "all_downloads")
|
| 54 |
+
# Example: load a second parquet file as another view
|
| 55 |
+
start_dt2, end_dt2 = load_parquet_to_duckdb(con, hf_parquet_url_2, "one_year_rolling")
|
| 56 |
+
|
| 57 |
+
msg = (
|
| 58 |
+
f"Successfully connected to datasets in {time.time() - overall_start_time:.2f}s."
|
| 59 |
+
)
|
| 60 |
print(msg)
|
| 61 |
except Exception as e:
|
| 62 |
+
err_msg = f"Failed to load dataset(s). Error: {e}"
|
| 63 |
print(err_msg)
|
| 64 |
raise
|
| 65 |
|
|
|
|
| 67 |
start_ts = int(start_dt.timestamp())
|
| 68 |
end_ts = int(end_dt.timestamp())
|
| 69 |
|
| 70 |
+
def ordinal(n):
|
| 71 |
+
# Helper to get ordinal suffix for a day
|
| 72 |
+
if 10 <= n % 100 <= 20:
|
| 73 |
+
suffix = 'th'
|
| 74 |
+
else:
|
| 75 |
+
suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th')
|
| 76 |
+
return f"{n}{suffix}"
|
| 77 |
+
|
| 78 |
+
def format_date(dt):
|
| 79 |
+
# Format date as "Oct 8th, 2025"
|
| 80 |
+
return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
|
| 81 |
+
|
| 82 |
marks = []
|
| 83 |
# Add start label (e.g. "Jan 2020")
|
| 84 |
marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
|
|
|
|
| 91 |
# Add end label (e.g. "Dec 2024")
|
| 92 |
marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
|
| 93 |
|
| 94 |
+
def get_thumb_labels(values):
|
| 95 |
+
# Returns formatted labels for both thumbs
|
| 96 |
+
distance = abs(values[1] - values[0])
|
| 97 |
+
close = distance < 4 * 30 * 86400 # 4 months
|
| 98 |
+
|
| 99 |
+
label_style = {
|
| 100 |
+
"background": "#fff",
|
| 101 |
+
"color": "#082030",
|
| 102 |
+
"fontWeight": "bold",
|
| 103 |
+
"fontSize": "13px",
|
| 104 |
+
"borderRadius": "8px",
|
| 105 |
+
"padding": "2px 8px",
|
| 106 |
+
"boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
|
| 107 |
+
"position": "absolute",
|
| 108 |
+
"left": "50%",
|
| 109 |
+
"transform": "translateX(-50%)",
|
| 110 |
+
"whiteSpace": "nowrap",
|
| 111 |
+
"zIndex": 100,
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
if close:
|
| 115 |
+
# Move first label above, second label below (closer to slider)
|
| 116 |
+
style_top_1 = label_style.copy()
|
| 117 |
+
style_top_1["top"] = "-38px"
|
| 118 |
+
style_top_2 = label_style.copy()
|
| 119 |
+
style_top_2["top"] = "14px"
|
| 120 |
+
return [
|
| 121 |
+
html.Div(
|
| 122 |
+
format_date(pd.to_datetime(values[0], unit="s")),
|
| 123 |
+
style=style_top_1,
|
| 124 |
+
),
|
| 125 |
+
html.Div(
|
| 126 |
+
format_date(pd.to_datetime(values[1], unit="s")),
|
| 127 |
+
style=style_top_2,
|
| 128 |
+
),
|
| 129 |
+
]
|
| 130 |
+
else:
|
| 131 |
+
# Both labels below the slider (closer to slider)
|
| 132 |
+
style_top_1 = label_style.copy()
|
| 133 |
+
style_top_1["top"] = "14px"
|
| 134 |
+
style_top_2 = label_style.copy()
|
| 135 |
+
style_top_2["top"] = "14px"
|
| 136 |
+
return [
|
| 137 |
+
html.Div(
|
| 138 |
+
format_date(pd.to_datetime(values[0], unit="s")),
|
| 139 |
+
style=style_top_1,
|
| 140 |
+
),
|
| 141 |
+
html.Div(
|
| 142 |
+
format_date(pd.to_datetime(values[1], unit="s")),
|
| 143 |
+
style=style_top_2,
|
| 144 |
+
),
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
# Create a dcc slider for time range selection by year
|
| 148 |
time_slider = dmc.RangeSlider(
|
| 149 |
id="time-slider",
|
|
|
|
| 158 |
size="md",
|
| 159 |
radius="xl",
|
| 160 |
marks=marks,
|
| 161 |
+
style={"width": "95%", "paddingLeft": "60px"}, # updated paddingLeft
|
| 162 |
+
label=None,
|
| 163 |
+
showLabelOnHover=False,
|
| 164 |
+
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 165 |
+
thumbChildren=get_thumb_labels([start_ts, end_ts]),
|
| 166 |
)
|
| 167 |
|
| 168 |
+
# Add a dcc.Store to hold the selected view (all_downloads or one_year_rolling)
|
| 169 |
app.layout = dmc.MantineProvider(
|
| 170 |
theme={
|
| 171 |
"colorScheme": "light",
|
|
|
|
| 173 |
"fontFamily": "Inter, sans-serif",
|
| 174 |
},
|
| 175 |
children=[
|
| 176 |
+
dcc.Store(id="selected-view", data="all_downloads"),
|
| 177 |
html.Div(
|
| 178 |
[
|
| 179 |
# Header
|
|
|
|
| 211 |
html.A(
|
| 212 |
children=[
|
| 213 |
html.Img(
|
| 214 |
+
src="assets/images/dpi.svg",
|
| 215 |
style={
|
| 216 |
"height": "28px",
|
| 217 |
"verticalAlign": "middle",
|
|
|
|
| 222 |
],
|
| 223 |
href="https://www.dataprovenance.org/",
|
| 224 |
target="_blank",
|
| 225 |
+
className="no-bg-link header-link",
|
| 226 |
style={
|
| 227 |
"display": "inline-block",
|
| 228 |
"padding": "6px 14px",
|
| 229 |
"fontSize": 13,
|
| 230 |
+
"color": "#FFFFFF", # white on dark header
|
| 231 |
+
# background removed so CSS controls it
|
| 232 |
"borderRadius": "18px",
|
| 233 |
"fontWeight": "700",
|
| 234 |
"textDecoration": "none",
|
|
|
|
| 238 |
html.A(
|
| 239 |
children=[
|
| 240 |
html.Img(
|
| 241 |
+
src="assets/images/hf.svg",
|
| 242 |
style={
|
| 243 |
"height": "30px",
|
| 244 |
"verticalAlign": "middle",
|
| 245 |
},
|
| 246 |
+
),
|
| 247 |
+
html.Span(
|
| 248 |
+
"Hugging Face",
|
| 249 |
+
className="hf-brand-text",
|
| 250 |
+
),
|
| 251 |
],
|
| 252 |
href="https://huggingface.co/",
|
| 253 |
target="_blank",
|
| 254 |
+
className="no-bg-link header-link",
|
| 255 |
style={
|
| 256 |
"display": "inline-flex",
|
| 257 |
"padding": "6px 14px",
|
| 258 |
"alignItems": "center",
|
| 259 |
+
"color": "#FFFFFF",
|
| 260 |
"borderRadius": "18px",
|
| 261 |
"textDecoration": "none",
|
| 262 |
+
"marginRight": "12px",
|
| 263 |
+
},
|
| 264 |
+
),
|
| 265 |
+
html.A(
|
| 266 |
+
children=[
|
| 267 |
+
html.Span(
|
| 268 |
+
"Read the paper",
|
| 269 |
+
className="paper-text",
|
| 270 |
+
),
|
| 271 |
+
],
|
| 272 |
+
href="https://www.google.com/",
|
| 273 |
+
target="_blank",
|
| 274 |
+
className="no-bg-link header-link paper-link",
|
| 275 |
+
style={
|
| 276 |
+
"display": "inline-flex",
|
| 277 |
+
"alignItems": "center",
|
| 278 |
+
"padding": "6px 12px", # decreased size
|
| 279 |
+
"fontSize": 14, # smaller text
|
| 280 |
+
"margin": "0 auto",
|
| 281 |
+
"backgroundColor": "#AC482A",
|
| 282 |
+
"color": "#FFFFFF",
|
| 283 |
+
"borderRadius": "5px",
|
| 284 |
+
"textDecoration": "none",
|
| 285 |
+
"fontWeight": "700",
|
| 286 |
},
|
| 287 |
),
|
| 288 |
],
|
|
|
|
| 318 |
"marginBottom": 20,
|
| 319 |
},
|
| 320 |
),
|
|
|
|
| 321 |
html.Div(
|
| 322 |
+
children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
style={
|
| 324 |
"fontSize": 14,
|
| 325 |
"marginTop": 18,
|
|
|
|
| 359 |
],
|
| 360 |
mb=10,
|
| 361 |
),
|
| 362 |
+
html.Div(
|
| 363 |
+
"Choose whether to view all downloads or only filtered downloads for the leaderboard.",
|
| 364 |
+
style={
|
| 365 |
+
"fontSize": 13,
|
| 366 |
+
"color": "#555",
|
| 367 |
+
"marginBottom": "12px",
|
| 368 |
+
},
|
| 369 |
+
),
|
| 370 |
+
# New segmented control below the first one
|
| 371 |
+
html.Div(
|
| 372 |
+
[
|
| 373 |
+
html.Div(
|
| 374 |
+
"Select Mode",
|
| 375 |
+
style={
|
| 376 |
+
"fontWeight": "700",
|
| 377 |
+
"marginBottom": 8,
|
| 378 |
+
"fontSize": 14,
|
| 379 |
+
},
|
| 380 |
+
),
|
| 381 |
+
dmc.Switch(
|
| 382 |
+
color="#AC482A",
|
| 383 |
+
label="Derived Authors",
|
| 384 |
+
checked=True,
|
| 385 |
+
mb=10,
|
| 386 |
+
),
|
| 387 |
+
html.Div(
|
| 388 |
+
"Switch between absolute numbers and relative percentages for leaderboard values.",
|
| 389 |
+
style={
|
| 390 |
+
"fontSize": 13,
|
| 391 |
+
"color": "#555",
|
| 392 |
+
"marginBottom": "12px",
|
| 393 |
+
},
|
| 394 |
+
),
|
| 395 |
+
],
|
| 396 |
+
style={"marginTop": "10px"},
|
| 397 |
+
),
|
| 398 |
html.Span(
|
| 399 |
id="global-toggle-status",
|
| 400 |
style={
|
|
|
|
| 417 |
},
|
| 418 |
),
|
| 419 |
time_slider,
|
| 420 |
+
html.Div(
|
| 421 |
+
"Adjust the time range to filter leaderboard results by model release date.",
|
| 422 |
+
style={
|
| 423 |
+
"fontSize": 13,
|
| 424 |
+
"color": "#555",
|
| 425 |
+
"marginTop": "32px", # increased from 24px
|
| 426 |
+
},
|
| 427 |
+
),
|
| 428 |
+
# Tip section
|
| 429 |
+
html.Div(
|
| 430 |
+
[
|
| 431 |
+
html.Div(
|
| 432 |
+
[
|
| 433 |
+
DashIconify(
|
| 434 |
+
icon="mdi:lightbulb-on-outline",
|
| 435 |
+
width=20,
|
| 436 |
+
height=20,
|
| 437 |
+
style={"marginRight": "8px", "color": "#082030"},
|
| 438 |
+
),
|
| 439 |
+
html.Span("Tip"),
|
| 440 |
+
],
|
| 441 |
+
style={
|
| 442 |
+
"fontWeight": "700",
|
| 443 |
+
"fontSize": 15,
|
| 444 |
+
"marginBottom": "6px",
|
| 445 |
+
"color": "#082030",
|
| 446 |
+
"display": "flex",
|
| 447 |
+
"alignItems": "center",
|
| 448 |
+
},
|
| 449 |
+
),
|
| 450 |
+
html.Div(
|
| 451 |
+
[
|
| 452 |
+
"Try switching between ",
|
| 453 |
+
html.Span("All Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 454 |
+
" and ",
|
| 455 |
+
html.Span("Filtered Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 456 |
+
" to compare ecosystem-wide vs. curated model trends. ",
|
| 457 |
+
"You can also toggle between ",
|
| 458 |
+
html.Span("Absolute", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 459 |
+
" and ",
|
| 460 |
+
html.Span("Relative", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 461 |
+
" to see raw counts or percentages."
|
| 462 |
+
],
|
| 463 |
+
style={
|
| 464 |
+
"fontSize": 13,
|
| 465 |
+
"color": "#082030",
|
| 466 |
+
"lineHeight": "1.6",
|
| 467 |
+
},
|
| 468 |
+
),
|
| 469 |
+
],
|
| 470 |
+
style={
|
| 471 |
+
"backgroundColor": "#F5ECE6",
|
| 472 |
+
"borderRadius": "14px",
|
| 473 |
+
"padding": "18px 20px",
|
| 474 |
+
"marginTop": "28px",
|
| 475 |
+
"boxShadow": "0 1px 4px rgba(8,32,48,0.04)",
|
| 476 |
+
"border": "1px solid #f0e3d6",
|
| 477 |
+
},
|
| 478 |
+
),
|
| 479 |
],
|
| 480 |
+
style={
|
| 481 |
+
"flex": 2,
|
| 482 |
+
"minWidth": "320px",
|
| 483 |
+
"display": "flex",
|
| 484 |
+
"flexDirection": "column",
|
| 485 |
+
"justifyContent": "center",
|
| 486 |
+
"height": "100%",
|
| 487 |
+
},
|
| 488 |
),
|
| 489 |
],
|
| 490 |
style={
|
|
|
|
| 522 |
"borderBottom": "3px solid #082030",
|
| 523 |
},
|
| 524 |
children=[
|
| 525 |
+
html.Div(
|
| 526 |
+
children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
|
| 527 |
+
style={
|
| 528 |
+
"fontSize": 14,
|
| 529 |
+
"marginTop": 18,
|
| 530 |
+
"marginBottom": 12,
|
| 531 |
+
"textAlign": "left",
|
| 532 |
+
},
|
| 533 |
+
),
|
| 534 |
+
dcc.Loading(
|
| 535 |
+
id="loading-countries",
|
| 536 |
+
type="circle",
|
| 537 |
+
color="#AC482A",
|
| 538 |
+
children=html.Div(id="top_countries-table")
|
| 539 |
+
),
|
| 540 |
+
html.Button(
|
| 541 |
+
id="top_countries-toggle",
|
| 542 |
+
children="▼ Show Top 50",
|
| 543 |
+
n_clicks=0,
|
| 544 |
+
style={**button_style, "border": "none"},
|
| 545 |
+
),
|
| 546 |
],
|
| 547 |
),
|
| 548 |
dcc.Tab(
|
|
|
|
| 563 |
"borderBottom": "3px solid #082030",
|
| 564 |
},
|
| 565 |
children=[
|
| 566 |
+
html.Div(
|
| 567 |
+
children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
|
| 568 |
+
style={
|
| 569 |
+
"fontSize": 14,
|
| 570 |
+
"marginTop": 18,
|
| 571 |
+
"marginBottom": 12,
|
| 572 |
+
"textAlign": "left",
|
| 573 |
+
},
|
| 574 |
+
),
|
| 575 |
+
dcc.Loading(
|
| 576 |
+
id="loading-developers",
|
| 577 |
+
type="circle",
|
| 578 |
+
color="#AC482A",
|
| 579 |
+
children=html.Div(id="top_developers-table")
|
| 580 |
+
),
|
| 581 |
+
html.Button(
|
| 582 |
+
id="top_developers-toggle",
|
| 583 |
+
children="▼ Show Top 50",
|
| 584 |
+
n_clicks=0,
|
| 585 |
+
style={**button_style, "border": "none"},
|
| 586 |
+
),
|
| 587 |
],
|
| 588 |
),
|
| 589 |
dcc.Tab(
|
|
|
|
| 604 |
"borderBottom": "3px solid #082030",
|
| 605 |
},
|
| 606 |
children=[
|
| 607 |
+
html.Div(
|
| 608 |
+
children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
|
| 609 |
+
style={
|
| 610 |
+
"fontSize": 14,
|
| 611 |
+
"marginTop": 18,
|
| 612 |
+
"marginBottom": 12,
|
| 613 |
+
"textAlign": "left",
|
| 614 |
+
},
|
| 615 |
+
),
|
| 616 |
+
dcc.Loading(
|
| 617 |
+
id="loading-models",
|
| 618 |
+
type="circle",
|
| 619 |
+
color="#AC482A",
|
| 620 |
+
children=html.Div(id="top_models-table")
|
| 621 |
+
),
|
| 622 |
+
html.Button(
|
| 623 |
+
id="top_models-toggle",
|
| 624 |
+
children="▼ Show Top 50",
|
| 625 |
+
n_clicks=0,
|
| 626 |
+
style={**button_style, "border": "none"},
|
| 627 |
+
),
|
| 628 |
],
|
| 629 |
),
|
| 630 |
],
|
|
|
|
| 634 |
"borderRadius": "18px",
|
| 635 |
"padding": "32px",
|
| 636 |
"marginTop": "12px",
|
| 637 |
+
"marginBottom": "12px", # reduced from 64px
|
| 638 |
"marginLeft": "50px",
|
| 639 |
"marginRight": "50px",
|
| 640 |
},
|
|
|
|
| 649 |
],
|
| 650 |
)
|
| 651 |
|
| 652 |
+
|
| 653 |
# Callbacks for interactivity
|
| 654 |
# -- helper utilities to consolidate duplicated callback logic --
|
| 655 |
+
def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_downloads"):
|
| 656 |
"""
|
| 657 |
Query DuckDB directly to get top N entries with metadata
|
| 658 |
This minimizes data transfer by doing aggregation in DuckDB
|
| 659 |
"""
|
| 660 |
# Build time filter clause
|
| 661 |
+
time_clause = ""
|
| 662 |
if slider_value and len(slider_value) == 2:
|
| 663 |
start = pd.to_datetime(slider_value[0], unit="s")
|
| 664 |
end = pd.to_datetime(slider_value[1], unit="s")
|
| 665 |
+
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
|
| 666 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
# Build the aggregation query to get top N with all needed metadata
|
| 668 |
# This query groups by the target column and aggregates downloads
|
| 669 |
# while collecting all metadata we need for chips
|
|
|
|
| 671 |
WITH base_data AS (
|
| 672 |
SELECT
|
| 673 |
{group_col},
|
| 674 |
+
CASE
|
| 675 |
+
WHEN org_country_single = 'HF' THEN 'United States of America'
|
| 676 |
+
WHEN org_country_single = 'International' THEN 'International/Online'
|
| 677 |
+
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 678 |
+
ELSE org_country_single
|
| 679 |
+
END AS org_country_single,
|
| 680 |
author,
|
| 681 |
merged_country_groups_single,
|
| 682 |
merged_modality,
|
| 683 |
downloads,
|
|
|
|
| 684 |
model
|
| 685 |
+
FROM {view}
|
| 686 |
+
{time_clause}
|
| 687 |
),
|
| 688 |
+
|
| 689 |
+
-- Compute the total downloads for all rows in the time range
|
| 690 |
+
total_downloads_cte AS (
|
| 691 |
+
SELECT SUM(downloads) AS total_downloads_all
|
| 692 |
FROM base_data
|
| 693 |
+
),
|
| 694 |
+
|
| 695 |
+
-- Compute per-group totals and their percentage of all downloads
|
| 696 |
+
top_items AS (
|
| 697 |
+
SELECT
|
| 698 |
+
b.{group_col} AS name,
|
| 699 |
+
SUM(b.downloads) AS total_downloads,
|
| 700 |
+
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
|
| 701 |
+
-- Pick first non-null metadata values for reference
|
| 702 |
+
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 703 |
+
ANY_VALUE(b.author) AS author,
|
| 704 |
+
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 705 |
+
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 706 |
+
ANY_VALUE(b.model) AS model
|
| 707 |
+
FROM base_data b
|
| 708 |
+
CROSS JOIN total_downloads_cte t
|
| 709 |
+
GROUP BY b.{group_col}, t.total_downloads_all
|
| 710 |
)
|
| 711 |
+
|
| 712 |
+
SELECT *
|
| 713 |
+
FROM top_items
|
| 714 |
+
ORDER BY total_downloads DESC
|
| 715 |
+
LIMIT {top_n};
|
| 716 |
"""
|
| 717 |
+
|
| 718 |
return con.execute(query).fetchdf()
|
| 719 |
|
| 720 |
+
|
| 721 |
+
def _leaderboard_callback_logic(
|
| 722 |
+
n_clicks,
|
| 723 |
+
slider_value,
|
| 724 |
+
current_label,
|
| 725 |
+
group_col,
|
| 726 |
+
filename,
|
| 727 |
+
default_label="▼ Show Top 50",
|
| 728 |
+
chip_color="#F0F9FF",
|
| 729 |
+
view="all_downloads",
|
| 730 |
+
):
|
| 731 |
# Normalize label on first load
|
| 732 |
if current_label is None:
|
| 733 |
current_label = default_label
|
|
|
|
| 744 |
top_n, new_label = 10, "▼ Show Top 50"
|
| 745 |
|
| 746 |
# Get filtered and aggregated data directly from DuckDB
|
| 747 |
+
df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view=view)
|
| 748 |
+
|
| 749 |
# Process the already-filtered data
|
| 750 |
df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n)
|
| 751 |
+
return render_table_content(
|
| 752 |
+
df, download_df, chip_color=chip_color, filename=filename
|
| 753 |
+
), new_label
|
| 754 |
+
|
| 755 |
+
|
| 756 |
# -- end helpers --
|
| 757 |
|
| 758 |
+
|
| 759 |
# Callbacks for interactivity (modularized)
|
| 760 |
@app.callback(
|
| 761 |
Output("top_countries-table", "children"),
|
| 762 |
Output("top_countries-toggle", "children"),
|
| 763 |
Input("top_countries-toggle", "n_clicks"),
|
| 764 |
Input("time-slider", "value"),
|
| 765 |
+
Input("selected-view", "data"),
|
| 766 |
State("top_countries-toggle", "children"),
|
| 767 |
)
|
| 768 |
+
def update_top_countries(n_clicks, slider_value, selected_view, current_label):
|
| 769 |
return _leaderboard_callback_logic(
|
| 770 |
n_clicks,
|
| 771 |
slider_value,
|
|
|
|
| 774 |
filename="top_countries",
|
| 775 |
default_label="▼ Show Top 50",
|
| 776 |
chip_color="#F0F9FF",
|
| 777 |
+
view=selected_view,
|
| 778 |
)
|
| 779 |
|
| 780 |
+
|
| 781 |
@app.callback(
|
| 782 |
Output("top_developers-table", "children"),
|
| 783 |
Output("top_developers-toggle", "children"),
|
| 784 |
Input("top_developers-toggle", "n_clicks"),
|
| 785 |
Input("time-slider", "value"),
|
| 786 |
+
Input("selected-view", "data"),
|
| 787 |
State("top_developers-toggle", "children"),
|
| 788 |
)
|
| 789 |
+
def update_top_developers(n_clicks, slider_value, selected_view, current_label):
|
| 790 |
return _leaderboard_callback_logic(
|
| 791 |
n_clicks,
|
| 792 |
slider_value,
|
| 793 |
current_label,
|
| 794 |
group_col="author",
|
| 795 |
filename="top_developers",
|
| 796 |
+
default_label="▼ Show Top 50",
|
| 797 |
chip_color="#F0F9FF",
|
| 798 |
+
view=selected_view,
|
| 799 |
)
|
| 800 |
|
| 801 |
+
|
| 802 |
@app.callback(
|
| 803 |
Output("top_models-table", "children"),
|
| 804 |
Output("top_models-toggle", "children"),
|
| 805 |
Input("top_models-toggle", "n_clicks"),
|
| 806 |
Input("time-slider", "value"),
|
| 807 |
+
Input("selected-view", "data"),
|
| 808 |
State("top_models-toggle", "children"),
|
| 809 |
)
|
| 810 |
+
def update_top_models(n_clicks, slider_value, selected_view, current_label):
|
| 811 |
return _leaderboard_callback_logic(
|
| 812 |
n_clicks,
|
| 813 |
slider_value,
|
|
|
|
| 816 |
filename="top_models",
|
| 817 |
default_label="▼ Show More",
|
| 818 |
chip_color="#F0F9FF",
|
| 819 |
+
view=selected_view,
|
| 820 |
)
|
| 821 |
|
| 822 |
+
|
| 823 |
@app.callback(
|
| 824 |
+
Output("time-slider", "thumbChildren"),
|
| 825 |
+
Input("time-slider", "value"),
|
| 826 |
)
|
| 827 |
+
def update_thumb_labels(values):
|
| 828 |
+
return get_thumb_labels(values)
|
|
|
|
|
|
|
| 829 |
|
| 830 |
+
# --- Add callback to update selected view based on segmented control ---
|
| 831 |
+
@app.callback(
|
| 832 |
+
Output("selected-view", "data"),
|
| 833 |
+
Input("segmented", "value"),
|
| 834 |
+
)
|
| 835 |
+
def update_selected_view(seg_value):
|
| 836 |
+
if seg_value == "filtered-downloads":
|
| 837 |
+
return "one_year_rolling"
|
| 838 |
+
return "all_downloads"
|
| 839 |
|
| 840 |
# Run the app
|
| 841 |
if __name__ == "__main__":
|
| 842 |
+
app.run(debug=True)
|
| 843 |
+
if __name__ == "__main__":
|
| 844 |
+
app.run(debug=True)
|
assets/images/Hf-logo-with-title.svg
DELETED
assets/images/{dpi-logo.svg → dpi.svg}
RENAMED
|
File without changes
|
assets/images/hf.svg
ADDED
|
|
assets/styles.css
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Header links: transparent background, white text, grow on hover */
|
| 2 |
+
.no-bg-link {
|
| 3 |
+
background-color: transparent !important;
|
| 4 |
+
color: #ffffff !important;
|
| 5 |
+
transition: transform 0.12s ease, background-color 0.12s ease;
|
| 6 |
+
display: inline-flex;
|
| 7 |
+
align-items: center;
|
| 8 |
+
text-decoration: none !important;
|
| 9 |
+
padding: 6px 14px; /* keep spacing consistent with inline styles */
|
| 10 |
+
border-radius: 18px;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/* Subtle hover tint and scale for interactivity */
|
| 14 |
+
.no-bg-link:hover {
|
| 15 |
+
transform: scale(1.06);
|
| 16 |
+
background-color: rgba(255, 255, 255, 0.04) !important;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/* Make logo images scale smoothly on hover */
|
| 20 |
+
.no-bg-link img {
|
| 21 |
+
transition: transform 0.12s ease;
|
| 22 |
+
}
|
| 23 |
+
.no-bg-link:hover img {
|
| 24 |
+
transform: scale(1.06);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
/* Read the paper button: scale up on hover */
|
| 28 |
+
.paper-button {
|
| 29 |
+
transition: transform 0.12s ease;
|
| 30 |
+
will-change: transform;
|
| 31 |
+
}
|
| 32 |
+
.paper-button:hover {
|
| 33 |
+
transform: scale(1.06);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
/* Hugging Face brand text to use Inter and visually match HF typography */
|
| 37 |
+
.hf-brand-text {
|
| 38 |
+
font-family: 'Source Sans Pro', system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', sans-serif;
|
| 39 |
+
font-weight: 1000;
|
| 40 |
+
font-size: 14px;
|
| 41 |
+
line-height: 1;
|
| 42 |
+
margin-left: 8px;
|
| 43 |
+
color: #FFFFFF;
|
| 44 |
+
display: inline-block;
|
| 45 |
+
transform-origin: center;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
/* Paper link: position relative for arrow, overflow visible */
|
| 49 |
+
.paper-link {
|
| 50 |
+
position: relative; /* needed for positioning the arrow */
|
| 51 |
+
overflow: visible;
|
| 52 |
+
background-color: #AC482A !important; /* restore previous button color */
|
| 53 |
+
color: #FFFFFF !important;
|
| 54 |
+
padding: 10px 20px; /* ensure spacing matches inline styles */
|
| 55 |
+
border-radius: 5px;
|
| 56 |
+
font-weight: 700;
|
| 57 |
+
display: inline-flex;
|
| 58 |
+
align-items: center;
|
| 59 |
+
text-decoration: none !important;
|
| 60 |
+
transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1)), background-color var(--default-transition-duration, .15s);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/* Small arrow placed inline to the right of the text */
|
| 64 |
+
.paper-link::after {
|
| 65 |
+
content: "↗";
|
| 66 |
+
display: inline-block;
|
| 67 |
+
margin-left: 8px;
|
| 68 |
+
font-size: 12px;
|
| 69 |
+
color: #FFFFFF;
|
| 70 |
+
opacity: 0.95;
|
| 71 |
+
transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1));
|
| 72 |
+
transform-origin: center;
|
| 73 |
+
pointer-events: none;
|
| 74 |
+
vertical-align: baseline;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/* Hover/focus states */
|
| 78 |
+
.paper-link:hover,
|
| 79 |
+
.paper-link:focus {
|
| 80 |
+
transform: scale(1.06);
|
| 81 |
+
background-color: #95371f !important; /* slightly darker on hover */
|
| 82 |
+
}
|
| 83 |
+
.paper-link:hover::after,
|
| 84 |
+
.paper-link:focus::after {
|
| 85 |
+
transform: scale(1.08);
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
/* Apply same interactive behavior to leaderboard "show top" toggles and download controls */
|
| 89 |
+
/* Matches elements with ids like "top_countries-toggle" and download anchors like "download-top_countries" */
|
| 90 |
+
[id$="-toggle"],
|
| 91 |
+
button[id$="-toggle"],
|
| 92 |
+
[id^="download-"],
|
| 93 |
+
a[id^="download-"],
|
| 94 |
+
button[id^="download-"],
|
| 95 |
+
.leaderboard-toggle,
|
| 96 |
+
.download-btn {
|
| 97 |
+
display: inline-flex;
|
| 98 |
+
align-items: center;
|
| 99 |
+
padding: 6px 12px;
|
| 100 |
+
font-size: 14px;
|
| 101 |
+
border-radius: 6px;
|
| 102 |
+
cursor: pointer;
|
| 103 |
+
text-decoration: none !important;
|
| 104 |
+
background-color: transparent;
|
| 105 |
+
color: #082030;
|
| 106 |
+
transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1)), background-color var(--default-transition-duration, .15s);
|
| 107 |
+
will-change: transform;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/* Hover / focus: subtle scale + tint similar to header links */
|
| 111 |
+
[id$="-toggle"]:hover,
|
| 112 |
+
[id^="download-"]:hover,
|
| 113 |
+
button[id$="-toggle"]:hover,
|
| 114 |
+
button[id^="download-"]:hover,
|
| 115 |
+
.leaderboard-toggle:hover,
|
| 116 |
+
.download-btn:hover,
|
| 117 |
+
[id$="-toggle"]:focus,
|
| 118 |
+
[id^="download-"]:focus,
|
| 119 |
+
button[id$="-toggle"]:focus,
|
| 120 |
+
button[id^="download-"]:focus {
|
| 121 |
+
transform: scale(1.06);
|
| 122 |
+
outline: none;
|
| 123 |
+
}
|
graphs/leaderboard.py
CHANGED
|
@@ -3,6 +3,7 @@ from dash import html, dcc
|
|
| 3 |
from dash_iconify import DashIconify
|
| 4 |
import dash_mantine_components as dmc
|
| 5 |
import base64
|
|
|
|
| 6 |
|
| 7 |
button_style = {
|
| 8 |
"display": "inline-block",
|
|
@@ -18,64 +19,6 @@ button_style = {
|
|
| 18 |
"fontSize": "14px",
|
| 19 |
}
|
| 20 |
|
| 21 |
-
country_icon_map = {
|
| 22 |
-
"USA": "🇺🇸",
|
| 23 |
-
"China": "🇨🇳",
|
| 24 |
-
"Germany": "🇩🇪",
|
| 25 |
-
"France": "🇫🇷",
|
| 26 |
-
"India": "🇮🇳",
|
| 27 |
-
"Italy": "🇮🇹",
|
| 28 |
-
"Japan": "🇯🇵",
|
| 29 |
-
"South Korea": "🇰🇷",
|
| 30 |
-
"United Kingdom": "🇬🇧",
|
| 31 |
-
"Canada": "🇨🇦",
|
| 32 |
-
"Brazil": "🇧🇷",
|
| 33 |
-
"Australia": "🇦🇺",
|
| 34 |
-
"Unknown": "❓",
|
| 35 |
-
"Finland": "🇫🇮",
|
| 36 |
-
"Lebanon": "🇱🇧",
|
| 37 |
-
"Iceland": "🇮🇸",
|
| 38 |
-
"Singapore": "🇸🇬",
|
| 39 |
-
"Israel": "🇮🇱",
|
| 40 |
-
"Iran": "🇮🇷",
|
| 41 |
-
"Hong Kong": "🇭🇰",
|
| 42 |
-
"Netherlands": "🇳🇱",
|
| 43 |
-
"Chile": "🇨🇱",
|
| 44 |
-
"Vietnam": "🇻🇳",
|
| 45 |
-
"Russia": "🇷🇺",
|
| 46 |
-
"Qatar": "🇶🇦",
|
| 47 |
-
"Switzerland": "🇨🇭",
|
| 48 |
-
"User": "👤",
|
| 49 |
-
"International/Online": "🌐",
|
| 50 |
-
"Spain": "🇪🇸",
|
| 51 |
-
"Sweden": "🇸🇪",
|
| 52 |
-
"Norway": "🇳🇴",
|
| 53 |
-
"Denmark": "🇩🇰",
|
| 54 |
-
"Austria": "🇦🇹",
|
| 55 |
-
"Belgium": "🇧🇪",
|
| 56 |
-
"Poland": "🇵🇱",
|
| 57 |
-
"Turkey": "🇹🇷",
|
| 58 |
-
"Mexico": "🇲🇽",
|
| 59 |
-
"Argentina": "🇦🇷",
|
| 60 |
-
"Thailand": "🇹🇭",
|
| 61 |
-
"Indonesia": "🇮🇩",
|
| 62 |
-
"Malaysia": "🇲🇾",
|
| 63 |
-
"Philippines": "🇵🇭",
|
| 64 |
-
"Egypt": "🇪🇬",
|
| 65 |
-
"South Africa": "🇿🇦",
|
| 66 |
-
"New Zealand": "🇳🇿",
|
| 67 |
-
"Ireland": "🇮🇪",
|
| 68 |
-
"Portugal": "🇵🇹",
|
| 69 |
-
"Greece": "🇬🇷",
|
| 70 |
-
"Czech Republic": "🇨🇿",
|
| 71 |
-
"Romania": "🇷🇴",
|
| 72 |
-
"Ukraine": "🇺🇦",
|
| 73 |
-
"United Arab Emirates": "🇦🇪",
|
| 74 |
-
"Saudi Arabia": "🇸🇦",
|
| 75 |
-
"Pakistan": "🇵🇰",
|
| 76 |
-
"Bangladesh": "🇧🇩",
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
company_icon_map = {
|
| 80 |
"google": "../assets/icons/google.png",
|
| 81 |
"distilbert": "../assets/icons/hugging-face.png",
|
|
@@ -84,6 +27,12 @@ company_icon_map = {
|
|
| 84 |
"openai": "../assets/icons/openai.png",
|
| 85 |
}
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
meta_cols_map = {
|
| 88 |
"org_country_single": ["org_country_single"],
|
| 89 |
"author": ["org_country_single", "author", "merged_country_groups_single"],
|
|
@@ -92,7 +41,7 @@ meta_cols_map = {
|
|
| 92 |
"author",
|
| 93 |
"merged_country_groups_single",
|
| 94 |
"merged_modality",
|
| 95 |
-
"
|
| 96 |
],
|
| 97 |
}
|
| 98 |
|
|
@@ -114,7 +63,7 @@ def chip(text, bg_color="#F0F0F0"):
|
|
| 114 |
|
| 115 |
|
| 116 |
# Progress bar for % of total
|
| 117 |
-
def progress_bar(percent, bar_color="#
|
| 118 |
return html.Div(
|
| 119 |
style={
|
| 120 |
"position": "relative",
|
|
@@ -179,42 +128,99 @@ def df_to_download_link(df, filename):
|
|
| 179 |
)
|
| 180 |
|
| 181 |
|
| 182 |
-
#
|
| 183 |
-
def
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
html.Span(
|
| 189 |
-
|
| 190 |
-
html.Img(
|
| 191 |
-
src=icon, style={"height": "18px", "marginRight": "6px"}
|
| 192 |
-
),
|
| 193 |
-
name,
|
| 194 |
-
],
|
| 195 |
style={
|
| 196 |
-
"backgroundColor":
|
| 197 |
"padding": "4px 10px",
|
| 198 |
"borderRadius": "12px",
|
| 199 |
"margin": "2px",
|
| 200 |
"display": "inline-flex",
|
| 201 |
-
"alignItems": "
|
| 202 |
"fontSize": "14px",
|
|
|
|
| 203 |
},
|
| 204 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
else:
|
| 207 |
-
chips.append(
|
| 208 |
return html.Div(
|
| 209 |
chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
|
| 210 |
)
|
| 211 |
|
| 212 |
|
| 213 |
def render_table_content(
|
| 214 |
-
df, download_df, chip_color, bar_color="#
|
| 215 |
):
|
| 216 |
return html.Div(
|
| 217 |
[
|
|
|
|
|
|
|
| 218 |
html.Table(
|
| 219 |
[
|
| 220 |
html.Thead(
|
|
@@ -274,129 +280,6 @@ def render_table_content(
|
|
| 274 |
]
|
| 275 |
)
|
| 276 |
|
| 277 |
-
|
| 278 |
-
# Table renderer
|
| 279 |
-
def render_table(
|
| 280 |
-
df, download_df, title, chip_color, bar_color="#AC482A", filename="data"
|
| 281 |
-
):
|
| 282 |
-
return html.Div(
|
| 283 |
-
id=f"{filename}-div",
|
| 284 |
-
children=[
|
| 285 |
-
html.Div(
|
| 286 |
-
[
|
| 287 |
-
html.H4(
|
| 288 |
-
title,
|
| 289 |
-
style={
|
| 290 |
-
"textAlign": "left",
|
| 291 |
-
"marginBottom": "10px",
|
| 292 |
-
"fontSize": "20px",
|
| 293 |
-
"display": "inline-block",
|
| 294 |
-
},
|
| 295 |
-
),
|
| 296 |
-
df_to_download_link(download_df, filename),
|
| 297 |
-
],
|
| 298 |
-
style={
|
| 299 |
-
"display": "flex",
|
| 300 |
-
"alignItems": "center",
|
| 301 |
-
"justifyContent": "space-between",
|
| 302 |
-
},
|
| 303 |
-
),
|
| 304 |
-
html.Div(
|
| 305 |
-
id=f"{filename}-table",
|
| 306 |
-
children=[
|
| 307 |
-
html.Table(
|
| 308 |
-
[
|
| 309 |
-
html.Thead(
|
| 310 |
-
html.Tr(
|
| 311 |
-
[
|
| 312 |
-
html.Th(
|
| 313 |
-
"Rank",
|
| 314 |
-
style={
|
| 315 |
-
"backgroundColor": "#F0F0F0",
|
| 316 |
-
"textAlign": "left",
|
| 317 |
-
},
|
| 318 |
-
),
|
| 319 |
-
html.Th(
|
| 320 |
-
"Name",
|
| 321 |
-
style={
|
| 322 |
-
"backgroundColor": "#F0F0F0",
|
| 323 |
-
"textAlign": "left",
|
| 324 |
-
},
|
| 325 |
-
),
|
| 326 |
-
html.Th(
|
| 327 |
-
"Metadata",
|
| 328 |
-
style={
|
| 329 |
-
"backgroundColor": "#F0F0F0",
|
| 330 |
-
"textAlign": "left",
|
| 331 |
-
"marginRight": "10px",
|
| 332 |
-
},
|
| 333 |
-
),
|
| 334 |
-
html.Th(
|
| 335 |
-
"% of Total",
|
| 336 |
-
style={
|
| 337 |
-
"backgroundColor": "#F0F0F0",
|
| 338 |
-
"textAlign": "left",
|
| 339 |
-
},
|
| 340 |
-
),
|
| 341 |
-
]
|
| 342 |
-
)
|
| 343 |
-
),
|
| 344 |
-
html.Tbody(
|
| 345 |
-
[
|
| 346 |
-
html.Tr(
|
| 347 |
-
[
|
| 348 |
-
html.Td(
|
| 349 |
-
idx + 1, style={"textAlign": "center"}
|
| 350 |
-
),
|
| 351 |
-
html.Td(
|
| 352 |
-
row["Name"], style={"textAlign": "left"}
|
| 353 |
-
),
|
| 354 |
-
html.Td(
|
| 355 |
-
render_chips(
|
| 356 |
-
row["Metadata"], chip_color
|
| 357 |
-
)
|
| 358 |
-
),
|
| 359 |
-
html.Td(
|
| 360 |
-
progress_bar(
|
| 361 |
-
row["% of total"], bar_color
|
| 362 |
-
),
|
| 363 |
-
style={"textAlign": "center"},
|
| 364 |
-
),
|
| 365 |
-
]
|
| 366 |
-
)
|
| 367 |
-
for idx, row in df.iterrows()
|
| 368 |
-
]
|
| 369 |
-
),
|
| 370 |
-
],
|
| 371 |
-
style={
|
| 372 |
-
"borderCollapse": "collapse",
|
| 373 |
-
"width": "100%",
|
| 374 |
-
"border": "none",
|
| 375 |
-
},
|
| 376 |
-
),
|
| 377 |
-
],
|
| 378 |
-
),
|
| 379 |
-
dcc.Loading(
|
| 380 |
-
id=f"loading-{filename}-toggle",
|
| 381 |
-
type="dot",
|
| 382 |
-
color="#082030",
|
| 383 |
-
children=html.Div(
|
| 384 |
-
[
|
| 385 |
-
html.Button(
|
| 386 |
-
"▼ Show Top 50",
|
| 387 |
-
id=f"{filename}-toggle",
|
| 388 |
-
n_clicks=0,
|
| 389 |
-
style={**button_style, "border": "none"},
|
| 390 |
-
)
|
| 391 |
-
],
|
| 392 |
-
style={"marginTop": "5px", "textAlign": "left"},
|
| 393 |
-
),
|
| 394 |
-
),
|
| 395 |
-
],
|
| 396 |
-
style={"marginBottom": "20px"},
|
| 397 |
-
)
|
| 398 |
-
|
| 399 |
-
|
| 400 |
# Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
|
| 401 |
def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
| 402 |
"""
|
|
@@ -410,17 +293,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 410 |
Returns:
|
| 411 |
tuple: (display_df, download_df)
|
| 412 |
"""
|
|
|
|
| 413 |
# Group by and get top N
|
| 414 |
top = (
|
| 415 |
-
filtered_df.groupby(group_col)["
|
| 416 |
.sum()
|
| 417 |
-
.nlargest(top_n)
|
| 418 |
.reset_index()
|
| 419 |
-
.rename(columns={group_col: "Name", "
|
| 420 |
)
|
| 421 |
-
|
| 422 |
-
total_value = top["Total Value"].sum()
|
| 423 |
-
top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
|
| 424 |
|
| 425 |
# Create a downloadable version of the leaderboard
|
| 426 |
download_top = top.copy()
|
|
@@ -459,7 +340,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 459 |
c = "USA"
|
| 460 |
if c == "user":
|
| 461 |
c = "User"
|
| 462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
|
| 464 |
# Author
|
| 465 |
for a in meta.get("author", []):
|
|
@@ -469,32 +358,19 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 469 |
icon = "🏢"
|
| 470 |
else:
|
| 471 |
icon = "👤"
|
| 472 |
-
chips.append((icon, a))
|
| 473 |
|
| 474 |
# Downloads
|
| 475 |
total_downloads = sum(
|
| 476 |
-
d for d in meta.get("
|
| 477 |
)
|
| 478 |
if total_downloads:
|
| 479 |
-
chips.append(("⬇️", f"{int(total_downloads):,}"))
|
| 480 |
|
| 481 |
# Modality
|
| 482 |
for m in meta.get("merged_modality", []):
|
| 483 |
if pd.notna(m):
|
| 484 |
-
chips.append(("", m))
|
| 485 |
-
|
| 486 |
-
# Estimated Parameters
|
| 487 |
-
for p in meta.get("estimated_parameters", []):
|
| 488 |
-
if pd.notna(p):
|
| 489 |
-
if p >= 1e9:
|
| 490 |
-
p_str = f"{p / 1e9:.1f}B"
|
| 491 |
-
elif p >= 1e6:
|
| 492 |
-
p_str = f"{p / 1e6:.1f}M"
|
| 493 |
-
elif p >= 1e3:
|
| 494 |
-
p_str = f"{p / 1e3:.1f}K"
|
| 495 |
-
else:
|
| 496 |
-
p_str = str(int(p))
|
| 497 |
-
chips.append(("⚙️", p_str))
|
| 498 |
|
| 499 |
return chips
|
| 500 |
|
|
@@ -526,7 +402,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 526 |
return top[["Name", "Metadata", "% of total"]], download_top
|
| 527 |
|
| 528 |
|
| 529 |
-
def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None):
|
| 530 |
"""
|
| 531 |
Query DuckDB directly to get top N entries with minimal data transfer
|
| 532 |
|
|
@@ -546,103 +422,57 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None):
|
|
| 546 |
end = pd.to_datetime(time_filter[1], unit="s")
|
| 547 |
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
|
| 548 |
|
| 549 |
-
# Apply country replacements in the query
|
| 550 |
-
country_case = """
|
| 551 |
-
CASE
|
| 552 |
-
WHEN org_country_single = 'HF' THEN 'United States of America'
|
| 553 |
-
WHEN org_country_single = 'International' THEN 'International/Online'
|
| 554 |
-
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 555 |
-
ELSE org_country_single
|
| 556 |
-
END as org_country_single
|
| 557 |
-
"""
|
| 558 |
-
|
| 559 |
# Optimized query: first find top N, then get only those rows
|
| 560 |
query = f"""
|
| 561 |
WITH base_data AS (
|
| 562 |
SELECT
|
| 563 |
{group_col},
|
| 564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
author,
|
| 566 |
merged_country_groups_single,
|
| 567 |
merged_modality,
|
| 568 |
downloads,
|
| 569 |
-
estimated_parameters,
|
| 570 |
model
|
| 571 |
-
FROM
|
| 572 |
{time_clause}
|
| 573 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
top_items AS (
|
| 575 |
SELECT
|
| 576 |
-
{group_col}
|
| 577 |
-
SUM(downloads)
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
)
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
FROM
|
| 586 |
-
|
| 587 |
-
|
| 588 |
"""
|
| 589 |
-
|
| 590 |
try:
|
| 591 |
return con.execute(query).fetchdf()
|
| 592 |
except Exception as e:
|
| 593 |
print(f"Error querying DuckDB: {e}")
|
| 594 |
-
return pd.DataFrame()
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
def create_leaderboard(con, board_type, top_n=10):
|
| 598 |
-
"""
|
| 599 |
-
Create leaderboard using DuckDB connection with optimized queries
|
| 600 |
-
|
| 601 |
-
Args:
|
| 602 |
-
con: DuckDB connection object
|
| 603 |
-
board_type: Type of leaderboard ('countries', 'developers', 'models')
|
| 604 |
-
top_n: Number of top entries to display
|
| 605 |
-
|
| 606 |
-
Returns:
|
| 607 |
-
Dash HTML component with the leaderboard table
|
| 608 |
-
"""
|
| 609 |
-
# Map board type to column name
|
| 610 |
-
column_map = {
|
| 611 |
-
"countries": "org_country_single",
|
| 612 |
-
"developers": "author",
|
| 613 |
-
"models": "model"
|
| 614 |
-
}
|
| 615 |
-
|
| 616 |
-
title_map = {
|
| 617 |
-
"countries": "Top Countries",
|
| 618 |
-
"developers": "Top Developers",
|
| 619 |
-
"models": "Top Models"
|
| 620 |
-
}
|
| 621 |
-
|
| 622 |
-
filename_map = {
|
| 623 |
-
"countries": "top_countries",
|
| 624 |
-
"developers": "top_developers",
|
| 625 |
-
"models": "top_models"
|
| 626 |
-
}
|
| 627 |
-
|
| 628 |
-
group_col = column_map.get(board_type)
|
| 629 |
-
if not group_col:
|
| 630 |
-
return html.Div(f"Unknown board type: {board_type}")
|
| 631 |
-
|
| 632 |
-
# Get only the top N rows from DuckDB
|
| 633 |
-
filtered_df = get_top_n_from_duckdb(con, group_col, top_n)
|
| 634 |
-
|
| 635 |
-
if filtered_df.empty:
|
| 636 |
-
return html.Div("No data available")
|
| 637 |
-
|
| 638 |
-
# Process the already-filtered data
|
| 639 |
-
top_data, download_data = get_top_n_leaderboard(filtered_df, group_col, top_n)
|
| 640 |
-
|
| 641 |
-
return render_table(
|
| 642 |
-
top_data,
|
| 643 |
-
download_data,
|
| 644 |
-
title_map[board_type],
|
| 645 |
-
chip_color="#F0F9FF",
|
| 646 |
-
bar_color="#082030",
|
| 647 |
-
filename=filename_map[board_type],
|
| 648 |
-
)
|
|
|
|
| 3 |
from dash_iconify import DashIconify
|
| 4 |
import dash_mantine_components as dmc
|
| 5 |
import base64
|
| 6 |
+
import countryflag
|
| 7 |
|
| 8 |
button_style = {
|
| 9 |
"display": "inline-block",
|
|
|
|
| 19 |
"fontSize": "14px",
|
| 20 |
}
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
company_icon_map = {
|
| 23 |
"google": "../assets/icons/google.png",
|
| 24 |
"distilbert": "../assets/icons/hugging-face.png",
|
|
|
|
| 27 |
"openai": "../assets/icons/openai.png",
|
| 28 |
}
|
| 29 |
|
| 30 |
+
country_emoji_fallback = {
|
| 31 |
+
"User": "👤",
|
| 32 |
+
"Organization": "🏢",
|
| 33 |
+
"Model": "📦",
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
meta_cols_map = {
|
| 37 |
"org_country_single": ["org_country_single"],
|
| 38 |
"author": ["org_country_single", "author", "merged_country_groups_single"],
|
|
|
|
| 41 |
"author",
|
| 42 |
"merged_country_groups_single",
|
| 43 |
"merged_modality",
|
| 44 |
+
"total_downloads",
|
| 45 |
],
|
| 46 |
}
|
| 47 |
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
# Progress bar for % of total
|
| 66 |
+
def progress_bar(percent, bar_color="#AC482A"):
|
| 67 |
return html.Div(
|
| 68 |
style={
|
| 69 |
"position": "relative",
|
|
|
|
| 128 |
)
|
| 129 |
|
| 130 |
|
| 131 |
+
# Helper to get popover content for each metadata type
|
| 132 |
+
def get_metadata_popover_content(icon, name, meta_type):
|
| 133 |
+
popover_texts = {
|
| 134 |
+
"country": f"Country: {name}",
|
| 135 |
+
"author": f"Author/Organization: {name}",
|
| 136 |
+
"downloads": f"Total downloads: {name}",
|
| 137 |
+
"modality": f"Modality: {name}",
|
| 138 |
+
"parameters": f"Estimated parameters: {name}",
|
| 139 |
+
}
|
| 140 |
+
return popover_texts.get(meta_type, name)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# Chip renderer with hovercard
|
| 144 |
+
def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None):
|
| 145 |
+
hovercard_content = get_metadata_popover_content(icon, text, meta_type)
|
| 146 |
+
return dmc.HoverCard(
|
| 147 |
+
width=220,
|
| 148 |
+
shadow="md",
|
| 149 |
+
position="top",
|
| 150 |
+
children=[
|
| 151 |
+
dmc.HoverCardTarget(
|
| 152 |
html.Span(
|
| 153 |
+
text,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
style={
|
| 155 |
+
"backgroundColor": bg_color,
|
| 156 |
"padding": "4px 10px",
|
| 157 |
"borderRadius": "12px",
|
| 158 |
"margin": "2px",
|
| 159 |
"display": "inline-flex",
|
| 160 |
+
"alignItems": "center",
|
| 161 |
"fontSize": "14px",
|
| 162 |
+
"cursor": "pointer",
|
| 163 |
},
|
| 164 |
)
|
| 165 |
+
),
|
| 166 |
+
dmc.HoverCardDropdown(
|
| 167 |
+
dmc.Text(hovercard_content, size="sm")
|
| 168 |
+
),
|
| 169 |
+
],
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# Render multiple chips in one row, each with popover
|
| 174 |
+
def render_chips(metadata_list, chip_color):
|
| 175 |
+
chips = []
|
| 176 |
+
for icon, name, meta_type in metadata_list:
|
| 177 |
+
if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")):
|
| 178 |
+
chips.append(
|
| 179 |
+
dmc.HoverCard(
|
| 180 |
+
width=220,
|
| 181 |
+
shadow="md",
|
| 182 |
+
position="top",
|
| 183 |
+
children=[
|
| 184 |
+
dmc.HoverCardTarget(
|
| 185 |
+
html.Span(
|
| 186 |
+
[
|
| 187 |
+
html.Img(
|
| 188 |
+
src=icon, style={"height": "18px", "marginRight": "6px"}
|
| 189 |
+
),
|
| 190 |
+
name,
|
| 191 |
+
],
|
| 192 |
+
style={
|
| 193 |
+
"backgroundColor": chip_color,
|
| 194 |
+
"padding": "4px 10px",
|
| 195 |
+
"borderRadius": "12px",
|
| 196 |
+
"margin": "2px",
|
| 197 |
+
"display": "inline-flex",
|
| 198 |
+
"alignItems": "left",
|
| 199 |
+
"fontSize": "14px",
|
| 200 |
+
"cursor": "pointer",
|
| 201 |
+
},
|
| 202 |
+
)
|
| 203 |
+
),
|
| 204 |
+
dmc.HoverCardDropdown(
|
| 205 |
+
dmc.Text(get_metadata_popover_content(icon, name, meta_type), size="sm")
|
| 206 |
+
),
|
| 207 |
+
],
|
| 208 |
+
)
|
| 209 |
)
|
| 210 |
else:
|
| 211 |
+
chips.append(chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon))
|
| 212 |
return html.Div(
|
| 213 |
chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
|
| 214 |
)
|
| 215 |
|
| 216 |
|
| 217 |
def render_table_content(
|
| 218 |
+
df, download_df, chip_color, bar_color="#AC482A", filename="data"
|
| 219 |
):
|
| 220 |
return html.Div(
|
| 221 |
[
|
| 222 |
+
# Add download button above the table
|
| 223 |
+
df_to_download_link(download_df, filename),
|
| 224 |
html.Table(
|
| 225 |
[
|
| 226 |
html.Thead(
|
|
|
|
| 280 |
]
|
| 281 |
)
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
# Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
|
| 284 |
def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
| 285 |
"""
|
|
|
|
| 293 |
Returns:
|
| 294 |
tuple: (display_df, download_df)
|
| 295 |
"""
|
| 296 |
+
|
| 297 |
# Group by and get top N
|
| 298 |
top = (
|
| 299 |
+
filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]]
|
| 300 |
.sum()
|
| 301 |
+
.nlargest(top_n, columns="total_downloads")
|
| 302 |
.reset_index()
|
| 303 |
+
.rename(columns={group_col: "Name", "total_downloads": "Total Value", "percent_of_total": "% of total"})
|
| 304 |
)
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
# Create a downloadable version of the leaderboard
|
| 307 |
download_top = top.copy()
|
|
|
|
| 340 |
c = "USA"
|
| 341 |
if c == "user":
|
| 342 |
c = "User"
|
| 343 |
+
# Try countryflag.getflag(), fallback to dictionary if fails
|
| 344 |
+
try:
|
| 345 |
+
flag_emoji = countryflag.getflag(c)
|
| 346 |
+
# If countryflag returns empty or None, fallback
|
| 347 |
+
if not flag_emoji or flag_emoji == c:
|
| 348 |
+
flag_emoji = country_emoji_fallback.get(c, "🌍")
|
| 349 |
+
except Exception:
|
| 350 |
+
flag_emoji = country_emoji_fallback.get(c, "🌍")
|
| 351 |
+
chips.append((flag_emoji, c, "country"))
|
| 352 |
|
| 353 |
# Author
|
| 354 |
for a in meta.get("author", []):
|
|
|
|
| 358 |
icon = "🏢"
|
| 359 |
else:
|
| 360 |
icon = "👤"
|
| 361 |
+
chips.append((icon, a, "author"))
|
| 362 |
|
| 363 |
# Downloads
|
| 364 |
total_downloads = sum(
|
| 365 |
+
d for d in meta.get("total_downloads", []) if pd.notna(d)
|
| 366 |
)
|
| 367 |
if total_downloads:
|
| 368 |
+
chips.append(("⬇️", f"{int(total_downloads):,}", "downloads"))
|
| 369 |
|
| 370 |
# Modality
|
| 371 |
for m in meta.get("merged_modality", []):
|
| 372 |
if pd.notna(m):
|
| 373 |
+
chips.append(("", m, "modality"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
return chips
|
| 376 |
|
|
|
|
| 402 |
return top[["Name", "Metadata", "% of total"]], download_top
|
| 403 |
|
| 404 |
|
| 405 |
+
def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"):
|
| 406 |
"""
|
| 407 |
Query DuckDB directly to get top N entries with minimal data transfer
|
| 408 |
|
|
|
|
| 422 |
end = pd.to_datetime(time_filter[1], unit="s")
|
| 423 |
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
# Optimized query: first find top N, then get only those rows
|
| 426 |
query = f"""
|
| 427 |
WITH base_data AS (
|
| 428 |
SELECT
|
| 429 |
{group_col},
|
| 430 |
+
CASE
|
| 431 |
+
WHEN org_country_single = 'HF' THEN 'United States of America'
|
| 432 |
+
WHEN org_country_single = 'International' THEN 'International/Online'
|
| 433 |
+
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 434 |
+
ELSE org_country_single
|
| 435 |
+
END AS org_country_single,
|
| 436 |
author,
|
| 437 |
merged_country_groups_single,
|
| 438 |
merged_modality,
|
| 439 |
downloads,
|
|
|
|
| 440 |
model
|
| 441 |
+
FROM {view}
|
| 442 |
{time_clause}
|
| 443 |
),
|
| 444 |
+
|
| 445 |
+
-- Compute the total downloads for all rows in the time range
|
| 446 |
+
total_downloads_cte AS (
|
| 447 |
+
SELECT SUM(downloads) AS total_downloads_all
|
| 448 |
+
FROM base_data
|
| 449 |
+
),
|
| 450 |
+
|
| 451 |
+
-- Compute per-group totals and their percentage of all downloads
|
| 452 |
top_items AS (
|
| 453 |
SELECT
|
| 454 |
+
b.{group_col} AS name,
|
| 455 |
+
SUM(b.downloads) AS total_downloads,
|
| 456 |
+
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
|
| 457 |
+
-- Pick first non-null metadata values for reference
|
| 458 |
+
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 459 |
+
ANY_VALUE(b.author) AS author,
|
| 460 |
+
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 461 |
+
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 462 |
+
ANY_VALUE(b.model) AS model
|
| 463 |
+
FROM base_data b
|
| 464 |
+
CROSS JOIN total_downloads_cte t
|
| 465 |
+
GROUP BY b.{group_col}, t.total_downloads_all
|
| 466 |
)
|
| 467 |
+
|
| 468 |
+
SELECT *
|
| 469 |
+
FROM top_items
|
| 470 |
+
ORDER BY total_downloads DESC
|
| 471 |
+
LIMIT {top_n};
|
| 472 |
"""
|
| 473 |
+
|
| 474 |
try:
|
| 475 |
return con.execute(query).fetchdf()
|
| 476 |
except Exception as e:
|
| 477 |
print(f"Error querying DuckDB: {e}")
|
| 478 |
+
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ gunicorn
|
|
| 5 |
dash-mantine-components
|
| 6 |
dash-bootstrap-components
|
| 7 |
pyarrow
|
| 8 |
-
duckdb
|
|
|
|
|
|
| 5 |
dash-mantine-components
|
| 6 |
dash-bootstrap-components
|
| 7 |
pyarrow
|
| 8 |
+
duckdb
|
| 9 |
+
countryflag
|