emsesc commited on
Commit
35ffa10
·
1 Parent(s): f136ea6

design overhaul and add switch functionality

Browse files
app.py CHANGED
@@ -4,49 +4,62 @@ import dash_mantine_components as dmc
4
  import duckdb
5
  import time
6
  from graphs.leaderboard import (
7
- create_leaderboard,
8
  get_top_n_leaderboard,
9
  render_table_content,
10
  )
 
11
 
12
  # Initialize the app
13
  app = Dash()
14
  server = app.server
15
 
16
- # DuckDB connection (global)
17
- con = duckdb.connect(database=':memory:', read_only=False)
18
-
19
- # Load parquet file from Hugging Face using DuckDB
20
- HF_DATASET_ID = "emsesc/open_model_evolution_data"
21
- hf_parquet_url = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/filtered_df.parquet"
22
-
23
- print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
24
- try:
25
- overall_start_time = time.time()
26
-
27
  # Install and load httpfs extension for remote file access
28
  con.execute("INSTALL httpfs;")
29
  con.execute("LOAD httpfs;")
30
-
31
  # Create a view that references the remote parquet file
32
  con.execute(f"""
33
- CREATE OR REPLACE VIEW filtered_df AS
34
- SELECT * FROM read_parquet('{hf_parquet_url}')
35
  """)
36
-
37
- # Get column list and basic info
38
- columns = con.execute("DESCRIBE filtered_df").fetchdf()
39
- print("Columns:", columns['column_name'].tolist())
40
-
41
  # Get time range for slider
42
- time_range = con.execute("SELECT MIN(time) as min_time, MAX(time) as max_time FROM filtered_df").fetchdf()
43
- start_dt = pd.to_datetime(time_range['min_time'].iloc[0])
44
- end_dt = pd.to_datetime(time_range['max_time'].iloc[0])
45
-
46
- msg = f"Successfully connected to dataset in {time.time() - overall_start_time:.2f}s."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  print(msg)
48
  except Exception as e:
49
- err_msg = f"Failed to load dataset. Error: {e}"
50
  print(err_msg)
51
  raise
52
 
@@ -54,6 +67,18 @@ except Exception as e:
54
  start_ts = int(start_dt.timestamp())
55
  end_ts = int(end_dt.timestamp())
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  marks = []
58
  # Add start label (e.g. "Jan 2020")
59
  marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
@@ -66,6 +91,59 @@ for yr in range(start_dt.year, end_dt.year + 1):
66
  # Add end label (e.g. "Dec 2024")
67
  marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Create a dcc slider for time range selection by year
70
  time_slider = dmc.RangeSlider(
71
  id="time-slider",
@@ -80,11 +158,14 @@ time_slider = dmc.RangeSlider(
80
  size="md",
81
  radius="xl",
82
  marks=marks,
83
- style={"width": "70%", "margin": "0 auto"},
84
- labelAlwaysOn=False,
 
 
 
85
  )
86
 
87
- # App layout
88
  app.layout = dmc.MantineProvider(
89
  theme={
90
  "colorScheme": "light",
@@ -92,6 +173,7 @@ app.layout = dmc.MantineProvider(
92
  "fontFamily": "Inter, sans-serif",
93
  },
94
  children=[
 
95
  html.Div(
96
  [
97
  # Header
@@ -129,7 +211,7 @@ app.layout = dmc.MantineProvider(
129
  html.A(
130
  children=[
131
  html.Img(
132
- src="assets/images/dpi-logo.svg",
133
  style={
134
  "height": "28px",
135
  "verticalAlign": "middle",
@@ -140,12 +222,13 @@ app.layout = dmc.MantineProvider(
140
  ],
141
  href="https://www.dataprovenance.org/",
142
  target="_blank",
 
143
  style={
144
  "display": "inline-block",
145
  "padding": "6px 14px",
146
  "fontSize": 13,
147
- "color": "#082030",
148
- "backgroundColor": "#ffffff",
149
  "borderRadius": "18px",
150
  "fontWeight": "700",
151
  "textDecoration": "none",
@@ -155,22 +238,51 @@ app.layout = dmc.MantineProvider(
155
  html.A(
156
  children=[
157
  html.Img(
158
- src="assets/images/Hf-logo-with-title.svg",
159
  style={
160
  "height": "30px",
161
  "verticalAlign": "middle",
162
  },
163
- )
 
 
 
 
164
  ],
165
  href="https://huggingface.co/",
166
  target="_blank",
 
167
  style={
168
  "display": "inline-flex",
169
  "padding": "6px 14px",
170
  "alignItems": "center",
171
- "backgroundColor": "#ffffff",
172
  "borderRadius": "18px",
173
  "textDecoration": "none",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  },
175
  ),
176
  ],
@@ -206,29 +318,8 @@ app.layout = dmc.MantineProvider(
206
  "marginBottom": 20,
207
  },
208
  ),
209
- # Button
210
  html.Div(
211
- children=[
212
- html.Button(
213
- "Read the paper",
214
- id="my-button",
215
- style={
216
- "padding": "10px 20px",
217
- "fontSize": 16,
218
- "margin": "0 auto",
219
- "display": "block",
220
- "backgroundColor": "#AC482A",
221
- "color": "white",
222
- "border": "none",
223
- "borderRadius": "5px",
224
- "cursor": "pointer",
225
- },
226
- ),
227
- ],
228
- style={"textAlign": "center", "marginBottom": 20},
229
- ),
230
- html.Div(
231
- children="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s...",
232
  style={
233
  "fontSize": 14,
234
  "marginTop": 18,
@@ -268,6 +359,42 @@ app.layout = dmc.MantineProvider(
268
  ],
269
  mb=10,
270
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  html.Span(
272
  id="global-toggle-status",
273
  style={
@@ -290,8 +417,74 @@ app.layout = dmc.MantineProvider(
290
  },
291
  ),
292
  time_slider,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  ],
294
- style={"flex": 2, "minWidth": "320px"},
 
 
 
 
 
 
 
295
  ),
296
  ],
297
  style={
@@ -329,7 +522,27 @@ app.layout = dmc.MantineProvider(
329
  "borderBottom": "3px solid #082030",
330
  },
331
  children=[
332
- create_leaderboard(con, "countries")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  ],
334
  ),
335
  dcc.Tab(
@@ -350,7 +563,27 @@ app.layout = dmc.MantineProvider(
350
  "borderBottom": "3px solid #082030",
351
  },
352
  children=[
353
- create_leaderboard(con, "developers")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  ],
355
  ),
356
  dcc.Tab(
@@ -371,7 +604,27 @@ app.layout = dmc.MantineProvider(
371
  "borderBottom": "3px solid #082030",
372
  },
373
  children=[
374
- create_leaderboard(con, "models")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  ],
376
  ),
377
  ],
@@ -381,7 +634,7 @@ app.layout = dmc.MantineProvider(
381
  "borderRadius": "18px",
382
  "padding": "32px",
383
  "marginTop": "12px",
384
- "marginBottom": "64px",
385
  "marginLeft": "50px",
386
  "marginRight": "50px",
387
  },
@@ -396,30 +649,21 @@ app.layout = dmc.MantineProvider(
396
  ],
397
  )
398
 
 
399
  # Callbacks for interactivity
400
  # -- helper utilities to consolidate duplicated callback logic --
401
- def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n):
402
  """
403
  Query DuckDB directly to get top N entries with metadata
404
  This minimizes data transfer by doing aggregation in DuckDB
405
  """
406
  # Build time filter clause
407
- time_filter = ""
408
  if slider_value and len(slider_value) == 2:
409
  start = pd.to_datetime(slider_value[0], unit="s")
410
  end = pd.to_datetime(slider_value[1], unit="s")
411
- time_filter = f"WHERE time >= '{start}' AND time <= '{end}'"
412
-
413
- # Apply country replacements in the query
414
- country_case = """
415
- CASE
416
- WHEN org_country_single = 'HF' THEN 'United States of America'
417
- WHEN org_country_single = 'International' THEN 'International/Online'
418
- WHEN org_country_single = 'Online' THEN 'International/Online'
419
- ELSE org_country_single
420
- END as org_country_single
421
- """
422
-
423
  # Build the aggregation query to get top N with all needed metadata
424
  # This query groups by the target column and aggregates downloads
425
  # while collecting all metadata we need for chips
@@ -427,35 +671,63 @@ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n):
427
  WITH base_data AS (
428
  SELECT
429
  {group_col},
430
- {country_case},
 
 
 
 
 
431
  author,
432
  merged_country_groups_single,
433
  merged_modality,
434
  downloads,
435
- estimated_parameters,
436
  model
437
- FROM filtered_df
438
- {time_filter}
439
  ),
440
- aggregated AS (
441
- SELECT
442
- {group_col} as name,
443
- SUM(downloads) as total_downloads
444
  FROM base_data
445
- GROUP BY {group_col}
446
- ORDER BY total_downloads DESC
447
- LIMIT {top_n}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  )
449
- SELECT
450
- b.*
451
- FROM base_data b
452
- INNER JOIN aggregated a ON b.{group_col} = a.name
453
- ORDER BY a.total_downloads DESC
454
  """
455
-
456
  return con.execute(query).fetchdf()
457
 
458
- def _leaderboard_callback_logic(n_clicks, slider_value, current_label, group_col, filename, default_label="▼ Show Top 50", chip_color="#F0F9FF"):
 
 
 
 
 
 
 
 
 
 
459
  # Normalize label on first load
460
  if current_label is None:
461
  current_label = default_label
@@ -472,22 +744,28 @@ def _leaderboard_callback_logic(n_clicks, slider_value, current_label, group_col
472
  top_n, new_label = 10, "▼ Show Top 50"
473
 
474
  # Get filtered and aggregated data directly from DuckDB
475
- df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n)
476
-
477
  # Process the already-filtered data
478
  df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n)
479
- return render_table_content(df, download_df, chip_color=chip_color, filename=filename), new_label
 
 
 
 
480
  # -- end helpers --
481
 
 
482
  # Callbacks for interactivity (modularized)
483
  @app.callback(
484
  Output("top_countries-table", "children"),
485
  Output("top_countries-toggle", "children"),
486
  Input("top_countries-toggle", "n_clicks"),
487
  Input("time-slider", "value"),
 
488
  State("top_countries-toggle", "children"),
489
  )
490
- def update_top_countries(n_clicks, slider_value, current_label):
491
  return _leaderboard_callback_logic(
492
  n_clicks,
493
  slider_value,
@@ -496,34 +774,40 @@ def update_top_countries(n_clicks, slider_value, current_label):
496
  filename="top_countries",
497
  default_label="▼ Show Top 50",
498
  chip_color="#F0F9FF",
 
499
  )
500
 
 
501
  @app.callback(
502
  Output("top_developers-table", "children"),
503
  Output("top_developers-toggle", "children"),
504
  Input("top_developers-toggle", "n_clicks"),
505
  Input("time-slider", "value"),
 
506
  State("top_developers-toggle", "children"),
507
  )
508
- def update_top_developers(n_clicks, slider_value, current_label):
509
  return _leaderboard_callback_logic(
510
  n_clicks,
511
  slider_value,
512
  current_label,
513
  group_col="author",
514
  filename="top_developers",
515
- default_label="▼ Show More",
516
  chip_color="#F0F9FF",
 
517
  )
518
 
 
519
  @app.callback(
520
  Output("top_models-table", "children"),
521
  Output("top_models-toggle", "children"),
522
  Input("top_models-toggle", "n_clicks"),
523
  Input("time-slider", "value"),
 
524
  State("top_models-toggle", "children"),
525
  )
526
- def update_top_models(n_clicks, slider_value, current_label):
527
  return _leaderboard_callback_logic(
528
  n_clicks,
529
  slider_value,
@@ -532,18 +816,29 @@ def update_top_models(n_clicks, slider_value, current_label):
532
  filename="top_models",
533
  default_label="▼ Show More",
534
  chip_color="#F0F9FF",
 
535
  )
536
 
 
537
  @app.callback(
538
- Output("time-slider", "label"),
539
- Input("time-slider", "value")
540
  )
541
- def update_range_labels(values):
542
- start_label = pd.to_datetime(values[0], unit="s").strftime("%b %Y")
543
- end_label = pd.to_datetime(values[1], unit="s").strftime("%b %Y")
544
- return [start_label, end_label]
545
 
 
 
 
 
 
 
 
 
 
546
 
547
  # Run the app
548
  if __name__ == "__main__":
549
- app.run(debug=True)
 
 
 
4
  import duckdb
5
  import time
6
  from graphs.leaderboard import (
7
+ button_style,
8
  get_top_n_leaderboard,
9
  render_table_content,
10
  )
11
+ from dash_iconify import DashIconify
12
 
13
  # Initialize the app
14
  app = Dash()
15
  server = app.server
16
 
17
+ def load_parquet_to_duckdb(con, parquet_url, view_name):
18
+ """
19
+ Loads a parquet file from a remote URL into DuckDB as a view.
20
+ Returns (start_dt, end_dt) for the 'time' column.
21
+ """
 
 
 
 
 
 
22
  # Install and load httpfs extension for remote file access
23
  con.execute("INSTALL httpfs;")
24
  con.execute("LOAD httpfs;")
25
+
26
  # Create a view that references the remote parquet file
27
  con.execute(f"""
28
+ CREATE OR REPLACE VIEW {view_name} AS
29
+ SELECT * FROM read_parquet('{parquet_url}')
30
  """)
31
+
 
 
 
 
32
  # Get time range for slider
33
+ time_range = con.execute(
34
+ f"SELECT MIN(time) as min_time, MAX(time) as max_time FROM {view_name}"
35
+ ).fetchdf()
36
+ start_dt = pd.to_datetime(time_range["min_time"].iloc[0])
37
+ end_dt = pd.to_datetime(time_range["max_time"].iloc[0])
38
+ return start_dt, end_dt
39
+
40
+ # DuckDB connection (global)
41
+ con = duckdb.connect(database=":memory:", read_only=False)
42
+
43
+ # Load parquet files from Hugging Face using DuckDB
44
+ HF_DATASET_ID = "emsesc/open_model_evolution_data"
45
+ hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
46
+ hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
47
+
48
+ print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
49
+ try:
50
+ overall_start_time = time.time()
51
+
52
+ # Load both parquet files as views
53
+ start_dt, end_dt = load_parquet_to_duckdb(con, hf_parquet_url_1, "all_downloads")
54
+ # Example: load a second parquet file as another view
55
+ start_dt2, end_dt2 = load_parquet_to_duckdb(con, hf_parquet_url_2, "one_year_rolling")
56
+
57
+ msg = (
58
+ f"Successfully connected to datasets in {time.time() - overall_start_time:.2f}s."
59
+ )
60
  print(msg)
61
  except Exception as e:
62
+ err_msg = f"Failed to load dataset(s). Error: {e}"
63
  print(err_msg)
64
  raise
65
 
 
67
  start_ts = int(start_dt.timestamp())
68
  end_ts = int(end_dt.timestamp())
69
 
70
+ def ordinal(n):
71
+ # Helper to get ordinal suffix for a day
72
+ if 10 <= n % 100 <= 20:
73
+ suffix = 'th'
74
+ else:
75
+ suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th')
76
+ return f"{n}{suffix}"
77
+
78
+ def format_date(dt):
79
+ # Format date as "Oct 8th, 2025"
80
+ return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
81
+
82
  marks = []
83
  # Add start label (e.g. "Jan 2020")
84
  marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
 
91
  # Add end label (e.g. "Dec 2024")
92
  marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
93
 
94
+ def get_thumb_labels(values):
95
+ # Returns formatted labels for both thumbs
96
+ distance = abs(values[1] - values[0])
97
+ close = distance < 4 * 30 * 86400 # 4 months
98
+
99
+ label_style = {
100
+ "background": "#fff",
101
+ "color": "#082030",
102
+ "fontWeight": "bold",
103
+ "fontSize": "13px",
104
+ "borderRadius": "8px",
105
+ "padding": "2px 8px",
106
+ "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
107
+ "position": "absolute",
108
+ "left": "50%",
109
+ "transform": "translateX(-50%)",
110
+ "whiteSpace": "nowrap",
111
+ "zIndex": 100,
112
+ }
113
+
114
+ if close:
115
+ # Move first label above, second label below (closer to slider)
116
+ style_top_1 = label_style.copy()
117
+ style_top_1["top"] = "-38px"
118
+ style_top_2 = label_style.copy()
119
+ style_top_2["top"] = "14px"
120
+ return [
121
+ html.Div(
122
+ format_date(pd.to_datetime(values[0], unit="s")),
123
+ style=style_top_1,
124
+ ),
125
+ html.Div(
126
+ format_date(pd.to_datetime(values[1], unit="s")),
127
+ style=style_top_2,
128
+ ),
129
+ ]
130
+ else:
131
+ # Both labels below the slider (closer to slider)
132
+ style_top_1 = label_style.copy()
133
+ style_top_1["top"] = "14px"
134
+ style_top_2 = label_style.copy()
135
+ style_top_2["top"] = "14px"
136
+ return [
137
+ html.Div(
138
+ format_date(pd.to_datetime(values[0], unit="s")),
139
+ style=style_top_1,
140
+ ),
141
+ html.Div(
142
+ format_date(pd.to_datetime(values[1], unit="s")),
143
+ style=style_top_2,
144
+ ),
145
+ ]
146
+
147
  # Create a dcc slider for time range selection by year
148
  time_slider = dmc.RangeSlider(
149
  id="time-slider",
 
158
  size="md",
159
  radius="xl",
160
  marks=marks,
161
+ style={"width": "95%", "paddingLeft": "60px"}, # updated paddingLeft
162
+ label=None,
163
+ showLabelOnHover=False,
164
+ labelTransitionProps={"transition": "fade", "duration": 150},
165
+ thumbChildren=get_thumb_labels([start_ts, end_ts]),
166
  )
167
 
168
+ # Add a dcc.Store to hold the selected view (all_downloads or one_year_rolling)
169
  app.layout = dmc.MantineProvider(
170
  theme={
171
  "colorScheme": "light",
 
173
  "fontFamily": "Inter, sans-serif",
174
  },
175
  children=[
176
+ dcc.Store(id="selected-view", data="all_downloads"),
177
  html.Div(
178
  [
179
  # Header
 
211
  html.A(
212
  children=[
213
  html.Img(
214
+ src="assets/images/dpi.svg",
215
  style={
216
  "height": "28px",
217
  "verticalAlign": "middle",
 
222
  ],
223
  href="https://www.dataprovenance.org/",
224
  target="_blank",
225
+ className="no-bg-link header-link",
226
  style={
227
  "display": "inline-block",
228
  "padding": "6px 14px",
229
  "fontSize": 13,
230
+ "color": "#FFFFFF", # white on dark header
231
+ # background removed so CSS controls it
232
  "borderRadius": "18px",
233
  "fontWeight": "700",
234
  "textDecoration": "none",
 
238
  html.A(
239
  children=[
240
  html.Img(
241
+ src="assets/images/hf.svg",
242
  style={
243
  "height": "30px",
244
  "verticalAlign": "middle",
245
  },
246
+ ),
247
+ html.Span(
248
+ "Hugging Face",
249
+ className="hf-brand-text",
250
+ ),
251
  ],
252
  href="https://huggingface.co/",
253
  target="_blank",
254
+ className="no-bg-link header-link",
255
  style={
256
  "display": "inline-flex",
257
  "padding": "6px 14px",
258
  "alignItems": "center",
259
+ "color": "#FFFFFF",
260
  "borderRadius": "18px",
261
  "textDecoration": "none",
262
+ "marginRight": "12px",
263
+ },
264
+ ),
265
+ html.A(
266
+ children=[
267
+ html.Span(
268
+ "Read the paper",
269
+ className="paper-text",
270
+ ),
271
+ ],
272
+ href="https://www.google.com/",
273
+ target="_blank",
274
+ className="no-bg-link header-link paper-link",
275
+ style={
276
+ "display": "inline-flex",
277
+ "alignItems": "center",
278
+ "padding": "6px 12px", # decreased size
279
+ "fontSize": 14, # smaller text
280
+ "margin": "0 auto",
281
+ "backgroundColor": "#AC482A",
282
+ "color": "#FFFFFF",
283
+ "borderRadius": "5px",
284
+ "textDecoration": "none",
285
+ "fontWeight": "700",
286
  },
287
  ),
288
  ],
 
318
  "marginBottom": 20,
319
  },
320
  ),
 
321
  html.Div(
322
+ children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  style={
324
  "fontSize": 14,
325
  "marginTop": 18,
 
359
  ],
360
  mb=10,
361
  ),
362
+ html.Div(
363
+ "Choose whether to view all downloads or only filtered downloads for the leaderboard.",
364
+ style={
365
+ "fontSize": 13,
366
+ "color": "#555",
367
+ "marginBottom": "12px",
368
+ },
369
+ ),
370
+ # New segmented control below the first one
371
+ html.Div(
372
+ [
373
+ html.Div(
374
+ "Select Mode",
375
+ style={
376
+ "fontWeight": "700",
377
+ "marginBottom": 8,
378
+ "fontSize": 14,
379
+ },
380
+ ),
381
+ dmc.Switch(
382
+ color="#AC482A",
383
+ label="Derived Authors",
384
+ checked=True,
385
+ mb=10,
386
+ ),
387
+ html.Div(
388
+ "Switch between absolute numbers and relative percentages for leaderboard values.",
389
+ style={
390
+ "fontSize": 13,
391
+ "color": "#555",
392
+ "marginBottom": "12px",
393
+ },
394
+ ),
395
+ ],
396
+ style={"marginTop": "10px"},
397
+ ),
398
  html.Span(
399
  id="global-toggle-status",
400
  style={
 
417
  },
418
  ),
419
  time_slider,
420
+ html.Div(
421
+ "Adjust the time range to filter leaderboard results by model release date.",
422
+ style={
423
+ "fontSize": 13,
424
+ "color": "#555",
425
+ "marginTop": "32px", # increased from 24px
426
+ },
427
+ ),
428
+ # Tip section
429
+ html.Div(
430
+ [
431
+ html.Div(
432
+ [
433
+ DashIconify(
434
+ icon="mdi:lightbulb-on-outline",
435
+ width=20,
436
+ height=20,
437
+ style={"marginRight": "8px", "color": "#082030"},
438
+ ),
439
+ html.Span("Tip"),
440
+ ],
441
+ style={
442
+ "fontWeight": "700",
443
+ "fontSize": 15,
444
+ "marginBottom": "6px",
445
+ "color": "#082030",
446
+ "display": "flex",
447
+ "alignItems": "center",
448
+ },
449
+ ),
450
+ html.Div(
451
+ [
452
+ "Try switching between ",
453
+ html.Span("All Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
454
+ " and ",
455
+ html.Span("Filtered Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
456
+ " to compare ecosystem-wide vs. curated model trends. ",
457
+ "You can also toggle between ",
458
+ html.Span("Absolute", style={"fontWeight": "600", "color": "#AC482A"}),
459
+ " and ",
460
+ html.Span("Relative", style={"fontWeight": "600", "color": "#AC482A"}),
461
+ " to see raw counts or percentages."
462
+ ],
463
+ style={
464
+ "fontSize": 13,
465
+ "color": "#082030",
466
+ "lineHeight": "1.6",
467
+ },
468
+ ),
469
+ ],
470
+ style={
471
+ "backgroundColor": "#F5ECE6",
472
+ "borderRadius": "14px",
473
+ "padding": "18px 20px",
474
+ "marginTop": "28px",
475
+ "boxShadow": "0 1px 4px rgba(8,32,48,0.04)",
476
+ "border": "1px solid #f0e3d6",
477
+ },
478
+ ),
479
  ],
480
+ style={
481
+ "flex": 2,
482
+ "minWidth": "320px",
483
+ "display": "flex",
484
+ "flexDirection": "column",
485
+ "justifyContent": "center",
486
+ "height": "100%",
487
+ },
488
  ),
489
  ],
490
  style={
 
522
  "borderBottom": "3px solid #082030",
523
  },
524
  children=[
525
+ html.Div(
526
+ children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
527
+ style={
528
+ "fontSize": 14,
529
+ "marginTop": 18,
530
+ "marginBottom": 12,
531
+ "textAlign": "left",
532
+ },
533
+ ),
534
+ dcc.Loading(
535
+ id="loading-countries",
536
+ type="circle",
537
+ color="#AC482A",
538
+ children=html.Div(id="top_countries-table")
539
+ ),
540
+ html.Button(
541
+ id="top_countries-toggle",
542
+ children="▼ Show Top 50",
543
+ n_clicks=0,
544
+ style={**button_style, "border": "none"},
545
+ ),
546
  ],
547
  ),
548
  dcc.Tab(
 
563
  "borderBottom": "3px solid #082030",
564
  },
565
  children=[
566
+ html.Div(
567
+ children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
568
+ style={
569
+ "fontSize": 14,
570
+ "marginTop": 18,
571
+ "marginBottom": 12,
572
+ "textAlign": "left",
573
+ },
574
+ ),
575
+ dcc.Loading(
576
+ id="loading-developers",
577
+ type="circle",
578
+ color="#AC482A",
579
+ children=html.Div(id="top_developers-table")
580
+ ),
581
+ html.Button(
582
+ id="top_developers-toggle",
583
+ children="▼ Show Top 50",
584
+ n_clicks=0,
585
+ style={**button_style, "border": "none"},
586
+ ),
587
  ],
588
  ),
589
  dcc.Tab(
 
604
  "borderBottom": "3px solid #082030",
605
  },
606
  children=[
607
+ html.Div(
608
+ children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
609
+ style={
610
+ "fontSize": 14,
611
+ "marginTop": 18,
612
+ "marginBottom": 12,
613
+ "textAlign": "left",
614
+ },
615
+ ),
616
+ dcc.Loading(
617
+ id="loading-models",
618
+ type="circle",
619
+ color="#AC482A",
620
+ children=html.Div(id="top_models-table")
621
+ ),
622
+ html.Button(
623
+ id="top_models-toggle",
624
+ children="▼ Show Top 50",
625
+ n_clicks=0,
626
+ style={**button_style, "border": "none"},
627
+ ),
628
  ],
629
  ),
630
  ],
 
634
  "borderRadius": "18px",
635
  "padding": "32px",
636
  "marginTop": "12px",
637
+ "marginBottom": "12px", # reduced from 64px
638
  "marginLeft": "50px",
639
  "marginRight": "50px",
640
  },
 
649
  ],
650
  )
651
 
652
+
653
  # Callbacks for interactivity
654
  # -- helper utilities to consolidate duplicated callback logic --
655
+ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_downloads"):
656
  """
657
  Query DuckDB directly to get top N entries with metadata
658
  This minimizes data transfer by doing aggregation in DuckDB
659
  """
660
  # Build time filter clause
661
+ time_clause = ""
662
  if slider_value and len(slider_value) == 2:
663
  start = pd.to_datetime(slider_value[0], unit="s")
664
  end = pd.to_datetime(slider_value[1], unit="s")
665
+ time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
666
+
 
 
 
 
 
 
 
 
 
 
667
  # Build the aggregation query to get top N with all needed metadata
668
  # This query groups by the target column and aggregates downloads
669
  # while collecting all metadata we need for chips
 
671
  WITH base_data AS (
672
  SELECT
673
  {group_col},
674
+ CASE
675
+ WHEN org_country_single = 'HF' THEN 'United States of America'
676
+ WHEN org_country_single = 'International' THEN 'International/Online'
677
+ WHEN org_country_single = 'Online' THEN 'International/Online'
678
+ ELSE org_country_single
679
+ END AS org_country_single,
680
  author,
681
  merged_country_groups_single,
682
  merged_modality,
683
  downloads,
 
684
  model
685
+ FROM {view}
686
+ {time_clause}
687
  ),
688
+
689
+ -- Compute the total downloads for all rows in the time range
690
+ total_downloads_cte AS (
691
+ SELECT SUM(downloads) AS total_downloads_all
692
  FROM base_data
693
+ ),
694
+
695
+ -- Compute per-group totals and their percentage of all downloads
696
+ top_items AS (
697
+ SELECT
698
+ b.{group_col} AS name,
699
+ SUM(b.downloads) AS total_downloads,
700
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
701
+ -- Pick first non-null metadata values for reference
702
+ ANY_VALUE(b.org_country_single) AS org_country_single,
703
+ ANY_VALUE(b.author) AS author,
704
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
705
+ ANY_VALUE(b.merged_modality) AS merged_modality,
706
+ ANY_VALUE(b.model) AS model
707
+ FROM base_data b
708
+ CROSS JOIN total_downloads_cte t
709
+ GROUP BY b.{group_col}, t.total_downloads_all
710
  )
711
+
712
+ SELECT *
713
+ FROM top_items
714
+ ORDER BY total_downloads DESC
715
+ LIMIT {top_n};
716
  """
717
+
718
  return con.execute(query).fetchdf()
719
 
720
+
721
+ def _leaderboard_callback_logic(
722
+ n_clicks,
723
+ slider_value,
724
+ current_label,
725
+ group_col,
726
+ filename,
727
+ default_label="▼ Show Top 50",
728
+ chip_color="#F0F9FF",
729
+ view="all_downloads",
730
+ ):
731
  # Normalize label on first load
732
  if current_label is None:
733
  current_label = default_label
 
744
  top_n, new_label = 10, "▼ Show Top 50"
745
 
746
  # Get filtered and aggregated data directly from DuckDB
747
+ df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view=view)
748
+
749
  # Process the already-filtered data
750
  df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n)
751
+ return render_table_content(
752
+ df, download_df, chip_color=chip_color, filename=filename
753
+ ), new_label
754
+
755
+
756
  # -- end helpers --
757
 
758
+
759
  # Callbacks for interactivity (modularized)
760
  @app.callback(
761
  Output("top_countries-table", "children"),
762
  Output("top_countries-toggle", "children"),
763
  Input("top_countries-toggle", "n_clicks"),
764
  Input("time-slider", "value"),
765
+ Input("selected-view", "data"),
766
  State("top_countries-toggle", "children"),
767
  )
768
+ def update_top_countries(n_clicks, slider_value, selected_view, current_label):
769
  return _leaderboard_callback_logic(
770
  n_clicks,
771
  slider_value,
 
774
  filename="top_countries",
775
  default_label="▼ Show Top 50",
776
  chip_color="#F0F9FF",
777
+ view=selected_view,
778
  )
779
 
780
+
781
  @app.callback(
782
  Output("top_developers-table", "children"),
783
  Output("top_developers-toggle", "children"),
784
  Input("top_developers-toggle", "n_clicks"),
785
  Input("time-slider", "value"),
786
+ Input("selected-view", "data"),
787
  State("top_developers-toggle", "children"),
788
  )
789
+ def update_top_developers(n_clicks, slider_value, selected_view, current_label):
790
  return _leaderboard_callback_logic(
791
  n_clicks,
792
  slider_value,
793
  current_label,
794
  group_col="author",
795
  filename="top_developers",
796
+ default_label="▼ Show Top 50",
797
  chip_color="#F0F9FF",
798
+ view=selected_view,
799
  )
800
 
801
+
802
  @app.callback(
803
  Output("top_models-table", "children"),
804
  Output("top_models-toggle", "children"),
805
  Input("top_models-toggle", "n_clicks"),
806
  Input("time-slider", "value"),
807
+ Input("selected-view", "data"),
808
  State("top_models-toggle", "children"),
809
  )
810
+ def update_top_models(n_clicks, slider_value, selected_view, current_label):
811
  return _leaderboard_callback_logic(
812
  n_clicks,
813
  slider_value,
 
816
  filename="top_models",
817
  default_label="▼ Show More",
818
  chip_color="#F0F9FF",
819
+ view=selected_view,
820
  )
821
 
822
+
823
  @app.callback(
824
+ Output("time-slider", "thumbChildren"),
825
+ Input("time-slider", "value"),
826
  )
827
+ def update_thumb_labels(values):
828
+ return get_thumb_labels(values)
 
 
829
 
830
+ # --- Add callback to update selected view based on segmented control ---
831
+ @app.callback(
832
+ Output("selected-view", "data"),
833
+ Input("segmented", "value"),
834
+ )
835
+ def update_selected_view(seg_value):
836
+ if seg_value == "filtered-downloads":
837
+ return "one_year_rolling"
838
+ return "all_downloads"
839
 
840
  # Run the app
841
  if __name__ == "__main__":
842
+ app.run(debug=True)
843
+ if __name__ == "__main__":
844
+ app.run(debug=True)
assets/images/Hf-logo-with-title.svg DELETED
assets/images/{dpi-logo.svg → dpi.svg} RENAMED
File without changes
assets/images/hf.svg ADDED
assets/styles.css ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Header links: transparent background, white text, grow on hover */
2
+ .no-bg-link {
3
+ background-color: transparent !important;
4
+ color: #ffffff !important;
5
+ transition: transform 0.12s ease, background-color 0.12s ease;
6
+ display: inline-flex;
7
+ align-items: center;
8
+ text-decoration: none !important;
9
+ padding: 6px 14px; /* keep spacing consistent with inline styles */
10
+ border-radius: 18px;
11
+ }
12
+
13
+ /* Subtle hover tint and scale for interactivity */
14
+ .no-bg-link:hover {
15
+ transform: scale(1.06);
16
+ background-color: rgba(255, 255, 255, 0.04) !important;
17
+ }
18
+
19
+ /* Make logo images scale smoothly on hover */
20
+ .no-bg-link img {
21
+ transition: transform 0.12s ease;
22
+ }
23
+ .no-bg-link:hover img {
24
+ transform: scale(1.06);
25
+ }
26
+
27
+ /* Read the paper button: scale up on hover */
28
+ .paper-button {
29
+ transition: transform 0.12s ease;
30
+ will-change: transform;
31
+ }
32
+ .paper-button:hover {
33
+ transform: scale(1.06);
34
+ }
35
+
36
+ /* Hugging Face brand text to use Inter and visually match HF typography */
37
+ .hf-brand-text {
38
+ font-family: 'Source Sans Pro', system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', sans-serif;
39
+ font-weight: 1000;
40
+ font-size: 14px;
41
+ line-height: 1;
42
+ margin-left: 8px;
43
+ color: #FFFFFF;
44
+ display: inline-block;
45
+ transform-origin: center;
46
+ }
47
+
48
+ /* Paper link: position relative for arrow, overflow visible */
49
+ .paper-link {
50
+ position: relative; /* needed for positioning the arrow */
51
+ overflow: visible;
52
+ background-color: #AC482A !important; /* restore previous button color */
53
+ color: #FFFFFF !important;
54
+ padding: 10px 20px; /* ensure spacing matches inline styles */
55
+ border-radius: 5px;
56
+ font-weight: 700;
57
+ display: inline-flex;
58
+ align-items: center;
59
+ text-decoration: none !important;
60
+ transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1)), background-color var(--default-transition-duration, .15s);
61
+ }
62
+
63
+ /* Small arrow placed inline to the right of the text */
64
+ .paper-link::after {
65
+ content: "↗";
66
+ display: inline-block;
67
+ margin-left: 8px;
68
+ font-size: 12px;
69
+ color: #FFFFFF;
70
+ opacity: 0.95;
71
+ transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1));
72
+ transform-origin: center;
73
+ pointer-events: none;
74
+ vertical-align: baseline;
75
+ }
76
+
77
+ /* Hover/focus states */
78
+ .paper-link:hover,
79
+ .paper-link:focus {
80
+ transform: scale(1.06);
81
+ background-color: #95371f !important; /* slightly darker on hover */
82
+ }
83
+ .paper-link:hover::after,
84
+ .paper-link:focus::after {
85
+ transform: scale(1.08);
86
+ }
87
+
88
+ /* Apply same interactive behavior to leaderboard "show top" toggles and download controls */
89
+ /* Matches elements with ids like "top_countries-toggle" and download anchors like "download-top_countries" */
90
+ [id$="-toggle"],
91
+ button[id$="-toggle"],
92
+ [id^="download-"],
93
+ a[id^="download-"],
94
+ button[id^="download-"],
95
+ .leaderboard-toggle,
96
+ .download-btn {
97
+ display: inline-flex;
98
+ align-items: center;
99
+ padding: 6px 12px;
100
+ font-size: 14px;
101
+ border-radius: 6px;
102
+ cursor: pointer;
103
+ text-decoration: none !important;
104
+ background-color: transparent;
105
+ color: #082030;
106
+ transition: transform var(--default-transition-duration, .15s) var(--default-transition-timing-function, cubic-bezier(.4,0,.2,1)), background-color var(--default-transition-duration, .15s);
107
+ will-change: transform;
108
+ }
109
+
110
+ /* Hover / focus: subtle scale + tint similar to header links */
111
+ [id$="-toggle"]:hover,
112
+ [id^="download-"]:hover,
113
+ button[id$="-toggle"]:hover,
114
+ button[id^="download-"]:hover,
115
+ .leaderboard-toggle:hover,
116
+ .download-btn:hover,
117
+ [id$="-toggle"]:focus,
118
+ [id^="download-"]:focus,
119
+ button[id$="-toggle"]:focus,
120
+ button[id^="download-"]:focus {
121
+ transform: scale(1.06);
122
+ outline: none;
123
+ }
graphs/leaderboard.py CHANGED
@@ -3,6 +3,7 @@ from dash import html, dcc
3
  from dash_iconify import DashIconify
4
  import dash_mantine_components as dmc
5
  import base64
 
6
 
7
  button_style = {
8
  "display": "inline-block",
@@ -18,64 +19,6 @@ button_style = {
18
  "fontSize": "14px",
19
  }
20
 
21
- country_icon_map = {
22
- "USA": "🇺🇸",
23
- "China": "🇨🇳",
24
- "Germany": "🇩🇪",
25
- "France": "🇫🇷",
26
- "India": "🇮🇳",
27
- "Italy": "🇮🇹",
28
- "Japan": "🇯🇵",
29
- "South Korea": "🇰🇷",
30
- "United Kingdom": "🇬🇧",
31
- "Canada": "🇨🇦",
32
- "Brazil": "🇧🇷",
33
- "Australia": "🇦🇺",
34
- "Unknown": "❓",
35
- "Finland": "🇫🇮",
36
- "Lebanon": "🇱🇧",
37
- "Iceland": "🇮🇸",
38
- "Singapore": "🇸🇬",
39
- "Israel": "🇮🇱",
40
- "Iran": "🇮🇷",
41
- "Hong Kong": "🇭🇰",
42
- "Netherlands": "🇳🇱",
43
- "Chile": "🇨🇱",
44
- "Vietnam": "🇻🇳",
45
- "Russia": "🇷🇺",
46
- "Qatar": "🇶🇦",
47
- "Switzerland": "🇨🇭",
48
- "User": "👤",
49
- "International/Online": "🌐",
50
- "Spain": "🇪🇸",
51
- "Sweden": "🇸🇪",
52
- "Norway": "🇳🇴",
53
- "Denmark": "🇩🇰",
54
- "Austria": "🇦🇹",
55
- "Belgium": "🇧🇪",
56
- "Poland": "🇵🇱",
57
- "Turkey": "🇹🇷",
58
- "Mexico": "🇲🇽",
59
- "Argentina": "🇦🇷",
60
- "Thailand": "🇹🇭",
61
- "Indonesia": "🇮🇩",
62
- "Malaysia": "🇲🇾",
63
- "Philippines": "🇵🇭",
64
- "Egypt": "🇪🇬",
65
- "South Africa": "🇿🇦",
66
- "New Zealand": "🇳🇿",
67
- "Ireland": "🇮🇪",
68
- "Portugal": "🇵🇹",
69
- "Greece": "🇬🇷",
70
- "Czech Republic": "🇨🇿",
71
- "Romania": "🇷🇴",
72
- "Ukraine": "🇺🇦",
73
- "United Arab Emirates": "🇦🇪",
74
- "Saudi Arabia": "🇸🇦",
75
- "Pakistan": "🇵🇰",
76
- "Bangladesh": "🇧🇩",
77
- }
78
-
79
  company_icon_map = {
80
  "google": "../assets/icons/google.png",
81
  "distilbert": "../assets/icons/hugging-face.png",
@@ -84,6 +27,12 @@ company_icon_map = {
84
  "openai": "../assets/icons/openai.png",
85
  }
86
 
 
 
 
 
 
 
87
  meta_cols_map = {
88
  "org_country_single": ["org_country_single"],
89
  "author": ["org_country_single", "author", "merged_country_groups_single"],
@@ -92,7 +41,7 @@ meta_cols_map = {
92
  "author",
93
  "merged_country_groups_single",
94
  "merged_modality",
95
- "downloads",
96
  ],
97
  }
98
 
@@ -114,7 +63,7 @@ def chip(text, bg_color="#F0F0F0"):
114
 
115
 
116
  # Progress bar for % of total
117
- def progress_bar(percent, bar_color="#082030"):
118
  return html.Div(
119
  style={
120
  "position": "relative",
@@ -179,42 +128,99 @@ def df_to_download_link(df, filename):
179
  )
180
 
181
 
182
- # Render multiple chips in one row
183
- def render_chips(metadata_list, chip_color):
184
- chips = []
185
- for icon, name in metadata_list:
186
- if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")):
187
- chips.append(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  html.Span(
189
- [
190
- html.Img(
191
- src=icon, style={"height": "18px", "marginRight": "6px"}
192
- ),
193
- name,
194
- ],
195
  style={
196
- "backgroundColor": chip_color,
197
  "padding": "4px 10px",
198
  "borderRadius": "12px",
199
  "margin": "2px",
200
  "display": "inline-flex",
201
- "alignItems": "left",
202
  "fontSize": "14px",
 
203
  },
204
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  )
206
  else:
207
- chips.append(chip(f"{icon} {name}", chip_color))
208
  return html.Div(
209
  chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
210
  )
211
 
212
 
213
  def render_table_content(
214
- df, download_df, chip_color, bar_color="#082030", filename="data"
215
  ):
216
  return html.Div(
217
  [
 
 
218
  html.Table(
219
  [
220
  html.Thead(
@@ -274,129 +280,6 @@ def render_table_content(
274
  ]
275
  )
276
 
277
-
278
- # Table renderer
279
- def render_table(
280
- df, download_df, title, chip_color, bar_color="#AC482A", filename="data"
281
- ):
282
- return html.Div(
283
- id=f"{filename}-div",
284
- children=[
285
- html.Div(
286
- [
287
- html.H4(
288
- title,
289
- style={
290
- "textAlign": "left",
291
- "marginBottom": "10px",
292
- "fontSize": "20px",
293
- "display": "inline-block",
294
- },
295
- ),
296
- df_to_download_link(download_df, filename),
297
- ],
298
- style={
299
- "display": "flex",
300
- "alignItems": "center",
301
- "justifyContent": "space-between",
302
- },
303
- ),
304
- html.Div(
305
- id=f"{filename}-table",
306
- children=[
307
- html.Table(
308
- [
309
- html.Thead(
310
- html.Tr(
311
- [
312
- html.Th(
313
- "Rank",
314
- style={
315
- "backgroundColor": "#F0F0F0",
316
- "textAlign": "left",
317
- },
318
- ),
319
- html.Th(
320
- "Name",
321
- style={
322
- "backgroundColor": "#F0F0F0",
323
- "textAlign": "left",
324
- },
325
- ),
326
- html.Th(
327
- "Metadata",
328
- style={
329
- "backgroundColor": "#F0F0F0",
330
- "textAlign": "left",
331
- "marginRight": "10px",
332
- },
333
- ),
334
- html.Th(
335
- "% of Total",
336
- style={
337
- "backgroundColor": "#F0F0F0",
338
- "textAlign": "left",
339
- },
340
- ),
341
- ]
342
- )
343
- ),
344
- html.Tbody(
345
- [
346
- html.Tr(
347
- [
348
- html.Td(
349
- idx + 1, style={"textAlign": "center"}
350
- ),
351
- html.Td(
352
- row["Name"], style={"textAlign": "left"}
353
- ),
354
- html.Td(
355
- render_chips(
356
- row["Metadata"], chip_color
357
- )
358
- ),
359
- html.Td(
360
- progress_bar(
361
- row["% of total"], bar_color
362
- ),
363
- style={"textAlign": "center"},
364
- ),
365
- ]
366
- )
367
- for idx, row in df.iterrows()
368
- ]
369
- ),
370
- ],
371
- style={
372
- "borderCollapse": "collapse",
373
- "width": "100%",
374
- "border": "none",
375
- },
376
- ),
377
- ],
378
- ),
379
- dcc.Loading(
380
- id=f"loading-{filename}-toggle",
381
- type="dot",
382
- color="#082030",
383
- children=html.Div(
384
- [
385
- html.Button(
386
- "▼ Show Top 50",
387
- id=f"{filename}-toggle",
388
- n_clicks=0,
389
- style={**button_style, "border": "none"},
390
- )
391
- ],
392
- style={"marginTop": "5px", "textAlign": "left"},
393
- ),
394
- ),
395
- ],
396
- style={"marginBottom": "20px"},
397
- )
398
-
399
-
400
  # Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
401
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
402
  """
@@ -410,17 +293,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
410
  Returns:
411
  tuple: (display_df, download_df)
412
  """
 
413
  # Group by and get top N
414
  top = (
415
- filtered_df.groupby(group_col)["downloads"]
416
  .sum()
417
- .nlargest(top_n)
418
  .reset_index()
419
- .rename(columns={group_col: "Name", "downloads": "Total Value"})
420
  )
421
-
422
- total_value = top["Total Value"].sum()
423
- top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
424
 
425
  # Create a downloadable version of the leaderboard
426
  download_top = top.copy()
@@ -459,7 +340,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
459
  c = "USA"
460
  if c == "user":
461
  c = "User"
462
- chips.append((country_icon_map.get(c, "🌍"), c))
 
 
 
 
 
 
 
 
463
 
464
  # Author
465
  for a in meta.get("author", []):
@@ -469,32 +358,19 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
469
  icon = "🏢"
470
  else:
471
  icon = "👤"
472
- chips.append((icon, a))
473
 
474
  # Downloads
475
  total_downloads = sum(
476
- d for d in meta.get("downloads", []) if pd.notna(d)
477
  )
478
  if total_downloads:
479
- chips.append(("⬇️", f"{int(total_downloads):,}"))
480
 
481
  # Modality
482
  for m in meta.get("merged_modality", []):
483
  if pd.notna(m):
484
- chips.append(("", m))
485
-
486
- # Estimated Parameters
487
- for p in meta.get("estimated_parameters", []):
488
- if pd.notna(p):
489
- if p >= 1e9:
490
- p_str = f"{p / 1e9:.1f}B"
491
- elif p >= 1e6:
492
- p_str = f"{p / 1e6:.1f}M"
493
- elif p >= 1e3:
494
- p_str = f"{p / 1e3:.1f}K"
495
- else:
496
- p_str = str(int(p))
497
- chips.append(("⚙️", p_str))
498
 
499
  return chips
500
 
@@ -526,7 +402,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
526
  return top[["Name", "Metadata", "% of total"]], download_top
527
 
528
 
529
- def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None):
530
  """
531
  Query DuckDB directly to get top N entries with minimal data transfer
532
 
@@ -546,103 +422,57 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None):
546
  end = pd.to_datetime(time_filter[1], unit="s")
547
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
548
 
549
- # Apply country replacements in the query
550
- country_case = """
551
- CASE
552
- WHEN org_country_single = 'HF' THEN 'United States of America'
553
- WHEN org_country_single = 'International' THEN 'International/Online'
554
- WHEN org_country_single = 'Online' THEN 'International/Online'
555
- ELSE org_country_single
556
- END as org_country_single
557
- """
558
-
559
  # Optimized query: first find top N, then get only those rows
560
  query = f"""
561
  WITH base_data AS (
562
  SELECT
563
  {group_col},
564
- {country_case},
 
 
 
 
 
565
  author,
566
  merged_country_groups_single,
567
  merged_modality,
568
  downloads,
569
- estimated_parameters,
570
  model
571
- FROM filtered_df
572
  {time_clause}
573
  ),
 
 
 
 
 
 
 
 
574
  top_items AS (
575
  SELECT
576
- {group_col} as name,
577
- SUM(downloads) as total_downloads
578
- FROM base_data
579
- GROUP BY {group_col}
580
- ORDER BY total_downloads DESC
581
- LIMIT {top_n}
 
 
 
 
 
 
582
  )
583
- SELECT
584
- b.*
585
- FROM base_data b
586
- INNER JOIN top_items t ON b.{group_col} = t.name
587
- ORDER BY t.total_downloads DESC
588
  """
589
-
590
  try:
591
  return con.execute(query).fetchdf()
592
  except Exception as e:
593
  print(f"Error querying DuckDB: {e}")
594
- return pd.DataFrame()
595
-
596
-
597
- def create_leaderboard(con, board_type, top_n=10):
598
- """
599
- Create leaderboard using DuckDB connection with optimized queries
600
-
601
- Args:
602
- con: DuckDB connection object
603
- board_type: Type of leaderboard ('countries', 'developers', 'models')
604
- top_n: Number of top entries to display
605
-
606
- Returns:
607
- Dash HTML component with the leaderboard table
608
- """
609
- # Map board type to column name
610
- column_map = {
611
- "countries": "org_country_single",
612
- "developers": "author",
613
- "models": "model"
614
- }
615
-
616
- title_map = {
617
- "countries": "Top Countries",
618
- "developers": "Top Developers",
619
- "models": "Top Models"
620
- }
621
-
622
- filename_map = {
623
- "countries": "top_countries",
624
- "developers": "top_developers",
625
- "models": "top_models"
626
- }
627
-
628
- group_col = column_map.get(board_type)
629
- if not group_col:
630
- return html.Div(f"Unknown board type: {board_type}")
631
-
632
- # Get only the top N rows from DuckDB
633
- filtered_df = get_top_n_from_duckdb(con, group_col, top_n)
634
-
635
- if filtered_df.empty:
636
- return html.Div("No data available")
637
-
638
- # Process the already-filtered data
639
- top_data, download_data = get_top_n_leaderboard(filtered_df, group_col, top_n)
640
-
641
- return render_table(
642
- top_data,
643
- download_data,
644
- title_map[board_type],
645
- chip_color="#F0F9FF",
646
- bar_color="#082030",
647
- filename=filename_map[board_type],
648
- )
 
3
  from dash_iconify import DashIconify
4
  import dash_mantine_components as dmc
5
  import base64
6
+ import countryflag
7
 
8
  button_style = {
9
  "display": "inline-block",
 
19
  "fontSize": "14px",
20
  }
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  company_icon_map = {
23
  "google": "../assets/icons/google.png",
24
  "distilbert": "../assets/icons/hugging-face.png",
 
27
  "openai": "../assets/icons/openai.png",
28
  }
29
 
30
+ country_emoji_fallback = {
31
+ "User": "👤",
32
+ "Organization": "🏢",
33
+ "Model": "📦",
34
+ }
35
+
36
  meta_cols_map = {
37
  "org_country_single": ["org_country_single"],
38
  "author": ["org_country_single", "author", "merged_country_groups_single"],
 
41
  "author",
42
  "merged_country_groups_single",
43
  "merged_modality",
44
+ "total_downloads",
45
  ],
46
  }
47
 
 
63
 
64
 
65
  # Progress bar for % of total
66
+ def progress_bar(percent, bar_color="#AC482A"):
67
  return html.Div(
68
  style={
69
  "position": "relative",
 
128
  )
129
 
130
 
131
+ # Helper to get popover content for each metadata type
132
+ def get_metadata_popover_content(icon, name, meta_type):
133
+ popover_texts = {
134
+ "country": f"Country: {name}",
135
+ "author": f"Author/Organization: {name}",
136
+ "downloads": f"Total downloads: {name}",
137
+ "modality": f"Modality: {name}",
138
+ "parameters": f"Estimated parameters: {name}",
139
+ }
140
+ return popover_texts.get(meta_type, name)
141
+
142
+
143
+ # Chip renderer with hovercard
144
+ def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None):
145
+ hovercard_content = get_metadata_popover_content(icon, text, meta_type)
146
+ return dmc.HoverCard(
147
+ width=220,
148
+ shadow="md",
149
+ position="top",
150
+ children=[
151
+ dmc.HoverCardTarget(
152
  html.Span(
153
+ text,
 
 
 
 
 
154
  style={
155
+ "backgroundColor": bg_color,
156
  "padding": "4px 10px",
157
  "borderRadius": "12px",
158
  "margin": "2px",
159
  "display": "inline-flex",
160
+ "alignItems": "center",
161
  "fontSize": "14px",
162
+ "cursor": "pointer",
163
  },
164
  )
165
+ ),
166
+ dmc.HoverCardDropdown(
167
+ dmc.Text(hovercard_content, size="sm")
168
+ ),
169
+ ],
170
+ )
171
+
172
+
173
+ # Render multiple chips in one row, each with popover
174
+ def render_chips(metadata_list, chip_color):
175
+ chips = []
176
+ for icon, name, meta_type in metadata_list:
177
+ if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")):
178
+ chips.append(
179
+ dmc.HoverCard(
180
+ width=220,
181
+ shadow="md",
182
+ position="top",
183
+ children=[
184
+ dmc.HoverCardTarget(
185
+ html.Span(
186
+ [
187
+ html.Img(
188
+ src=icon, style={"height": "18px", "marginRight": "6px"}
189
+ ),
190
+ name,
191
+ ],
192
+ style={
193
+ "backgroundColor": chip_color,
194
+ "padding": "4px 10px",
195
+ "borderRadius": "12px",
196
+ "margin": "2px",
197
+ "display": "inline-flex",
198
+ "alignItems": "left",
199
+ "fontSize": "14px",
200
+ "cursor": "pointer",
201
+ },
202
+ )
203
+ ),
204
+ dmc.HoverCardDropdown(
205
+ dmc.Text(get_metadata_popover_content(icon, name, meta_type), size="sm")
206
+ ),
207
+ ],
208
+ )
209
  )
210
  else:
211
+ chips.append(chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon))
212
  return html.Div(
213
  chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
214
  )
215
 
216
 
217
  def render_table_content(
218
+ df, download_df, chip_color, bar_color="#AC482A", filename="data"
219
  ):
220
  return html.Div(
221
  [
222
+ # Add download button above the table
223
+ df_to_download_link(download_df, filename),
224
  html.Table(
225
  [
226
  html.Thead(
 
280
  ]
281
  )
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
284
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
285
  """
 
293
  Returns:
294
  tuple: (display_df, download_df)
295
  """
296
+
297
  # Group by and get top N
298
  top = (
299
+ filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]]
300
  .sum()
301
+ .nlargest(top_n, columns="total_downloads")
302
  .reset_index()
303
+ .rename(columns={group_col: "Name", "total_downloads": "Total Value", "percent_of_total": "% of total"})
304
  )
 
 
 
305
 
306
  # Create a downloadable version of the leaderboard
307
  download_top = top.copy()
 
340
  c = "USA"
341
  if c == "user":
342
  c = "User"
343
+ # Try countryflag.getflag(), fallback to dictionary if fails
344
+ try:
345
+ flag_emoji = countryflag.getflag(c)
346
+ # If countryflag returns empty or None, fallback
347
+ if not flag_emoji or flag_emoji == c:
348
+ flag_emoji = country_emoji_fallback.get(c, "🌍")
349
+ except Exception:
350
+ flag_emoji = country_emoji_fallback.get(c, "🌍")
351
+ chips.append((flag_emoji, c, "country"))
352
 
353
  # Author
354
  for a in meta.get("author", []):
 
358
  icon = "🏢"
359
  else:
360
  icon = "👤"
361
+ chips.append((icon, a, "author"))
362
 
363
  # Downloads
364
  total_downloads = sum(
365
+ d for d in meta.get("total_downloads", []) if pd.notna(d)
366
  )
367
  if total_downloads:
368
+ chips.append(("⬇️", f"{int(total_downloads):,}", "downloads"))
369
 
370
  # Modality
371
  for m in meta.get("merged_modality", []):
372
  if pd.notna(m):
373
+ chips.append(("", m, "modality"))
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  return chips
376
 
 
402
  return top[["Name", "Metadata", "% of total"]], download_top
403
 
404
 
405
+ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"):
406
  """
407
  Query DuckDB directly to get top N entries with minimal data transfer
408
 
 
422
  end = pd.to_datetime(time_filter[1], unit="s")
423
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
424
 
 
 
 
 
 
 
 
 
 
 
425
  # Optimized query: first find top N, then get only those rows
426
  query = f"""
427
  WITH base_data AS (
428
  SELECT
429
  {group_col},
430
+ CASE
431
+ WHEN org_country_single = 'HF' THEN 'United States of America'
432
+ WHEN org_country_single = 'International' THEN 'International/Online'
433
+ WHEN org_country_single = 'Online' THEN 'International/Online'
434
+ ELSE org_country_single
435
+ END AS org_country_single,
436
  author,
437
  merged_country_groups_single,
438
  merged_modality,
439
  downloads,
 
440
  model
441
+ FROM {view}
442
  {time_clause}
443
  ),
444
+
445
+ -- Compute the total downloads for all rows in the time range
446
+ total_downloads_cte AS (
447
+ SELECT SUM(downloads) AS total_downloads_all
448
+ FROM base_data
449
+ ),
450
+
451
+ -- Compute per-group totals and their percentage of all downloads
452
  top_items AS (
453
  SELECT
454
+ b.{group_col} AS name,
455
+ SUM(b.downloads) AS total_downloads,
456
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
457
+ -- Pick first non-null metadata values for reference
458
+ ANY_VALUE(b.org_country_single) AS org_country_single,
459
+ ANY_VALUE(b.author) AS author,
460
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
461
+ ANY_VALUE(b.merged_modality) AS merged_modality,
462
+ ANY_VALUE(b.model) AS model
463
+ FROM base_data b
464
+ CROSS JOIN total_downloads_cte t
465
+ GROUP BY b.{group_col}, t.total_downloads_all
466
  )
467
+
468
+ SELECT *
469
+ FROM top_items
470
+ ORDER BY total_downloads DESC
471
+ LIMIT {top_n};
472
  """
473
+
474
  try:
475
  return con.execute(query).fetchdf()
476
  except Exception as e:
477
  print(f"Error querying DuckDB: {e}")
478
+ return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -5,4 +5,5 @@ gunicorn
5
  dash-mantine-components
6
  dash-bootstrap-components
7
  pyarrow
8
- duckdb
 
 
5
  dash-mantine-components
6
  dash-bootstrap-components
7
  pyarrow
8
+ duckdb
9
+ countryflag