emsesc commited on
Commit
b233a23
·
1 Parent(s): 855952e

map + toggle: need to cleanup code

Browse files
Files changed (3) hide show
  1. app.py +77 -4
  2. graphs/leaderboard.py +271 -237
  3. graphs/model_market_share.py +12 -2
app.py CHANGED
@@ -1,8 +1,8 @@
1
- from dash import Dash, html, dcc, Input, Output
2
  import pandas as pd
3
  import dash_mantine_components as dmc
4
  from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
5
- from graphs.leaderboard import create_leaderboard
6
  from graphs.model_characteristics import create_concentration_chart, create_line_plot
7
  from graphs.tree import generate_model_treemap
8
 
@@ -266,8 +266,7 @@ def update_world_map(value):
266
  start_time = pd.to_datetime(value[0], unit='s').strftime('%Y-%m-%d')
267
  end_time = pd.to_datetime(value[1], unit='s').strftime('%Y-%m-%d')
268
  updated_fig = create_world_map(
269
- country_concentration_df, "time", "metric", "value",
270
- start_time=start_time, end_time=end_time
271
  )
272
  updated_fig.update_layout(font_family="Inter")
273
  return updated_fig
@@ -309,6 +308,80 @@ def update_stacked_area(value):
309
  return updated_fig
310
  return model_market_share_area
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  # Run the app
313
  if __name__ == '__main__':
314
  app.run(debug=True)
 
1
+ from dash import Dash, html, dcc, Input, Output, State
2
  import pandas as pd
3
  import dash_mantine_components as dmc
4
  from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
5
+ from graphs.leaderboard import create_leaderboard, get_top_n_leaderboard, render_table, render_table_content
6
  from graphs.model_characteristics import create_concentration_chart, create_line_plot
7
  from graphs.tree import generate_model_treemap
8
 
 
266
  start_time = pd.to_datetime(value[0], unit='s').strftime('%Y-%m-%d')
267
  end_time = pd.to_datetime(value[1], unit='s').strftime('%Y-%m-%d')
268
  updated_fig = create_world_map(
269
+ filtered_df
 
270
  )
271
  updated_fig.update_layout(font_family="Inter")
272
  return updated_fig
 
308
  return updated_fig
309
  return model_market_share_area
310
 
311
+ @app.callback(
312
+ Output("top_countries-table", "children"),
313
+ Output("top_countries-toggle", "children"),
314
+ Input("top_countries-toggle", "n_clicks"),
315
+ State("top_countries-toggle", "children"),
316
+ )
317
+ def update_top_countries(n_clicks, current_label):
318
+ print(n_clicks, current_label)
319
+ # Handle initial page load
320
+ if current_label is None:
321
+ current_label = "▼ Show Top 50"
322
+
323
+ if n_clicks == 0:
324
+ top_n = 10
325
+ new_label = current_label
326
+ elif "Show Top 50" in current_label:
327
+ top_n, new_label = 50, "▼ Show Top 100"
328
+ elif "Show Top 100" in current_label:
329
+ top_n, new_label = 100, "▲ Show Less"
330
+ else:
331
+ top_n, new_label = 10, "▼ Show Top 50"
332
+
333
+ df, download_df = get_top_n_leaderboard(filtered_df, "org_country_single", top_n)
334
+ return render_table_content(df, download_df, chip_color="#FCE8E6", filename="top_countries"), new_label
335
+
336
+
337
+ @app.callback(
338
+ Output("top_developers-table", "children"),
339
+ Output("top_developers-toggle", "children"),
340
+ Input("top_developers-toggle", "n_clicks"),
341
+ State("top_developers-toggle", "children"),
342
+ )
343
+ def update_top_developers(n_clicks, current_label):
344
+ # Handle initial page load
345
+ if current_label is None:
346
+ current_label = "▼ Show More"
347
+
348
+ if n_clicks == 0:
349
+ top_n = 10
350
+ new_label = current_label
351
+ elif "Show Top 50" in current_label:
352
+ top_n, new_label = 50, "▼ Show Top 100"
353
+ elif "Show Top 100" in current_label:
354
+ top_n, new_label = 100, "▲ Show Less"
355
+ else:
356
+ top_n, new_label = 10, "▼ Show Top 50"
357
+
358
+ df, download_df = get_top_n_leaderboard(filtered_df, "author", top_n)
359
+ return render_table_content(df, download_df, chip_color="#E6F4EA", filename="top_developers"), new_label
360
+
361
+ @app.callback(
362
+ Output("top_models-table", "children"),
363
+ Output("top_models-toggle", "children"),
364
+ Input("top_models-toggle", "n_clicks"),
365
+ State("top_models-toggle", "children"),
366
+ )
367
+ def update_top_models(n_clicks, current_label):
368
+ # Handle initial page load
369
+ if current_label is None:
370
+ current_label = "▼ Show More"
371
+
372
+ if n_clicks == 0:
373
+ top_n = 10
374
+ new_label = current_label
375
+ elif "Show Top 50" in current_label:
376
+ top_n, new_label = 50, "▼ Show Top 100"
377
+ elif "Show Top 100" in current_label:
378
+ top_n, new_label = 100, "▲ Show Less"
379
+ else:
380
+ top_n, new_label = 10, "▼ Show Top 50"
381
+
382
+ df, download_df = get_top_n_leaderboard(filtered_df, "model", top_n)
383
+ return render_table_content(df, download_df, chip_color="#E8F0FE", filename="top_models"), new_label
384
+
385
  # Run the app
386
  if __name__ == '__main__':
387
  app.run(debug=True)
graphs/leaderboard.py CHANGED
@@ -2,8 +2,21 @@ import pandas as pd
2
  from dash import html, dcc
3
  import base64
4
 
5
- def create_leaderboard(filtered_df, start_time=None, top_n=10):
6
- country_icon_map = {
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "USA": "🇺🇸",
8
  "China": "🇨🇳",
9
  "Germany": "🇩🇪",
@@ -23,245 +36,144 @@ def create_leaderboard(filtered_df, start_time=None, top_n=10):
23
  "International/Online": "🌐",
24
  }
25
 
26
- company_icon_map = {
27
- "google": "../assets/icons/google.png",
28
- "distilbert": "../assets/icons/hugging-face.png",
29
- "sentence-transformers": "../assets/icons/hugging-face.png",
30
- "facebook": "../assets/icons/meta.png",
31
- "openai": "../assets/icons/openai.png",
32
- }
33
-
34
- meta_cols_map = {
35
- "org_country_single": ["org_country_single"],
36
- "author": ["org_country_single", "author", "merged_country_groups_single"],
37
- "model": ["org_country_single", "author", "merged_country_groups_single", "merged_modality", "downloads"]
38
- }
39
-
40
- # Filter by time
41
- if start_time is not None:
42
- filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
43
-
44
- if filtered_df.empty:
45
- return html.Div("No data in selected range")
46
-
47
- # Merge HF and USA
48
- filtered_df["org_country_single"] = filtered_df["org_country_single"].replace({"HF": "United States of America"})
49
- # Merge International and Online
50
- filtered_df["org_country_single"] = filtered_df["org_country_single"].replace({"International": "International/Online", "Online": "International/Online"})
51
-
52
- # Function to get top N leaderboard
53
- def get_top_n_leaderboard(group_col, top_n=10):
54
- top = filtered_df.groupby(group_col)["downloads"].sum().nlargest(top_n).reset_index().rename(columns={group_col: "Name", "downloads": "Total Value"})
55
- total_value = top["Total Value"].sum()
56
- top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
57
-
58
- # Create a downloadable version of the leaderboard
59
- download_top = top.copy()
60
- download_top["Total Value"] = download_top["Total Value"].astype(int)
61
- download_top["% of total"] = download_top["% of total"].round(2)
62
 
63
- top["Name"].replace("User", "user", inplace=True)
 
 
 
 
64
 
65
- # All relevant metadata columns
66
- meta_cols = meta_cols_map.get(group_col, [])
67
- # Collect all metadata per top n for each category (country, author, model)
68
- meta_map = {}
69
- download_map = {}
70
- for name in top["Name"]:
71
- name_data = filtered_df[filtered_df[group_col] == name]
72
- meta_map[name] = {}
73
- download_map[name] = {}
74
- for col in meta_cols:
75
- if col in name_data.columns:
76
- unique_vals = name_data[col].unique()
77
- meta_map[name][col] = list(unique_vals)
78
- download_map[name][col] = list(unique_vals)
79
-
80
- # Function to build metadata chips
81
- def build_metadata(nm):
82
- meta = meta_map.get(nm, {})
83
- chips = []
84
- # Countries
85
- for c in meta.get("org_country_single", []):
86
- if c == "United States of America":
87
- c = "USA"
88
- if c == "user":
89
- c = "User"
90
- chips.append((country_icon_map.get(c, ""), c))
91
- # Author
92
- for a in meta.get("author", []):
93
- icon = company_icon_map.get(a, "")
94
- if icon == "":
95
- if meta.get("merged_country_groups_single", ["User"])[0] != "User":
96
- icon = "🏢"
97
- else:
98
- icon = "👤"
99
- chips.append((icon, a))
100
- # Downloads
101
- # Sum downloads if multiple entries
102
- total_downloads = sum(d for d in meta.get("downloads", []) if pd.notna(d)) # Check if d is not NaN
103
- if total_downloads:
104
- chips.append(("⬇️", f"{int(total_downloads):,}"))
105
 
106
- # Modality
107
- for m in meta.get("merged_modality", []):
108
- chips.append(("", m))
109
-
110
- # Estimated Parameters
111
- for p in meta.get("estimated_parameters", []):
112
- if pd.notna(p): # Check if p is not NaN
113
- if p >= 1e9:
114
- p_str = f"{p/1e9:.1f}B"
115
- elif p >= 1e6:
116
- p_str = f"{p/1e6:.1f}M"
117
- elif p >= 1e3:
118
- p_str = f"{p/1e3:.1f}K"
119
- else:
120
- p_str = str(p)
121
- chips.append(("⚙️", p_str))
122
- return chips
123
-
124
- # Function to create downloadable dataframe
125
- def build_download_metadata(nm):
126
- meta = download_map.get(nm, {})
127
- download_info = {}
128
- for col in meta_cols:
129
- # don't add empty columns
130
- if col not in meta or not meta[col]:
131
- continue
132
- vals = meta.get(col, [])
133
- if vals:
134
- # Join list into a single string for CSV
135
- download_info[col] = ", ".join(str(v) for v in vals)
136
- else:
137
- download_info[col] = ""
138
- return download_info
139
-
140
- # Apply metadata builder to top dataframe
141
- top["Metadata"] = top["Name"].map(build_metadata)
142
- download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
143
- download_info_df = pd.DataFrame(download_info_list)
144
- download_top = pd.concat([download_top, download_info_df], axis=1)
145
 
146
- return top[["Name", "Metadata", "% of total"]], download_top
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Build leaderboards
149
- top_countries, download_top_countries = get_top_n_leaderboard("org_country_single", top_n)
150
- top_developers, download_top_developers = get_top_n_leaderboard("author", top_n)
151
- top_models, download_top_models = get_top_n_leaderboard("model", top_n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- # Chip renderer
154
- def chip(text, bg_color="#F0F0F0"):
155
- return html.Span(
156
- text,
157
- style={
158
- "backgroundColor": bg_color,
159
- "padding": "4px 10px",
160
- "borderRadius": "12px",
161
- "margin": "2px",
162
- "display": "inline-flex",
163
- "alignItems": "center",
164
- "fontSize": "14px"
165
- }
166
- )
167
-
168
- # Render multiple chips in one row
169
- def render_chips(metadata_list, chip_color="#F0F0F0"):
170
- chips = []
171
- for icon, name in metadata_list:
172
- if isinstance(icon, str) and icon.endswith(('.png', '.jpg', '.jpeg', '.svg')):
173
- chips.append(
174
- html.Span([
175
- html.Img(src=icon, style={"height": "18px", "marginRight": "6px"}),
176
- name
177
- ],
178
- style={
179
- "backgroundColor": chip_color,
180
- "padding": "4px 10px",
181
- "borderRadius": "12px",
182
- "margin": "2px",
183
- "display": "inline-flex",
184
- "alignItems": "left",
185
- "fontSize": "14px"
186
- })
187
- )
188
- else:
189
- chips.append(chip(f"{icon} {name}", chip_color))
190
- return html.Div(
191
- chips,
192
- style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
193
- )
194
-
195
- # Progress bar for % of total
196
- def progress_bar(percent, bar_color="#4CAF50"):
197
- return html.Div(
198
- style={
199
- "position": "relative",
200
- "backgroundColor": "#E0E0E0",
201
- "borderRadius": "8px",
202
- "height": "20px",
203
- "width": "100%",
204
- "overflow": "hidden",
205
- },
206
- children=[
207
- html.Div(
208
- style={
209
- "backgroundColor": bar_color,
210
- "width": f"{percent}%",
211
- "height": "100%",
212
- "borderRadius": "8px",
213
- "transition": "width 0.5s",
214
- }
215
- ),
216
- html.Div(
217
- f"{percent:.1f}%",
218
- style={
219
- "position": "absolute",
220
- "top": 0,
221
- "left": "50%",
222
- "transform": "translateX(-50%)",
223
- "color": "black",
224
- "fontWeight": "bold",
225
- "fontSize": "12px",
226
- "lineHeight": "20px",
227
- "textAlign": "center",
228
- }
229
- )
230
- ]
231
- )
232
-
233
- # Helper to convert DataFrame to CSV and encode for download
234
- def df_to_download_link(df, filename):
235
- csv_string = df.to_csv(index=False)
236
- b64 = base64.b64encode(csv_string.encode()).decode()
237
- return html.Div(
238
- html.A(
239
- "Download CSV",
240
- id=f"download-{filename}",
241
- download=f"{filename}.csv",
242
- href=f"data:text/csv;base64,{b64}",
243
- target="_blank",
244
- style={
245
- "display": "inline-block",
246
- "marginBottom": "10px",
247
- "marginRight": "15px",
248
- "marginTop": "30px",
249
- "padding": "6px 16px",
250
- "backgroundColor": "#2196F3",
251
- "color": "white",
252
- "borderRadius": "6px",
253
- "textDecoration": "none",
254
- "fontWeight": "bold",
255
- "fontSize": "14px"
256
- }
257
- ),
258
- style={"textAlign": "right"}
259
- )
260
 
261
- # Table renderer
262
- def render_table(df, download_df, title, chip_color="#F0F0F0", bar_color="#4CAF50", filename="data"):
263
- return html.Div([
264
- html.H4(title, style={"textAlign": "left", "marginBottom": "10px", "fontSize": "20px"}),
 
 
 
 
265
  html.Table([
266
  html.Thead(html.Tr([
267
  html.Th("Rank", style={"backgroundColor": "#F0F0F0", "textAlign": "left"}),
@@ -277,9 +189,131 @@ def create_leaderboard(filtered_df, start_time=None, top_n=10):
277
  html.Td(progress_bar(row["% of total"], bar_color), style={"textAlign": "center"})
278
  ]) for idx, row in df.iterrows()
279
  ])
280
- ], style={"borderCollapse": "collapse", "width": "100%"}),
281
- df_to_download_link(download_df, filename),
282
- ], style={"marginBottom": "20px"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  # Layout with 3 stacked tables
285
  layout = html.Div([
 
2
  from dash import html, dcc
3
  import base64
4
 
5
+ button_style = {
6
+ "display": "inline-block",
7
+ "marginBottom": "10px",
8
+ "marginRight": "15px",
9
+ "marginTop": "30px",
10
+ "padding": "6px 16px",
11
+ "backgroundColor": "#2196F3",
12
+ "color": "white",
13
+ "borderRadius": "6px",
14
+ "textDecoration": "none",
15
+ "fontWeight": "bold",
16
+ "fontSize": "14px"
17
+ }
18
+
19
+ country_icon_map = {
20
  "USA": "🇺🇸",
21
  "China": "🇨🇳",
22
  "Germany": "🇩🇪",
 
36
  "International/Online": "🌐",
37
  }
38
 
39
+ company_icon_map = {
40
+ "google": "../assets/icons/google.png",
41
+ "distilbert": "../assets/icons/hugging-face.png",
42
+ "sentence-transformers": "../assets/icons/hugging-face.png",
43
+ "facebook": "../assets/icons/meta.png",
44
+ "openai": "../assets/icons/openai.png",
45
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ meta_cols_map = {
48
+ "org_country_single": ["org_country_single"],
49
+ "author": ["org_country_single", "author", "merged_country_groups_single"],
50
+ "model": ["org_country_single", "author", "merged_country_groups_single", "merged_modality", "downloads"]
51
+ }
52
 
53
+ # Chip renderer
54
+ def chip(text, bg_color="#F0F0F0"):
55
+ return html.Span(
56
+ text,
57
+ style={
58
+ "backgroundColor": bg_color,
59
+ "padding": "4px 10px",
60
+ "borderRadius": "12px",
61
+ "margin": "2px",
62
+ "display": "inline-flex",
63
+ "alignItems": "center",
64
+ "fontSize": "14px"
65
+ }
66
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Progress bar for % of total
69
+ def progress_bar(percent, bar_color="#4CAF50"):
70
+ return html.Div(
71
+ style={
72
+ "position": "relative",
73
+ "backgroundColor": "#E0E0E0",
74
+ "borderRadius": "8px",
75
+ "height": "20px",
76
+ "width": "100%",
77
+ "overflow": "hidden",
78
+ },
79
+ children=[
80
+ html.Div(
81
+ style={
82
+ "backgroundColor": bar_color,
83
+ "width": f"{percent}%",
84
+ "height": "100%",
85
+ "borderRadius": "8px",
86
+ "transition": "width 0.5s",
87
+ }
88
+ ),
89
+ html.Div(
90
+ f"{percent:.1f}%",
91
+ style={
92
+ "position": "absolute",
93
+ "top": 0,
94
+ "left": "50%",
95
+ "transform": "translateX(-50%)",
96
+ "color": "black",
97
+ "fontWeight": "bold",
98
+ "fontSize": "12px",
99
+ "lineHeight": "20px",
100
+ "textAlign": "center",
101
+ }
102
+ )
103
+ ]
104
+ )
 
 
105
 
106
+ # Helper to convert DataFrame to CSV and encode for download
107
+ def df_to_download_link(df, filename):
108
+ csv_string = df.to_csv(index=False)
109
+ b64 = base64.b64encode(csv_string.encode()).decode()
110
+ return html.Div(
111
+ html.A(
112
+ "Download CSV",
113
+ id=f"download-{filename}",
114
+ download=f"{filename}.csv",
115
+ href=f"data:text/csv;base64,{b64}",
116
+ target="_blank",
117
+ style=button_style
118
+ ),
119
+ style={"textAlign": "right"}
120
+ )
121
 
122
+ # Render multiple chips in one row
123
+ def render_chips(metadata_list, chip_color="#F0F0F0"):
124
+ chips = []
125
+ for icon, name in metadata_list:
126
+ if isinstance(icon, str) and icon.endswith(('.png', '.jpg', '.jpeg', '.svg')):
127
+ chips.append(
128
+ html.Span([
129
+ html.Img(src=icon, style={"height": "18px", "marginRight": "6px"}),
130
+ name
131
+ ],
132
+ style={
133
+ "backgroundColor": chip_color,
134
+ "padding": "4px 10px",
135
+ "borderRadius": "12px",
136
+ "margin": "2px",
137
+ "display": "inline-flex",
138
+ "alignItems": "left",
139
+ "fontSize": "14px"
140
+ })
141
+ )
142
+ else:
143
+ chips.append(chip(f"{icon} {name}", chip_color))
144
+ return html.Div(
145
+ chips,
146
+ style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
147
+ )
148
 
149
+ def render_table_content(df, download_df, chip_color="#F0F0F0", bar_color="#4CAF50", filename="data"):
150
+ return html.Div([
151
+ html.Table([
152
+ html.Thead(html.Tr([
153
+ html.Th("Rank", style={"backgroundColor": "#F0F0F0", "textAlign": "left"}),
154
+ html.Th("Name", style={"backgroundColor": "#F0F0F0", "textAlign": "left"}),
155
+ html.Th("Metadata", style={"backgroundColor": "#F0F0F0", "textAlign": "left", "marginRight": "10px"}),
156
+ html.Th("% of Total", style={"backgroundColor": "#F0F0F0", "textAlign": "left"})
157
+ ])),
158
+ html.Tbody([
159
+ html.Tr([
160
+ html.Td(idx+1, style={"textAlign": "center"}),
161
+ html.Td(row["Name"], style={"textAlign": "left"}),
162
+ html.Td(render_chips(row["Metadata"], chip_color)),
163
+ html.Td(progress_bar(row["% of total"], bar_color), style={"textAlign": "center"})
164
+ ]) for idx, row in df.iterrows()
165
+ ])
166
+ ], style={"borderCollapse": "collapse", "width": "100%"}),
167
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ # Table renderer
170
+ def render_table(df, download_df, title, chip_color="#F0F0F0", bar_color="#4CAF50", filename="data"):
171
+ return html.Div(id=f"{filename}-div", children=[
172
+ html.Div([
173
+ html.H4(title, style={"textAlign": "left", "marginBottom": "10px", "fontSize": "20px", "display": "inline-block"}),
174
+ df_to_download_link(download_df, filename)
175
+ ], style={"display": "flex", "alignItems": "center", "justifyContent": "space-between"}),
176
+ html.Div(id=f"{filename}-table", children=[
177
  html.Table([
178
  html.Thead(html.Tr([
179
  html.Th("Rank", style={"backgroundColor": "#F0F0F0", "textAlign": "left"}),
 
189
  html.Td(progress_bar(row["% of total"], bar_color), style={"textAlign": "center"})
190
  ]) for idx, row in df.iterrows()
191
  ])
192
+ ], style={"borderCollapse": "collapse", "width": "100%", "border": "none"}),
193
+ ]),
194
+ html.Div([
195
+ html.Button(
196
+ "▼ Show Top 50",
197
+ id=f"{filename}-toggle",
198
+ n_clicks=0,
199
+ style={**button_style, "border": "none"}
200
+ )
201
+ ], style={"marginTop": "5px", "textAlign": "left"})
202
+ ], style={"marginBottom": "20px"})
203
+
204
+ # Function to get top N leaderboard
205
+ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
206
+ top = filtered_df.groupby(group_col)["downloads"].sum().nlargest(top_n).reset_index().rename(columns={group_col: "Name", "downloads": "Total Value"})
207
+ total_value = top["Total Value"].sum()
208
+ top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
209
+
210
+ # Create a downloadable version of the leaderboard
211
+ download_top = top.copy()
212
+ download_top["Total Value"] = download_top["Total Value"].astype(int)
213
+ download_top["% of total"] = download_top["% of total"].round(2)
214
+
215
+ top["Name"].replace("User", "user", inplace=True)
216
+
217
+ # All relevant metadata columns
218
+ meta_cols = meta_cols_map.get(group_col, [])
219
+ # Collect all metadata per top n for each category (country, author, model)
220
+ meta_map = {}
221
+ download_map = {}
222
+ for name in top["Name"]:
223
+ name_data = filtered_df[filtered_df[group_col] == name]
224
+ meta_map[name] = {}
225
+ download_map[name] = {}
226
+ for col in meta_cols:
227
+ if col in name_data.columns:
228
+ unique_vals = name_data[col].unique()
229
+ meta_map[name][col] = list(unique_vals)
230
+ download_map[name][col] = list(unique_vals)
231
+
232
+ # Function to build metadata chips
233
+ def build_metadata(nm):
234
+ meta = meta_map.get(nm, {})
235
+ chips = []
236
+ # Countries
237
+ for c in meta.get("org_country_single", []):
238
+ if c == "United States of America":
239
+ c = "USA"
240
+ if c == "user":
241
+ c = "User"
242
+ chips.append((country_icon_map.get(c, ""), c))
243
+ # Author
244
+ for a in meta.get("author", []):
245
+ icon = company_icon_map.get(a, "")
246
+ if icon == "":
247
+ if meta.get("merged_country_groups_single", ["User"])[0] != "User":
248
+ icon = "🏢"
249
+ else:
250
+ icon = "👤"
251
+ chips.append((icon, a))
252
+ # Downloads
253
+ # Sum downloads if multiple entries
254
+ total_downloads = sum(d for d in meta.get("downloads", []) if pd.notna(d)) # Check if d is not NaN
255
+ if total_downloads:
256
+ chips.append(("⬇️", f"{int(total_downloads):,}"))
257
+
258
+ # Modality
259
+ for m in meta.get("merged_modality", []):
260
+ chips.append(("", m))
261
+
262
+ # Estimated Parameters
263
+ for p in meta.get("estimated_parameters", []):
264
+ if pd.notna(p): # Check if p is not NaN
265
+ if p >= 1e9:
266
+ p_str = f"{p/1e9:.1f}B"
267
+ elif p >= 1e6:
268
+ p_str = f"{p/1e6:.1f}M"
269
+ elif p >= 1e3:
270
+ p_str = f"{p/1e3:.1f}K"
271
+ else:
272
+ p_str = str(p)
273
+ chips.append(("⚙️", p_str))
274
+ return chips
275
+
276
+ # Function to create downloadable dataframe
277
+ def build_download_metadata(nm):
278
+ meta = download_map.get(nm, {})
279
+ download_info = {}
280
+ for col in meta_cols:
281
+ # don't add empty columns
282
+ if col not in meta or not meta[col]:
283
+ continue
284
+ vals = meta.get(col, [])
285
+ if vals:
286
+ # Join list into a single string for CSV
287
+ download_info[col] = ", ".join(str(v) for v in vals)
288
+ else:
289
+ download_info[col] = ""
290
+ return download_info
291
+
292
+ # Apply metadata builder to top dataframe
293
+ top["Metadata"] = top["Name"].map(build_metadata)
294
+ download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
295
+ download_info_df = pd.DataFrame(download_info_list)
296
+ download_top = pd.concat([download_top, download_info_df], axis=1)
297
+
298
+ return top[["Name", "Metadata", "% of total"]], download_top
299
+
300
+ def create_leaderboard(filtered_df, start_time=None, top_n=10):
301
+ # Filter by time
302
+ if start_time is not None:
303
+ filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
304
+
305
+ if filtered_df.empty:
306
+ return html.Div("No data in selected range")
307
+
308
+ # Merge HF and USA
309
+ filtered_df["org_country_single"] = filtered_df["org_country_single"].replace({"HF": "United States of America"})
310
+ # Merge International and Online
311
+ filtered_df["org_country_single"] = filtered_df["org_country_single"].replace({"International": "International/Online", "Online": "International/Online"})
312
+
313
+ # Build leaderboards
314
+ top_countries, download_top_countries = get_top_n_leaderboard(filtered_df, "org_country_single", top_n)
315
+ top_developers, download_top_developers = get_top_n_leaderboard(filtered_df, "author", top_n)
316
+ top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)
317
 
318
  # Layout with 3 stacked tables
319
  layout = html.Div([
graphs/model_market_share.py CHANGED
@@ -234,7 +234,13 @@ def create_world_map(
234
  specs=[[{"type": "geo"}]],
235
  )
236
 
237
- downloads_by_country = df.groupby('org_country_single')['downloads'].sum().reset_index()
 
 
 
 
 
 
238
 
239
  # Prepare top countries for annotation
240
  total_downloads = float(downloads_by_country['downloads'].sum())
@@ -246,9 +252,11 @@ def create_world_map(
246
  hover_text.append(
247
  f"<b>{row['org_country_single']}</b><br>"
248
  f"Avg Downloads: {row['pct']:.1f}% of total<br>"
249
- f"Avg Value: {row['downloads']:.6f}"
250
  )
251
 
 
 
 
252
  # Add choropleth to plot
253
  fig.add_trace(
254
  go.Choropleth(
@@ -268,6 +276,8 @@ def create_world_map(
268
  ],
269
  colorbar=dict(
270
  title="Avg % of Total Downloads",
 
 
271
  tickfont=dict(size=12),
272
  len=0.6,
273
  x=1.02,
 
234
  specs=[[{"type": "geo"}]],
235
  )
236
 
237
+ downloads_by_country = (
238
+ df.groupby(['org_country_single', 'country_code'])['downloads']
239
+ .sum()
240
+ .reset_index()
241
+ )
242
+
243
+ print(downloads_by_country.columns)
244
 
245
  # Prepare top countries for annotation
246
  total_downloads = float(downloads_by_country['downloads'].sum())
 
252
  hover_text.append(
253
  f"<b>{row['org_country_single']}</b><br>"
254
  f"Avg Downloads: {row['pct']:.1f}% of total<br>"
 
255
  )
256
 
257
+ linear_ticks = [0.01, 0.1, 10, 50, 100] # percent values
258
+ log_ticks = np.log10(linear_ticks) # what you're actually plotting
259
+
260
  # Add choropleth to plot
261
  fig.add_trace(
262
  go.Choropleth(
 
276
  ],
277
  colorbar=dict(
278
  title="Avg % of Total Downloads",
279
+ tickvals=log_ticks, # positions in log space
280
+ ticktext=[f"{t}%" for t in linear_ticks], # labels shown
281
  tickfont=dict(size=12),
282
  len=0.6,
283
  x=1.02,