emsesc commited on
Commit
2811ff1
·
1 Parent(s): 62665d6

modify leaderboards

Browse files
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from dash import Dash, html, dcc, Input, Output
2
  import pandas as pd
3
- from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider, create_leaderboard
 
4
  from graphs.model_characteristics import create_concentration_chart, create_line_plot
5
 
6
  # Initialize the app
@@ -8,6 +9,7 @@ app = Dash()
8
  server = app.server
9
 
10
  # Load pre-processed data frames
 
11
  model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
12
  model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
13
  model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
@@ -82,10 +84,6 @@ world_map = create_world_map(
82
  country_concentration_df, "time", "metric", "value"
83
  )
84
 
85
- leaderboard = create_leaderboard(
86
- country_concentration_df, author_concentration_df, model_concentration_df
87
- )
88
-
89
  slider = create_range_slider(
90
  model_topk_df
91
  )
@@ -175,9 +173,14 @@ app.layout = html.Div(
175
  dcc.Graph(id='world-map-with-slider'),
176
  style={'display': 'flex', 'justifyContent': 'center'}
177
  ),
178
- dcc.Graph(id='leaderboard'),
179
  ], style={'marginBottom': 12})
180
  ]),
 
 
 
 
 
181
  dcc.Tab(label='Model Characteristics', children=[
182
  dcc.Graph(id='language-concentration-chart'),
183
  html.Div([
@@ -243,21 +246,21 @@ def update_map(relayout_data):
243
  return world_map
244
 
245
  # On slider change, update leaderboard
246
- @app.callback(
247
- Output('leaderboard', 'figure'),
248
- [Input('time-slider', 'relayoutData')]
249
- )
250
- def update_leaderboard(relayout_data):
251
- if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
252
- start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
253
- end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
254
- updated_fig = create_leaderboard(
255
- country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
256
- )
257
- updated_fig.update_layout(font_family="Inter")
258
- return updated_fig
259
- else:
260
- return leaderboard
261
 
262
  # On slider change, update stacked area chart
263
  @app.callback(
 
1
  from dash import Dash, html, dcc, Input, Output
2
  import pandas as pd
3
+ from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
4
+ from graphs.leaderboard import create_leaderboard
5
  from graphs.model_characteristics import create_concentration_chart, create_line_plot
6
 
7
  # Initialize the app
 
9
  server = app.server
10
 
11
  # Load pre-processed data frames
12
+ filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
13
  model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
14
  model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
15
  model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
 
84
  country_concentration_df, "time", "metric", "value"
85
  )
86
 
 
 
 
 
87
  slider = create_range_slider(
88
  model_topk_df
89
  )
 
173
  dcc.Graph(id='world-map-with-slider'),
174
  style={'display': 'flex', 'justifyContent': 'center'}
175
  ),
176
+ # dcc.Graph(id='leaderboard'),
177
  ], style={'marginBottom': 12})
178
  ]),
179
+ dcc.Tab(label='Leaderboard', children=[
180
+ create_leaderboard(
181
+ filtered_df, country_concentration_df, author_concentration_df, model_concentration_df
182
+ )
183
+ ]),
184
  dcc.Tab(label='Model Characteristics', children=[
185
  dcc.Graph(id='language-concentration-chart'),
186
  html.Div([
 
246
  return world_map
247
 
248
  # On slider change, update leaderboard
249
+ # @app.callback(
250
+ # Output('leaderboard', 'figure'),
251
+ # [Input('time-slider', 'relayoutData')]
252
+ # )
253
+ # def update_leaderboard(relayout_data):
254
+ # if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
255
+ # start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
256
+ # end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
257
+ # updated_fig = create_leaderboard(
258
+ # country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
259
+ # )
260
+ # updated_fig.update_layout(font_family="Inter")
261
+ # return updated_fig
262
+ # else:
263
+ # return leaderboard
264
 
265
  # On slider change, update stacked area chart
266
  @app.callback(
assets/icons/google.png ADDED
assets/icons/meta.png ADDED
assets/icons/openai.png ADDED
graphs/__pycache__/model_market_share.cpython-39.pyc CHANGED
Binary files a/graphs/__pycache__/model_market_share.cpython-39.pyc and b/graphs/__pycache__/model_market_share.cpython-39.pyc differ
 
graphs/leaderboard.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from dash import html
3
+
4
+ def create_leaderboard(filtered_df, country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
5
+ country_icon_map = {
6
+ "United States of America": "🇺🇸",
7
+ "China": "🇨🇳",
8
+ "Germany": "🇩🇪",
9
+ "France": "🇫🇷",
10
+ "India": "🇮🇳",
11
+ "Italy": "🇮🇹",
12
+ "Japan": "🇯🇵",
13
+ "South Korea": "🇰🇷",
14
+ "United Kingdom": "🇬🇧",
15
+ "Canada": "🇨🇦",
16
+ "Brazil": "🇧🇷",
17
+ "Australia": "🇦🇺",
18
+ "Unknown": "❓",
19
+ "Finland": "🇫🇮",
20
+ "Lebanon": "🇱🇧",
21
+ "HF": "../assets/icons/hugging-face.png",
22
+ }
23
+
24
+ company_icon_map = {
25
+ "google": "../assets/icons/google.png",
26
+ "distilbert": "../assets/icons/hugging-face.png",
27
+ "sentence-transformers": "../assets/icons/hugging-face.png",
28
+ "facebook": "../assets/icons/meta.png",
29
+ "openai": "../assets/icons/openai.png",
30
+ }
31
+
32
+ # Ensure datetime
33
+ for df in [country_df, developer_df, model_df]:
34
+ df["time"] = pd.to_datetime(df["time"])
35
+
36
+ # Merge country info for developers/models
37
+ developer_df = developer_df.merge(
38
+ filtered_df[["country", "author", "org_or_user", "model"]].drop_duplicates(subset=["author"]),
39
+ left_on="metric", right_on="author", how="left"
40
+ ).drop(columns=["metric"])
41
+
42
+ model_df = model_df.merge(
43
+ filtered_df[["country", "author", "downloads", "org_or_user", "model"]].drop_duplicates(subset=["model"]),
44
+ left_on="metric", right_on="model", how="left"
45
+ ).drop(columns=["metric"])
46
+
47
+ # Rename metric columns
48
+ country_df = country_df.rename(columns={"metric": "country"})
49
+
50
+ # Filter by time
51
+ start_time = start_time or country_df["time"].min()
52
+ end_time = end_time or country_df["time"].max()
53
+ country_df = country_df[(country_df["time"] >= start_time) & (country_df["time"] <= end_time)]
54
+ developer_df = developer_df[(developer_df["time"] >= start_time) & (developer_df["time"] <= end_time)]
55
+ model_df = model_df[(model_df["time"] >= start_time) & (model_df["time"] <= end_time)]
56
+
57
+ if country_df.empty and developer_df.empty and model_df.empty:
58
+ return html.Div("No data in selected range")
59
+
60
+ # Function to get top N leaderboard
61
+ def get_top_n_leaderboard(df, group_col, top_n=10):
62
+ top = (
63
+ df.groupby(group_col)["value"]
64
+ .sum()
65
+ .sort_values(ascending=False)
66
+ .head(top_n)
67
+ .reset_index()
68
+ .rename(columns={group_col: "Name", "value": "Total Value"})
69
+ )
70
+ total_value = top["Total Value"].sum()
71
+ top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
72
+
73
+ # All relevant metadata columns
74
+ meta_cols = ["country", "author", "downloads", "org_or_user"]
75
+ # Collect all metadata per top n for each category (country, author, model)
76
+ meta_map = {}
77
+ for name in top["Name"]:
78
+ name_data = df[df[group_col] == name]
79
+ meta_map[name] = {}
80
+ for col in meta_cols:
81
+ if col in name_data.columns:
82
+ unique_vals = name_data[col].unique()
83
+ meta_map[name][col] = list(unique_vals)
84
+
85
+ # Function to build metadata chips
86
+ def build_metadata(nm):
87
+ meta = meta_map.get(nm, {})
88
+ chips = []
89
+ # Countries
90
+ for c in meta.get("country", []):
91
+ chips.append((country_icon_map.get(c, ""), c))
92
+ # Author
93
+ for a in meta.get("author", []):
94
+ chips.append((company_icon_map.get(a, ""), a))
95
+ # Downloads
96
+ for d in meta.get("downloads", []):
97
+ if pd.notna(d): # Check if d is not NaN
98
+ chips.append(("⬇️", f"{int(d):,}"))
99
+ # Org or User
100
+ for o in meta.get("org_or_user", []):
101
+ chips.append(("🏢" if o == "org" else "👤", "Org" if o == "org" else "User"))
102
+ return chips
103
+
104
+ # Apply metadata builder to top dataframe
105
+ top["Metadata"] = top["Name"].map(build_metadata)
106
+
107
+ return top[["Name", "Metadata", "% of total"]]
108
+
109
+ # Build leaderboards
110
+ top_countries = get_top_n_leaderboard(country_df, "country", top_n)
111
+ top_developers = get_top_n_leaderboard(developer_df, "author", top_n)
112
+ top_models = get_top_n_leaderboard(model_df, "model", top_n)
113
+
114
+ # Chip renderer
115
+ def chip(text, bg_color="#F0F0F0"):
116
+ return html.Span(
117
+ text,
118
+ style={
119
+ "backgroundColor": bg_color,
120
+ "padding": "4px 10px",
121
+ "borderRadius": "12px",
122
+ "margin": "2px",
123
+ "display": "inline-flex",
124
+ "alignItems": "center",
125
+ "fontSize": "14px"
126
+ }
127
+ )
128
+
129
+ # Render multiple chips in one row
130
+ def render_chips(metadata_list, chip_color="#F0F0F0"):
131
+ chips = []
132
+ for icon, name in metadata_list:
133
+ if isinstance(icon, str) and icon.endswith(('.png', '.jpg', '.jpeg', '.svg')):
134
+ chips.append(
135
+ html.Span([
136
+ html.Img(src=icon, style={"height": "18px", "marginRight": "6px"}),
137
+ name
138
+ ],
139
+ style={
140
+ "backgroundColor": chip_color,
141
+ "padding": "4px 10px",
142
+ "borderRadius": "12px",
143
+ "margin": "2px",
144
+ "display": "inline-flex",
145
+ "alignItems": "center",
146
+ "fontSize": "14px"
147
+ })
148
+ )
149
+ else:
150
+ chips.append(chip(f"{icon} {name}", chip_color))
151
+ return html.Div(
152
+ chips,
153
+ style={"display": "flex", "flexWrap": "wrap", "justifyContent": "center"}
154
+ )
155
+
156
+ # Progress bar for % of total
157
+ def progress_bar(percent, bar_color="#4CAF50"):
158
+ return html.Div(
159
+ style={
160
+ "position": "relative",
161
+ "backgroundColor": "#E0E0E0",
162
+ "borderRadius": "8px",
163
+ "height": "20px",
164
+ "width": "100%",
165
+ "overflow": "hidden",
166
+ },
167
+ children=[
168
+ html.Div(
169
+ style={
170
+ "backgroundColor": bar_color,
171
+ "width": f"{percent}%",
172
+ "height": "100%",
173
+ "borderRadius": "8px",
174
+ "transition": "width 0.5s",
175
+ }
176
+ ),
177
+ html.Div(
178
+ f"{percent:.1f}%",
179
+ style={
180
+ "position": "absolute",
181
+ "top": 0,
182
+ "left": "50%",
183
+ "transform": "translateX(-50%)",
184
+ "color": "black",
185
+ "fontWeight": "bold",
186
+ "fontSize": "12px",
187
+ "lineHeight": "20px",
188
+ "textAlign": "center",
189
+ }
190
+ )
191
+ ]
192
+ )
193
+
194
+ # Table renderer
195
+ def render_table(df, title, chip_color="#F0F0F0", bar_color="#4CAF50"):
196
+ return html.Div([
197
+ html.H4(title, style={"textAlign": "center", "marginBottom": "10px", "fontSize": "20px"}),
198
+ html.Table([
199
+ html.Thead(html.Tr([
200
+ html.Th("Rank", style={"backgroundColor": "#F0F0F0"}),
201
+ html.Th("Name", style={"backgroundColor": "#F0F0F0"}),
202
+ html.Th("Metadata", style={"backgroundColor": "#F0F0F0"}),
203
+ html.Th("% of Total", style={"backgroundColor": "#F0F0F0"})
204
+ ])),
205
+ html.Tbody([
206
+ html.Tr([
207
+ html.Td(idx+1, style={"textAlign": "center"}),
208
+ html.Td(row["Name"], style={"textAlign": "center"}),
209
+ html.Td(render_chips(row["Metadata"], chip_color), style={"textAlign": "center"}),
210
+ html.Td(progress_bar(row["% of total"], bar_color), style={"textAlign": "center"})
211
+ ]) for idx, row in df.iterrows()
212
+ ])
213
+ ], style={"borderCollapse": "collapse", "width": "100%"})
214
+ ], style={"marginBottom": "20px"})
215
+
216
+ # Layout with 3 stacked tables
217
+ layout = html.Div([
218
+ render_table(top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61"),
219
+ render_table(top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50"),
220
+ render_table(top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3"),
221
+ ])
222
+
223
+ return layout
graphs/model_market_share.py CHANGED
@@ -1,8 +1,5 @@
1
  import plotly.graph_objects as go
2
  from plotly.subplots import make_subplots
3
- import pandas as pd
4
-
5
- filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
6
 
7
  def create_stacked_area_chart(
8
  topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None
@@ -393,146 +390,4 @@ def create_range_slider(df):
393
  height=100
394
  )
395
 
396
- return fig
397
-
398
- def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
399
- # Country -> Emoji mapping
400
- country_emoji_map = {
401
- "United States of America": "🇺🇸",
402
- "China": "🇨🇳",
403
- "Germany": "🇩🇪",
404
- "France": "🇫🇷",
405
- "India": "🇮🇳",
406
- "Italy": "🇮🇹",
407
- "Japan": "🇯🇵",
408
- "South Korea": "🇰🇷",
409
- "United Kingdom": "🇬🇧",
410
- "Canada": "🇨🇦",
411
- "Brazil": "🇧🇷",
412
- "Australia": "🇦🇺",
413
- "Unknown": "❓",
414
- "Finland": "🇫🇮",
415
- "Lebanon": "🇱🇧 ",
416
- }
417
-
418
- # Ensure datetime
419
- country_df["time"] = pd.to_datetime(country_df["time"])
420
- developer_df["time"] = pd.to_datetime(developer_df["time"])
421
- model_df["time"] = pd.to_datetime(model_df["time"])
422
-
423
- # Add corresponding country info to developer_df and model_df, mapping "metric" to "author" and "metric" to "model"
424
- # Merge with filtered_df to get country info
425
- developer_df = developer_df.merge(
426
- filtered_df[["author", "country"]].drop_duplicates(),
427
- left_on="metric",
428
- right_on="author",
429
- how="left"
430
- ).rename(columns={"country": "country_metric"}).drop(columns=["author"])
431
- model_df = model_df.merge(
432
- filtered_df[["model", "country"]].drop_duplicates(),
433
- left_on="metric",
434
- right_on="model",
435
- how="left"
436
- ).rename(columns={"country": "country_metric"}).drop(columns=["model"])
437
-
438
- if start_time is None:
439
- start_time = country_df["time"].min()
440
- if end_time is None:
441
- end_time = country_df["time"].max()
442
-
443
- # Filter time range
444
- country_df_filtered = country_df[
445
- (country_df["time"] >= start_time) & (country_df["time"] <= end_time)
446
- ]
447
- developer_df_filtered = developer_df[
448
- (developer_df["time"] >= start_time) & (developer_df["time"] <= end_time)
449
- ]
450
- model_df_filtered = model_df[
451
- (model_df["time"] >= start_time) & (model_df["time"] <= end_time)
452
- ]
453
-
454
- if country_df_filtered.empty and developer_df_filtered.empty and model_df_filtered.empty:
455
- return go.Figure()
456
-
457
- # Function to get top N leaderboard with percentage
458
- def get_top_n_leaderboard(df, group_col, label, top_n=10):
459
- top = (
460
- df.groupby(group_col)["value"]
461
- .sum()
462
- .sort_values(ascending=False)
463
- .head(top_n)
464
- .reset_index()
465
- .rename(columns={group_col: label, "value": "Total Value"})
466
- )
467
- total_value = top["Total Value"].sum()
468
- if total_value > 0:
469
- top["% of total"] = top["Total Value"] / total_value * 100
470
- else:
471
- top["% of total"] = 0
472
-
473
- # add column with metadata (country emoji for country, country for developer/model)
474
- if label == "Country":
475
- top["Attributes"] = top[label].map(country_emoji_map).fillna("")
476
- else:
477
- # Get the country_metric for each developer/model with the already merged info
478
- top = top.merge(
479
- df[[group_col, "country_metric"]].drop_duplicates(),
480
- left_on=label,
481
- right_on=group_col,
482
- how="left"
483
- ).drop(columns=[group_col])
484
- top["Attributes"] = top["country_metric"].map(country_emoji_map).fillna("")
485
- return top[[label, "Attributes", "% of total"]]
486
-
487
- top_countries = get_top_n_leaderboard(country_df_filtered, "metric", "Country", top_n=top_n)
488
- top_developers = get_top_n_leaderboard(developer_df_filtered, "metric", "Developer", top_n=top_n)
489
- top_models = get_top_n_leaderboard(model_df_filtered, "metric", "Model", top_n=top_n)
490
-
491
- # Create subplot grid with 3 columns
492
- fig = make_subplots(
493
- rows=1, cols=3,
494
- subplot_titles=("Top Countries", "Top Developers", "Top Models"),
495
- specs=[[{"type": "table"}, {"type": "table"}, {"type": "table"}]]
496
- )
497
-
498
- # Add country table
499
- fig.add_trace(
500
- go.Table(
501
- header=dict(values=list(top_countries.columns),
502
- fill_color="lightgrey", align="left"),
503
- cells=dict(values=[top_countries[col] for col in top_countries.columns],
504
- fill_color="white", align="left"),
505
- ),
506
- row=1, col=1
507
- )
508
-
509
- # Add developer table
510
- fig.add_trace(
511
- go.Table(
512
- header=dict(values=list(top_developers.columns),
513
- fill_color="lightgrey", align="left"),
514
- cells=dict(values=[top_developers[col] for col in top_developers.columns],
515
- fill_color="white", align="left"),
516
- ),
517
- row=1, col=2
518
- )
519
-
520
- # Add model table
521
- fig.add_trace(
522
- go.Table(
523
- header=dict(values=list(top_models.columns),
524
- fill_color="lightgrey", align="left"),
525
- cells=dict(values=[top_models[col] for col in top_models.columns],
526
- fill_color="white", align="left"),
527
- ),
528
- row=1, col=3
529
- )
530
-
531
- fig.update_layout(
532
- height=400,
533
- showlegend=False,
534
- title_text="Leaderboards"
535
- )
536
-
537
- return fig
538
-
 
1
  import plotly.graph_objects as go
2
  from plotly.subplots import make_subplots
 
 
 
3
 
4
  def create_stacked_area_chart(
5
  topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None
 
390
  height=100
391
  )
392
 
393
+ return fig