Spaces:

economies-open-ai
/

open-model-evolution

Running

App Files Files Community

emsesc commited on Sep 2

Commit

2811ff1

1 Parent(s): 62665d6

modify leaderboards

Browse files

Files changed (7) hide show

app.py +24 -21
assets/icons/google.png +0 -0
assets/icons/meta.png +0 -0
assets/icons/openai.png +0 -0
graphs/__pycache__/model_market_share.cpython-39.pyc +0 -0
graphs/leaderboard.py +223 -0
graphs/model_market_share.py +1 -146

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from dash import Dash, html, dcc, Input, Output
 import pandas as pd
-from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider, create_leaderboard
 from graphs.model_characteristics import create_concentration_chart, create_line_plot
 # Initialize the app
@@ -8,6 +9,7 @@ app = Dash()
 server = app.server
 # Load pre-processed data frames
 model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
 model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
 model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
@@ -82,10 +84,6 @@ world_map = create_world_map(
     country_concentration_df, "time", "metric", "value"
 )
-leaderboard = create_leaderboard(
-    country_concentration_df, author_concentration_df, model_concentration_df
-)
 slider = create_range_slider(
     model_topk_df
 )
@@ -175,9 +173,14 @@ app.layout = html.Div(
                                 dcc.Graph(id='world-map-with-slider'),
                                 style={'display': 'flex', 'justifyContent': 'center'}
                             ),
-                            dcc.Graph(id='leaderboard'),
                         ], style={'marginBottom': 12})
                     ]),
                     dcc.Tab(label='Model Characteristics', children=[
                         dcc.Graph(id='language-concentration-chart'),
                         html.Div([
@@ -243,21 +246,21 @@ def update_map(relayout_data):
         return world_map
 # On slider change, update leaderboard
-@app.callback(
-    Output('leaderboard', 'figure'),
-    [Input('time-slider', 'relayoutData')]
-)
-def update_leaderboard(relayout_data):
-    if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
-        start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
-        end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
-        updated_fig = create_leaderboard(
-            country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
-        )
-        updated_fig.update_layout(font_family="Inter")
-        return updated_fig
-    else:
-        return leaderboard
 # On slider change, update stacked area chart
 @app.callback(

 from dash import Dash, html, dcc, Input, Output
 import pandas as pd
+from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
+from graphs.leaderboard import create_leaderboard
 from graphs.model_characteristics import create_concentration_chart, create_line_plot
 # Initialize the app
 server = app.server
 # Load pre-processed data frames
+filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
 model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
 model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
 model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
     country_concentration_df, "time", "metric", "value"
 )
 slider = create_range_slider(
     model_topk_df
 )
                                 dcc.Graph(id='world-map-with-slider'),
                                 style={'display': 'flex', 'justifyContent': 'center'}
                             ),
+                            # dcc.Graph(id='leaderboard'),
                         ], style={'marginBottom': 12})
                     ]),
+                    dcc.Tab(label='Leaderboard', children=[
+                        create_leaderboard(
+                            filtered_df, country_concentration_df, author_concentration_df, model_concentration_df
+                        )
+                    ]),
                     dcc.Tab(label='Model Characteristics', children=[
                         dcc.Graph(id='language-concentration-chart'),
                         html.Div([
         return world_map
 # On slider change, update leaderboard
+# @app.callback(
+#     Output('leaderboard', 'figure'),
+#     [Input('time-slider', 'relayoutData')]
+# )
+# def update_leaderboard(relayout_data):
+#     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
+#         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
+#         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
+#         updated_fig = create_leaderboard(
+#             country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
+#         )
+#         updated_fig.update_layout(font_family="Inter")
+#         return updated_fig
+#     else:
+#         return leaderboard
 # On slider change, update stacked area chart
 @app.callback(

assets/icons/google.png ADDED Viewed

assets/icons/meta.png ADDED Viewed

assets/icons/openai.png ADDED Viewed

graphs/__pycache__/model_market_share.cpython-39.pyc CHANGED Viewed

Binary files a/graphs/__pycache__/model_market_share.cpython-39.pyc and b/graphs/__pycache__/model_market_share.cpython-39.pyc differ

graphs/leaderboard.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import pandas as pd
+from dash import html
+def create_leaderboard(filtered_df, country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
+    country_icon_map = {
+        "United States of America": "🇺🇸",
+        "China": "🇨🇳",
+        "Germany": "🇩🇪",
+        "France": "🇫🇷",
+        "India": "🇮🇳",
+        "Italy": "🇮🇹",
+        "Japan": "🇯🇵",
+        "South Korea": "🇰🇷",
+        "United Kingdom": "🇬🇧",
+        "Canada": "🇨🇦",
+        "Brazil": "🇧🇷",
+        "Australia": "🇦🇺",
+        "Unknown": "❓",
+        "Finland": "🇫🇮",
+        "Lebanon": "🇱🇧",
+        "HF": "../assets/icons/hugging-face.png",
+    }
+    company_icon_map = {
+        "google": "../assets/icons/google.png",
+        "distilbert": "../assets/icons/hugging-face.png",
+        "sentence-transformers": "../assets/icons/hugging-face.png",
+        "facebook": "../assets/icons/meta.png",
+        "openai": "../assets/icons/openai.png",
+    }
+    # Ensure datetime
+    for df in [country_df, developer_df, model_df]:
+        df["time"] = pd.to_datetime(df["time"])
+    # Merge country info for developers/models
+    developer_df = developer_df.merge(
+        filtered_df[["country", "author", "org_or_user", "model"]].drop_duplicates(subset=["author"]),
+        left_on="metric", right_on="author", how="left"
+    ).drop(columns=["metric"])
+    model_df = model_df.merge(
+        filtered_df[["country", "author", "downloads", "org_or_user", "model"]].drop_duplicates(subset=["model"]),
+        left_on="metric", right_on="model", how="left"
+    ).drop(columns=["metric"])
+    # Rename metric columns
+    country_df = country_df.rename(columns={"metric": "country"})
+    # Filter by time
+    start_time = start_time or country_df["time"].min()
+    end_time = end_time or country_df["time"].max()
+    country_df = country_df[(country_df["time"] >= start_time) & (country_df["time"] <= end_time)]
+    developer_df = developer_df[(developer_df["time"] >= start_time) & (developer_df["time"] <= end_time)]
+    model_df = model_df[(model_df["time"] >= start_time) & (model_df["time"] <= end_time)]
+    if country_df.empty and developer_df.empty and model_df.empty:
+        return html.Div("No data in selected range")
+    # Function to get top N leaderboard
+    def get_top_n_leaderboard(df, group_col, top_n=10):
+        top = (
+            df.groupby(group_col)["value"]
+            .sum()
+            .sort_values(ascending=False)
+            .head(top_n)
+            .reset_index()
+            .rename(columns={group_col: "Name", "value": "Total Value"})
+        )
+        total_value = top["Total Value"].sum()
+        top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0
+        # All relevant metadata columns
+        meta_cols = ["country", "author", "downloads", "org_or_user"]
+        # Collect all metadata per top n for each category (country, author, model)
+        meta_map = {}
+        for name in top["Name"]:
+            name_data = df[df[group_col] == name]
+            meta_map[name] = {}
+            for col in meta_cols:
+                if col in name_data.columns:
+                    unique_vals = name_data[col].unique()
+                    meta_map[name][col] = list(unique_vals)
+        # Function to build metadata chips
+        def build_metadata(nm):
+            meta = meta_map.get(nm, {})
+            chips = []
+            # Countries
+            for c in meta.get("country", []):
+                chips.append((country_icon_map.get(c, ""), c))
+            # Author
+            for a in meta.get("author", []):
+                chips.append((company_icon_map.get(a, ""), a))
+            # Downloads
+            for d in meta.get("downloads", []):
+                if pd.notna(d):  # Check if d is not NaN
+                    chips.append(("⬇️", f"{int(d):,}"))
+            # Org or User
+            for o in meta.get("org_or_user", []):
+                chips.append(("🏢" if o == "org" else "👤", "Org" if o == "org" else "User"))
+            return chips
+        # Apply metadata builder to top dataframe
+        top["Metadata"] = top["Name"].map(build_metadata)
+        return top[["Name", "Metadata", "% of total"]]
+    # Build leaderboards
+    top_countries = get_top_n_leaderboard(country_df, "country", top_n)
+    top_developers = get_top_n_leaderboard(developer_df, "author", top_n)
+    top_models = get_top_n_leaderboard(model_df, "model", top_n)
+    # Chip renderer
+    def chip(text, bg_color="#F0F0F0"):
+        return html.Span(
+            text,
+            style={
+                "backgroundColor": bg_color,
+                "padding": "4px 10px",
+                "borderRadius": "12px",
+                "margin": "2px",
+                "display": "inline-flex",
+                "alignItems": "center",
+                "fontSize": "14px"
+            }
+        )
+    # Render multiple chips in one row
+    def render_chips(metadata_list, chip_color="#F0F0F0"):
+        chips = []
+        for icon, name in metadata_list:
+            if isinstance(icon, str) and icon.endswith(('.png', '.jpg', '.jpeg', '.svg')):
+                chips.append(
+                    html.Span([
+                        html.Img(src=icon, style={"height": "18px", "marginRight": "6px"}),
+                        name
+                    ],
+                    style={
+                        "backgroundColor": chip_color,
+                        "padding": "4px 10px",
+                        "borderRadius": "12px",
+                        "margin": "2px",
+                        "display": "inline-flex",
+                        "alignItems": "center",
+                        "fontSize": "14px"
+                    })
+                )
+            else:
+                chips.append(chip(f"{icon} {name}", chip_color))
+        return html.Div(
+            chips,
+            style={"display": "flex", "flexWrap": "wrap", "justifyContent": "center"}
+        )
+    # Progress bar for % of total
+    def progress_bar(percent, bar_color="#4CAF50"):
+        return html.Div(
+            style={
+                "position": "relative",
+                "backgroundColor": "#E0E0E0",
+                "borderRadius": "8px",
+                "height": "20px",
+                "width": "100%",
+                "overflow": "hidden",
+            },
+            children=[
+                html.Div(
+                    style={
+                        "backgroundColor": bar_color,
+                        "width": f"{percent}%",
+                        "height": "100%",
+                        "borderRadius": "8px",
+                        "transition": "width 0.5s",
+                    }
+                ),
+                html.Div(
+                    f"{percent:.1f}%",
+                    style={
+                        "position": "absolute",
+                        "top": 0,
+                        "left": "50%",
+                        "transform": "translateX(-50%)",
+                        "color": "black",
+                        "fontWeight": "bold",
+                        "fontSize": "12px",
+                        "lineHeight": "20px",
+                        "textAlign": "center",
+                    }
+                )
+            ]
+        )
+    # Table renderer
+    def render_table(df, title, chip_color="#F0F0F0", bar_color="#4CAF50"):
+        return html.Div([
+            html.H4(title, style={"textAlign": "center", "marginBottom": "10px", "fontSize": "20px"}),
+            html.Table([
+                html.Thead(html.Tr([
+                    html.Th("Rank", style={"backgroundColor": "#F0F0F0"}),
+                    html.Th("Name", style={"backgroundColor": "#F0F0F0"}),
+                    html.Th("Metadata", style={"backgroundColor": "#F0F0F0"}),
+                    html.Th("% of Total", style={"backgroundColor": "#F0F0F0"})
+                ])),
+                html.Tbody([
+                    html.Tr([
+                        html.Td(idx+1, style={"textAlign": "center"}),
+                        html.Td(row["Name"], style={"textAlign": "center"}),
+                        html.Td(render_chips(row["Metadata"], chip_color), style={"textAlign": "center"}),
+                        html.Td(progress_bar(row["% of total"], bar_color), style={"textAlign": "center"})
+                    ]) for idx, row in df.iterrows()
+                ])
+            ], style={"borderCollapse": "collapse", "width": "100%"})
+        ], style={"marginBottom": "20px"})
+    # Layout with 3 stacked tables
+    layout = html.Div([
+        render_table(top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61"),
+        render_table(top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50"),
+        render_table(top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3"),
+    ])
+    return layout

graphs/model_market_share.py CHANGED Viewed

@@ -1,8 +1,5 @@
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-import pandas as pd
-filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
 def create_stacked_area_chart(
     topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None
@@ -393,146 +390,4 @@ def create_range_slider(df):
         height=100
     )
-    return fig
-def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
-    # Country -> Emoji mapping
-    country_emoji_map = {
-        "United States of America": "🇺🇸",
-        "China": "🇨🇳",
-        "Germany": "🇩🇪",
-        "France": "🇫🇷",
-        "India": "🇮🇳",
-        "Italy": "🇮🇹",
-        "Japan": "🇯🇵",
-        "South Korea": "🇰🇷",
-        "United Kingdom": "🇬🇧",
-        "Canada": "🇨🇦",
-        "Brazil": "🇧🇷",
-        "Australia": "🇦🇺",
-        "Unknown": "❓",
-        "Finland": "🇫🇮",
-        "Lebanon": "🇱🇧 ",
-    }
-    # Ensure datetime
-    country_df["time"] = pd.to_datetime(country_df["time"])
-    developer_df["time"] = pd.to_datetime(developer_df["time"])
-    model_df["time"] = pd.to_datetime(model_df["time"])
-    # Add corresponding country info to developer_df and model_df, mapping "metric" to "author" and "metric" to "model"
-    # Merge with filtered_df to get country info
-    developer_df = developer_df.merge(
-        filtered_df[["author", "country"]].drop_duplicates(),
-        left_on="metric",
-        right_on="author",
-        how="left"
-    ).rename(columns={"country": "country_metric"}).drop(columns=["author"])
-    model_df = model_df.merge(
-        filtered_df[["model", "country"]].drop_duplicates(),
-        left_on="metric",
-        right_on="model",
-        how="left"
-    ).rename(columns={"country": "country_metric"}).drop(columns=["model"])
-    if start_time is None:
-        start_time = country_df["time"].min()
-    if end_time is None:
-        end_time = country_df["time"].max()
-    # Filter time range
-    country_df_filtered = country_df[
-        (country_df["time"] >= start_time) & (country_df["time"] <= end_time)
-    ]
-    developer_df_filtered = developer_df[
-        (developer_df["time"] >= start_time) & (developer_df["time"] <= end_time)
-    ]
-    model_df_filtered = model_df[
-        (model_df["time"] >= start_time) & (model_df["time"] <= end_time)
-    ]
-    if country_df_filtered.empty and developer_df_filtered.empty and model_df_filtered.empty:
-        return go.Figure()
-    # Function to get top N leaderboard with percentage
-    def get_top_n_leaderboard(df, group_col, label, top_n=10):
-        top = (
-            df.groupby(group_col)["value"]
-            .sum()
-            .sort_values(ascending=False)
-            .head(top_n)
-            .reset_index()
-            .rename(columns={group_col: label, "value": "Total Value"})
-        )
-        total_value = top["Total Value"].sum()
-        if total_value > 0:
-            top["% of total"] = top["Total Value"] / total_value * 100
-        else:
-            top["% of total"] = 0
-        # add column with metadata (country emoji for country, country for developer/model)
-        if label == "Country":
-            top["Attributes"] = top[label].map(country_emoji_map).fillna("")
-        else:
-            # Get the country_metric for each developer/model with the already merged info
-            top = top.merge(
-                df[[group_col, "country_metric"]].drop_duplicates(),
-                left_on=label,
-                right_on=group_col,
-                how="left"
-            ).drop(columns=[group_col])
-            top["Attributes"] = top["country_metric"].map(country_emoji_map).fillna("")
-        return top[[label, "Attributes", "% of total"]]
-    top_countries = get_top_n_leaderboard(country_df_filtered, "metric", "Country", top_n=top_n)
-    top_developers = get_top_n_leaderboard(developer_df_filtered, "metric", "Developer", top_n=top_n)
-    top_models = get_top_n_leaderboard(model_df_filtered, "metric", "Model", top_n=top_n)
-    # Create subplot grid with 3 columns
-    fig = make_subplots(
-        rows=1, cols=3,
-        subplot_titles=("Top Countries", "Top Developers", "Top Models"),
-        specs=[[{"type": "table"}, {"type": "table"}, {"type": "table"}]]
-    )
-    # Add country table
-    fig.add_trace(
-        go.Table(
-            header=dict(values=list(top_countries.columns),
-                        fill_color="lightgrey", align="left"),
-            cells=dict(values=[top_countries[col] for col in top_countries.columns],
-                       fill_color="white", align="left"),
-        ),
-        row=1, col=1
-    )
-    # Add developer table
-    fig.add_trace(
-        go.Table(
-            header=dict(values=list(top_developers.columns),
-                        fill_color="lightgrey", align="left"),
-            cells=dict(values=[top_developers[col] for col in top_developers.columns],
-                       fill_color="white", align="left"),
-        ),
-        row=1, col=2
-    )
-    # Add model table
-    fig.add_trace(
-        go.Table(
-            header=dict(values=list(top_models.columns),
-                        fill_color="lightgrey", align="left"),
-            cells=dict(values=[top_models[col] for col in top_models.columns],
-                       fill_color="white", align="left"),
-        ),
-        row=1, col=3
-    )
-    fig.update_layout(
-        height=400,
-        showlegend=False,
-        title_text="Leaderboards"
-    )
-    return fig

 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 def create_stacked_area_chart(
     topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None
         height=100
     )
+    return fig