Amber Tanaka commited on
Commit
ae05bbd
·
unverified ·
1 Parent(s): dcfd58f

Refactor pages and improve tooltips! (#13)

Browse files
c_and_e.py CHANGED
@@ -1,81 +1,10 @@
1
  import gradio as gr
2
- import pandas as pd
3
-
4
- # Import our UI factories and the data loader
5
- from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data,create_sub_navigation_bar
6
  from content import CODE_EXECUTION_DESCRIPTION
 
 
7
  # Define the category for this page
8
  CATEGORY_NAME = "Code Execution"
9
 
10
  with gr.Blocks() as demo:
11
  gr.Markdown(f"## Astabench {CATEGORY_NAME} Leaderboard")
12
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
- test_df, test_tag_map = get_full_leaderboard_data("test")
14
- gr.Markdown(CODE_EXECUTION_DESCRIPTION, elem_id="category-intro")
15
- with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
16
- create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
- with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
18
- create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
19
-
20
-
21
- # --- This page now has two main sections: Validation and Test ---
22
- with gr.Tabs():
23
- with gr.Tab("Results: Test Set") as test_tab:
24
- # Repeat the process for the "test" split
25
- test_df, test_tag_map = get_full_leaderboard_data("test")
26
-
27
- if not test_df.empty:
28
- create_leaderboard_display(
29
- full_df=test_df,
30
- tag_map=test_tag_map,
31
- category_name=CATEGORY_NAME,
32
- split_name="test"
33
- )
34
- create_benchmark_details_display(
35
- full_df=test_df,
36
- tag_map=test_tag_map,
37
- category_name=CATEGORY_NAME
38
- )
39
- else:
40
- gr.Markdown("No data available for test split.")
41
- with gr.Tab("Results: Validation Set") as validation_tab:
42
- # 1. Load all necessary data for the "validation" split ONCE.
43
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
44
-
45
- if not validation_df.empty:
46
- # 2. Render the main category display using the loaded data.
47
- create_leaderboard_display(
48
- full_df=validation_df,
49
- tag_map=validation_tag_map,
50
- category_name=CATEGORY_NAME,
51
- split_name="validation"
52
- )
53
-
54
- # 3. Render the detailed breakdown for each benchmark in the category.
55
- create_benchmark_details_display(
56
- full_df=validation_df,
57
- tag_map=validation_tag_map,
58
- category_name=CATEGORY_NAME
59
- )
60
- else:
61
- gr.Markdown("No data available for validation split.")
62
-
63
- show_validation_js = """
64
- () => {
65
- document.getElementById('validation_nav_container').style.display = 'block';
66
- document.getElementById('test_nav_container').style.display = 'none';
67
- setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
68
- }
69
- """
70
-
71
- # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
72
- show_test_js = """
73
- () => {
74
- document.getElementById('validation_nav_container').style.display = 'none';
75
- document.getElementById('test_nav_container').style.display = 'block';
76
- }
77
- """
78
-
79
- # Assign the pure JS functions to the select events. No Python `fn` is needed.
80
- validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
81
- test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
 
1
  import gradio as gr
 
 
 
 
2
  from content import CODE_EXECUTION_DESCRIPTION
3
+ from category_page_builder import build_category_page
4
+
5
  # Define the category for this page
6
  CATEGORY_NAME = "Code Execution"
7
 
8
  with gr.Blocks() as demo:
9
  gr.Markdown(f"## Astabench {CATEGORY_NAME} Leaderboard")
10
+ build_category_page(CATEGORY_NAME, CODE_EXECUTION_DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
category_page_builder.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ # Import our UI factories and the data loader
5
+ from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
6
+
7
+ def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
8
+ validation_df, validation_tag_map = get_full_leaderboard_data("validation")
9
+ test_df, test_tag_map = get_full_leaderboard_data("test")
10
+ gr.Markdown(PAGE_DESCRIPTION, elem_id="category-intro")
11
+ with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
12
+ create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
13
+
14
+ with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
15
+ create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
16
+
17
+ # --- This page now has two main sections: Validation and Test ---
18
+ with gr.Tabs():
19
+ with gr.Tab("Results: Test Set") as test_tab:
20
+ # Repeat the process for the "test" split
21
+ test_df, test_tag_map = get_full_leaderboard_data("test")
22
+
23
+ if not test_df.empty:
24
+ create_leaderboard_display(
25
+ full_df=test_df,
26
+ tag_map=test_tag_map,
27
+ category_name=CATEGORY_NAME,
28
+ split_name="test"
29
+ )
30
+ create_benchmark_details_display(
31
+ full_df=test_df,
32
+ tag_map=test_tag_map,
33
+ category_name=CATEGORY_NAME
34
+ )
35
+ else:
36
+ gr.Markdown("No data available for test split.")
37
+ with gr.Tab("Results: Validation Set") as validation_tab:
38
+ # 1. Load all necessary data for the "validation" split ONCE.
39
+ validation_df, validation_tag_map = get_full_leaderboard_data("validation")
40
+
41
+ if not validation_df.empty:
42
+ # 2. Render the main category display using the loaded data.
43
+ create_leaderboard_display(
44
+ full_df=validation_df,
45
+ tag_map=validation_tag_map,
46
+ category_name=CATEGORY_NAME,
47
+ split_name="validation"
48
+ )
49
+
50
+ # 3. Render the detailed breakdown for each benchmark in the category.
51
+ create_benchmark_details_display(
52
+ full_df=validation_df,
53
+ tag_map=validation_tag_map,
54
+ category_name=CATEGORY_NAME
55
+ )
56
+ else:
57
+ gr.Markdown("No data available for validation split.")
58
+
59
+
60
+ show_validation_js = """
61
+ () => {
62
+ document.getElementById('validation_nav_container').style.display = 'block';
63
+ document.getElementById('test_nav_container').style.display = 'none';
64
+ setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
65
+ }
66
+ """
67
+
68
+ # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
69
+ show_test_js = """
70
+ () => {
71
+ document.getElementById('validation_nav_container').style.display = 'none';
72
+ document.getElementById('test_nav_container').style.display = 'block';
73
+ }
74
+ """
75
+
76
+ # Assign the pure JS functions to the select events. No Python `fn` is needed.
77
+ validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
78
+ test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
79
+
80
+ return validation_nav_container, test_nav_container
content.py CHANGED
@@ -305,17 +305,21 @@ html:not(.dark) #legend-markdown .light-mode-icon,
305
  content: attr(data-tooltip);
306
  position: absolute;
307
  bottom: 125%;
308
- background-color: #333;
309
  color: #fff;
310
- padding: 12px 16px;
311
  border-radius: 4px;
312
  font-size: 12px;
313
  opacity: 0;
314
  transition: opacity 0.2s;
315
  white-space: pre-line;
316
- width: 500px;
 
317
  text-align: left;
318
  pointer-events: none;
 
 
 
319
  }
320
 
321
  .tooltip-icon:hover::after {
 
305
  content: attr(data-tooltip);
306
  position: absolute;
307
  bottom: 125%;
308
+ background-color: #105257;
309
  color: #fff;
310
+ padding: 0px 10px 10px;
311
  border-radius: 4px;
312
  font-size: 12px;
313
  opacity: 0;
314
  transition: opacity 0.2s;
315
  white-space: pre-line;
316
+ width: max-content;
317
+ max-width: 350px; /* Limit width for better readability */
318
  text-align: left;
319
  pointer-events: none;
320
+ left: 50%;
321
+ transform: translateX(-50%);
322
+ z-index: 1000; /* Ensure it appears above other elements */
323
  }
324
 
325
  .tooltip-icon:hover::after {
data_analysis.py CHANGED
@@ -1,80 +1,9 @@
1
  import gradio as gr
2
- import pandas as pd
3
-
4
- # Import our UI factories and the data loader
5
- from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
6
  from content import DATA_ANALYSIS_DESCRIPTION
 
7
  # Define the category for this page
8
  CATEGORY_NAME = "Data Analysis"
9
 
10
  with gr.Blocks() as demo:
11
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
12
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
- test_df, test_tag_map = get_full_leaderboard_data("test")
14
- gr.Markdown(DATA_ANALYSIS_DESCRIPTION, elem_id="category-intro")
15
- with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
16
- create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
- with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
18
- create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
19
- # --- This page now has two main sections: Validation and Test ---
20
- with gr.Tabs():
21
- with gr.Tab("Results: Test") as test_tab:
22
- # Repeat the process for the "test" split
23
- test_df, test_tag_map = get_full_leaderboard_data("test")
24
-
25
- if not test_df.empty:
26
- create_leaderboard_display(
27
- full_df=test_df,
28
- tag_map=test_tag_map,
29
- category_name=CATEGORY_NAME,
30
- split_name="test"
31
- )
32
- create_benchmark_details_display(
33
- full_df=test_df,
34
- tag_map=test_tag_map,
35
- category_name=CATEGORY_NAME
36
- )
37
- else:
38
- gr.Markdown("No data available for test split.")
39
- with gr.Tab("Results: Validation") as validation_tab:
40
- # 1. Load all necessary data for the "validation" split ONCE.
41
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
42
-
43
- if not validation_df.empty:
44
- # 2. Render the main category display using the loaded data.
45
- create_leaderboard_display(
46
- full_df=validation_df,
47
- tag_map=validation_tag_map,
48
- category_name=CATEGORY_NAME,
49
- split_name="validation"
50
- )
51
-
52
- # 3. Render the detailed breakdown for each benchmark in the category.
53
- create_benchmark_details_display(
54
- full_df=validation_df,
55
- tag_map=validation_tag_map,
56
- category_name=CATEGORY_NAME
57
- )
58
- else:
59
- gr.Markdown("No data available for validation split.")
60
-
61
-
62
- show_validation_js = """
63
- () => {
64
- document.getElementById('validation_nav_container').style.display = 'block';
65
- document.getElementById('test_nav_container').style.display = 'none';
66
- setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
67
- }
68
- """
69
-
70
- # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
71
- show_test_js = """
72
- () => {
73
- document.getElementById('validation_nav_container').style.display = 'none';
74
- document.getElementById('test_nav_container').style.display = 'block';
75
- }
76
- """
77
-
78
- # Assign the pure JS functions to the select events. No Python `fn` is needed.
79
- validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
80
- test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
 
1
  import gradio as gr
 
 
 
 
2
  from content import DATA_ANALYSIS_DESCRIPTION
3
+ from category_page_builder import build_category_page
4
  # Define the category for this page
5
  CATEGORY_NAME = "Data Analysis"
6
 
7
  with gr.Blocks() as demo:
8
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
9
+ build_category_page(CATEGORY_NAME, DATA_ANALYSIS_DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2e.py CHANGED
@@ -1,80 +1,9 @@
1
  import gradio as gr
2
- import pandas as pd
3
-
4
- # Import our UI factories and the data loader
5
- from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
6
  from content import DISCOVERY_DESCRIPTION
 
7
  # Define the category for this page
8
  CATEGORY_NAME = "Discovery"
9
 
10
  with gr.Blocks() as demo:
11
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
12
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
- test_df, test_tag_map = get_full_leaderboard_data("test")
14
- gr.Markdown(DISCOVERY_DESCRIPTION, elem_id="category-intro")
15
- with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
16
- create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
- with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
18
- create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
19
- # --- This page now has two main sections: Validation and Test ---
20
- with gr.Tabs():
21
- with gr.Tab("Results: Test") as test_tab:
22
- # Repeat the process for the "test" split
23
- test_df, test_tag_map = get_full_leaderboard_data("test")
24
-
25
- if not test_df.empty:
26
- create_leaderboard_display(
27
- full_df=test_df,
28
- tag_map=test_tag_map,
29
- category_name=CATEGORY_NAME,
30
- split_name="test"
31
- )
32
- create_benchmark_details_display(
33
- full_df=test_df,
34
- tag_map=test_tag_map,
35
- category_name=CATEGORY_NAME
36
- )
37
- else:
38
- gr.Markdown("No data available for test split.")
39
- with gr.Tab("Results: Validation") as validation_tab:
40
- # 1. Load all necessary data for the "validation" split ONCE.
41
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
42
-
43
- if not validation_df.empty:
44
- # 2. Render the main category display using the loaded data.
45
- create_leaderboard_display(
46
- full_df=validation_df,
47
- tag_map=validation_tag_map,
48
- category_name=CATEGORY_NAME,
49
- split_name="validation"
50
- )
51
-
52
- # 3. Render the detailed breakdown for each benchmark in the category.
53
- create_benchmark_details_display(
54
- full_df=validation_df,
55
- tag_map=validation_tag_map,
56
- category_name=CATEGORY_NAME
57
- )
58
- else:
59
- gr.Markdown("No data available for validation split.")
60
-
61
-
62
- show_validation_js = """
63
- () => {
64
- document.getElementById('validation_nav_container').style.display = 'block';
65
- document.getElementById('test_nav_container').style.display = 'none';
66
- setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
67
- }
68
- """
69
-
70
- # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
71
- show_test_js = """
72
- () => {
73
- document.getElementById('validation_nav_container').style.display = 'none';
74
- document.getElementById('test_nav_container').style.display = 'block';
75
- }
76
- """
77
-
78
- # Assign the pure JS functions to the select events. No Python `fn` is needed.
79
- validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
80
- test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
 
1
  import gradio as gr
 
 
 
 
2
  from content import DISCOVERY_DESCRIPTION
3
+ from category_page_builder import build_category_page
4
  # Define the category for this page
5
  CATEGORY_NAME = "Discovery"
6
 
7
  with gr.Blocks() as demo:
8
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
9
+ build_category_page(CATEGORY_NAME, DISCOVERY_DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
literature_understanding.py CHANGED
@@ -1,83 +1,10 @@
1
  import gradio as gr
2
- import pandas as pd
3
-
4
- # Import our UI factories and the data loader
5
- from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
6
  from content import LIT_DESCRIPTION
 
 
7
  # Define the category for this page
8
  CATEGORY_NAME = "Literature Understanding"
9
 
10
  with gr.Blocks() as demo:
11
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
12
-
13
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
14
- test_df, test_tag_map = get_full_leaderboard_data("test")
15
- gr.Markdown(LIT_DESCRIPTION, elem_id="category-intro")
16
- with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
17
- create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
18
-
19
- with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
20
- create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
21
-
22
- # --- This page now has two main sections: Validation and Test ---
23
- with gr.Tabs():
24
- with gr.Tab("Results: Test Set") as test_tab:
25
- # Repeat the process for the "test" split
26
- test_df, test_tag_map = get_full_leaderboard_data("test")
27
-
28
- if not test_df.empty:
29
- create_leaderboard_display(
30
- full_df=test_df,
31
- tag_map=test_tag_map,
32
- category_name=CATEGORY_NAME,
33
- split_name="test"
34
- )
35
- create_benchmark_details_display(
36
- full_df=test_df,
37
- tag_map=test_tag_map,
38
- category_name=CATEGORY_NAME
39
- )
40
- else:
41
- gr.Markdown("No data available for test split.")
42
- with gr.Tab("Results: Validation Set") as validation_tab:
43
- # 1. Load all necessary data for the "validation" split ONCE.
44
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
45
-
46
- if not validation_df.empty:
47
- # 2. Render the main category display using the loaded data.
48
- create_leaderboard_display(
49
- full_df=validation_df,
50
- tag_map=validation_tag_map,
51
- category_name=CATEGORY_NAME,
52
- split_name="validation"
53
- )
54
-
55
- # 3. Render the detailed breakdown for each benchmark in the category.
56
- create_benchmark_details_display(
57
- full_df=validation_df,
58
- tag_map=validation_tag_map,
59
- category_name=CATEGORY_NAME
60
- )
61
- else:
62
- gr.Markdown("No data available for validation split.")
63
-
64
-
65
- show_validation_js = """
66
- () => {
67
- document.getElementById('validation_nav_container').style.display = 'block';
68
- document.getElementById('test_nav_container').style.display = 'none';
69
- setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
70
- }
71
- """
72
-
73
- # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
74
- show_test_js = """
75
- () => {
76
- document.getElementById('validation_nav_container').style.display = 'none';
77
- document.getElementById('test_nav_container').style.display = 'block';
78
- }
79
- """
80
-
81
- # Assign the pure JS functions to the select events. No Python `fn` is needed.
82
- validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
83
- test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
 
1
  import gradio as gr
 
 
 
 
2
  from content import LIT_DESCRIPTION
3
+ from category_page_builder import build_category_page
4
+
5
  # Define the category for this page
6
  CATEGORY_NAME = "Literature Understanding"
7
 
8
  with gr.Blocks() as demo:
9
  gr.Markdown(f"## Astabench{CATEGORY_NAME} Leaderboard")
10
+ build_category_page(CATEGORY_NAME, LIT_DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ui_components.py CHANGED
@@ -159,26 +159,34 @@ tooling_html = " ".join(tooling_html_items)
159
  # Your final legend_markdown string (the structure of this does not change)
160
  legend_markdown = f"""
161
  <div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 24px; font-size: 14px; padding-bottom: 8px;">
162
-
163
  <div> <!-- Container for the Pareto section -->
164
- <b>Pareto</b>
 
 
165
  <div style="padding-top: 4px;"><span>📈 On frontier</span></div>
166
  </div>
167
 
168
  <div> <!-- Container for the Openness section -->
169
- <b>Agent Openness</b>
 
 
 
 
 
170
  <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{openness_html}</div>
171
  </div>
172
 
173
  <div> <!-- Container for the Tooling section -->
174
- <b>Agent Tooling</b>
 
 
 
 
175
  <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{tooling_html}</div>
176
  </div>
177
-
178
- <div><b>Column Descriptions</b><span class="tooltip-icon" data-tooltip="• Pareto: Indicates if agent is on the Pareto frontier
179
- • Openness: Level of accessibility to model and implementation
180
- • Agent Tooling: Approach used by the agent
181
- • Agent: Name of the AI agent
182
  • Overall Score: Performance across all benchmarks
183
  • Overall Cost: Cost per task in USD
184
  • Literature Understanding Score: Performance on scientific literature tasks
 
159
  # Your final legend_markdown string (the structure of this does not change)
160
  legend_markdown = f"""
161
  <div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 24px; font-size: 14px; padding-bottom: 8px;">
162
+
163
  <div> <!-- Container for the Pareto section -->
164
+ <b>Pareto</b><span class="tooltip-icon" data-tooltip="
165
+ •Pareto: Indicates if agent is on the Pareto frontier
166
+ ">ⓘ</span>
167
  <div style="padding-top: 4px;"><span>📈 On frontier</span></div>
168
  </div>
169
 
170
  <div> <!-- Container for the Openness section -->
171
+ <b>Agent Openness</b><span class="tooltip-icon" data-tooltip="
172
+ •Closed: No API or code available
173
+ •API Available: API available, but no code
174
+ •Open Source: Code available, but no weights
175
+ •Open Source + Open Weights: Code and weights available
176
+ ">ⓘ</span>
177
  <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{openness_html}</div>
178
  </div>
179
 
180
  <div> <!-- Container for the Tooling section -->
181
+ <b>Agent Tooling</b><span class="tooltip-icon" data-tooltip="
182
+ • Standard: Standard Approach used by the agent
183
+ • Custom with Standard Search: Standard search used by the agent
184
+ • Fully Custom: Fully custom tools used by the agent
185
+ ">ⓘ</span>
186
  <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{tooling_html}</div>
187
  </div>
188
+
189
+ <div><b>Column Descriptions</b><span class="tooltip-icon" data-tooltip="
 
 
 
190
  • Overall Score: Performance across all benchmarks
191
  • Overall Cost: Cost per task in USD
192
  • Literature Understanding Score: Performance on scientific literature tasks