openhands commited on
Commit
742f074
·
1 Parent(s): becc391

Revert homepage simplification - keep original design

Browse files
Files changed (2) hide show
  1. category_page_builder.py +2 -9
  2. main_page.py +15 -27
category_page_builder.py CHANGED
@@ -12,15 +12,8 @@ CATEGORY_DIAGRAM_MAP = {
12
 
13
  def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
14
  with gr.Column(elem_id="page-content-wrapper"):
15
- try:
16
- validation_df, validation_tag_map = get_full_leaderboard_data("validation")
17
- test_df, test_tag_map = get_full_leaderboard_data("test")
18
- except Exception as e:
19
- print(f"Error loading data for {CATEGORY_NAME}: {e}")
20
- validation_df = pd.DataFrame()
21
- validation_tag_map = {}
22
- test_df = pd.DataFrame()
23
- test_tag_map = {}
24
  with gr.Row(elem_id="intro-row"):
25
 
26
  with gr.Column(scale=1):
 
12
 
13
  def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
14
  with gr.Column(elem_id="page-content-wrapper"):
15
+ validation_df, validation_tag_map = get_full_leaderboard_data("validation")
16
+ test_df, test_tag_map = get_full_leaderboard_data("test")
 
 
 
 
 
 
 
17
  with gr.Row(elem_id="intro-row"):
18
 
19
  with gr.Column(scale=1):
main_page.py CHANGED
@@ -33,33 +33,21 @@ def build_page():
33
 
34
  # --- Leaderboard Display Section ---
35
  gr.Markdown("---")
36
- gr.HTML('<h2>OpenHands Index Leaderboards</h2>', elem_id="main-header")
37
-
38
- gr.Markdown("""
39
- Select a benchmark from the navigation above to view detailed results:
40
-
41
- - **SWE-bench**: Software engineering benchmarks
42
- - **Multi-SWE-bench**: Multi-repository software engineering tasks
43
- - **SWE-bench Multimodal**: Multimodal software engineering challenges
44
- - **SWT-bench**: Web testing benchmarks
45
- - **Commit0**: Zero-shot code commit generation
46
- - **GAIA**: General AI Assistant benchmarks
47
- """)
48
-
49
- # Simplified - no expensive data loading on home page
50
- if False: # Disabled expensive loading
51
- with gr.Tabs() as tabs:
52
- with gr.Tab("Results: Test Set") as test_tab:
53
- test_df, test_tag_map = get_full_leaderboard_data("test")
54
- if not test_df.empty:
55
- gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
56
- create_leaderboard_display(
57
- full_df=test_df,
58
- tag_map=test_tag_map,
59
- category_name="Overall",
60
- split_name="test"
61
- )
62
- else:
63
  gr.Markdown("No data available for test split.")
64
  with gr.Tab("Results: Validation Set") as validation_tab:
65
  # 1. Load all necessary data for the "validation" split ONCE.
 
33
 
34
  # --- Leaderboard Display Section ---
35
  gr.Markdown("---")
36
+ CATEGORY_NAME = "Overall"
37
+ gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")
38
+
39
+ with gr.Tabs() as tabs:
40
+ with gr.Tab("Results: Test Set") as test_tab:
41
+ test_df, test_tag_map = get_full_leaderboard_data("test")
42
+ if not test_df.empty:
43
+ gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
44
+ create_leaderboard_display(
45
+ full_df=test_df,
46
+ tag_map=test_tag_map,
47
+ category_name=CATEGORY_NAME, # Use our constant
48
+ split_name="test"
49
+ )
50
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
51
  gr.Markdown("No data available for test split.")
52
  with gr.Tab("Results: Validation Set") as validation_tab:
53
  # 1. Load all necessary data for the "validation" split ONCE.