File size: 5,280 Bytes
085a012
 
 
 
 
 
c56f232
 
 
 
 
085a012
 
 
 
742f074
 
085a012
 
 
7904c2d
085a012
 
 
 
 
 
 
 
 
 
c56f232
085a012
c56f232
085a012
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import pandas as pd

# Import our UI factories and the data loader
from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
CATEGORY_DIAGRAM_MAP = {
    "Bug Fixing": "assets/bug-fixing.svg",
    "App Creation": "assets/app-creation.svg",
    "Frontend Development": "assets/frontend-development.svg",
    "Test Generation": "assets/test-generation.svg",
    "Information Gathering": "assets/information-gathering.svg",
}

def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
    with gr.Column(elem_id="page-content-wrapper"):
        validation_df, validation_tag_map = get_full_leaderboard_data("validation")
        test_df, test_tag_map = get_full_leaderboard_data("test")
        with gr.Row(elem_id="intro-row"):

            with gr.Column(scale=1):
                gr.HTML(f'<h2>OpenHands Index {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")
                with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
                    create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME, validation=True)

                with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
                    create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)

                gr.Markdown(PAGE_DESCRIPTION, elem_id="intro-category-paragraph")

            # --- The Right Column ---
            with gr.Column(scale=1):
                import os
                image_path = CATEGORY_DIAGRAM_MAP.get(CATEGORY_NAME)
                if image_path and os.path.exists(image_path):
                    gr.Image(
                        value=image_path,
                        show_label=False,
                        show_download_button=False,
                        show_fullscreen_button=False,
                        show_share_button=False,
                        interactive=False,
                        elem_id="diagram-image"
                    )
        # --- This page now has two main sections: Validation and Test ---
        with gr.Tabs():
            with gr.Tab("Results: Test Set") as test_tab:
                # Repeat the process for the "test" split
                if not test_df.empty:
                    gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
                    create_leaderboard_display(
                        full_df=test_df,
                        tag_map=test_tag_map,
                        category_name=CATEGORY_NAME,
                        split_name="test"
                    )
                    create_benchmark_details_display(
                        full_df=test_df,
                        tag_map=test_tag_map,
                        category_name=CATEGORY_NAME,
                        validation=False,
                    )
                else:
                    gr.Markdown("No data available for test split.")
            with gr.Tab("Results: Validation Set") as validation_tab:
                # 1. Load all necessary data for the "validation" split ONCE.
                if not validation_df.empty:
                    gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.")
                    # 2. Render the main category display using the loaded data.
                    create_leaderboard_display(
                        full_df=validation_df,
                        tag_map=validation_tag_map,
                        category_name=CATEGORY_NAME,
                        split_name="validation"
                    )

                    # 3. Render the detailed breakdown for each benchmark in the category.
                    create_benchmark_details_display(
                        full_df=validation_df,
                        tag_map=validation_tag_map,
                        category_name=CATEGORY_NAME,
                        validation=True,
                    )
                else:
                    gr.Markdown("No data available for validation split.")


        show_validation_js = """
            () => {
                document.getElementById('validation_nav_container').style.display = 'block';
                document.getElementById('test_nav_container').style.display = 'none';
                setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
            }
            """

        # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
        show_test_js = """
            () => {
                document.getElementById('validation_nav_container').style.display = 'none';
                document.getElementById('test_nav_container').style.display = 'block';
            }
            """

        # Assign the pure JS functions to the select events. No Python `fn` is needed.
        validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
        test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)

    return validation_nav_container, test_nav_container