Spaces:
Running
Running
openhands
openhands
commited on
Commit
·
3781804
1
Parent(s):
376500e
Swap column order and fix duplicate column warnings
Browse files- Swapped Language Model and OpenHands Version column order
- Fixed duplicate column warning by renaming columns before getting headers
- Updated mock data with proper agent_version values (1.0.1, 1.0.2)
- Removed duplicate agent_version keys from metadata.json files
Co-authored-by: openhands <[email protected]>
- leaderboard_transformer.py +1 -1
- mock_results/1.0.0-dev1/results/20251124_claude_3_5_sonnet_20241022/metadata.json +1 -2
- mock_results/1.0.0-dev1/results/20251124_claude_3_opus_20240229/metadata.json +1 -2
- mock_results/1.0.0-dev1/results/20251124_gpt_4_turbo_2024_04_09/metadata.json +1 -2
- mock_results/1.0.0-dev1/results/20251124_gpt_4o_2024_11_20/metadata.json +1 -2
- mock_results/1.0.0-dev1/results/20251124_gpt_4o_mini_2024_07_18/metadata.json +1 -2
- ui_components.py +19 -18
leaderboard_transformer.py
CHANGED
|
@@ -256,7 +256,7 @@ class DataTransformer:
|
|
| 256 |
df_view = df_sorted.copy()
|
| 257 |
|
| 258 |
# --- 3. Add Columns for Agent Openness ---
|
| 259 |
-
base_cols = ["id","
|
| 260 |
new_cols = ["Openness"]
|
| 261 |
ending_cols = ["Date", "Logs"]
|
| 262 |
|
|
|
|
| 256 |
df_view = df_sorted.copy()
|
| 257 |
|
| 258 |
# --- 3. Add Columns for Agent Openness ---
|
| 259 |
+
base_cols = ["id","Language Model","OpenHands Version","Source"]
|
| 260 |
new_cols = ["Openness"]
|
| 261 |
ending_cols = ["Date", "Logs"]
|
| 262 |
|
mock_results/1.0.0-dev1/results/20251124_claude_3_5_sonnet_20241022/metadata.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"agent_version": "
|
| 3 |
-
"agent_version": "OpenHands CodeAct v2.1",
|
| 4 |
"model": "claude-3-5-sonnet-20241022",
|
| 5 |
"openness": "closed_api_available",
|
| 6 |
"tool_usage": "standard",
|
|
|
|
| 1 |
{
|
| 2 |
+
"agent_version": "1.0.1",
|
|
|
|
| 3 |
"model": "claude-3-5-sonnet-20241022",
|
| 4 |
"openness": "closed_api_available",
|
| 5 |
"tool_usage": "standard",
|
mock_results/1.0.0-dev1/results/20251124_claude_3_opus_20240229/metadata.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"agent_version": "
|
| 3 |
-
"agent_version": "SWE-Agent",
|
| 4 |
"model": "claude-3-opus-20240229",
|
| 5 |
"openness": "closed_api_available",
|
| 6 |
"tool_usage": "custom_interface",
|
|
|
|
| 1 |
{
|
| 2 |
+
"agent_version": "1.0.1",
|
|
|
|
| 3 |
"model": "claude-3-opus-20240229",
|
| 4 |
"openness": "closed_api_available",
|
| 5 |
"tool_usage": "custom_interface",
|
mock_results/1.0.0-dev1/results/20251124_gpt_4_turbo_2024_04_09/metadata.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"agent_version": "
|
| 3 |
-
"agent_version": "AutoCodeRover",
|
| 4 |
"model": "gpt-4-turbo-2024-04-09",
|
| 5 |
"openness": "closed_api_available",
|
| 6 |
"tool_usage": "standard",
|
|
|
|
| 1 |
{
|
| 2 |
+
"agent_version": "1.0.1",
|
|
|
|
| 3 |
"model": "gpt-4-turbo-2024-04-09",
|
| 4 |
"openness": "closed_api_available",
|
| 5 |
"tool_usage": "standard",
|
mock_results/1.0.0-dev1/results/20251124_gpt_4o_2024_11_20/metadata.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"agent_version": "
|
| 3 |
-
"agent_version": "OpenHands CodeAct v2.0",
|
| 4 |
"model": "gpt-4o-2024-11-20",
|
| 5 |
"openness": "closed_api_available",
|
| 6 |
"tool_usage": "standard",
|
|
|
|
| 1 |
{
|
| 2 |
+
"agent_version": "1.0.2",
|
|
|
|
| 3 |
"model": "gpt-4o-2024-11-20",
|
| 4 |
"openness": "closed_api_available",
|
| 5 |
"tool_usage": "standard",
|
mock_results/1.0.0-dev1/results/20251124_gpt_4o_mini_2024_07_18/metadata.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"agent_version": "
|
| 3 |
-
"agent_version": "Agentless",
|
| 4 |
"model": "gpt-4o-mini-2024-07-18",
|
| 5 |
"openness": "closed_api_available",
|
| 6 |
"tool_usage": "standard",
|
|
|
|
| 1 |
{
|
| 2 |
+
"agent_version": "1.0.2",
|
|
|
|
| 3 |
"model": "gpt-4o-mini-2024-07-18",
|
| 4 |
"openness": "closed_api_available",
|
| 5 |
"tool_usage": "standard",
|
ui_components.py
CHANGED
|
@@ -408,22 +408,23 @@ def create_leaderboard_display(
|
|
| 408 |
columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
|
| 409 |
df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
df_headers = df_view.columns.tolist()
|
| 412 |
df_datatypes = []
|
| 413 |
for col in df_headers:
|
| 414 |
if col == "Logs" or "Cost" in col or "Score" in col:
|
| 415 |
df_datatypes.append("markdown")
|
| 416 |
-
elif col in ["OpenHands Version","
|
| 417 |
df_datatypes.append("html")
|
| 418 |
else:
|
| 419 |
df_datatypes.append("str")
|
| 420 |
-
|
| 421 |
-
header_rename_map = {
|
| 422 |
-
"Pareto": "",
|
| 423 |
-
"Icon": "",
|
| 424 |
-
}
|
| 425 |
-
# 2. Create the final list of headers for display.
|
| 426 |
-
df_view = df_view.rename(columns=header_rename_map)
|
| 427 |
# Dynamically set widths for the DataFrame columns
|
| 428 |
fixed_start_widths = [40, 40, 200, 100, 200]
|
| 429 |
num_score_cost_cols = 0
|
|
@@ -570,8 +571,8 @@ def create_benchmark_details_display(
|
|
| 570 |
desired_cols_in_order = [
|
| 571 |
'Pareto',
|
| 572 |
'Icon',
|
| 573 |
-
'OpenHands Version',
|
| 574 |
'Language Model',
|
|
|
|
| 575 |
'Attempted Benchmark',
|
| 576 |
benchmark_score_col,
|
| 577 |
benchmark_cost_col,
|
|
@@ -587,23 +588,23 @@ def create_benchmark_details_display(
|
|
| 587 |
benchmark_score_col: 'Score',
|
| 588 |
benchmark_cost_col: 'Cost',
|
| 589 |
}, inplace=True)
|
| 590 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
df_headers = benchmark_table_df.columns.tolist()
|
| 592 |
df_datatypes = []
|
| 593 |
for col in df_headers:
|
| 594 |
if "Logs" in col or "Cost" in col or "Score" in col:
|
| 595 |
df_datatypes.append("markdown")
|
| 596 |
-
elif col in ["OpenHands Version", "
|
| 597 |
df_datatypes.append("html")
|
| 598 |
else:
|
| 599 |
df_datatypes.append("str")
|
| 600 |
-
# Remove Pareto, Openness, and Agent Tooling from the headers
|
| 601 |
-
header_rename_map = {
|
| 602 |
-
"Pareto": "",
|
| 603 |
-
"Icon": "",
|
| 604 |
-
}
|
| 605 |
-
# 2. Create the final list of headers for display.
|
| 606 |
-
benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
|
| 607 |
benchmark_plot = _plot_scatter_plotly(
|
| 608 |
data=full_df,
|
| 609 |
x=benchmark_cost_col,
|
|
|
|
| 408 |
columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
|
| 409 |
df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
|
| 410 |
|
| 411 |
+
header_rename_map = {
|
| 412 |
+
"Pareto": "",
|
| 413 |
+
"Icon": "",
|
| 414 |
+
}
|
| 415 |
+
# Rename columns first before getting headers
|
| 416 |
+
df_view = df_view.rename(columns=header_rename_map)
|
| 417 |
+
|
| 418 |
+
# Now get headers from the renamed dataframe
|
| 419 |
df_headers = df_view.columns.tolist()
|
| 420 |
df_datatypes = []
|
| 421 |
for col in df_headers:
|
| 422 |
if col == "Logs" or "Cost" in col or "Score" in col:
|
| 423 |
df_datatypes.append("markdown")
|
| 424 |
+
elif col in ["OpenHands Version","Language Model", ""]: # "" for renamed Pareto/Icon columns
|
| 425 |
df_datatypes.append("html")
|
| 426 |
else:
|
| 427 |
df_datatypes.append("str")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
# Dynamically set widths for the DataFrame columns
|
| 429 |
fixed_start_widths = [40, 40, 200, 100, 200]
|
| 430 |
num_score_cost_cols = 0
|
|
|
|
| 571 |
desired_cols_in_order = [
|
| 572 |
'Pareto',
|
| 573 |
'Icon',
|
|
|
|
| 574 |
'Language Model',
|
| 575 |
+
'OpenHands Version',
|
| 576 |
'Attempted Benchmark',
|
| 577 |
benchmark_score_col,
|
| 578 |
benchmark_cost_col,
|
|
|
|
| 588 |
benchmark_score_col: 'Score',
|
| 589 |
benchmark_cost_col: 'Cost',
|
| 590 |
}, inplace=True)
|
| 591 |
+
# Remove Pareto and Icon column headers (rename to empty string)
|
| 592 |
+
header_rename_map = {
|
| 593 |
+
"Pareto": "",
|
| 594 |
+
"Icon": "",
|
| 595 |
+
}
|
| 596 |
+
benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
|
| 597 |
+
|
| 598 |
+
# Now get headers from the renamed dataframe
|
| 599 |
df_headers = benchmark_table_df.columns.tolist()
|
| 600 |
df_datatypes = []
|
| 601 |
for col in df_headers:
|
| 602 |
if "Logs" in col or "Cost" in col or "Score" in col:
|
| 603 |
df_datatypes.append("markdown")
|
| 604 |
+
elif col in ["OpenHands Version", "Language Model", ""]: # "" for renamed Pareto/Icon columns
|
| 605 |
df_datatypes.append("html")
|
| 606 |
else:
|
| 607 |
df_datatypes.append("str")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
benchmark_plot = _plot_scatter_plotly(
|
| 609 |
data=full_df,
|
| 610 |
x=benchmark_cost_col,
|