openhands openhands commited on
Commit
3781804
·
1 Parent(s): 376500e

Swap column order and fix duplicate column warnings

Browse files

- Swapped Language Model and OpenHands Version column order
- Fixed duplicate column warning by renaming columns before getting headers
- Updated mock data with proper agent_version values (1.0.1, 1.0.2)
- Removed duplicate agent_version keys from metadata.json files

Co-authored-by: openhands <[email protected]>

leaderboard_transformer.py CHANGED
@@ -256,7 +256,7 @@ class DataTransformer:
256
  df_view = df_sorted.copy()
257
 
258
  # --- 3. Add Columns for Agent Openness ---
259
- base_cols = ["id","OpenHands Version","Language Model","Source"]
260
  new_cols = ["Openness"]
261
  ending_cols = ["Date", "Logs"]
262
 
 
256
  df_view = df_sorted.copy()
257
 
258
  # --- 3. Add Columns for Agent Openness ---
259
+ base_cols = ["id","Language Model","OpenHands Version","Source"]
260
  new_cols = ["Openness"]
261
  ending_cols = ["Date", "Logs"]
262
 
mock_results/1.0.0-dev1/results/20251124_claude_3_5_sonnet_20241022/metadata.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "agent_version": "OpenHands CodeAct v2.1",
3
- "agent_version": "OpenHands CodeAct v2.1",
4
  "model": "claude-3-5-sonnet-20241022",
5
  "openness": "closed_api_available",
6
  "tool_usage": "standard",
 
1
  {
2
+ "agent_version": "1.0.1",
 
3
  "model": "claude-3-5-sonnet-20241022",
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
mock_results/1.0.0-dev1/results/20251124_claude_3_opus_20240229/metadata.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "agent_version": "SWE-Agent",
3
- "agent_version": "SWE-Agent",
4
  "model": "claude-3-opus-20240229",
5
  "openness": "closed_api_available",
6
  "tool_usage": "custom_interface",
 
1
  {
2
+ "agent_version": "1.0.1",
 
3
  "model": "claude-3-opus-20240229",
4
  "openness": "closed_api_available",
5
  "tool_usage": "custom_interface",
mock_results/1.0.0-dev1/results/20251124_gpt_4_turbo_2024_04_09/metadata.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "agent_version": "AutoCodeRover",
3
- "agent_version": "AutoCodeRover",
4
  "model": "gpt-4-turbo-2024-04-09",
5
  "openness": "closed_api_available",
6
  "tool_usage": "standard",
 
1
  {
2
+ "agent_version": "1.0.1",
 
3
  "model": "gpt-4-turbo-2024-04-09",
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
mock_results/1.0.0-dev1/results/20251124_gpt_4o_2024_11_20/metadata.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "agent_version": "OpenHands CodeAct v2.0",
3
- "agent_version": "OpenHands CodeAct v2.0",
4
  "model": "gpt-4o-2024-11-20",
5
  "openness": "closed_api_available",
6
  "tool_usage": "standard",
 
1
  {
2
+ "agent_version": "1.0.2",
 
3
  "model": "gpt-4o-2024-11-20",
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
mock_results/1.0.0-dev1/results/20251124_gpt_4o_mini_2024_07_18/metadata.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "agent_version": "Agentless",
3
- "agent_version": "Agentless",
4
  "model": "gpt-4o-mini-2024-07-18",
5
  "openness": "closed_api_available",
6
  "tool_usage": "standard",
 
1
  {
2
+ "agent_version": "1.0.2",
 
3
  "model": "gpt-4o-mini-2024-07-18",
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
ui_components.py CHANGED
@@ -408,22 +408,23 @@ def create_leaderboard_display(
408
  columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
409
  df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
410
 
 
 
 
 
 
 
 
 
411
  df_headers = df_view.columns.tolist()
412
  df_datatypes = []
413
  for col in df_headers:
414
  if col == "Logs" or "Cost" in col or "Score" in col:
415
  df_datatypes.append("markdown")
416
- elif col in ["OpenHands Version","Icon","Language Model", "Pareto"]:
417
  df_datatypes.append("html")
418
  else:
419
  df_datatypes.append("str")
420
-
421
- header_rename_map = {
422
- "Pareto": "",
423
- "Icon": "",
424
- }
425
- # 2. Create the final list of headers for display.
426
- df_view = df_view.rename(columns=header_rename_map)
427
  # Dynamically set widths for the DataFrame columns
428
  fixed_start_widths = [40, 40, 200, 100, 200]
429
  num_score_cost_cols = 0
@@ -570,8 +571,8 @@ def create_benchmark_details_display(
570
  desired_cols_in_order = [
571
  'Pareto',
572
  'Icon',
573
- 'OpenHands Version',
574
  'Language Model',
 
575
  'Attempted Benchmark',
576
  benchmark_score_col,
577
  benchmark_cost_col,
@@ -587,23 +588,23 @@ def create_benchmark_details_display(
587
  benchmark_score_col: 'Score',
588
  benchmark_cost_col: 'Cost',
589
  }, inplace=True)
590
- # Ensure the 'Logs' column is formatted correctly
 
 
 
 
 
 
 
591
  df_headers = benchmark_table_df.columns.tolist()
592
  df_datatypes = []
593
  for col in df_headers:
594
  if "Logs" in col or "Cost" in col or "Score" in col:
595
  df_datatypes.append("markdown")
596
- elif col in ["OpenHands Version", "Icon", "Language Model", "Pareto"]:
597
  df_datatypes.append("html")
598
  else:
599
  df_datatypes.append("str")
600
- # Remove Pareto, Openness, and Agent Tooling from the headers
601
- header_rename_map = {
602
- "Pareto": "",
603
- "Icon": "",
604
- }
605
- # 2. Create the final list of headers for display.
606
- benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
607
  benchmark_plot = _plot_scatter_plotly(
608
  data=full_df,
609
  x=benchmark_cost_col,
 
408
  columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
409
  df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
410
 
411
+ header_rename_map = {
412
+ "Pareto": "",
413
+ "Icon": "",
414
+ }
415
+ # Rename columns first before getting headers
416
+ df_view = df_view.rename(columns=header_rename_map)
417
+
418
+ # Now get headers from the renamed dataframe
419
  df_headers = df_view.columns.tolist()
420
  df_datatypes = []
421
  for col in df_headers:
422
  if col == "Logs" or "Cost" in col or "Score" in col:
423
  df_datatypes.append("markdown")
424
+ elif col in ["OpenHands Version","Language Model", ""]: # "" for renamed Pareto/Icon columns
425
  df_datatypes.append("html")
426
  else:
427
  df_datatypes.append("str")
 
 
 
 
 
 
 
428
  # Dynamically set widths for the DataFrame columns
429
  fixed_start_widths = [40, 40, 200, 100, 200]
430
  num_score_cost_cols = 0
 
571
  desired_cols_in_order = [
572
  'Pareto',
573
  'Icon',
 
574
  'Language Model',
575
+ 'OpenHands Version',
576
  'Attempted Benchmark',
577
  benchmark_score_col,
578
  benchmark_cost_col,
 
588
  benchmark_score_col: 'Score',
589
  benchmark_cost_col: 'Cost',
590
  }, inplace=True)
591
+ # Remove Pareto and Icon column headers (rename to empty string)
592
+ header_rename_map = {
593
+ "Pareto": "",
594
+ "Icon": "",
595
+ }
596
+ benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
597
+
598
+ # Now get headers from the renamed dataframe
599
  df_headers = benchmark_table_df.columns.tolist()
600
  df_datatypes = []
601
  for col in df_headers:
602
  if "Logs" in col or "Cost" in col or "Score" in col:
603
  df_datatypes.append("markdown")
604
+ elif col in ["OpenHands Version", "Language Model", ""]: # "" for renamed Pareto/Icon columns
605
  df_datatypes.append("html")
606
  else:
607
  df_datatypes.append("str")
 
 
 
 
 
 
 
608
  benchmark_plot = _plot_scatter_plotly(
609
  data=full_df,
610
  x=benchmark_cost_col,