openhands-index / github_data_loader.py
openhands
Initial OpenHands Index leaderboard based on ASTA Bench
085a012
raw
history blame
2.52 kB
"""
Custom data loader for OpenHands Index that fetches from GitHub instead of HF datasets.
Mimics the interface of LeaderboardViewer from agent-eval.
"""
import pandas as pd
import requests
from typing import Dict, List, Tuple
class GitHubDataLoader:
"""Loads leaderboard data from GitHub repository."""
def __init__(self, base_url: str, split: str):
self.base_url = base_url
self.split = split
self.tag_map = self._build_tag_map()
def _build_tag_map(self) -> Dict[str, List[str]]:
"""Build tag map for the OpenHands datasets."""
# Map datasets to their respective tags
return {
"swe-bench": ["swe-bench"],
"multi-swe-bench": ["multi-swe-bench"],
"swe-bench-multimodal": ["swe-bench-multimodal"],
"swt-bench": ["swt-bench"],
"commit0": ["commit0"],
"gaia": ["gaia"],
}
def _load(self) -> Tuple[pd.DataFrame, Dict]:
"""Load and combine data from all GitHub JSON files."""
all_results = []
datasets = ["swe-bench", "multi-swe-bench", "swe-bench-multimodal",
"swt-bench", "commit0", "gaia"]
for dataset in datasets:
url = f"{self.base_url}/{dataset}.json"
try:
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
# Transform GitHub data to match agenteval format
for entry in data:
all_results.append({
"agent_name": entry.get("agent_name", "Unknown"),
"score": entry.get("score", 0.0),
"dataset": dataset,
"split": self.split,
# Add other fields as needed
})
except Exception as e:
print(f"Warning: Could not load data from {url}: {e}")
continue
if all_results:
df = pd.DataFrame(all_results)
return df, self.tag_map
else:
return pd.DataFrame(), self.tag_map
class DummyViewer:
"""Fallback viewer when data loading fails."""
def __init__(self, df: pd.DataFrame):
self._df = df
self.tag_map = {"Overall": []}
def _load(self) -> Tuple[pd.DataFrame, Dict]:
return self._df, self.tag_map