""" Custom data loader for OpenHands Index that fetches from GitHub instead of HF datasets. Mimics the interface of LeaderboardViewer from agent-eval. """ import pandas as pd import requests from typing import Dict, List, Tuple class GitHubDataLoader: """Loads leaderboard data from GitHub repository.""" def __init__(self, base_url: str, split: str): self.base_url = base_url self.split = split self.tag_map = self._build_tag_map() def _build_tag_map(self) -> Dict[str, List[str]]: """Build tag map for the OpenHands datasets.""" # Map datasets to their respective tags return { "swe-bench": ["swe-bench"], "multi-swe-bench": ["multi-swe-bench"], "swe-bench-multimodal": ["swe-bench-multimodal"], "swt-bench": ["swt-bench"], "commit0": ["commit0"], "gaia": ["gaia"], } def _load(self) -> Tuple[pd.DataFrame, Dict]: """Load and combine data from all GitHub JSON files.""" all_results = [] datasets = ["swe-bench", "multi-swe-bench", "swe-bench-multimodal", "swt-bench", "commit0", "gaia"] for dataset in datasets: url = f"{self.base_url}/{dataset}.json" try: response = requests.get(url, timeout=10) if response.status_code == 200: data = response.json() # Transform GitHub data to match agenteval format for entry in data: all_results.append({ "agent_name": entry.get("agent_name", "Unknown"), "score": entry.get("score", 0.0), "dataset": dataset, "split": self.split, # Add other fields as needed }) except Exception as e: print(f"Warning: Could not load data from {url}: {e}") continue if all_results: df = pd.DataFrame(all_results) return df, self.tag_map else: return pd.DataFrame(), self.tag_map class DummyViewer: """Fallback viewer when data loading fails.""" def __init__(self, df: pd.DataFrame): self._df = df self.tag_map = {"Overall": []} def _load(self) -> Tuple[pd.DataFrame, Dict]: return self._df, self.tag_map