Spaces:
Running
Running
| """ | |
| Custom data loader for OpenHands Index that fetches from GitHub instead of HF datasets. | |
| Mimics the interface of LeaderboardViewer from agent-eval. | |
| """ | |
| import pandas as pd | |
| import requests | |
| from typing import Dict, List, Tuple | |
| class GitHubDataLoader: | |
| """Loads leaderboard data from GitHub repository.""" | |
| def __init__(self, base_url: str, split: str): | |
| self.base_url = base_url | |
| self.split = split | |
| self.tag_map = self._build_tag_map() | |
| def _build_tag_map(self) -> Dict[str, List[str]]: | |
| """Build tag map for the OpenHands datasets.""" | |
| # Map datasets to their respective tags | |
| return { | |
| "swe-bench": ["swe-bench"], | |
| "multi-swe-bench": ["multi-swe-bench"], | |
| "swe-bench-multimodal": ["swe-bench-multimodal"], | |
| "swt-bench": ["swt-bench"], | |
| "commit0": ["commit0"], | |
| "gaia": ["gaia"], | |
| } | |
| def _load(self) -> Tuple[pd.DataFrame, Dict]: | |
| """Load and combine data from all GitHub JSON files.""" | |
| all_results = [] | |
| datasets = ["swe-bench", "multi-swe-bench", "swe-bench-multimodal", | |
| "swt-bench", "commit0", "gaia"] | |
| for dataset in datasets: | |
| url = f"{self.base_url}/{dataset}.json" | |
| try: | |
| response = requests.get(url, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| # Transform GitHub data to match agenteval format | |
| for entry in data: | |
| all_results.append({ | |
| "agent_name": entry.get("agent_name", "Unknown"), | |
| "score": entry.get("score", 0.0), | |
| "dataset": dataset, | |
| "split": self.split, | |
| # Add other fields as needed | |
| }) | |
| except Exception as e: | |
| print(f"Warning: Could not load data from {url}: {e}") | |
| continue | |
| if all_results: | |
| df = pd.DataFrame(all_results) | |
| return df, self.tag_map | |
| else: | |
| return pd.DataFrame(), self.tag_map | |
| class DummyViewer: | |
| """Fallback viewer when data loading fails.""" | |
| def __init__(self, df: pd.DataFrame): | |
| self._df = df | |
| self.tag_map = {"Overall": []} | |
| def _load(self) -> Tuple[pd.DataFrame, Dict]: | |
| return self._df, self.tag_map | |