Spaces:

OpenHands
/

openhands-index

Running

openhands-index / github_data_loader.py

openhands

Initial OpenHands Index leaderboard based on ASTA Bench

085a012 20 days ago

2.52 kB

	"""
	Custom data loader for OpenHands Index that fetches from GitHub instead of HF datasets.
	Mimics the interface of LeaderboardViewer from agent-eval.
	"""
	import pandas as pd
	import requests
	from typing import Dict, List, Tuple


	class GitHubDataLoader:
	"""Loads leaderboard data from GitHub repository."""

	def __init__(self, base_url: str, split: str):
	self.base_url = base_url
	self.split = split
	self.tag_map = self._build_tag_map()

	def _build_tag_map(self) -> Dict[str, List[str]]:
	"""Build tag map for the OpenHands datasets."""
	# Map datasets to their respective tags
	return {
	"swe-bench": ["swe-bench"],
	"multi-swe-bench": ["multi-swe-bench"],
	"swe-bench-multimodal": ["swe-bench-multimodal"],
	"swt-bench": ["swt-bench"],
	"commit0": ["commit0"],
	"gaia": ["gaia"],
	}

	def _load(self) -> Tuple[pd.DataFrame, Dict]:
	"""Load and combine data from all GitHub JSON files."""
	all_results = []

	datasets = ["swe-bench", "multi-swe-bench", "swe-bench-multimodal",
	"swt-bench", "commit0", "gaia"]

	for dataset in datasets:
	url = f"{self.base_url}/{dataset}.json"
	try:
	response = requests.get(url, timeout=10)
	if response.status_code == 200:
	data = response.json()
	# Transform GitHub data to match agenteval format
	for entry in data:
	all_results.append({
	"agent_name": entry.get("agent_name", "Unknown"),
	"score": entry.get("score", 0.0),
	"dataset": dataset,
	"split": self.split,
	# Add other fields as needed
	})
	except Exception as e:
	print(f"Warning: Could not load data from {url}: {e}")
	continue

	if all_results:
	df = pd.DataFrame(all_results)
	return df, self.tag_map
	else:
	return pd.DataFrame(), self.tag_map


	class DummyViewer:
	"""Fallback viewer when data loading fails."""

	def __init__(self, df: pd.DataFrame):
	self._df = df
	self.tag_map = {"Overall": []}

	def _load(self) -> Tuple[pd.DataFrame, Dict]:
	return self._df, self.tag_map