Spaces:
Running
Running
| """ | |
| Setup script to fetch data from GitHub repository or use mock data as fallback. | |
| This runs before the app starts to ensure data is available. | |
| """ | |
| import os | |
| import shutil | |
| import subprocess | |
| from pathlib import Path | |
| from config import DATA_DIR, EXTRACTED_DATA_DIR, CONFIG_NAME | |
| GITHUB_REPO = "https://github.com/OpenHands/openhands-index-results.git" | |
| def fetch_data_from_github(): | |
| """ | |
| Fetch data from the openhands-index-results GitHub repository. | |
| Returns True if successful, False otherwise. | |
| """ | |
| temp_clone_dir = Path("/tmp/openhands-index-results-clone") | |
| target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME | |
| try: | |
| # Remove existing temp directory if it exists | |
| if temp_clone_dir.exists(): | |
| shutil.rmtree(temp_clone_dir) | |
| print(f"Attempting to clone data from {GITHUB_REPO}...") | |
| # Set environment to prevent git from prompting for credentials | |
| env = os.environ.copy() | |
| env['GIT_TERMINAL_PROMPT'] = '0' | |
| # Clone the repository | |
| result = subprocess.run( | |
| ["git", "clone", "--depth", "1", GITHUB_REPO, str(temp_clone_dir)], | |
| capture_output=True, | |
| text=True, | |
| timeout=30, # Shorter timeout | |
| stdin=subprocess.DEVNULL, # Prevent any input prompts | |
| env=env # Use modified environment | |
| ) | |
| if result.returncode != 0: | |
| print(f"Git clone failed: {result.stderr}") | |
| return False | |
| # Look for data files in the cloned repository | |
| # Expected structure: openhands-index-results/results/YYYYMMDD_model/ | |
| results_source = temp_clone_dir / "results" | |
| if not results_source.exists(): | |
| print(f"Results directory not found in repository") | |
| return False | |
| # Check if there are any agent result directories | |
| result_dirs = list(results_source.iterdir()) | |
| if not result_dirs: | |
| print(f"No agent results found in {results_source}") | |
| return False | |
| print(f"Found {len(result_dirs)} agent result directories") | |
| # Create target directory and copy the results structure | |
| os.makedirs(target_dir.parent, exist_ok=True) | |
| if target_dir.exists(): | |
| shutil.rmtree(target_dir) | |
| # Copy the entire results directory | |
| target_results = target_dir / "results" | |
| shutil.copytree(results_source, target_results) | |
| print(f"Successfully fetched data from GitHub. Files: {list(target_dir.glob('*'))}") | |
| # Verify data integrity by checking a sample agent | |
| sample_agents = list(target_results.glob("*/scores.json")) | |
| if sample_agents: | |
| import json | |
| with open(sample_agents[0]) as f: | |
| sample_data = json.load(f) | |
| print(f"Sample data from {sample_agents[0].parent.name}: {sample_data[0] if sample_data else 'EMPTY'}") | |
| return True | |
| except subprocess.TimeoutExpired: | |
| print("Git clone timed out") | |
| return False | |
| except Exception as e: | |
| print(f"Error fetching data from GitHub: {e}") | |
| return False | |
| finally: | |
| # Cleanup temp directory | |
| if temp_clone_dir.exists(): | |
| shutil.rmtree(temp_clone_dir) | |
| def copy_mock_data(): | |
| """Copy mock data to the expected extraction directory.""" | |
| # Try both relative and absolute paths | |
| mock_source = Path("mock_results") / CONFIG_NAME | |
| if not mock_source.exists(): | |
| # Try absolute path in case we're in a different working directory | |
| mock_source = Path("/app/mock_results") / CONFIG_NAME | |
| target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME | |
| print(f"Current working directory: {os.getcwd()}") | |
| print(f"Looking for mock data at: {mock_source.absolute()}") | |
| if not mock_source.exists(): | |
| print(f"ERROR: Mock data directory {mock_source} not found!") | |
| print(f"Directory contents: {list(Path('.').glob('*'))}") | |
| return False | |
| # Create target directory | |
| os.makedirs(target_dir.parent, exist_ok=True) | |
| # Copy mock data | |
| print(f"Using mock data from {mock_source}") | |
| if target_dir.exists(): | |
| shutil.rmtree(target_dir) | |
| shutil.copytree(mock_source, target_dir) | |
| # Verify the copy | |
| copied_files = list(target_dir.glob('*')) | |
| print(f"Mock data copied successfully. Files: {copied_files}") | |
| print(f"Target directory: {target_dir.absolute()}") | |
| return True | |
| def setup_mock_data(): | |
| """ | |
| Setup data for the leaderboard. | |
| First tries to fetch from GitHub, falls back to mock data if unavailable. | |
| """ | |
| print("=" * 60) | |
| print("STARTING DATA SETUP") | |
| print("=" * 60) | |
| target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME | |
| # Check if data already exists | |
| if target_dir.exists() and any(target_dir.glob("*.jsonl")): | |
| jsonl_files = list(target_dir.glob("*.jsonl")) | |
| print(f"Data already exists at {target_dir}") | |
| print(f"Found {len(jsonl_files)} JSONL files: {[f.name for f in jsonl_files]}") | |
| return | |
| # Try to fetch from GitHub first | |
| print("\n--- Attempting to fetch from GitHub ---") | |
| try: | |
| if fetch_data_from_github(): | |
| print("✓ Successfully using data from GitHub repository") | |
| return | |
| except Exception as e: | |
| print(f"GitHub fetch failed with error: {e}") | |
| # Fall back to mock data | |
| print("\n--- GitHub data not available, falling back to mock data ---") | |
| try: | |
| if copy_mock_data(): | |
| print("✓ Successfully using mock data") | |
| return | |
| except Exception as e: | |
| print(f"Mock data copy failed with error: {e}") | |
| print("\n" + "!" * 60) | |
| print("ERROR: No data available! Neither GitHub nor mock data could be loaded.") | |
| print("!" * 60) | |
| if __name__ == "__main__": | |
| setup_mock_data() | |