Spaces:
Running
Running
File size: 5,858 Bytes
a63b4cf 671ebc9 a63b4cf 671ebc9 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 9da8453 a63b4cf 9da8453 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 2176460 a63b4cf 2176460 9da8453 a63b4cf 2176460 9da8453 a63b4cf 2176460 a63b4cf 2176460 a63b4cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""
Setup script to fetch data from GitHub repository or use mock data as fallback.
This runs before the app starts to ensure data is available.
"""
import os
import shutil
import subprocess
from pathlib import Path
from config import DATA_DIR, EXTRACTED_DATA_DIR, CONFIG_NAME
GITHUB_REPO = "https://github.com/OpenHands/openhands-index-results.git"
def fetch_data_from_github():
"""
Fetch data from the openhands-index-results GitHub repository.
Returns True if successful, False otherwise.
"""
temp_clone_dir = Path("/tmp/openhands-index-results-clone")
target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
try:
# Remove existing temp directory if it exists
if temp_clone_dir.exists():
shutil.rmtree(temp_clone_dir)
print(f"Attempting to clone data from {GITHUB_REPO}...")
# Set environment to prevent git from prompting for credentials
env = os.environ.copy()
env['GIT_TERMINAL_PROMPT'] = '0'
# Clone the repository
result = subprocess.run(
["git", "clone", "--depth", "1", GITHUB_REPO, str(temp_clone_dir)],
capture_output=True,
text=True,
timeout=30, # Shorter timeout
stdin=subprocess.DEVNULL, # Prevent any input prompts
env=env # Use modified environment
)
if result.returncode != 0:
print(f"Git clone failed: {result.stderr}")
return False
# Look for data files in the cloned repository
# Expected structure: openhands-index-results/{version}/test.jsonl, validation.jsonl, etc.
data_source = temp_clone_dir / CONFIG_NAME
if not data_source.exists():
print(f"Data directory {data_source} not found in repository")
# Try to find any version directories
version_dirs = list(temp_clone_dir.glob("*.*.*"))
if version_dirs:
print(f"Found version directories: {[d.name for d in version_dirs]}")
# Use the first available version
data_source = version_dirs[0]
print(f"Using data from {data_source.name}")
else:
print("No data found in repository")
return False
# Check if there are any JSONL files
jsonl_files = list(data_source.glob("*.jsonl"))
if not jsonl_files:
print(f"No JSONL files found in {data_source}")
return False
# Create target directory and copy data
os.makedirs(target_dir.parent, exist_ok=True)
if target_dir.exists():
shutil.rmtree(target_dir)
shutil.copytree(data_source, target_dir)
print(f"Successfully fetched data from GitHub. Files: {list(target_dir.glob('*'))}")
return True
except subprocess.TimeoutExpired:
print("Git clone timed out")
return False
except Exception as e:
print(f"Error fetching data from GitHub: {e}")
return False
finally:
# Cleanup temp directory
if temp_clone_dir.exists():
shutil.rmtree(temp_clone_dir)
def copy_mock_data():
"""Copy mock data to the expected extraction directory."""
# Try both relative and absolute paths
mock_source = Path("mock_results") / CONFIG_NAME
if not mock_source.exists():
# Try absolute path in case we're in a different working directory
mock_source = Path("/app/mock_results") / CONFIG_NAME
target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
print(f"Current working directory: {os.getcwd()}")
print(f"Looking for mock data at: {mock_source.absolute()}")
if not mock_source.exists():
print(f"ERROR: Mock data directory {mock_source} not found!")
print(f"Directory contents: {list(Path('.').glob('*'))}")
return False
# Create target directory
os.makedirs(target_dir.parent, exist_ok=True)
# Copy mock data
print(f"Using mock data from {mock_source}")
if target_dir.exists():
shutil.rmtree(target_dir)
shutil.copytree(mock_source, target_dir)
# Verify the copy
copied_files = list(target_dir.glob('*'))
print(f"Mock data copied successfully. Files: {copied_files}")
print(f"Target directory: {target_dir.absolute()}")
return True
def setup_mock_data():
"""
Setup data for the leaderboard.
First tries to fetch from GitHub, falls back to mock data if unavailable.
"""
print("=" * 60)
print("STARTING DATA SETUP")
print("=" * 60)
target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
# Check if data already exists
if target_dir.exists() and any(target_dir.glob("*.jsonl")):
jsonl_files = list(target_dir.glob("*.jsonl"))
print(f"Data already exists at {target_dir}")
print(f"Found {len(jsonl_files)} JSONL files: {[f.name for f in jsonl_files]}")
return
# Try to fetch from GitHub first
print("\n--- Attempting to fetch from GitHub ---")
try:
if fetch_data_from_github():
print("✓ Successfully using data from GitHub repository")
return
except Exception as e:
print(f"GitHub fetch failed with error: {e}")
# Fall back to mock data
print("\n--- GitHub data not available, falling back to mock data ---")
try:
if copy_mock_data():
print("✓ Successfully using mock data")
return
except Exception as e:
print(f"Mock data copy failed with error: {e}")
print("\n" + "!" * 60)
print("ERROR: No data available! Neither GitHub nor mock data could be loaded.")
print("!" * 60)
if __name__ == "__main__":
setup_mock_data()
|