openhands commited on
Commit
a63b4cf
·
1 Parent(s): 7d37950

Fix data loading: fetch from GitHub with fallback to mock data

Browse files

- Add setup_data.py to fetch data from openhands-index-results GitHub repo
- Falls back to mock data when GitHub repo is empty/unavailable
- Copy data to expected extraction directory on app startup
- Fixes JavaScript error: Cannot read properties of null
- Data now loads correctly and tables render properly

Files changed (2) hide show
  1. app.py +4 -0
  2. setup_data.py +130 -0
app.py CHANGED
@@ -3,6 +3,10 @@ import logging
3
 
4
  logging.basicConfig(level=logging.WARNING)
5
 
 
 
 
 
6
  import gradio as gr
7
  import urllib.parse
8
 
 
3
 
4
  logging.basicConfig(level=logging.WARNING)
5
 
6
+ # Setup mock data before anything else
7
+ from setup_data import setup_mock_data
8
+ setup_mock_data()
9
+
10
  import gradio as gr
11
  import urllib.parse
12
 
setup_data.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Setup script to fetch data from GitHub repository or use mock data as fallback.
3
+ This runs before the app starts to ensure data is available.
4
+ """
5
+ import os
6
+ import shutil
7
+ import subprocess
8
+ from pathlib import Path
9
+ from config import DATA_DIR, EXTRACTED_DATA_DIR, CONFIG_NAME
10
+
11
+ GITHUB_REPO = "https://github.com/OpenHands/openhands-index-results.git"
12
+
13
+ def fetch_data_from_github():
14
+ """
15
+ Fetch data from the openhands-index-results GitHub repository.
16
+ Returns True if successful, False otherwise.
17
+ """
18
+ temp_clone_dir = Path("/tmp/openhands-index-results-clone")
19
+ target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
20
+
21
+ try:
22
+ # Remove existing temp directory if it exists
23
+ if temp_clone_dir.exists():
24
+ shutil.rmtree(temp_clone_dir)
25
+
26
+ print(f"Attempting to clone data from {GITHUB_REPO}...")
27
+
28
+ # Clone the repository
29
+ result = subprocess.run(
30
+ ["git", "clone", "--depth", "1", GITHUB_REPO, str(temp_clone_dir)],
31
+ capture_output=True,
32
+ text=True,
33
+ timeout=60
34
+ )
35
+
36
+ if result.returncode != 0:
37
+ print(f"Git clone failed: {result.stderr}")
38
+ return False
39
+
40
+ # Look for data files in the cloned repository
41
+ # Expected structure: openhands-index-results/{version}/test.jsonl, validation.jsonl, etc.
42
+ data_source = temp_clone_dir / CONFIG_NAME
43
+
44
+ if not data_source.exists():
45
+ print(f"Data directory {data_source} not found in repository")
46
+ # Try to find any version directories
47
+ version_dirs = list(temp_clone_dir.glob("*.*.*"))
48
+ if version_dirs:
49
+ print(f"Found version directories: {[d.name for d in version_dirs]}")
50
+ # Use the first available version
51
+ data_source = version_dirs[0]
52
+ print(f"Using data from {data_source.name}")
53
+ else:
54
+ print("No data found in repository")
55
+ return False
56
+
57
+ # Check if there are any JSONL files
58
+ jsonl_files = list(data_source.glob("*.jsonl"))
59
+ if not jsonl_files:
60
+ print(f"No JSONL files found in {data_source}")
61
+ return False
62
+
63
+ # Create target directory and copy data
64
+ os.makedirs(target_dir.parent, exist_ok=True)
65
+ if target_dir.exists():
66
+ shutil.rmtree(target_dir)
67
+ shutil.copytree(data_source, target_dir)
68
+
69
+ print(f"Successfully fetched data from GitHub. Files: {list(target_dir.glob('*'))}")
70
+ return True
71
+
72
+ except subprocess.TimeoutExpired:
73
+ print("Git clone timed out")
74
+ return False
75
+ except Exception as e:
76
+ print(f"Error fetching data from GitHub: {e}")
77
+ return False
78
+ finally:
79
+ # Cleanup temp directory
80
+ if temp_clone_dir.exists():
81
+ shutil.rmtree(temp_clone_dir)
82
+
83
+ def copy_mock_data():
84
+ """Copy mock data to the expected extraction directory."""
85
+ mock_source = Path("mock_results") / CONFIG_NAME
86
+ target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
87
+
88
+ if not mock_source.exists():
89
+ print(f"Warning: Mock data directory {mock_source} not found")
90
+ return False
91
+
92
+ # Create target directory
93
+ os.makedirs(target_dir.parent, exist_ok=True)
94
+
95
+ # Copy mock data
96
+ print(f"Using mock data from {mock_source}")
97
+ if target_dir.exists():
98
+ shutil.rmtree(target_dir)
99
+ shutil.copytree(mock_source, target_dir)
100
+ print(f"Mock data copied successfully. Files: {list(target_dir.glob('*'))}")
101
+ return True
102
+
103
+ def setup_mock_data():
104
+ """
105
+ Setup data for the leaderboard.
106
+ First tries to fetch from GitHub, falls back to mock data if unavailable.
107
+ """
108
+ target_dir = Path(EXTRACTED_DATA_DIR) / CONFIG_NAME
109
+
110
+ # Check if data already exists
111
+ if target_dir.exists() and any(target_dir.glob("*.jsonl")):
112
+ print(f"Data already exists at {target_dir}")
113
+ return
114
+
115
+ # Try to fetch from GitHub first
116
+ print("Checking for data from GitHub repository...")
117
+ if fetch_data_from_github():
118
+ print("Using data from GitHub repository")
119
+ return
120
+
121
+ # Fall back to mock data
122
+ print("GitHub data not available, falling back to mock data...")
123
+ if copy_mock_data():
124
+ print("Using mock data")
125
+ return
126
+
127
+ print("ERROR: No data available! Neither GitHub nor mock data could be loaded.")
128
+
129
+ if __name__ == "__main__":
130
+ setup_mock_data()