#!/usr/bin/env python3 """ Test script to check Hugging Face connectivity and provide solutions """ import requests import os from pathlib import Path def test_huggingface_connectivity(): """Test connection to Hugging Face""" print("๐ŸŒ Testing Hugging Face connectivity...") try: response = requests.get("https://huggingface.co", timeout=10) if response.status_code == 200: print("โœ… Hugging Face is accessible") return True else: print(f"โš ๏ธ Hugging Face returned status code: {response.status_code}") return False except requests.exceptions.Timeout: print("โŒ Connection to Hugging Face timed out") return False except requests.exceptions.ConnectionError: print("โŒ Cannot connect to Hugging Face") return False except Exception as e: print(f"โŒ Error connecting to Hugging Face: {e}") return False def check_cached_models(): """Check if T5 models are already cached""" print("\n๐Ÿ“ Checking for cached models...") # Common cache locations cache_locations = [ Path.home() / ".cache" / "huggingface" / "transformers", Path.home() / ".cache" / "huggingface" / "hub", Path(os.environ.get("HF_HOME", "")) / "hub" if os.environ.get("HF_HOME") else None, ] found_models = [] for cache_dir in cache_locations: if cache_dir and cache_dir.exists(): # Look for t5-base related folders for item in cache_dir.iterdir(): if item.is_dir() and "t5" in item.name.lower(): found_models.append(str(item)) print(f"โœ… Found cached model: {item}") if not found_models: print("โŒ No T5 models found in cache") return found_models def suggest_solutions(): """Provide solutions for connectivity issues""" print("\n๐Ÿ’ก Solutions for connectivity issues:") print("="*50) print("\n1. ๐ŸŒ **Pre-download the model with better connectivity:**") print(" Run this when you have stable internet:") print(" ```python") print(" from transformers import AutoTokenizer, AutoModelForSeq2SeqLM") print(" tokenizer = AutoTokenizer.from_pretrained('t5-base')") print(" model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')") print(" ```") print("\n2. ๐Ÿ”„ **Retry with longer timeout:**") print(" Set environment variables:") print(" ```bash") print(" export HF_HUB_TIMEOUT=300") print(" export REQUESTS_TIMEOUT=300") print(" ```") print("\n3. ๐Ÿ  **Use offline mode (if model is cached):**") print(" ```bash") print(" export TRANSFORMERS_OFFLINE=1") print(" ```") print("\n4. ๐ŸŒ **Alternative: Use different mirror:**") print(" ```bash") print(" export HF_ENDPOINT=https://hf-mirror.com") print(" ```") print("\n5. ๐Ÿ“ฆ **Local testing without model download:**") print(" Use a smaller test that doesn't require model downloads") def create_simple_test(): """Create a simple test that doesn't require model downloads""" print("\n๐Ÿงช Creating simplified test...") test_script = '''#!/usr/bin/env python3 """ Simple test that only tests data loading and GPU monitoring without model downloads """ import sys import os sys.path.append('src') def test_data_only(): """Test only data loading functionality""" try: import pandas as pd from tevatron.utils.gpu_monitor import GPUMemoryMonitor print("โœ… Testing data loading...") df = pd.read_csv("data/the_vault/DOC_VAULT_train.tsv", sep='\\t', nrows=5) print(f"โœ… Loaded {len(df)} samples") print("โœ… Testing GPU monitor...") monitor = GPUMemoryMonitor(memory_threshold=0.8, check_interval=10) stats = monitor.get_memory_stats() print(f"โœ… GPU monitor initialized: {stats}") print("๐ŸŽ‰ Basic functionality test PASSED!") return True except Exception as e: print(f"โŒ Test failed: {e}") return False if __name__ == "__main__": success = test_data_only() sys.exit(0 if success else 1) ''' with open("scripts/test_basic.py", "w") as f: f.write(test_script) print("โœ… Created scripts/test_basic.py") print(" Run with: python scripts/test_basic.py") def main(): print("๐Ÿ” GLEN Connectivity Diagnostic") print("="*40) # Test connectivity connectivity_ok = test_huggingface_connectivity() # Check cached models cached_models = check_cached_models() # Create simple test create_simple_test() # Suggest solutions suggest_solutions() print("\n" + "="*50) print("๐Ÿ“‹ Summary:") print(f" - Hugging Face connectivity: {'โœ… OK' if connectivity_ok else 'โŒ FAILED'}") print(f" - Cached models found: {'โœ… YES' if cached_models else 'โŒ NO'}") print(" - Simple test created: โœ… YES") if not connectivity_ok and not cached_models: print("\nโš ๏ธ **Action needed:** Either fix connectivity or pre-download models") print(" Try running: python scripts/test_basic.py (for basic functionality)") elif cached_models: print("\nโœ… **Good news:** You have cached models. Try offline mode!") print(" Set: export TRANSFORMERS_OFFLINE=1") else: print("\nโœ… **All good:** You should be able to run full training!") if __name__ == "__main__": main()