#!/usr/bin/env python3
"""
Test script to check Hugging Face connectivity and provide solutions
"""

import requests
import os
from pathlib import Path

def test_huggingface_connectivity():
    """Test connection to Hugging Face"""
    print("🌐 Testing Hugging Face connectivity...")
    
    try:
        response = requests.get("https://huggingface.co", timeout=10)
        if response.status_code == 200:
            print("✅ Hugging Face is accessible")
            return True
        else:
            print(f"⚠️  Hugging Face returned status code: {response.status_code}")
            return False
    except requests.exceptions.Timeout:
        print("❌ Connection to Hugging Face timed out")
        return False
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Hugging Face")
        return False
    except Exception as e:
        print(f"❌ Error connecting to Hugging Face: {e}")
        return False

def check_cached_models():
    """Check if T5 models are already cached"""
    print("\n📁 Checking for cached models...")
    
    # Common cache locations
    cache_locations = [
        Path.home() / ".cache" / "huggingface" / "transformers",
        Path.home() / ".cache" / "huggingface" / "hub",
        Path(os.environ.get("HF_HOME", "")) / "hub" if os.environ.get("HF_HOME") else None,
    ]
    
    found_models = []
    for cache_dir in cache_locations:
        if cache_dir and cache_dir.exists():
            # Look for t5-base related folders
            for item in cache_dir.iterdir():
                if item.is_dir() and "t5" in item.name.lower():
                    found_models.append(str(item))
                    print(f"✅ Found cached model: {item}")
    
    if not found_models:
        print("❌ No T5 models found in cache")
    
    return found_models

def suggest_solutions():
    """Provide solutions for connectivity issues"""
    print("\n💡 Solutions for connectivity issues:")
    print("="*50)
    
    print("\n1. 🌐 **Pre-download the model with better connectivity:**")
    print("   Run this when you have stable internet:")
    print("   ```python")
    print("   from transformers import AutoTokenizer, AutoModelForSeq2SeqLM")
    print("   tokenizer = AutoTokenizer.from_pretrained('t5-base')")
    print("   model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')")
    print("   ```")
    
    print("\n2. 🔄 **Retry with longer timeout:**")
    print("   Set environment variables:")
    print("   ```bash")
    print("   export HF_HUB_TIMEOUT=300")
    print("   export REQUESTS_TIMEOUT=300")
    print("   ```")
    
    print("\n3. 🏠 **Use offline mode (if model is cached):**")
    print("   ```bash")
    print("   export TRANSFORMERS_OFFLINE=1")
    print("   ```")
    
    print("\n4. 🌐 **Alternative: Use different mirror:**")
    print("   ```bash")
    print("   export HF_ENDPOINT=https://hf-mirror.com")
    print("   ```")
    
    print("\n5. 📦 **Local testing without model download:**")
    print("   Use a smaller test that doesn't require model downloads")

def create_simple_test():
    """Create a simple test that doesn't require model downloads"""
    print("\n🧪 Creating simplified test...")
    
    test_script = '''#!/usr/bin/env python3
"""
Simple test that only tests data loading and GPU monitoring without model downloads
"""

import sys
import os
sys.path.append('src')

def test_data_only():
    """Test only data loading functionality"""
    try:
        import pandas as pd
        from tevatron.utils.gpu_monitor import GPUMemoryMonitor
        
        print("✅ Testing data loading...")
        df = pd.read_csv("data/the_vault/DOC_VAULT_train.tsv", sep='\\t', nrows=5)
        print(f"✅ Loaded {len(df)} samples")
        
        print("✅ Testing GPU monitor...")
        monitor = GPUMemoryMonitor(memory_threshold=0.8, check_interval=10)
        stats = monitor.get_memory_stats()
        print(f"✅ GPU monitor initialized: {stats}")
        
        print("🎉 Basic functionality test PASSED!")
        return True
        
    except Exception as e:
        print(f"❌ Test failed: {e}")
        return False

if __name__ == "__main__":
    success = test_data_only()
    sys.exit(0 if success else 1)
'''
    
    with open("scripts/test_basic.py", "w") as f:
        f.write(test_script)
    
    print("✅ Created scripts/test_basic.py")
    print("   Run with: python scripts/test_basic.py")

def main():
    print("🔍 GLEN Connectivity Diagnostic")
    print("="*40)
    
    # Test connectivity
    connectivity_ok = test_huggingface_connectivity()
    
    # Check cached models
    cached_models = check_cached_models()
    
    # Create simple test
    create_simple_test()
    
    # Suggest solutions
    suggest_solutions()
    
    print("\n" + "="*50)
    print("📋 Summary:")
    print(f"  - Hugging Face connectivity: {'✅ OK' if connectivity_ok else '❌ FAILED'}")
    print(f"  - Cached models found: {'✅ YES' if cached_models else '❌ NO'}")
    print("  - Simple test created: ✅ YES")
    
    if not connectivity_ok and not cached_models:
        print("\n⚠️  **Action needed:** Either fix connectivity or pre-download models")
        print("   Try running: python scripts/test_basic.py (for basic functionality)")
    elif cached_models:
        print("\n✅ **Good news:** You have cached models. Try offline mode!")
        print("   Set: export TRANSFORMERS_OFFLINE=1")
    else:
        print("\n✅ **All good:** You should be able to run full training!")

if __name__ == "__main__":
    main()