#!/usr/bin/env python3 import sys import os sys.path.append('src') print("Testing GLEN document ID generation (final version)...") print(f"Working directory: {os.getcwd()}") # Simulate command line arguments sys.argv = [ 'makeid_glen.py', '--model_name_or_path', 'logs/test_glen_vault/GLEN_P2_test', '--infer_dir', 'logs/test_glen_vault/GLEN_P2_test', '--dataset_name', 'the_vault', '--docid_file_name', 'GLEN_P2_test_docids', '--per_device_eval_batch_size', '4', '--max_input_length', '128', '--num_return_sequences', '10' ] try: print("▶️ Starting document ID generation...") # Import and run the makeid script exec(open('examples/glen_phase2/makeid_glen.py').read()) print("✅ Document ID generation completed successfully!") # Check if output file was created output_file = "logs/GLEN_P2_test_docids.tsv" if os.path.exists(output_file): with open(output_file, 'r') as f: lines = f.readlines() print(f"📄 Output file created: {output_file}") print(f"📊 Generated {len(lines)} document IDs") if lines: print(f"📝 Sample line: {lines[0].strip()}") else: print("⚠️ Output file not found") except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc()