""" DocuMint Smart Trainer UI - Core adapter (one-time) - Skill-wise adapters - Dataset selectable from UI """ import os import threading import gradio as gr from train import train_skill # ================== GLOBAL ================== training_thread = None CORE_LOCK_FILE = ".core_trained" # ================== HELPERS ================== def core_already_trained(): return os.path.exists(CORE_LOCK_FILE) def mark_core_trained(): with open(CORE_LOCK_FILE, "w") as f: f.write("trained") # ================== TRAIN HANDLER ================== def start_training( training_mode, dataset_name, skill_name, epochs, learning_rate, batch_size, ): global training_thread if training_thread and training_thread.is_alive(): return "⚠️ Training already running" if training_mode == "Core": if core_already_trained(): return "❌ Core adapter already trained. Core is locked." final_skill = "core" final_epochs = int(epochs) final_lr = float(learning_rate) else: # Skill training if not skill_name.strip(): return "❌ Skill name is required for Skill training" final_skill = skill_name.strip().lower() final_epochs = int(epochs) final_lr = float(learning_rate) def run(): train_skill( dataset_name=dataset_name.strip(), skill_name=final_skill, epochs=final_epochs, lr=final_lr, batch_size=int(batch_size), ) if training_mode == "Core": mark_core_trained() training_thread = threading.Thread(target=run, daemon=True) training_thread.start() return ( f"🚀 Training started\n\n" f"Mode: {training_mode}\n" f"Dataset: {dataset_name}\n" f"Adapter: {final_skill}\n" f"Epochs: {final_epochs}\n" f"LR: {final_lr}" ) # ================== UI ================== with gr.Blocks( title="DocuMint Smart Trainer", theme=gr.themes.Soft(primary_hue="orange"), ) as demo: gr.Markdown( """ # 🧠 DocuMint Smart Trainer Progressive LoRA training with **Core freeze + Skill adapters** ✔ Dataset selectable ✔ No catastrophic forgetting ✔ Production-safe training """ ) with gr.Row(): core_status = gr.Markdown( f"### Core Status: {'🔒 Locked (trained)' if core_already_trained() else '🆕 Not trained'}" ) with gr.Tabs(): # ================== TRAIN TAB ================== with gr.Tab("🎯 Train"): with gr.Row(): with gr.Column(): training_mode = gr.Radio( ["Core", "Skill"], value="Skill", label="Training Mode", info="Core = one time only | Skill = additive learning", ) dataset_input = gr.Textbox( label="Dataset (Hugging Face)", placeholder="e.g. gsm8k or himu1780/DocuMint-Data", ) skill_input = gr.Textbox( label="Skill Name (Skill mode only)", placeholder="vat / invoice / math / docs", ) epochs_input = gr.Slider( minimum=1, maximum=5, value=1, step=1, label="Epochs", ) lr_input = gr.Number( value=5e-5, label="Learning Rate", ) batch_input = gr.Slider( minimum=1, maximum=4, value=1, step=1, label="Batch Size", ) train_btn = gr.Button( "🚀 Start Training", variant="primary", size="lg", ) with gr.Column(): output_box = gr.Textbox( label="Status", lines=8, interactive=False, ) train_btn.click( fn=start_training, inputs=[ training_mode, dataset_input, skill_input, epochs_input, lr_input, batch_input, ], outputs=output_box, ) # ================== HELP TAB ================== with gr.Tab("❓ Help"): gr.Markdown( """ ## How to use safely ### 1️⃣ Train Core (ONE TIME) - Mode: **Core** - Dataset: `gsm8k` / `MathInstruct` - Epochs: `3` - LR: `2e-4` 🔒 Core will auto-lock after training. ### 2️⃣ Add Skills (Unlimited) - Mode: **Skill** - Skill name: `vat`, `invoice`, `math`, etc - Epochs: `1` - LR: `5e-5` or `3e-5` ### 3️⃣ Dataset is always safe to change What matters is **which adapter is trained**, not the dataset. --- **Rule:** Core = brain Skill = hands / legs """ ) gr.Markdown( """ --- **DocuMint Smart Trainer** Progressive learning without forgetting """ ) # ================== LAUNCH ================== if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)