Spaces:
Running
Running
| """ | |
| DocuMint Smart Trainer UI | |
| - Core adapter (one-time) | |
| - Skill-wise adapters | |
| - Dataset selectable from UI | |
| """ | |
| import os | |
| import threading | |
| import gradio as gr | |
| from train import train_skill | |
| # ================== GLOBAL ================== | |
| training_thread = None | |
| CORE_LOCK_FILE = ".core_trained" | |
| # ================== HELPERS ================== | |
| def core_already_trained(): | |
| return os.path.exists(CORE_LOCK_FILE) | |
| def mark_core_trained(): | |
| with open(CORE_LOCK_FILE, "w") as f: | |
| f.write("trained") | |
| # ================== TRAIN HANDLER ================== | |
| def start_training( | |
| training_mode, | |
| dataset_name, | |
| skill_name, | |
| epochs, | |
| learning_rate, | |
| batch_size, | |
| ): | |
| global training_thread | |
| if training_thread and training_thread.is_alive(): | |
| return "β οΈ Training already running" | |
| if training_mode == "Core": | |
| if core_already_trained(): | |
| return "β Core adapter already trained. Core is locked." | |
| final_skill = "core" | |
| final_epochs = int(epochs) | |
| final_lr = float(learning_rate) | |
| else: # Skill training | |
| if not skill_name.strip(): | |
| return "β Skill name is required for Skill training" | |
| final_skill = skill_name.strip().lower() | |
| final_epochs = int(epochs) | |
| final_lr = float(learning_rate) | |
| def run(): | |
| train_skill( | |
| dataset_name=dataset_name.strip(), | |
| skill_name=final_skill, | |
| epochs=final_epochs, | |
| lr=final_lr, | |
| batch_size=int(batch_size), | |
| ) | |
| if training_mode == "Core": | |
| mark_core_trained() | |
| training_thread = threading.Thread(target=run, daemon=True) | |
| training_thread.start() | |
| return ( | |
| f"π Training started\n\n" | |
| f"Mode: {training_mode}\n" | |
| f"Dataset: {dataset_name}\n" | |
| f"Adapter: {final_skill}\n" | |
| f"Epochs: {final_epochs}\n" | |
| f"LR: {final_lr}" | |
| ) | |
| # ================== UI ================== | |
| with gr.Blocks( | |
| title="DocuMint Smart Trainer", | |
| theme=gr.themes.Soft(primary_hue="orange"), | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π§ DocuMint Smart Trainer | |
| Progressive LoRA training with **Core freeze + Skill adapters** | |
| β Dataset selectable | |
| β No catastrophic forgetting | |
| β Production-safe training | |
| """ | |
| ) | |
| with gr.Row(): | |
| core_status = gr.Markdown( | |
| f"### Core Status: {'π Locked (trained)' if core_already_trained() else 'π Not trained'}" | |
| ) | |
| with gr.Tabs(): | |
| # ================== TRAIN TAB ================== | |
| with gr.Tab("π― Train"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| training_mode = gr.Radio( | |
| ["Core", "Skill"], | |
| value="Skill", | |
| label="Training Mode", | |
| info="Core = one time only | Skill = additive learning", | |
| ) | |
| dataset_input = gr.Textbox( | |
| label="Dataset (Hugging Face)", | |
| placeholder="e.g. gsm8k or himu1780/DocuMint-Data", | |
| ) | |
| skill_input = gr.Textbox( | |
| label="Skill Name (Skill mode only)", | |
| placeholder="vat / invoice / math / docs", | |
| ) | |
| epochs_input = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=1, | |
| step=1, | |
| label="Epochs", | |
| ) | |
| lr_input = gr.Number( | |
| value=5e-5, | |
| label="Learning Rate", | |
| ) | |
| batch_input = gr.Slider( | |
| minimum=1, | |
| maximum=4, | |
| value=1, | |
| step=1, | |
| label="Batch Size", | |
| ) | |
| train_btn = gr.Button( | |
| "π Start Training", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Column(): | |
| output_box = gr.Textbox( | |
| label="Status", | |
| lines=8, | |
| interactive=False, | |
| ) | |
| train_btn.click( | |
| fn=start_training, | |
| inputs=[ | |
| training_mode, | |
| dataset_input, | |
| skill_input, | |
| epochs_input, | |
| lr_input, | |
| batch_input, | |
| ], | |
| outputs=output_box, | |
| ) | |
| # ================== HELP TAB ================== | |
| with gr.Tab("β Help"): | |
| gr.Markdown( | |
| """ | |
| ## How to use safely | |
| ### 1οΈβ£ Train Core (ONE TIME) | |
| - Mode: **Core** | |
| - Dataset: `gsm8k` / `MathInstruct` | |
| - Epochs: `3` | |
| - LR: `2e-4` | |
| π Core will auto-lock after training. | |
| ### 2οΈβ£ Add Skills (Unlimited) | |
| - Mode: **Skill** | |
| - Skill name: `vat`, `invoice`, `math`, etc | |
| - Epochs: `1` | |
| - LR: `5e-5` or `3e-5` | |
| ### 3οΈβ£ Dataset is always safe to change | |
| What matters is **which adapter is trained**, not the dataset. | |
| --- | |
| **Rule:** | |
| Core = brain | |
| Skill = hands / legs | |
| """ | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **DocuMint Smart Trainer** | |
| Progressive learning without forgetting | |
| """ | |
| ) | |
| # ================== LAUNCH ================== | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |