DocuMint-Train / app.py
himu1780's picture
Create app.py
02a89de verified
"""
DocuMint Smart Trainer UI
- Core adapter (one-time)
- Skill-wise adapters
- Dataset selectable from UI
"""
import os
import threading
import gradio as gr
from train import train_skill
# ================== GLOBAL ==================
training_thread = None
CORE_LOCK_FILE = ".core_trained"
# ================== HELPERS ==================
def core_already_trained():
return os.path.exists(CORE_LOCK_FILE)
def mark_core_trained():
with open(CORE_LOCK_FILE, "w") as f:
f.write("trained")
# ================== TRAIN HANDLER ==================
def start_training(
training_mode,
dataset_name,
skill_name,
epochs,
learning_rate,
batch_size,
):
global training_thread
if training_thread and training_thread.is_alive():
return "⚠️ Training already running"
if training_mode == "Core":
if core_already_trained():
return "❌ Core adapter already trained. Core is locked."
final_skill = "core"
final_epochs = int(epochs)
final_lr = float(learning_rate)
else: # Skill training
if not skill_name.strip():
return "❌ Skill name is required for Skill training"
final_skill = skill_name.strip().lower()
final_epochs = int(epochs)
final_lr = float(learning_rate)
def run():
train_skill(
dataset_name=dataset_name.strip(),
skill_name=final_skill,
epochs=final_epochs,
lr=final_lr,
batch_size=int(batch_size),
)
if training_mode == "Core":
mark_core_trained()
training_thread = threading.Thread(target=run, daemon=True)
training_thread.start()
return (
f"πŸš€ Training started\n\n"
f"Mode: {training_mode}\n"
f"Dataset: {dataset_name}\n"
f"Adapter: {final_skill}\n"
f"Epochs: {final_epochs}\n"
f"LR: {final_lr}"
)
# ================== UI ==================
with gr.Blocks(
title="DocuMint Smart Trainer",
theme=gr.themes.Soft(primary_hue="orange"),
) as demo:
gr.Markdown(
"""
# 🧠 DocuMint Smart Trainer
Progressive LoRA training with **Core freeze + Skill adapters**
βœ” Dataset selectable
βœ” No catastrophic forgetting
βœ” Production-safe training
"""
)
with gr.Row():
core_status = gr.Markdown(
f"### Core Status: {'πŸ”’ Locked (trained)' if core_already_trained() else 'πŸ†• Not trained'}"
)
with gr.Tabs():
# ================== TRAIN TAB ==================
with gr.Tab("🎯 Train"):
with gr.Row():
with gr.Column():
training_mode = gr.Radio(
["Core", "Skill"],
value="Skill",
label="Training Mode",
info="Core = one time only | Skill = additive learning",
)
dataset_input = gr.Textbox(
label="Dataset (Hugging Face)",
placeholder="e.g. gsm8k or himu1780/DocuMint-Data",
)
skill_input = gr.Textbox(
label="Skill Name (Skill mode only)",
placeholder="vat / invoice / math / docs",
)
epochs_input = gr.Slider(
minimum=1,
maximum=5,
value=1,
step=1,
label="Epochs",
)
lr_input = gr.Number(
value=5e-5,
label="Learning Rate",
)
batch_input = gr.Slider(
minimum=1,
maximum=4,
value=1,
step=1,
label="Batch Size",
)
train_btn = gr.Button(
"πŸš€ Start Training",
variant="primary",
size="lg",
)
with gr.Column():
output_box = gr.Textbox(
label="Status",
lines=8,
interactive=False,
)
train_btn.click(
fn=start_training,
inputs=[
training_mode,
dataset_input,
skill_input,
epochs_input,
lr_input,
batch_input,
],
outputs=output_box,
)
# ================== HELP TAB ==================
with gr.Tab("❓ Help"):
gr.Markdown(
"""
## How to use safely
### 1️⃣ Train Core (ONE TIME)
- Mode: **Core**
- Dataset: `gsm8k` / `MathInstruct`
- Epochs: `3`
- LR: `2e-4`
πŸ”’ Core will auto-lock after training.
### 2️⃣ Add Skills (Unlimited)
- Mode: **Skill**
- Skill name: `vat`, `invoice`, `math`, etc
- Epochs: `1`
- LR: `5e-5` or `3e-5`
### 3️⃣ Dataset is always safe to change
What matters is **which adapter is trained**, not the dataset.
---
**Rule:**
Core = brain
Skill = hands / legs
"""
)
gr.Markdown(
"""
---
**DocuMint Smart Trainer**
Progressive learning without forgetting
"""
)
# ================== LAUNCH ==================
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)