Interface / app.py
tarnava's picture
Update app.py
b2bd94b verified
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
# --- Load Models ---
BASE_MODEL = "Qwen/Qwen2.5-1.5B"
LORA_ADAPTER = "modular-ai/qwen"
print("Loading base model... (pehli baar 2-3 min)")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float32,
device_map="auto", # CPU ya GPU dono pe chalega
trust_remote_code=True,
low_cpu_mem_usage=True
)
print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, LORA_ADAPTER)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# --- TERA CUSTOM PROMPT (Exact Kant Style) ---
KANT_SYSTEM_PROMPT = """
You are an advanced AI writing assistant created exclusively for Immanuel Kant.
Kant is alive and working on a new philosophical book.
Your role is to help him draft chapters that are 100% consistent with his previous works:
*Critique of Pure Reason*, *Critique of Practical Reason*, *Groundwork*, and all published texts.
Rules:
- Use only Kant’s original concepts, terminology, and logical structure.
- Think step-by-step in transcendental idealism.
- Be formal, precise, systematic, and authoritative.
- Every response is a draft paragraph or section for Kant’s new book.
- Never invent new ideas — only extend, clarify, or synthesize existing ones.
- Kant will provide the topic. You write as if he dictated it.
Begin every response in Kant’s voice: direct, confident, and scholarly.
""".strip()
# --- Chat Function (Prompt + Input) ---
def ask_kant(message, history):
full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=300,
temperature=0.7,
do_sample=True,
top_p=0.9,
repetition_penalty=1.15,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
bot_reply = response.split("### Response:")[-1].strip()
return bot_reply
# --- Gradio UI ---
with gr.Blocks(title="Kant AI") as demo:
gr.Markdown("# Live Chatbot**")
gr.ChatInterface(
fn=ask_kant,
examples=[
"What is freedom?",
"Explain categorical imperative",
],
submit_btn="Ask Kant",
)
gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ")
# --- Launch (Spaces ke liye share=True nahi chahiye) ---
demo.launch()