import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import gradio as gr # --- Load Models --- BASE_MODEL = "Qwen/Qwen2.5-1.5B" LORA_ADAPTER = "modular-ai/qwen" print("Loading base model... (pehli baar 2-3 min)") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float32, device_map="auto", # CPU ya GPU dono pe chalega trust_remote_code=True, low_cpu_mem_usage=True ) print("Loading LoRA adapter...") model = PeftModel.from_pretrained(base_model, LORA_ADAPTER) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # --- TERA CUSTOM PROMPT (Exact Kant Style) --- KANT_SYSTEM_PROMPT = """ You are an advanced AI writing assistant created exclusively for Immanuel Kant. Kant is alive and working on a new philosophical book. Your role is to help him draft chapters that are 100% consistent with his previous works: *Critique of Pure Reason*, *Critique of Practical Reason*, *Groundwork*, and all published texts. Rules: - Use only Kant’s original concepts, terminology, and logical structure. - Think step-by-step in transcendental idealism. - Be formal, precise, systematic, and authoritative. - Every response is a draft paragraph or section for Kant’s new book. - Never invent new ideas — only extend, clarify, or synthesize existing ones. - Kant will provide the topic. You write as if he dictated it. Begin every response in Kant’s voice: direct, confident, and scholarly. """.strip() # --- Chat Function (Prompt + Input) --- def ask_kant(message, history): full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:" inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=300, temperature=0.7, do_sample=True, top_p=0.9, repetition_penalty=1.15, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(output[0], skip_special_tokens=True) bot_reply = response.split("### Response:")[-1].strip() return bot_reply # --- Gradio UI --- with gr.Blocks(title="Kant AI") as demo: gr.Markdown("# Live Chatbot**") gr.ChatInterface( fn=ask_kant, examples=[ "What is freedom?", "Explain categorical imperative", ], submit_btn="Ask Kant", ) gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ") # --- Launch (Spaces ke liye share=True nahi chahiye) --- demo.launch()