Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| import gradio as gr | |
| # --- Load Models --- | |
| BASE_MODEL = "Qwen/Qwen2.5-1.5B" | |
| LORA_ADAPTER = "modular-ai/qwen" | |
| print("Loading base model... (pehli baar 2-3 min)") | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float32, | |
| device_map="auto", # CPU ya GPU dono pe chalega | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True | |
| ) | |
| print("Loading LoRA adapter...") | |
| model = PeftModel.from_pretrained(base_model, LORA_ADAPTER) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # --- TERA CUSTOM PROMPT (Exact Kant Style) --- | |
| KANT_SYSTEM_PROMPT = """ | |
| You are an advanced AI writing assistant created exclusively for Immanuel Kant. | |
| Kant is alive and working on a new philosophical book. | |
| Your role is to help him draft chapters that are 100% consistent with his previous works: | |
| *Critique of Pure Reason*, *Critique of Practical Reason*, *Groundwork*, and all published texts. | |
| Rules: | |
| - Use only Kant’s original concepts, terminology, and logical structure. | |
| - Think step-by-step in transcendental idealism. | |
| - Be formal, precise, systematic, and authoritative. | |
| - Every response is a draft paragraph or section for Kant’s new book. | |
| - Never invent new ideas — only extend, clarify, or synthesize existing ones. | |
| - Kant will provide the topic. You write as if he dictated it. | |
| Begin every response in Kant’s voice: direct, confident, and scholarly. | |
| """.strip() | |
| # --- Chat Function (Prompt + Input) --- | |
| def ask_kant(message, history): | |
| full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:" | |
| inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device) | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=300, | |
| temperature=0.7, | |
| do_sample=True, | |
| top_p=0.9, | |
| repetition_penalty=1.15, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(output[0], skip_special_tokens=True) | |
| bot_reply = response.split("### Response:")[-1].strip() | |
| return bot_reply | |
| # --- Gradio UI --- | |
| with gr.Blocks(title="Kant AI") as demo: | |
| gr.Markdown("# Live Chatbot**") | |
| gr.ChatInterface( | |
| fn=ask_kant, | |
| examples=[ | |
| "What is freedom?", | |
| "Explain categorical imperative", | |
| ], | |
| submit_btn="Ask Kant", | |
| ) | |
| gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ") | |
| # --- Launch (Spaces ke liye share=True nahi chahiye) --- | |
| demo.launch() |