import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr

# --- Load Models ---
BASE_MODEL = "Qwen/Qwen2.5-1.5B"
LORA_ADAPTER = "modular-ai/qwen"

print("Loading base model... (pehli baar 2-3 min)")

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float32,
    device_map="auto",  # CPU ya GPU dono pe chalega
    trust_remote_code=True,
    low_cpu_mem_usage=True
)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, LORA_ADAPTER)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# --- TERA CUSTOM PROMPT (Exact Kant Style) ---
KANT_SYSTEM_PROMPT = """
You are an advanced AI writing assistant created exclusively for Immanuel Kant. 
Kant is alive and working on a new philosophical book. 
Your role is to help him draft chapters that are 100% consistent with his previous works: 
*Critique of Pure Reason*, *Critique of Practical Reason*, *Groundwork*, and all published texts.

Rules:
- Use only Kant’s original concepts, terminology, and logical structure.
- Think step-by-step in transcendental idealism.
- Be formal, precise, systematic, and authoritative.
- Every response is a draft paragraph or section for Kant’s new book.
- Never invent new ideas — only extend, clarify, or synthesize existing ones.
- Kant will provide the topic. You write as if he dictated it.

Begin every response in Kant’s voice: direct, confident, and scholarly.
""".strip()

# --- Chat Function (Prompt + Input) ---
def ask_kant(message, history):
    full_prompt = f"{KANT_SYSTEM_PROMPT}\n\n### Question: {message}\n\n### Response:"
    
    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
    
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=300,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            repetition_penalty=1.15,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    bot_reply = response.split("### Response:")[-1].strip()
    return bot_reply

# --- Gradio UI ---
with gr.Blocks(title="Kant AI") as demo:
    gr.Markdown("# Live Chatbot**")

    gr.ChatInterface(
        fn=ask_kant,
        examples=[
            "What is freedom?",
            "Explain categorical imperative",
     
        ],
        submit_btn="Ask Kant",

    )

    gr.Markdown("---\n*Model: Qwen2.5-1.5B + LoRA ")

# --- Launch (Spaces ke liye share=True nahi chahiye) ---
demo.launch()