# app.py
# Improved Gradio UI for Vietnamese Text Summarization (Extractive + Abstractive)
# Designed for deployment on Hugging Face Spaces (or similar).
# Keeps the original technologies: PhoBERT + custom TF decoder, TextRank extractive.
import os
import sys
import shutil
import tempfile
import importlib
from io import BytesIO
from typing import List, Optional

import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Ensure repo root on path so py package is importable
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
if ROOT_DIR not in sys.path:
    sys.path.insert(0, ROOT_DIR)

# Add py/ to path for summarizer modules
PY_DIR = os.path.join(ROOT_DIR, "py")
if PY_DIR not in sys.path:
    sys.path.insert(0, PY_DIR)


smooth = SmoothingFunction().method1

# Lazy singletons for summarizers
ext_summarizer = None
abs_summarizer = None

DEFAULT_MODEL_DIR = os.path.join(ROOT_DIR, "models")
os.makedirs(DEFAULT_MODEL_DIR, exist_ok=True)


# Utilities to import summarizer modules (robust to py/ package or root)
def import_summarizer_module(base_name: str):
    candidates = [f"py.{base_name}", base_name]
    last_err = None
    for mod_name in candidates:
        try:
            module = importlib.import_module(mod_name)
            return module
        except Exception as e:
            last_err = e
    raise ImportError(f"No module named '{base_name}' (tried {candidates}). Last error: {last_err}")


def get_ext_summarizer():
    global ext_summarizer
    if ext_summarizer is None:
        mod = import_summarizer_module("extractive")
        ExtractiveSummarizer = getattr(mod, "ExtractiveSummarizer")
        ext_summarizer = ExtractiveSummarizer()
    return ext_summarizer


def get_abs_summarizer(model_dir: Optional[str] = None):
    global abs_summarizer
    if abs_summarizer is None:
        mod = import_summarizer_module("abstractive")
        AbstractiveSummarizer = getattr(mod, "AbstractiveSummarizer")
        model_dir = model_dir or DEFAULT_MODEL_DIR
        abs_summarizer = AbstractiveSummarizer(model_dir=model_dir)
    return abs_summarizer


# Summarization functions used by the UI
def generate_extractive(article: str, top_n: int, state):
    """
    Generate extractive summary.
    Returns (summary_text, new_state)
    """
    if not article or not article.strip():
        return "Vui lòng nhập văn bản để tóm tắt.", state

    try:
        summarizer = get_ext_summarizer()
        summary = summarizer.summarize(article, top_n=top_n)
        if not summary or len(summary.strip()) < 10:
            summary = "Không thể tạo tóm tắt từ văn bản này. Vui lòng thử văn bản dài hơn."
    except Exception as e:
        summary = f"Lỗi khi chạy Extractive: {e}"
    return summary, summary


def generate_abstractive(article: str, beam_k: int, state, model_dir=DEFAULT_MODEL_DIR):
    """
    Generate abstractive summary.
    Returns (summary_text, new_state)
    """
    if not article or not article.strip():
        return "Vui lòng nhập văn bản để tóm tắt.", state

    try:
        # Try to lazily load abstractive summarizer
        summarizer = get_abs_summarizer(model_dir=model_dir)
        summary = summarizer.summarize(article, k=int(beam_k))
        if not summary or len(summary.strip()) < 10:
            summary = "Không thể tạo tóm tắt từ văn bản này. Vui lòng thử văn bản dài hơn."
    except FileNotFoundError as e:
        summary = f"Abstractive model chưa sẵn sàng: {e}"
    except Exception as e:
        summary = f"Lỗi khi chạy Abstractive: {e}"
    return summary, summary


# Evaluation function produces a bar chart comparing metrics
def evaluate_and_plot(reference: str, ext_summary: str, abs_summary: str):
    ref = reference or ""
    ext = ext_summary or ""
    abs_ = abs_summary or ""

    def safe_tokens(text):
        return [t for t in text.split() if t.strip()]

    # BLEU
    try:
        ref_tokens = safe_tokens(ref)
        if ref_tokens and ext:
            bleu1_ext = sentence_bleu([ref_tokens], safe_tokens(ext), weights=(1, 0, 0, 0), smoothing_function=smooth)
            bleu4_ext = sentence_bleu([ref_tokens], safe_tokens(ext), weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth)
        else:
            bleu1_ext = bleu4_ext = 0.0

        if ref_tokens and abs_:
            bleu1_abs = sentence_bleu([ref_tokens], safe_tokens(abs_), weights=(1, 0, 0, 0), smoothing_function=smooth)
            bleu4_abs = sentence_bleu([ref_tokens], safe_tokens(abs_), weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth)
        else:
            bleu1_abs = bleu4_abs = 0.0
    except Exception:
        bleu1_ext = bleu4_ext = bleu1_abs = bleu4_abs = 0.0

    # ROUGE
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    try:
        if ref and ext:
            ext_r = rouge.score(ref, ext)
        else:
            ext_r = {'rouge1': None, 'rouge2': None, 'rougeL': None}
        if ref and abs_:
            abs_r = rouge.score(ref, abs_)
        else:
            abs_r = {'rouge1': None, 'rouge2': None, 'rougeL': None}
    except Exception:
        ext_r = abs_r = {'rouge1': None, 'rouge2': None, 'rougeL': None}

    metrics = ["BLEU-1", "BLEU-4", "ROUGE-1", "ROUGE-2", "ROUGE-L"]
    ext_vals = [
        float(bleu1_ext or 0.0),
        float(bleu4_ext or 0.0),
        float(ext_r['rouge1'].fmeasure) if ext_r['rouge1'] is not None else 0.0,
        float(ext_r['rouge2'].fmeasure) if ext_r['rouge2'] is not None else 0.0,
        float(ext_r['rougeL'].fmeasure) if ext_r['rougeL'] is not None else 0.0,
    ]
    abs_vals = [
        float(bleu1_abs or 0.0),
        float(bleu4_abs or 0.0),
        float(abs_r['rouge1'].fmeasure) if abs_r['rouge1'] is not None else 0.0,
        float(abs_r['rouge2'].fmeasure) if abs_r['rouge2'] is not None else 0.0,
        float(abs_r['rougeL'].fmeasure) if abs_r['rougeL'] is not None else 0.0,
    ]

    # Plot
    x = np.arange(len(metrics))
    width = 0.35
    fig, ax = plt.subplots(figsize=(9, 4))
    ax.bar(x - width / 2, ext_vals, width, label='Extractive', color="#66B2FF")
    ax.bar(x + width / 2, abs_vals, width, label='Abstractive', color="#FF9999")
    ax.set_ylabel("Score")
    ax.set_title("Comparison: Extractive vs Abstractive")
    ax.set_xticks(x)
    ax.set_xticklabels(metrics)
    ax.set_ylim(0, 1)
    ax.legend(loc='upper right')
    for i, v in enumerate(ext_vals):
        ax.text(i - width / 2, v + 0.02, f"{v:.3f}", ha='center', fontsize=9)
    for i, v in enumerate(abs_vals):
        ax.text(i + width / 2, v + 0.02, f"{v:.3f}", ha='center', fontsize=9)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format="png", dpi=100)
    plt.close(fig)
    buf.seek(0)
    img = Image.open(buf).convert("RGB")
    return img


# Utility: save uploaded model files to models/ directory
def save_model_files(uploaded_files: List[gr.File], model_dir: str = DEFAULT_MODEL_DIR):
    """
    uploaded_files: list of gradio uploaded file objects
    Returns status message.
    """
    if not uploaded_files:
        return "Không có tệp được tải lên."

    os.makedirs(model_dir, exist_ok=True)
    saved = []
    errors = []
    for f in uploaded_files:
        try:
            # Different Gradio versions expose different attributes; try common ones
            src_path = None
            if hasattr(f, "name") and os.path.exists(f.name):
                src_path = f.name
            elif hasattr(f, "tmp_path") and os.path.exists(f.tmp_path):
                src_path = f.tmp_path
            elif isinstance(f, dict) and "name" in f and "tmp_path" in f and os.path.exists(f["tmp_path"]):
                src_path = f["tmp_path"]
            else:
                # attempt to read file-like object
                if hasattr(f, "file"):
                    # f.file is a SpooledTemporaryFile or similar
                    dest = os.path.join(model_dir, getattr(f, "name", "uploaded.bin"))
                    with open(dest, "wb") as out_f:
                        f.file.seek(0)
                        shutil.copyfileobj(f.file, out_f)
                    saved.append(dest)
                    continue
                # last resort: write bytes attribute if present
                if hasattr(f, "read"):
                    dest = os.path.join(model_dir, getattr(f, "name", "uploaded.bin"))
                    with open(dest, "wb") as out_f:
                        out_f.write(f.read())
                    saved.append(dest)
                    continue
                raise RuntimeError("Unsupported file object from Gradio upload.")
            if src_path:
                dest = os.path.join(model_dir, os.path.basename(src_path))
                shutil.copy(src_path, dest)
                saved.append(dest)
        except Exception as e:
            errors.append(f"{getattr(f, 'name', str(f))}: {e}")

    msg = ""
    if saved:
        msg += f"Lưu thành công {len(saved)} tệp: " + ", ".join(os.path.basename(s) for s in saved) + ". "
    if errors:
        msg += "Lỗi: " + "; ".join(errors)
    return msg or "Không có tệp được lưu."


# Reload abstractive summarizer (clear singleton and attempt to reload)
def reload_abstractive(model_dir: str = DEFAULT_MODEL_DIR):
    global abs_summarizer
    abs_summarizer = None
    try:
        get_abs_summarizer(model_dir=model_dir)
        return "✅ Abstractive model đã được tải thành công."
    except Exception as e:
        return f"❌ Không thể tải abstractive model: {e}"


# Check model status (files exist and try light init)
def get_model_status(model_dir: str = DEFAULT_MODEL_DIR):
    tok = os.path.join(model_dir, "decoder_tokenizer_re.pkl")
    w = os.path.join(model_dir, "decoder_only.weights.h5")
    parts = []
    parts.append(f"Model dir: {model_dir}")
    parts.append(f"- Tokenizer: {'OK' if os.path.exists(tok) else 'MISSING'} ({os.path.basename(tok)})")
    parts.append(f"- Weights: {'OK' if os.path.exists(w) else 'MISSING'} ({os.path.basename(w)})")
    # quick attempt to import abstractive to report readiness
    try:
        import_summarizer_module("abstractive")
        parts.append("- Abstractive module: available")
    except Exception as e:
        parts.append(f"- Abstractive module: error ({e})")
    try:
        import_summarizer_module("extractive")
        parts.append("- Extractive module: available")
    except Exception as e:
        parts.append(f"- Extractive module: error ({e})")
    return "\n".join(parts)


# Download text to a temporary file and return path (for Gradio File component)
def download_text_to_file(text: str, prefix="summary"):
    if not text:
        return None
    fd, path = tempfile.mkstemp(suffix=".txt", prefix=prefix + "_")
    os.close(fd)
    with open(path, "w", encoding="utf-8") as f:
        f.write(text)
    return path


# EXAMPLES (short list to populate example selector)
EXAMPLES = [
    (
        "Trong bối cảnh công nghệ phát triển mạnh mẽ, trí tuệ nhân tạo (AI) đang dần thay đổi cách thức vận hành của nhiều ngành nghề. "
        "Đặc biệt trong lĩnh vực giáo dục, việc ứng dụng AI giúp cá nhân hóa lộ trình học tập cho từng học sinh, đồng thời hỗ trợ giáo viên "
        "trong việc soạn giảng và đánh giá năng lực. Tuy nhiên, các chuyên gia cũng cảnh báo về những thách thức liên quan đến đạo đức và quyền riêng tư khi triển khai AI rộng rãi. Hệ thống giáo dục cần có những bước đi thận trọng để tận dụng tối đa lợi ích mà công nghệ mang lại mà không làm mất đi giá trị cốt lõi của việc dạy và học."
    ),
    (
        "Giá xăng dầu thế giới tăng mạnh trong tuần qua do căng thẳng địa chính trị. Các chuyên gia cho rằng nguồn cung bị ảnh hưởng bởi việc "
        "giảm xuất khẩu từ một số quốc gia sản xuất lớn. Giá nhiên liệu tăng kéo theo chi phí vận chuyển và giá hàng hóa tăng ở nhiều nơi."
    ),
    (
        "Kết luận của đoàn kiểm tra liên ngành (Chi cục An toàn vệ sinh thực phẩm Hà Nội và UBND xã Bình Minh), nêu rõ, bếp ăn đặt tại Trường Tiểu học Cự Khê, xã Bình Minh, Hà Nội, do công ty Nhật Anh phụ trách, vi phạm nghiêm trọng về điều kiện vệ sinh và quy trình triển khai suất ăn cho học sinh. Cụ thể, khu vực nấu ăn, chia thức ăn được bố trí chung với khu vực rửa, có nguy cơ cao gây nhiễm chéo giữa thực phẩm sống và thực phẩm chín. Vệ sinh môi trường kém, hệ thống cống thoát nước trong khu vực chế biến không được che kín, bị ứ đọng, bốc mùi hôi. Một lớp học của Trường Tiểu học Cự Khê trưa 20/10 có lác đác cặp học sinh . Nơi sơ chế, chế biến thức ăn còn có côn trùng. Mặc dù toàn trường có 1.450 suất ăn nhưng chỉ có 2 bàn inox để chia thức ăn, không đáp ứng đủ yêu cầu. Đặc biệt theo kết luận của đoàn kiểm tra, bếp ăn của Công ty Nhật Anh đặt ở Trường Tiểu học Cự khê còn vi phạm quy trình kiểm thực. Cụ thể, nhân viên không thực hiện ghi chép sổ kiểm thực 3 bước trong ngày kiểm tra (16/10). Với những vi phạm trên, đoàn kiểm tra lập biên bản, xử phạt hành chính công ty Nhật Anh 40 triệu đồng. UBND xã Bình Minh sẽ tiếp tục kiểm tra, giám sát chặt chẽ công tác quản lý an toàn thực phẩm của Trường Tiểu học Cự Khê nhằm đảm bảo an toàn tuyệt đối cho học sinh. Học sinh tự mang cơm ăn tại lớp trưa 20/10 . Bên cạnh những vi phạm trên, đơn vị này có một số mặt đạt được như: Có dùng nước sạch, có hợp đồng và phiếu kết quả kiểm nghiệm đạt chuẩn, trang bị dụng cụ thu gom rác thải, xuất trình được hồ sơ chứng minh nguồn gốc xuất xứ của các nguyên liệu thực phẩm. Cũng theo kết luận, tại thời điểm kiểm tra, công ty Nhật Anh đã xuất trình đầy đủ các hồ sơ pháp lý liên quan gồm: Giấy chứng nhận đăng ký kinh doanh/giấy chứng nhận đăng ký doanh nghiệp; giấy chứng nhận cơ sở đủ điều kiện an toàn thực phẩm cho loại hình kinh doanh dịch vụ ăn uống - bếp ăn tập thể; giấy chứng nhận kinh doanh bếp ăn tập thể cho phạm vi chế biến và cung cấp suất ăn công nghiệp… Hiện, Trường Tiểu học Cự Khê đã ban hành thông báo chấm dứt hợp đồng với Công ty TNHH Thương Mại Dịch vụ Xuất nhập khẩu Nhật Anh và tìm đơn vị thay thế. Trả lời phóng viên Dân trí chiều nay, ông Nguyễn Đăng Việt, Chủ tịch UBND xã Bình Minh cho biết, việc cung cấp bếp ăn mới cần chờ thời gian bởi liên quan đến một số quy trình, trong đó đơn vị cung cấp mới cần tiếp quản hoặc thay thế cơ sở vật chất nhà bếp. Thời gian theo quy định của quy trình thay thế bếp mới ít nhất phải 15 ngày. Mặc dù vậy, ông Việt cho hay, UBND xã đang xúc tiến nhanh quy trình để rút ngắn thời gian xuống khoảng một tuần để sớm cung cấp suất ăn cho học sinh. Cũng theo ông Việt, trong thời gian chờ đợi bếp ăn mới, địa phương sẽ huy động bếp ăn các trường học lân cận hỗ trợ suất ăn cho học sinh Trường Tiểu học Cự Khê để phụ huynh không phải đưa đón con buổi trưa hoặc mang cơm tới lớp. Như Dân trí đã phản ánh, sáng nay (20/10), hàng loạt phụ huynh của trường đã cho học sinh nghỉ học vì không thể đón con buổi trưa hoặc để con tự mang cơm đến lớp. Đây là 2 phương án do nhà trường đưa ra để phụ huynh chọn lựa trong thời gian chờ đợi đơn vị cung cấp thực phẩm mới. Tuy nhiên, nhiều phụ huynh tỏ ra bức xúc vì nhà trường không có thông báo cụ thể về thời gian bếp ăn hoạt động trở lại và công tác đảm bảo an toàn thực phẩm cho học sinh. Dân trí sẽ tiếp tục theo dõi sự việc!"
    ),
    
]


# Build Gradio interface with improved layout
with gr.Blocks(theme=gr.themes.Soft(), css="""
/* Small UI tweaks */
.header { font-weight:700; font-size:22px; }
.card { border-radius: 10px; padding: 12px; box-shadow: 0 2px 6px rgba(0,0,0,0.08); }
.note { font-size: 13px; color: #555; }
.small { font-size: 12px; color: #666; }
""") as demo:

    gr.Markdown("<div class='header'>🇻🇳 Vietnamese Text Summarization — Extractive & Abstractive</div>")
    gr.Markdown("Tùy chỉnh giao diện để dễ dùng trên Hugging Face Spaces. "
                "Upload trọng số & tokenizer vào `models/` nếu muốn bật Abstractive. "
                "Nếu abstractive chưa khả dụng app sẽ báo rõ ràng.")

    with gr.Row():
        with gr.Column(scale=3):
            article = gr.Textbox(
                label="📝 Văn bản gốc (nhập bài báo hoặc đoạn văn tiếng Việt)",
                lines=12, placeholder="Dán bài báo tiếng Việt vào đây...", show_label=True
            )
            examples = gr.Examples(
                examples=EXAMPLES,
                inputs=[article],
                cache_examples=False
            )

            with gr.Row():
                generate_ext_btn = gr.Button("🔍 Tạo Extractive", variant="secondary")
                top_n = gr.Slider(label="Số câu (Extractive - top_n)", minimum=1, maximum=6, value=3, step=1)
                download_ext_btn = gr.Button("⬇️ Tải Extractive")
            with gr.Row():
                generate_abs_btn = gr.Button("✨ Tạo Abstractive (Beam Search)", variant="primary")
                beam_k = gr.Slider(label="Beam size (k)", minimum=1, maximum=6, value=3, step=1)
                download_abs_btn = gr.Button("⬇️ Tải Abstractive")

            gr.Markdown("---")
            ref_box = gr.Textbox(label="📋 (Tùy chọn) Tóm tắt gốc / reference - để so sánh", lines=4)

            with gr.Row():
                eval_btn = gr.Button("📈 Đánh giá & So sánh (BLEU / ROUGE)")
                eval_plot = gr.Image(label="Biểu đồ so sánh", height=350, interactive=False)

        with gr.Column(scale=2):
            with gr.Tab("Kết quả Extractive"):
                ext_out = gr.Textbox(label="🟦 Extractive Summary", interactive=False, lines=8)
                ext_file = gr.File(label="Tải tệp Extractive", visible=False)
            with gr.Tab("Kết quả Abstractive"):
                abs_out = gr.Textbox(label="🟪 Abstractive Summary", interactive=False, lines=8)
                abs_file = gr.File(label="Tải tệp Abstractive", visible=False)
            with gr.Tab("Trạng thái & Models"):
                model_status = gr.Textbox(label="Model Status", interactive=False, lines=8, value=get_model_status())
                with gr.Accordion("Upload/Replace model files (models/)", open=False):
                    upload_files = gr.Files(label="Chọn file để upload (weights + tokenizer). Ví dụ: decoder_only.weights.h5, decoder_tokenizer_re.pkl", file_count="multiple")
                    upload_btn = gr.Button("⬆️ Upload sang models/")
                    reload_btn = gr.Button("🔄 Reload Abstractive Model")
                    upload_result = gr.Textbox(label="Kết quả upload", interactive=False, lines=3)
                with gr.Accordion("Tips & Notes (click)", open=False):
                    gr.Markdown(
                        "- Nếu bạn deploy trên Spaces, upload `decoder_only.weights.h5` và `decoder_tokenizer_re.pkl` vào `models/`.\n"
                        "- VnCoreNLP có thể yêu cầu Java; nếu không sẵn sàng, Extractive sẽ fallback sang tách câu regex.\n"
                        "- Abstractive (PhoBERT + custom decoder) cần nhiều RAM/CPU; nếu chậm hãy giảm beam size hoặc chỉ dùng Extractive."
                    )

    # Wiring buttons to functions
    # Generate extractive
    generate_ext_btn.click(
        fn=generate_extractive,
        inputs=[article, top_n, gr.State("")],
        outputs=[ext_out, gr.State("")],
    )

    # Generate abstractive
    generate_abs_btn.click(
        fn=generate_abstractive,
        inputs=[article, beam_k, gr.State(""), gr.State(DEFAULT_MODEL_DIR)],
        outputs=[abs_out, gr.State("")],
    )

    # Evaluation
    eval_btn.click(
        fn=evaluate_and_plot,
        inputs=[ref_box, ext_out, abs_out],
        outputs=[eval_plot],
    )

    # Download ext summary as txt
    def _download_ext(text):
        p = download_text_to_file(text, prefix="extractive")
        return p

    download_ext_btn.click(fn=_download_ext, inputs=[ext_out], outputs=[ext_file])

    # Download abs summary as txt
    def _download_abs(text):
        p = download_text_to_file(text, prefix="abstractive")
        return p

    download_abs_btn.click(fn=_download_abs, inputs=[abs_out], outputs=[abs_file])

    # Upload model files
    def _upload_and_report(files):
        msg = save_model_files(files, model_dir=DEFAULT_MODEL_DIR)
        status = get_model_status(DEFAULT_MODEL_DIR)
        return msg, status

    upload_btn.click(fn=_upload_and_report, inputs=[upload_files], outputs=[upload_result, model_status])

    # Reload abstractive
    reload_btn.click(fn=lambda: reload_abstractive(DEFAULT_MODEL_DIR), inputs=None, outputs=[upload_result])

    # Periodically show status button (manual)
    def _status_text():
        return get_model_status(DEFAULT_MODEL_DIR)

    model_status_refresh = gr.Button("🔁 Refresh Status")
    model_status_refresh.click(fn=_status_text, inputs=None, outputs=[model_status])

    # Footer
    gr.Markdown("---")
    gr.Markdown("Powered by PhoBERT (vinai/phobert-base), TensorFlow and classic TextRank + TF-IDF for extractive.")
    gr.Markdown("If you want help uploading model files to the Hugging Face Hub instead of storing them in the repo, I can provide helper scripts.")

if __name__ == "__main__":
    # In Spaces the default host & port are provided by the platform
    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)