doc / app.py
raahinaez's picture
Update app.py
d46886b verified
raw
history blame
951 Bytes
import os
import gradio as gr
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.base_models import InputFormat
import tempfile
def pdf_to_markdown(file):
# Save uploaded file temporarily
tmp_path = file.name
# Convert PDF using Docling/VLM (Granite Docling)
converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption()
}
)
result = converter.convert(tmp_path)
doc = result.document
# Export to Markdown (or you can export to JSON via doc.model_dump())
md = doc.export_to_markdown()
return md
interface = gr.Interface(
fn=pdf_to_markdown,
inputs=gr.File(file_types=[".pdf"]),
outputs="text",
title="PDF β†’ Markdown/JSON with Granite Docling",
description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling."
)
if __name__ == "__main__":
interface.launch()