|
|
import os |
|
|
import gradio as gr |
|
|
from docling.document_converter import DocumentConverter, PdfFormatOption |
|
|
from docling.datamodel.base_models import InputFormat |
|
|
import tempfile |
|
|
|
|
|
def pdf_to_markdown(file): |
|
|
|
|
|
tmp_path = file.name |
|
|
|
|
|
converter = DocumentConverter( |
|
|
format_options={ |
|
|
InputFormat.PDF: PdfFormatOption() |
|
|
} |
|
|
) |
|
|
result = converter.convert(tmp_path) |
|
|
doc = result.document |
|
|
|
|
|
md = doc.export_to_markdown() |
|
|
return md |
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=pdf_to_markdown, |
|
|
inputs=gr.File(file_types=[".pdf"]), |
|
|
outputs="text", |
|
|
title="PDF β Markdown/JSON with Granite Docling", |
|
|
description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|