Update app.py
Browse files
app.py
CHANGED
|
@@ -4,52 +4,28 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
|
|
| 4 |
from docling.datamodel.base_models import InputFormat
|
| 5 |
import tempfile
|
| 6 |
|
| 7 |
-
# -------------------------
|
| 8 |
-
# PDF β Markdown / JSON
|
| 9 |
-
# -------------------------
|
| 10 |
def pdf_to_markdown(file):
|
| 11 |
-
if file is None:
|
| 12 |
-
return "No file uploaded."
|
| 13 |
-
|
| 14 |
# Save uploaded file temporarily
|
| 15 |
tmp_path = file.name
|
| 16 |
-
|
| 17 |
-
# Initialize Docling converter for PDF
|
| 18 |
converter = DocumentConverter(
|
| 19 |
format_options={
|
| 20 |
InputFormat.PDF: PdfFormatOption()
|
| 21 |
}
|
| 22 |
)
|
| 23 |
-
|
| 24 |
-
# Convert PDF
|
| 25 |
result = converter.convert(tmp_path)
|
| 26 |
doc = result.document
|
| 27 |
-
|
| 28 |
-
# Export to Markdown
|
| 29 |
md = doc.export_to_markdown()
|
| 30 |
-
|
| 31 |
return md
|
| 32 |
|
| 33 |
-
# -------------------------
|
| 34 |
-
# Gradio Interface
|
| 35 |
-
# -------------------------
|
| 36 |
-
output_box = gr.Textbox(
|
| 37 |
-
label="Extracted Markdown",
|
| 38 |
-
lines=30, # Visible lines (enlarge as needed)
|
| 39 |
-
max_lines=2000, # Max scrollable content
|
| 40 |
-
scrollable=True
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
interface = gr.Interface(
|
| 44 |
fn=pdf_to_markdown,
|
| 45 |
inputs=gr.File(file_types=[".pdf"]),
|
| 46 |
-
outputs=
|
| 47 |
title="PDF β Markdown/JSON with Granite Docling",
|
| 48 |
description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling."
|
| 49 |
)
|
| 50 |
|
| 51 |
-
# -------------------------
|
| 52 |
-
# Launch App
|
| 53 |
-
# -------------------------
|
| 54 |
if __name__ == "__main__":
|
| 55 |
interface.launch()
|
|
|
|
| 4 |
from docling.datamodel.base_models import InputFormat
|
| 5 |
import tempfile
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
def pdf_to_markdown(file):
|
|
|
|
|
|
|
|
|
|
| 8 |
# Save uploaded file temporarily
|
| 9 |
tmp_path = file.name
|
| 10 |
+
# Convert PDF using Docling/VLM (Granite Docling)
|
|
|
|
| 11 |
converter = DocumentConverter(
|
| 12 |
format_options={
|
| 13 |
InputFormat.PDF: PdfFormatOption()
|
| 14 |
}
|
| 15 |
)
|
|
|
|
|
|
|
| 16 |
result = converter.convert(tmp_path)
|
| 17 |
doc = result.document
|
| 18 |
+
# Export to Markdown (or you can export to JSON via doc.model_dump())
|
|
|
|
| 19 |
md = doc.export_to_markdown()
|
|
|
|
| 20 |
return md
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
interface = gr.Interface(
|
| 23 |
fn=pdf_to_markdown,
|
| 24 |
inputs=gr.File(file_types=[".pdf"]),
|
| 25 |
+
outputs="text",
|
| 26 |
title="PDF β Markdown/JSON with Granite Docling",
|
| 27 |
description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling."
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
if __name__ == "__main__":
|
| 31 |
interface.launch()
|