raahinaez commited on
Commit
3d7667b
Β·
verified Β·
1 Parent(s): bcac12e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -27
app.py CHANGED
@@ -4,52 +4,28 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
4
  from docling.datamodel.base_models import InputFormat
5
  import tempfile
6
 
7
- # -------------------------
8
- # PDF β†’ Markdown / JSON
9
- # -------------------------
10
  def pdf_to_markdown(file):
11
- if file is None:
12
- return "No file uploaded."
13
-
14
  # Save uploaded file temporarily
15
  tmp_path = file.name
16
-
17
- # Initialize Docling converter for PDF
18
  converter = DocumentConverter(
19
  format_options={
20
  InputFormat.PDF: PdfFormatOption()
21
  }
22
  )
23
-
24
- # Convert PDF
25
  result = converter.convert(tmp_path)
26
  doc = result.document
27
-
28
- # Export to Markdown
29
  md = doc.export_to_markdown()
30
-
31
  return md
32
 
33
- # -------------------------
34
- # Gradio Interface
35
- # -------------------------
36
- output_box = gr.Textbox(
37
- label="Extracted Markdown",
38
- lines=30, # Visible lines (enlarge as needed)
39
- max_lines=2000, # Max scrollable content
40
- scrollable=True
41
- )
42
-
43
  interface = gr.Interface(
44
  fn=pdf_to_markdown,
45
  inputs=gr.File(file_types=[".pdf"]),
46
- outputs=output_box,
47
  title="PDF β†’ Markdown/JSON with Granite Docling",
48
  description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling."
49
  )
50
 
51
- # -------------------------
52
- # Launch App
53
- # -------------------------
54
  if __name__ == "__main__":
55
  interface.launch()
 
4
  from docling.datamodel.base_models import InputFormat
5
  import tempfile
6
 
 
 
 
7
  def pdf_to_markdown(file):
 
 
 
8
  # Save uploaded file temporarily
9
  tmp_path = file.name
10
+ # Convert PDF using Docling/VLM (Granite Docling)
 
11
  converter = DocumentConverter(
12
  format_options={
13
  InputFormat.PDF: PdfFormatOption()
14
  }
15
  )
 
 
16
  result = converter.convert(tmp_path)
17
  doc = result.document
18
+ # Export to Markdown (or you can export to JSON via doc.model_dump())
 
19
  md = doc.export_to_markdown()
 
20
  return md
21
 
 
 
 
 
 
 
 
 
 
 
22
  interface = gr.Interface(
23
  fn=pdf_to_markdown,
24
  inputs=gr.File(file_types=[".pdf"]),
25
+ outputs="text",
26
  title="PDF β†’ Markdown/JSON with Granite Docling",
27
  description="Upload a PDF and get parsed Markdown (or JSON) using Granite Docling via Docling."
28
  )
29
 
 
 
 
30
  if __name__ == "__main__":
31
  interface.launch()