Update app.py
Browse files
app.py
CHANGED
|
@@ -3,13 +3,13 @@ import gradio as gr
|
|
| 3 |
import pdfplumber
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 5 |
|
| 6 |
-
model_name = "ibm-granite/granite-docling-
|
| 7 |
|
| 8 |
-
# Use
|
| 9 |
hf_token = os.environ.get("HF_HUB_TOKEN")
|
| 10 |
|
| 11 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
| 12 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_name,
|
| 13 |
|
| 14 |
def extract_text_from_pdf(pdf_file):
|
| 15 |
text = ""
|
|
@@ -32,7 +32,7 @@ interface = gr.Interface(
|
|
| 32 |
inputs=gr.File(file_types=[".pdf"]),
|
| 33 |
outputs=gr.Textbox(label="Generated JSON"),
|
| 34 |
title="PDF to JSON using Granite DocLing",
|
| 35 |
-
description="Upload a PDF and get a JSON output using
|
| 36 |
)
|
| 37 |
|
| 38 |
interface.launch()
|
|
|
|
| 3 |
import pdfplumber
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 5 |
|
| 6 |
+
model_name = "ibm-granite/granite-docling-258M"
|
| 7 |
|
| 8 |
+
# Use HF token stored in Space secrets
|
| 9 |
hf_token = os.environ.get("HF_HUB_TOKEN")
|
| 10 |
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
|
| 12 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, token=hf_token)
|
| 13 |
|
| 14 |
def extract_text_from_pdf(pdf_file):
|
| 15 |
text = ""
|
|
|
|
| 32 |
inputs=gr.File(file_types=[".pdf"]),
|
| 33 |
outputs=gr.Textbox(label="Generated JSON"),
|
| 34 |
title="PDF to JSON using Granite DocLing",
|
| 35 |
+
description="Upload a PDF and get a JSON output using ibm-granite/granite-docling-258M."
|
| 36 |
)
|
| 37 |
|
| 38 |
interface.launch()
|