raahinaez commited on
Commit
300a3ab
Β·
verified Β·
1 Parent(s): 3c2e11e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -1,14 +1,16 @@
1
- # app.py
2
  import gradio as gr
3
  import pdfplumber
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
 
6
- # Load the model
7
  model_name = "ibm-granite/granite-docling-258m-demo"
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
 
11
- # Function to extract text from PDF
 
 
 
 
 
12
  def extract_text_from_pdf(pdf_file):
13
  text = ""
14
  with pdfplumber.open(pdf_file.name) as pdf:
@@ -18,7 +20,6 @@ def extract_text_from_pdf(pdf_file):
18
  text += page_text + "\n"
19
  return text
20
 
21
- # Function to generate JSON from text
22
  def pdf_to_json(pdf_file):
23
  text = extract_text_from_pdf(pdf_file)
24
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048)
@@ -26,7 +27,6 @@ def pdf_to_json(pdf_file):
26
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
  return result
28
 
29
- # Gradio interface
30
  interface = gr.Interface(
31
  fn=pdf_to_json,
32
  inputs=gr.File(file_types=[".pdf"]),
 
1
+ import os
2
  import gradio as gr
3
  import pdfplumber
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
 
 
6
  model_name = "ibm-granite/granite-docling-258m-demo"
 
 
7
 
8
+ # Use the Hugging Face token stored in Secrets
9
+ hf_token = os.environ.get("HF_HUB_TOKEN")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
12
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, use_auth_token=hf_token)
13
+
14
  def extract_text_from_pdf(pdf_file):
15
  text = ""
16
  with pdfplumber.open(pdf_file.name) as pdf:
 
20
  text += page_text + "\n"
21
  return text
22
 
 
23
  def pdf_to_json(pdf_file):
24
  text = extract_text_from_pdf(pdf_file)
25
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048)
 
27
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
28
  return result
29
 
 
30
  interface = gr.Interface(
31
  fn=pdf_to_json,
32
  inputs=gr.File(file_types=[".pdf"]),