Spaces:

winamnd
/

ocr-llm-test

Sleeping

App Files Files Community

winamnd commited on Feb 17

Commit

2a250f6

verified ·

1 Parent(s): bf26c19

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -17

app.py CHANGED Viewed

@@ -83,43 +83,57 @@ def generate_ocr(method, img):
     # Select OCR method
     if method == "PaddleOCR":
-        text_output = ocr_with_paddle(img)
     elif method == "EasyOCR":
-        text_output = ocr_with_easy(img)
     elif method == "KerasOCR":
-        text_output = ocr_with_keras(img)
     elif method == "TesseractOCR":
-        text_output, _ = ocr_with_tesseract(img)  # Ignore confidence values
     else:
         return "Invalid OCR method", "N/A"
-    # Clean and truncate the extracted text
-    text_output = text_output.strip()
-    if len(text_output) == 0:
         return "No text detected!", "Cannot classify"
-    # Tokenize text for classification
-    inputs = tokenizer(text_output, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Perform inference
     with torch.no_grad():
         outputs = model(**inputs)
-        logits = outputs.logits  # Get raw logits
-    # Debugging: Print raw logits
-    print(f"Raw logits: {logits}")
-    # Use raw logits directly instead of softmax
     predicted_class = torch.argmax(logits, dim=1).item()
-    # Map class index to labels
     label_map = {0: "Not Spam", 1: "Spam"}
     label = label_map.get(predicted_class, "Unknown")
     # Save results
-    save_results_to_repo(text_output, label)
-    return text_output, label
 # Gradio Interface
 image_input = gr.Image()

     # Select OCR method
     if method == "PaddleOCR":
+        extracted_text = ocr_with_paddle(img)
     elif method == "EasyOCR":
+        extracted_text = ocr_with_easy(img)
     elif method == "KerasOCR":
+        extracted_text = ocr_with_keras(img)
     elif method == "TesseractOCR":
+        extracted_text, _ = ocr_with_tesseract(img)  # Ignore confidence values
     else:
         return "Invalid OCR method", "N/A"
+    # Clean text
+    extracted_text = extracted_text.strip()
+    if not extracted_text:
         return "No text detected!", "Cannot classify"
+    # Debugging: Print extracted text
+    print(f"Extracted Text: {extracted_text}")
+    # Tokenize input
+    inputs = tokenizer(
+        extracted_text,
+        return_tensors="pt",
+        truncation=True,
+        padding="max_length",
+        max_length=512
+    )
+    # Move tensors to the same device as the model
+    inputs = {key: val.to(model.device) for key, val in inputs.items()}
     # Perform inference
     with torch.no_grad():
         outputs = model(**inputs)
+        logits = outputs.logits
+    # Debugging: Print logits
+    print(f"Logits: {logits}")
+    # Use argmax to classify
     predicted_class = torch.argmax(logits, dim=1).item()
     label_map = {0: "Not Spam", 1: "Spam"}
     label = label_map.get(predicted_class, "Unknown")
+    # Debugging: Print final classification
+    print(f"Predicted Class: {predicted_class}, Label: {label}")
     # Save results
+    save_results_to_repo(extracted_text, label)
+    return extracted_text, label
 # Gradio Interface
 image_input = gr.Image()