Spaces:

ariG23498
/

zero-shot-od

Running on Zero

App Files Files Community

ariG23498 HF Staff commited on Aug 12

Commit

4fa3f07

verified ·

1 Parent(s): ff97ba5

update text labels usage

Browse files

Files changed (1) hide show

app.py +2 -22

app.py CHANGED Viewed

@@ -20,9 +20,6 @@ def extract_model_short_name(model_id: str) -> str:
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# (Optional) modest speed-ups
-torch.set_grad_enabled(False)
 # Model bundles for cleaner wiring
 @dataclass
 class ZSDetBundle:
@@ -30,7 +27,6 @@ class ZSDetBundle:
     model_name: str
     processor: AutoProcessor
     model: AutoModelForZeroShotObjectDetection
-    use_label_ids: bool  # True for OWLv2/OMDet (labels are indices), False for others
 # LLMDet
 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
@@ -41,7 +37,6 @@ bundle_llmdet = ZSDetBundle(
     model_name=extract_model_short_name(model_llmdet_id),
     processor=processor_llmdet,
     model=model_llmdet,
-    use_label_ids=False,
 )
 # MM GroundingDINO
@@ -53,7 +48,6 @@ bundle_mm_grounding = ZSDetBundle(
     model_name=extract_model_short_name(model_mm_grounding_id),
     processor=processor_mm_grounding,
     model=model_mm_grounding,
-    use_label_ids=False,
 )
 # OMDet Turbo
@@ -65,7 +59,6 @@ bundle_omdet = ZSDetBundle(
     model_name=extract_model_short_name(model_omdet_id),
     processor=processor_omdet,
     model=model_omdet,
-    use_label_ids=True,  # returns label indices
 )
 # OWLv2
@@ -77,7 +70,6 @@ bundle_owlv2 = ZSDetBundle(
     model_name=extract_model_short_name(model_owlv2_id),
     processor=processor_owlv2,
     model=model_owlv2,
-    use_label_ids=True,  # returns label indices
 )
 # ---------------------------
@@ -106,27 +98,15 @@ def detect(
         outputs = model(**inputs)
     results = bundle.processor.post_process_grounded_object_detection(
-        outputs, threshold=threshold, target_sizes=[image.size[::-1]]
     )[0]
     annotations = []
-    key = "labels" if bundle.use_label_ids else "text_labels"
-    for box, score, label in zip(results["boxes"], results["scores"], results[key]):
         if float(score) < threshold:
             continue
-        if bundle.use_label_ids:
-            # Map label index -> prompt string
-            label_idx = int(label) if isinstance(label, torch.Tensor) else int(label)
-            if 0 <= label_idx < len(prompts):
-                label_name = prompts[label_idx]
-            else:
-                label_name = str(label_idx)
-        else:
-            # Direct text label
-            label_name = label if isinstance(label, str) else str(label)
         xmin, ymin, xmax, ymax = map(lambda v: int(v), box.tolist())
         annotations.append(((xmin, ymin, xmax, ymax), f"{label_name} {float(score):.2f}"))

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Model bundles for cleaner wiring
 @dataclass
 class ZSDetBundle:
     model_name: str
     processor: AutoProcessor
     model: AutoModelForZeroShotObjectDetection
 # LLMDet
 model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
     model_name=extract_model_short_name(model_llmdet_id),
     processor=processor_llmdet,
     model=model_llmdet,
 )
 # MM GroundingDINO
     model_name=extract_model_short_name(model_mm_grounding_id),
     processor=processor_mm_grounding,
     model=model_mm_grounding,
 )
 # OMDet Turbo
     model_name=extract_model_short_name(model_omdet_id),
     processor=processor_omdet,
     model=model_omdet,
 )
 # OWLv2
     model_name=extract_model_short_name(model_owlv2_id),
     processor=processor_owlv2,
     model=model_owlv2,
 )
 # ---------------------------
         outputs = model(**inputs)
     results = bundle.processor.post_process_grounded_object_detection(
+        outputs, threshold=threshold, target_sizes=[image.size[::-1]], text_labels=texts,
     )[0]
     annotations = []
+    for box, score, label_name in zip(results["boxes"], results["scores"], results["text_labels"]):
         if float(score) < threshold:
             continue
         xmin, ymin, xmax, ymax = map(lambda v: int(v), box.tolist())
         annotations.append(((xmin, ymin, xmax, ymax), f"{label_name} {float(score):.2f}"))