Spaces:

ishanprogs
/

car-segmentation

Sleeping

App Files Files Community

ishanprogs commited on Apr 21

Commit

4ce9de2

verified ·

1 Parent(s): b1a336a

Upload 5 files

Browse files

Files changed (5) hide show

app.py +228 -0
clip_text_features.pt +3 -0
clip_vit_b16.pth +3 -0
requirements.txt +13 -0
yolobest.pt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import gradio as gr
+import torch
+import clip
+from PIL import Image
+import cv2
+import numpy as np
+import os
+from ultralytics import YOLO # Import YOLO
+import gc
+# --- Configuration & Model Loading ---
+# Device Setup
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# --- CLIP Model Setup ---
+print("Loading CLIP model...")
+try:
+    clip_model, clip_preprocess = clip.load("ViT-B/16", device=device, jit=False) # jit=False can sometimes help compatibility
+    # Load saved visual backbone weights (optional but good practice if specifically saved)
+    # clip_model_path = "clip_model/clip_vit_b16.pth"
+    # if os.path.exists(clip_model_path):
+    #     clip_model.load_state_dict(torch.load(clip_model_path, map_location=device))
+    #     print("Loaded custom CLIP visual weights.")
+    clip_model.eval()
+    # Load saved text features
+    clip_text_features_path = "clip_text_features.pt"
+    if not os.path.exists(clip_text_features_path):
+        raise FileNotFoundError("CLIP text features file 'clip_text_features.pt' not found.")
+    clip_text_features = torch.load(clip_text_features_path, map_location=device)
+    print("CLIP model and text features loaded.")
+except Exception as e:
+    print(f"Error loading CLIP model or features: {e}")
+    # Handle error appropriately, maybe disable CLIP check
+    clip_model = None
+# --- YOLOv8 Model Setup ---
+print("Loading YOLOv8 model...")
+# Define class names EXACTLY as used during YOLO training
+YOLO_CLASSES = ['Cracked', 'Scratch', 'Flaking', 'Broken part', 'Corrosion', 'Dent','Paint chip','Missing part']
+YOLO_NUM_CLASSES = len(YOLO_CLASSES)
+# Path to your best YOLOv8 weights
+yolo_weights_path = "best.pt"
+if not os.path.exists(yolo_weights_path):
+    raise FileNotFoundError(f"YOLOv8 weights file '{yolo_weights_path}' not found.")
+try:
+    yolo_model = YOLO(yolo_weights_path)
+    # Set model parameters manually if needed (especially if config wasn't saved)
+    # This ensures the internal model state matches your training
+    # yolo_model.model.yaml['nc'] = YOLO_NUM_CLASSES # Usually loaded from weights/yaml, but good to verify
+    # Forcing model names if they don't load correctly from weights:
+    yolo_model.names = {i: name for i, name in enumerate(YOLO_CLASSES)}
+    # Move model to device explicitly
+    yolo_model.to(device)
+    print("YOLOv8 model loaded.")
+    print(f"YOLOv8 Class Names: {yolo_model.names}")
+except Exception as e:
+    print(f"Error loading YOLOv8 model: {e}")
+    yolo_model = None
+# --- Prediction Functions ---
+def validate_image_with_clip(image_pil):
+    """Checks if the PIL image is likely a car using CLIP."""
+    if clip_model is None:
+        print("CLIP model not loaded, skipping validation.")
+        return "Car", 1.0 # Assume it's a car if CLIP failed to load
+    print("Running CLIP validation...")
+    try:
+        # Use simple preprocessing for validation check
+        image_input = clip_preprocess(image_pil).unsqueeze(0).to(device)
+        with torch.no_grad():
+            image_features = clip_model.encode_image(image_input)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+            logit_scale = clip_model.logit_scale.exp()
+            similarity = (image_features @ clip_text_features.T) * logit_scale
+            probs = similarity.softmax(dim=-1).squeeze() # Get probabilities
+        car_prob = probs[0].item()
+        not_car_prob = probs[1].item()
+        predicted_label = "Car" if car_prob > not_car_prob else "Not Car"
+        print(f"CLIP Result: {predicted_label} (Car Prob: {car_prob:.4f}, Not Car Prob: {not_car_prob:.4f})")
+        return predicted_label, car_prob
+    except Exception as e:
+        print(f"Error during CLIP prediction: {e}")
+        return "Error", 0.0
+def predict_damage_with_yolo(image_np_bgr, confidence_threshold=0.4):
+    """Runs YOLOv8 segmentation on the OpenCV image (BGR)."""
+    if yolo_model is None:
+        print("YOLOv8 model not loaded, skipping damage prediction.")
+        # Return original image with error message
+        cv2.putText(image_np_bgr, "YOLOv8 model failed to load", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        return cv2.cvtColor(image_np_bgr, cv2.COLOR_BGR2RGB) # Return RGB for Gradio
+    print(f"Running YOLOv8 prediction with conf: {confidence_threshold}...")
+    try:
+        # Perform prediction
+        results = yolo_model.predict(
+            source=image_np_bgr, # Pass BGR numpy array
+            conf=confidence_threshold,
+            verbose=False, # Less console output
+            device=device
+        )
+        if not results or len(results) == 0:
+            print("YOLOv8 predict() returned no results.")
+            # Return original image with message
+            cv2.putText(image_np_bgr, "No results from YOLOv8", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 128, 255), 2)
+            return cv2.cvtColor(image_np_bgr, cv2.COLOR_BGR2RGB)
+        result = results[0] # Get results for the first image
+        # Use the built-in plot function to draw results on the image
+        # result.plot() returns a NumPy array in RGB format
+        annotated_image_rgb = result.plot(conf=True, boxes=True, masks=True)
+        print(f"YOLOv8 found {len(result.boxes)} instances above threshold.")
+        return annotated_image_rgb # Return the annotated RGB image
+    except Exception as e:
+        print(f"Error during YOLOv8 prediction or plotting: {e}")
+        # Return original image with error message
+        cv2.putText(image_np_bgr, f"YOLO Error: {e}", (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
+        return cv2.cvtColor(image_np_bgr, cv2.COLOR_BGR2RGB)
+# --- Main Gradio Function ---
+def validate_and_segment(input_image_pil, clip_threshold, yolo_threshold):
+    """
+    Main function called by Gradio interface.
+    Takes a PIL image, runs CLIP validation, then YOLOv8 segmentation if valid.
+    """
+    start_time = torch.cuda.Event(enable_timing=True)
+    end_time = torch.cuda.Event(enable_timing=True)
+    if input_image_pil is None:
+        return None, "Please upload an image."
+    # 1. Validate using CLIP
+    clip_label, clip_prob = validate_image_with_clip(input_image_pil)
+    if clip_label == "Error":
+        return None, "Error during CLIP validation."
+    if clip_label == "Not Car" or clip_prob < clip_threshold:
+        status_message = f"Image rejected by validator. Classified as '{clip_label}' (Confidence: {clip_prob:.2f}). Required > {clip_threshold:.2f}."
+        print(status_message)
+        # Convert PIL to numpy BGR then RGB for display
+        img_display_rgb = cv2.cvtColor(np.array(input_image_pil), cv2.COLOR_RGB2BGR)
+        cv2.putText(img_display_rgb, status_message, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
+        img_display_rgb = cv2.cvtColor(img_display_rgb, cv2.COLOR_BGR2RGB)
+        return img_display_rgb, status_message # Display original image with message
+    # 2. If validation passes, run YOLOv8 segmentation
+    status_message = f"Image validated as 'Car' (Confidence: {clip_prob:.2f}). Running damage segmentation..."
+    print(status_message)
+    # Convert PIL Image to OpenCV format (BGR NumPy array) for YOLOv8
+    image_np_bgr = cv2.cvtColor(np.array(input_image_pil), cv2.COLOR_RGB2BGR)
+    # Record start time for YOLO prediction
+    start_time.record()
+    # Run YOLO prediction
+    annotated_image_rgb = predict_damage_with_yolo(image_np_bgr, yolo_threshold)
+    # Record end time and calculate duration
+    end_time.record()
+    torch.cuda.synchronize()  # Wait for GPU operations to complete
+    prediction_time = start_time.elapsed_time(end_time) / 1000.0  # Time in seconds
+    status_message += f"\nDamage segmentation complete (Time: {prediction_time:.2f}s)."
+    print(status_message)
+    # Clear memory after prediction
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return annotated_image_rgb, status_message
+# --- Create Gradio Interface ---
+print("Creating Gradio interface...")
+# Define input and output components
+image_input = gr.Image(type="pil", label="Upload Car Image") # Input PIL image
+image_output = gr.Image(type="numpy", label="Segmentation Result") # Output NumPy array (RGB)
+status_output = gr.Textbox(label="Status & Validation Result")
+clip_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.5, label="CLIP Car Confidence Threshold")
+yolo_slider = gr.Slider(minimum=0.05, maximum=0.95, step=0.05, value=0.4, label="YOLO Damage Confidence Threshold")
+# Load example images if available
+example_image_folder = "examples"
+example_list = []
+if os.path.isdir(example_image_folder):
+    for img_name in os.listdir(example_image_folder):
+        if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
+            example_list.append(os.path.join(example_image_folder, img_name))
+# Build the interface
+iface = gr.Interface(
+    fn=validate_and_segment,
+    inputs=[image_input, clip_slider, yolo_slider],
+    outputs=[image_output, status_output],
+    title="🚗 Car Damage Validation & Segmentation",
+    description="Upload an image of a car. The system first validates if it's a car using CLIP. If validated, it runs YOLOv8 to segment damage.",
+    examples=example_list if example_list else None,
+    allow_flagging='never' # Disable flagging
+)
+# --- Launch the Interface ---
+print("Launching Gradio interface...")
+# share=True creates a public link (valid for ~72h) if running locally outside HF Spaces
+# Use auth for basic protection if needed: auth=("username", "password")
+iface.launch(share=False) # Set share=True if running locally and need public access

clip_text_features.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28315215c9429a04e5aafd99cf8a0292a489bf2937d44d580a3cf1c78ee84f94
+size 3283

clip_vit_b16.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a3b5ff65477fcc1bbaa1fcaa249a6f9745269e6e06e751f9eea6efeb521bb7b
+size 350463888

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+torchvision
+torchaudio
+gradio
+ultralytics
+opencv-python-headless # Use headless version for servers
+matplotlib # If used by ultralytics plotting or your code
+ftfy # CLIP dependency
+regex # CLIP dependency
+git+https://github.com/openai/CLIP.git # Install CLIP directly
+Pillow # PIL dependency for CLIP/images
+# Add any other specific libraries you might need
+pyyaml # Usually needed by ultralytics/detectron2 indirectly

yolobest.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae345bfb159676f6343daf72c1912bb374fa4997e6788e84d930b9bb28751d27
+size 92296829