import gradio as gr import torch import numpy as np from PIL import Image from transformers import Sam3Processor, Sam3Model # Load model on startup print("Loading SAM 3 model...") device = "cuda" if torch.cuda.is_available() else "cpu" model = Sam3Model.from_pretrained("facebook/sam3", token=True).to(device) processor = Sam3Processor.from_pretrained("facebook/sam3", token=True) print(f"Model loaded on {device}") # Reference dimensions (inches) REFERENCE_DIMENSIONS = { "light switch wall plate": {"height": 4.5, "width": 2.75}, "electrical wall outlet": {"height": 4.5, "width": 2.75}, "room door": {"height": 80, "width": 36}, "window": {"height": 48, "width": 36}, } def get_mask_dimensions(mask): mask_np = mask.cpu().numpy() if hasattr(mask, 'cpu') else mask rows = np.any(mask_np, axis=1) cols = np.any(mask_np, axis=0) return np.sum(rows), np.sum(cols) def calculate_mask_area(mask): if hasattr(mask, 'sum'): return mask.sum().item() return np.sum(mask) def pixel_area_to_sqft(pixel_area, pixels_per_inch): if pixels_per_inch is None or pixels_per_inch == 0: return 0 square_inches = pixel_area / (pixels_per_inch ** 2) return square_inches / 144 def overlay_masks_colored(image, masks, alpha=0.5, color=(255, 0, 0)): if isinstance(image, np.ndarray): image = Image.fromarray(image.astype('uint8')) image = image.convert("RGBA") for mask in masks: mask_np = mask.cpu().numpy() if hasattr(mask, 'cpu') else mask if mask_np.max() <= 1.0: mask_np = (mask_np * 255).astype(np.uint8) else: mask_np = mask_np.astype(np.uint8) mask_img = Image.fromarray(mask_np) overlay = Image.new("RGBA", image.size, color + (0,)) alpha_mask = mask_img.point(lambda v: int(v * alpha)) overlay.putalpha(alpha_mask) image = Image.alpha_composite(image, overlay) return image def analyze_room(image): if image is None: return None, {"error": "No image provided"} if isinstance(image, np.ndarray): image = Image.fromarray(image).convert("RGB") else: image = image.convert("RGB") room_concepts = [ "wall", "floor", "rug", "carpet", "ceiling", "room door", "window", "electrical wall outlet", "light switch wall plate" ] room_segments = {} for concept in room_concepts: inputs = processor(images=image, text=concept, return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) results = processor.post_process_instance_segmentation( outputs, threshold=0.3, mask_threshold=0.3, target_sizes=inputs.get("original_sizes").tolist() )[0] room_segments[concept] = results floor_masks = [] for key in ["floor", "rug", "carpet"]: if key in room_segments and len(room_segments[key]['masks']) > 0: for mask in room_segments[key]['masks']: floor_masks.append(mask) if floor_masks: room_segments["total_floor"] = {'masks': floor_masks} pixels_per_inch = None for ref_type in ["light switch wall plate", "electrical wall outlet", "room door", "window"]: if ref_type in room_segments and len(room_segments[ref_type]['masks']) > 0: mask = room_segments[ref_type]['masks'][0] pixel_height, _ = get_mask_dimensions(mask) pixels_per_inch = pixel_height / REFERENCE_DIMENSIONS[ref_type]["height"] break if pixels_per_inch is None: pixels_per_inch = 5.0 measurements = {} for concept in ["wall", "ceiling", "window", "room door"]: if concept in room_segments and len(room_segments[concept]['masks']) > 0: total_pixels = sum([calculate_mask_area(m) for m in room_segments[concept]['masks']]) sqft = pixel_area_to_sqft(total_pixels, pixels_per_inch) measurements[concept] = round(sqft, 1) if "total_floor" in room_segments and len(room_segments["total_floor"]['masks']) > 0: total_pixels = sum([calculate_mask_area(m) for m in room_segments["total_floor"]['masks']]) sqft = pixel_area_to_sqft(total_pixels, pixels_per_inch) measurements["floor"] = round(sqft, 1) floor_sqft = measurements.get("floor", 0) ceiling_sqft = measurements.get("ceiling", 0) if ceiling_sqft < (floor_sqft * 0.5) and floor_sqft > 0: measurements["ceiling"] = floor_sqft wall_sqft = measurements.get("wall", 0) window_sqft = measurements.get("window", 0) paint_wall = max(0, wall_sqft - window_sqft) materials = { "paint_gallons": max(1, round((paint_wall * 2) / 350, 1)), "flooring_boxes": max(1, round((floor_sqft * 1.1) / 20)) if floor_sqft else 0 } c_paint = (materials["paint_gallons"] * 35) + (paint_wall * 2) c_floor = (floor_sqft * 4) + (floor_sqft * 3) if floor_sqft else 0 total_cost = round(c_paint + c_floor) overlay_img = image.copy() if "wall" in room_segments and len(room_segments["wall"]['masks']) > 0: overlay_img = overlay_masks_colored(overlay_img, room_segments["wall"]['masks'], alpha=0.4, color=(239, 68, 68)) if "total_floor" in room_segments and len(room_segments["total_floor"]['masks']) > 0: overlay_img = overlay_masks_colored(overlay_img, room_segments["total_floor"]['masks'], alpha=0.4, color=(16, 185, 129)) if "window" in room_segments and len(room_segments["window"]['masks']) > 0: overlay_img = overlay_masks_colored(overlay_img, room_segments["window"]['masks'], alpha=0.4, color=(99, 102, 241)) if overlay_img.mode == 'RGBA': background = Image.new('RGB', overlay_img.size, (255, 255, 255)) background.paste(overlay_img, mask=overlay_img.split()[3]) overlay_img = background results = { "measurements": measurements, "materials": materials, "costs": { "paint_and_labor": round(c_paint), "flooring_and_labor": round(c_floor), "total": total_cost } } return overlay_img, results # Create and launch Gradio interface demo = gr.Interface( fn=analyze_room, inputs=gr.Image(type="pil", label="Upload Room Photo"), outputs=[ gr.Image(label="Segmentation Overlay"), gr.JSON(label="Room Analysis Results") ], title="Room Estimator", description="Upload a room photo to get measurements and material estimates" ) demo.launch(ssr_mode=False)