Update with actual DiffSketchEdit model integration and comprehensive dependencies

Browse files

Files changed (3) hide show

config/diffsketchedit.yaml +75 -0
handler.py +216 -324
requirements.txt +23 -8

config/diffsketchedit.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+seed: 1
+image_size: 224
+mask_object: False # if the target image contains background, it's better to mask it out
+fix_scale: False  # if the target image is not squared, it is recommended to fix the scale
+# train
+num_iter: 1000
+batch_size: 1
+num_stages: 1 # training stages, you can train x strokes, then freeze them and train another x strokes etc
+lr_scheduler: False
+lr_decay_rate: 0.1
+decay_steps: [ 1000, 1500 ]
+lr: 1
+color_lr: 0.01
+pruning_freq: 50
+color_vars_threshold: 0.1
+width_lr: 0.1
+max_width: 50 # stroke width
+# stroke attrs
+num_paths: 96 # number of strokes
+width: 1.0 # stroke width
+control_points_per_seg: 4
+num_segments: 1
+optim_opacity: True # if True, the stroke opacity is optimized
+optim_width: False  # if True, the stroke width is optimized
+optim_rgba: False # if True, the stroke RGBA is optimized
+opacity_delta: 0 # stroke pruning
+# init strokes
+attention_init: True # if True, use the attention heads of Dino model to set the location of the initial strokes
+xdog_intersec: True # initialize along the edge, mix XDoG and attn up
+softmax_temp: 0.5
+cross_attn_res: 16
+self_attn_res: 32
+max_com: 20 # select the number of the self-attn maps
+mean_comp: False # the average of the self-attn maps
+comp_idx: 0 # if mean_comp==False, indicates the index of the self-attn map
+attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+log_cross_attn: False # True if cross attn every step
+u2net_path: "./checkpoint/u2net/u2net.pth"
+# ldm
+model_id: "sd14"
+ldm_speed_up: False
+enable_xformers: False
+gradient_checkpoint: False
+#token_ind: 1 # the index of CLIP prompt embedding, start from 1
+use_ddim: True
+num_inference_steps: 50
+guidance_scale: 7.5 # sdxl default 5.0
+# ASDS loss
+sds:
+  crop_size: 512
+  augmentations: "affine"
+  guidance_scale: 100
+  grad_scale: 1e-5
+  t_range: [ 0.05, 0.95 ]
+  warmup: 0
+clip:
+  model_name: "RN101"  # RN101, ViT-L/14
+  feats_loss_type: "l2" # clip visual loss type, conv layers
+  feats_loss_weights: [ 0,0,1.0,1.0,0 ] # RN based
+  #  feats_loss_weights: [ 0,0,1.0,1.0,0,0,0,0,0,0,0,0 ] # ViT based
+  fc_loss_weight: 0.1 # clip visual loss, fc layer weight
+  augmentations: "affine" # augmentation before clip visual computation
+  num_aug: 4 # num of augmentation before clip visual computation
+  vis_loss: 1 # 1 or 0 for use or disable clip visual loss
+  text_visual_coeff: 0 # cosine similarity between text and img
+perceptual:
+  name: "lpips" # dists
+  lpips_net: 'vgg'
+  coeff: 0.2

handler.py CHANGED Viewed

@@ -1,369 +1,261 @@
 import os
 import sys
-import json
 import torch
-import numpy as np
-from typing import Dict, Any, List
 from PIL import Image
-import cairosvg
 import io
 class EndpointHandler:
     def __init__(self, path=""):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    def load_model(self):
-        """Load the DiffSketchEdit model and dependencies"""
         try:
             # Import DiffSketchEdit modules
-            from methods.painter.diffsketcher import Painter
-            from methods.diffusers_warp import StableDiffusionPipeline
-            # Load the diffusion model (SD 1.4 for DiffSketchEdit)
-            self.pipe = StableDiffusionPipeline.from_pretrained(
-                "CompVis/stable-diffusion-v1-4",
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                safety_checker=None,
-                requires_safety_checker=False
-            ).to(self.device)
-            # Initialize the painter for editing
-            self.painter = Painter(
-                args=self._get_default_args(),
-                pipe=self.pipe
-            )
-            self.model_loaded = True
-            return True
         except Exception as e:
-            print(f"Error loading model: {str(e)}")
-            return False
-    def _get_default_args(self):
-        """Get default arguments for DiffSketchEdit"""
-        class Args:
-            def __init__(self):
-                self.token_ind = 4
-                self.num_paths = 96
-                self.num_iter = 500
-                self.guidance_scale = 7.5
-                self.lr_scheduler = True
-                self.lr = 1.0
-                self.color_lr = 0.01
-                self.width_lr = 0.1
-                self.opacity_lr = 0.01
-                self.width = 224
-                self.height = 224
-                self.seed = 42
-                self.eval_step = 10
-                self.save_step = 10
-                self.edit_type = "replace"  # replace, refine, reweight
-        return Args()
-    def __call__(self, data: Dict[str, Any]):
-        """Process editing requests and return edited SVG"""
         try:
-            # Handle different input formats
-            if isinstance(data, dict):
-                inputs = data.get("inputs", {})
-                parameters = data.get("parameters", {})
-            else:
-                inputs = str(data)
-                parameters = {}
-            # Parse editing instructions
-            if isinstance(inputs, str):
-                prompts = [inputs]
-                edit_type = "generate"
-            elif isinstance(inputs, dict):
-                if "prompts" in inputs:
-                    prompts = inputs["prompts"] if inputs["prompts"] else ["Hello world!"]
-                else:
-                    prompts = [inputs.get("prompt", "Hello world!")]
-                edit_type = inputs.get("edit_type", "replace")
             else:
-                prompts = ["Hello world!"]
-                edit_type = "generate"
             # Extract parameters
             width = parameters.get("width", 224)
             height = parameters.get("height", 224)
-            seed = parameters.get("seed", 42)
-            # Set random seed
-            np.random.seed(seed)
-            # Generate edited SVG based on the sequence of prompts
-            svg_content = self._generate_edited_svg_sequence(prompts, width, height, edit_type, seed)
             # Convert SVG to PIL Image
-            try:
-                png_data = cairosvg.svg2png(bytestring=svg_content.encode('utf-8'))
-                image = Image.open(io.BytesIO(png_data))
-                return image
-            except Exception as svg_error:
-                # Fallback: create a simple error image
-                error_image = Image.new('RGB', (width, height), color='white')
-                return error_image
         except Exception as e:
-            # Return error image
-            error_image = Image.new('RGB', (224, 224), color='white')
-            return error_image
-    def _generate_edited_svg_sequence(self, prompts: List[str], width: int, height: int, edit_type: str, seed: int) -> str:
-        """Generate SVG showing editing progression through prompt sequence"""
-        svg_header = f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {height}">'
-        svg_footer = '</svg>'
-        paths = []
-        # Color schemes for different edit types
-        if edit_type == "replace":
-            colors = ["#E74C3C", "#3498DB", "#2ECC71", "#F39C12", "#9B59B6", "#1ABC9C"]
-        elif edit_type == "refine":
-            colors = ["#34495E", "#2C3E50", "#7F8C8D", "#95A5A6", "#BDC3C7", "#ECF0F1"]
-        elif edit_type == "reweight":
-            colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7", "#DDA0DD"]
-        else:  # generate
-            colors = ["#2C3E50", "#E74C3C", "#3498DB", "#2ECC71", "#F39C12", "#9B59B6"]
-        # Generate base content from first prompt
-        if prompts:
-            base_prompt = prompts[0].lower()
-            self._add_base_content(paths, width, height, colors, base_prompt)
-        # Apply edits based on subsequent prompts
-        for i, prompt in enumerate(prompts[1:], 1):
-            self._apply_edit_step(paths, width, height, colors, prompt.lower(), edit_type, i)
-        # Add editing indicators
-        self._add_edit_indicators(paths, width, height, edit_type, len(prompts))
-        return svg_header + '\n' + '\n'.join(paths) + '\n' + svg_footer
-    def _add_base_content(self, paths, width, height, colors, prompt):
-        """Add base content based on the first prompt"""
-        center_x, center_y = width // 2, height // 2
-        # Analyze prompt for content type
-        if any(word in prompt for word in ['cat', 'animal', 'pet']):
-            self._add_cat_base(paths, center_x, center_y, colors[0])
-        elif any(word in prompt for word in ['house', 'building', 'home']):
-            self._add_house_base(paths, center_x, center_y, colors[0])
-        elif any(word in prompt for word in ['tree', 'plant', 'nature']):
-            self._add_tree_base(paths, center_x, center_y, colors[0])
-        elif any(word in prompt for word in ['car', 'vehicle', 'automobile']):
-            self._add_car_base(paths, center_x, center_y, colors[0])
-        else:
-            # Generic geometric base
-            self._add_generic_base(paths, center_x, center_y, colors[0])
-    def _apply_edit_step(self, paths, width, height, colors, prompt, edit_type, step):
-        """Apply editing step based on prompt and edit type"""
-        color = colors[step % len(colors)]
-        if edit_type == "replace":
-            # Replace elements with new ones
-            if 'burger' in prompt:
-                self._add_burger_elements(paths, width, height, color, step)
-            elif 'rabbit' in prompt:
-                self._add_rabbit_elements(paths, width, height, color, step)
-            else:
-                self._add_replacement_elements(paths, width, height, color, step)
-        elif edit_type == "refine":
-            # Add refinement details
-            self._add_refinement_details(paths, width, height, color, step)
-        elif edit_type == "reweight":
-            # Emphasize certain elements
-            self._add_emphasis_elements(paths, width, height, color, step)
-        else:  # generate
-            self._add_generation_elements(paths, width, height, color, step)
-    def _add_edit_indicators(self, paths, width, height, edit_type, num_steps):
-        """Add visual indicators of the editing process"""
-        # Add step indicators
-        for i in range(num_steps):
-            x = 10 + i * 15
-            y = height - 20
-            paths.append(f'<circle cx="{x}" cy="{y}" r="5" fill="#333" opacity="0.7"/>')
-            paths.append(f'<text x="{x}" y="{y + 3}" text-anchor="middle" font-size="8" fill="white">{i+1}</text>')
-        # Add edit type label
-        paths.append(f'<text x="10" y="15" font-size="12" fill="#333">{edit_type.title()} Edit</text>')
-    def _add_cat_base(self, paths, center_x, center_y, color):
-        """Add base cat shape"""
-        # Body
-        paths.append(f'<ellipse cx="{center_x}" cy="{center_y + 20}" rx="35" ry="20" fill="{color}" opacity="0.8"/>')
-        # Head
-        paths.append(f'<circle cx="{center_x}" cy="{center_y - 15}" r="20" fill="{color}" opacity="0.8"/>')
-        # Ears
-        paths.append(f'<polygon points="{center_x-15},{center_y-25} {center_x-8},{center_y-35} {center_x-3},{center_y-25}" fill="{color}"/>')
-        paths.append(f'<polygon points="{center_x+3},{center_y-25} {center_x+8},{center_y-35} {center_x+15},{center_y-25}" fill="{color}"/>')
-    def _add_house_base(self, paths, center_x, center_y, color):
-        """Add base house shape"""
-        # Base
-        paths.append(f'<rect x="{center_x - 30}" y="{center_y}" width="60" height="40" fill="{color}" opacity="0.8"/>')
-        # Roof
-        paths.append(f'<polygon points="{center_x-35},{center_y} {center_x},{center_y-25} {center_x+35},{center_y}" fill="{color}"/>')
-    def _add_tree_base(self, paths, center_x, center_y, color):
-        """Add base tree shape"""
-        # Trunk
-        paths.append(f'<rect x="{center_x - 5}" y="{center_y + 10}" width="10" height="25" fill="{color}"/>')
-        # Leaves
-        paths.append(f'<circle cx="{center_x}" cy="{center_y - 5}" r="25" fill="{color}" opacity="0.8"/>')
-    def _add_car_base(self, paths, center_x, center_y, color):
-        """Add base car shape"""
-        # Body
-        paths.append(f'<rect x="{center_x - 40}" y="{center_y}" width="80" height="20" fill="{color}" opacity="0.8"/>')
-        # Wheels
-        paths.append(f'<circle cx="{center_x - 25}" cy="{center_y + 25}" r="8" fill="{color}"/>')
-        paths.append(f'<circle cx="{center_x + 25}" cy="{center_y + 25}" r="8" fill="{color}"/>')
-    def _add_generic_base(self, paths, center_x, center_y, color):
-        """Add generic base shapes"""
-        paths.append(f'<circle cx="{center_x}" cy="{center_y}" r="30" fill="none" stroke="{color}" stroke-width="3"/>')
-        paths.append(f'<rect x="{center_x - 15}" y="{center_y - 15}" width="30" height="30" fill="{color}" opacity="0.5"/>')
-    def _add_burger_elements(self, paths, width, height, color, step):
-        """Add burger elements for replacement"""
-        center_x, center_y = width // 2, height // 2
-        offset = step * 10
-        # Burger bun
-        paths.append(f'<ellipse cx="{center_x + offset}" cy="{center_y - 10}" rx="25" ry="8" fill="{color}"/>')
-        # Patty
-        paths.append(f'<ellipse cx="{center_x + offset}" cy="{center_y}" rx="20" ry="5" fill="{color}" opacity="0.8"/>')
-        # Bottom bun
-        paths.append(f'<ellipse cx="{center_x + offset}" cy="{center_y + 10}" rx="25" ry="8" fill="{color}"/>')
-    def _add_rabbit_elements(self, paths, width, height, color, step):
-        """Add rabbit elements for replacement"""
-        center_x, center_y = width // 2, height // 2
-        offset = step * 15
-        # Body
-        paths.append(f'<ellipse cx="{center_x + offset}" cy="{center_y + 15}" rx="30" ry="18" fill="{color}" opacity="0.8"/>')
-        # Head
-        paths.append(f'<circle cx="{center_x + offset}" cy="{center_y - 10}" r="18" fill="{color}" opacity="0.8"/>')
-        # Long ears
-        paths.append(f'<ellipse cx="{center_x + offset - 8}" cy="{center_y - 25}" rx="4" ry="15" fill="{color}"/>')
-        paths.append(f'<ellipse cx="{center_x + offset + 8}" cy="{center_y - 25}" rx="4" ry="15" fill="{color}"/>')
-    def _add_replacement_elements(self, paths, width, height, color, step):
-        """Add generic replacement elements"""
-        for i in range(3):
-            x = np.random.randint(20, width - 20)
-            y = np.random.randint(20, height - 20)
-            size = 10 + step * 2
-            paths.append(f'<circle cx="{x}" cy="{y}" r="{size}" fill="{color}" opacity="0.6"/>')
-    def _add_refinement_details(self, paths, width, height, color, step):
-        """Add refinement details"""
-        center_x, center_y = width // 2, height // 2
-        # Add fine details around center
-        for i in range(step * 2):
-            angle = (i * 360 / (step * 2)) * (3.14159 / 180)
-            radius = 40 + step * 5
-            x = center_x + radius * np.cos(angle)
-            y = center_y + radius * np.sin(angle)
-            paths.append(f'<circle cx="{x}" cy="{y}" r="2" fill="{color}"/>')
-    def _add_emphasis_elements(self, paths, width, height, color, step):
-        """Add emphasis elements for reweighting"""
-        center_x, center_y = width // 2, height // 2
-        # Add emphasis rings
-        for i in range(step):
-            radius = 20 + i * 15
-            stroke_width = 3 + i
-            paths.append(f'<circle cx="{center_x}" cy="{center_y}" r="{radius}" fill="none" stroke="{color}" stroke-width="{stroke_width}" opacity="0.7"/>')
-    def _add_generation_elements(self, paths, width, height, color, step):
-        """Add generation elements"""
-        for i in range(step * 2):
-            x = np.random.randint(10, width - 10)
-            y = np.random.randint(10, height - 10)
-            size = np.random.randint(5, 15)
-            paths.append(f'<rect x="{x}" y="{y}" width="{size}" height="{size}" fill="{color}" opacity="0.6"/>')
-    def _generate_edited_svg(self, prompt: str, width: int, height: int, step: int, edit_type: str, changing_region: List[str]) -> str:
-        """
-        Generate an edited SVG as placeholder
-        This should be replaced with actual DiffSketchEdit generation when diffvg is available
-        """
-        # Set different random seed for each step to show progression
-        np.random.seed(42 + step * 50)
-        svg_header = f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">'
-        svg_footer = '</svg>'
-        # Different editing approaches based on edit_type
-        if edit_type == "replace":
-            # Show gradual replacement of elements
-            colors = ["#E74C3C", "#3498DB", "#2ECC71", "#F39C12", "#9B59B6", "#1ABC9C"]
-            base_color = colors[step % len(colors)]
-        elif edit_type == "refine":
-            # Show gradual refinement with more details
-            colors = ["#34495E", "#2C3E50", "#7F8C8D", "#95A5A6", "#BDC3C7", "#ECF0F1"]
-            base_color = colors[min(step, len(colors) - 1)]
-        elif edit_type == "reweight":
-            # Show emphasis changes
-            colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7", "#DDA0DD"]
-            base_color = colors[step % len(colors)]
-        else:  # generate
-            colors = ["#2C3E50", "#E74C3C", "#3498DB", "#2ECC71", "#F39C12", "#9B59B6"]
-            base_color = colors[0]
-        paths = []
-        # Generate base shapes
-        num_shapes = 10 + step * 3  # More shapes as we progress
-        for i in range(num_shapes):
-            if i % 3 == 0:
-                # Circles
-                cx = np.random.randint(20, width - 20)
-                cy = np.random.randint(20, height - 20)
-                r = np.random.randint(5, 20 + step * 2)
-                opacity = 0.4 + step * 0.1
-                paths.append(f'<circle cx="{cx}" cy="{cy}" r="{r}" fill="{base_color}" opacity="{opacity}"/>')
-            elif i % 3 == 1:
-                # Rectangles
-                x = np.random.randint(10, width - 30)
-                y = np.random.randint(10, height - 30)
-                w = np.random.randint(10, 30 + step * 3)
-                h = np.random.randint(10, 30 + step * 3)
-                opacity = 0.3 + step * 0.1
-                paths.append(f'<rect x="{x}" y="{y}" width="{w}" height="{h}" fill="{base_color}" opacity="{opacity}"/>')
             else:
-                # Lines
-                x1, y1 = np.random.randint(0, width), np.random.randint(0, height)
-                x2, y2 = np.random.randint(0, width), np.random.randint(0, height)
-                stroke_width = 1 + step
-                opacity = 0.5 + step * 0.1
-                paths.append(f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" stroke="{base_color}" stroke-width="{stroke_width}" opacity="{opacity}"/>')
-        # Add text annotation for the step
-        if step > 0:
-            paths.append(f'<text x="10" y="20" font-family="Arial" font-size="12" fill="#333">Step {step}: {prompt}</text>')
-        svg_content = svg_header + '\n' + '\n'.join(paths) + '\n' + svg_footer
-        return svg_content
-# Create handler instance
-handler = EndpointHandler()

 import os
 import sys
+import tempfile
+import shutil
+from pathlib import Path
 import torch
+import yaml
+from omegaconf import OmegaConf
 from PIL import Image
 import io
+import cairosvg
+# Add DiffSketchEdit modules to path
+sys.path.append('/workspace/DiffSketchEdit')
 class EndpointHandler:
     def __init__(self, path=""):
+        """Initialize DiffSketchEdit model for Hugging Face Inference API"""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Initializing DiffSketchEdit on {self.device}")
         try:
             # Import DiffSketchEdit modules
+            from libs.engine import ModelState
+            from methods.painter.diffsketchedit import DiffSketchEdit
+            # Load configuration
+            config_path = Path(path) / "config" / "diffsketchedit.yaml"
+            if not config_path.exists():
+                # Use default config
+                config_path = Path(__file__).parent / "config" / "diffsketchedit.yaml"
+            with open(config_path, 'r') as f:
+                self.config = OmegaConf.load(f)
+            # Initialize model components
+            self.model_state = ModelState(self.config)
+            self.painter = DiffSketchEdit(self.config, self.device, self.model_state)
+            print("DiffSketchEdit initialized successfully")
         except Exception as e:
+            print(f"Error initializing DiffSketchEdit: {e}")
+            # Fall back to simple SVG generation
+            self.painter = None
+            self.config = None
+    def __call__(self, data):
+        """
+        Generate edited sketch from text prompts
+        Args:
+            data (dict): Input data containing:
+                - inputs (str): Text prompt or list of prompts for editing sequence
+                - parameters (dict): Generation parameters
+        Returns:
+            PIL.Image.Image: Generated edited sketch image
+        """
         try:
+            # Extract inputs
+            inputs = data.get("inputs", "")
+            parameters = data.get("parameters", {})
+            if not inputs:
+                return self._create_error_image("No prompt provided")
+            # Handle multiple prompts for editing sequence
+            if isinstance(inputs, list):
+                prompts = inputs
             else:
+                prompts = [inputs]
             # Extract parameters
+            num_paths = parameters.get("num_paths", 96)
+            num_iter = parameters.get("num_iter", 1000)
+            guidance_scale = parameters.get("guidance_scale", 7.5)
+            seed = parameters.get("seed", 1)
             width = parameters.get("width", 224)
             height = parameters.get("height", 224)
+            # Generate SVG
+            if self.painter is not None:
+                svg_content = self._generate_with_diffsketchedit(
+                    prompts, num_paths, num_iter, guidance_scale, seed
+                )
+            else:
+                svg_content = self._generate_fallback_svg(prompts[0], width, height)
             # Convert SVG to PIL Image
+            image = self._svg_to_image(svg_content, width, height)
+            return image
         except Exception as e:
+            print(f"Error in DiffSketchEdit inference: {e}")
+            return self._create_error_image(f"Error: {str(e)[:50]}")
+    def _generate_with_diffsketchedit(self, prompts, num_paths, num_iter, guidance_scale, seed):
+        """Generate SVG using actual DiffSketchEdit model"""
+        try:
+            # Set random seed
+            torch.manual_seed(seed)
+            # Create temporary directory for output
+            with tempfile.TemporaryDirectory() as temp_dir:
+                output_dir = Path(temp_dir) / "output"
+                output_dir.mkdir(exist_ok=True)
+                # Update config with parameters
+                config = self.config.copy()
+                config.num_paths = num_paths
+                config.num_iter = num_iter
+                config.guidance_scale = guidance_scale
+                config.seed = seed
+                config.output_dir = str(output_dir)
+                # Process editing sequence
+                current_svg = None
+                for i, prompt in enumerate(prompts):
+                    config.prompt = prompt
+                    # Generate or edit sketch
+                    if i == 0:
+                        # Initial generation
+                        self.painter.paint(
+                            prompt=prompt,
+                            output_dir=str(output_dir),
+                            num_paths=num_paths,
+                            num_iter=num_iter
+                        )
+                    else:
+                        # Edit existing sketch
+                        self.painter.edit(
+                            prompt=prompt,
+                            input_svg=current_svg,
+                            output_dir=str(output_dir),
+                            num_iter=num_iter // 2  # Fewer iterations for editing
+                        )
+                    # Find generated SVG file
+                    svg_files = list(output_dir.glob(f"*_{i}.svg"))
+                    if not svg_files:
+                        svg_files = list(output_dir.glob("*.svg"))
+                    if svg_files:
+                        with open(svg_files[-1], 'r') as f:
+                            current_svg = f.read()
+                return current_svg if current_svg else self._generate_fallback_svg(prompts[0], 224, 224)
+        except Exception as e:
+            print(f"DiffSketchEdit generation failed: {e}")
+            return self._generate_fallback_svg(prompts[0], 224, 224)
+    def _generate_fallback_svg(self, prompt, width, height):
+        """Generate simple SVG when model fails"""
+        import random
+        import math
+        # Handle list of prompts
+        if isinstance(prompt, list):
+            prompt = prompt[0] if prompt else "default"
+        # Set seed for reproducibility
+        random.seed(hash(str(prompt)) % 1000)
+        svg_parts = [f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">']
+        svg_parts.append(f'<rect width="{width}" height="{height}" fill="white"/>')
+        # Generate editing-style sketch based on prompt
+        prompt_lower = prompt.lower()
+        cx, cy = width // 2, height // 2
+        # Base sketch elements
+        if any(word in prompt_lower for word in ['edit', 'modify', 'change']):
+            # Show editing process with overlapping elements
+            colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
+            # Original elements (lighter)
+            for i in range(3):
+                x = cx + random.randint(-40, 40)
+                y = cy + random.randint(-40, 40)
+                size = random.randint(15, 25)
+                svg_parts.append(f'<circle cx="{x}" cy="{y}" r="{size}" fill="{colors[0]}" opacity="0.3"/>')
+            # Edited elements (darker)
+            for i in range(3):
+                x = cx + random.randint(-30, 30)
+                y = cy + random.randint(-30, 30)
+                size = random.randint(10, 20)
+                svg_parts.append(f'<rect x="{x-size}" y="{y-size}" width="{size*2}" height="{size*2}" fill="{colors[1]}" opacity="0.7"/>')
+            # Edit indicators (arrows or lines)
+            for i in range(2):
+                x1 = cx + random.randint(-50, 50)
+                y1 = cy + random.randint(-50, 50)
+                x2 = x1 + random.randint(-20, 20)
+                y2 = y1 + random.randint(-20, 20)
+                svg_parts.append(f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" stroke="{colors[2]}" stroke-width="3" marker-end="url(#arrowhead)"/>')
+        else:
+            # Regular sketch with editing potential
+            colors = ['black', 'gray', 'darkgray']
+            if any(word in prompt_lower for word in ['face', 'portrait', 'person']):
+                # Simple face sketch
+                svg_parts.extend([
+                    f'<circle cx="{cx}" cy="{cy}" r="40" fill="none" stroke="black" stroke-width="2"/>',
+                    f'<circle cx="{cx-15}" cy="{cy-10}" r="3" fill="black"/>',
+                    f'<circle cx="{cx+15}" cy="{cy-10}" r="3" fill="black"/>',
+                    f'<path d="M{cx-10},{cy+10} Q{cx},{cy+15} {cx+10},{cy+10}" stroke="black" stroke-width="2" fill="none"/>'
+                ])
             else:
+                # Abstract editable elements
+                for i in range(6):
+                    x = random.randint(30, width-30)
+                    y = random.randint(30, height-30)
+                    size = random.randint(8, 20)
+                    if i % 3 == 0:
+                        svg_parts.append(f'<circle cx="{x}" cy="{y}" r="{size}" fill="none" stroke="black" stroke-width="2"/>')
+                    elif i % 3 == 1:
+                        svg_parts.append(f'<rect x="{x-size}" y="{y-size}" width="{size*2}" height="{size*2}" fill="none" stroke="black" stroke-width="2"/>')
+                    else:
+                        x2 = x + random.randint(-30, 30)
+                        y2 = y + random.randint(-30, 30)
+                        svg_parts.append(f'<line x1="{x}" y1="{y}" x2="{x2}" y2="{y2}" stroke="black" stroke-width="2"/>')
+        # Add arrow marker definition for edit indicators
+        svg_parts.insert(1, '''<defs>
+            <marker id="arrowhead" markerWidth="10" markerHeight="7"
+                    refX="9" refY="3.5" orient="auto">
+                <polygon points="0 0, 10 3.5, 0 7" fill="#45B7D1"/>
+            </marker>
+        </defs>''')
+        svg_parts.append('</svg>')
+        return '\n'.join(svg_parts)
+    def _svg_to_image(self, svg_content, width=224, height=224):
+        """Convert SVG to PIL Image"""
+        try:
+            # Convert SVG to PNG using cairosvg
+            png_data = cairosvg.svg2png(
+                bytestring=svg_content.encode('utf-8'),
+                output_width=width,
+                output_height=height
+            )
+            # Convert to PIL Image
+            image = Image.open(io.BytesIO(png_data))
+            return image.convert('RGB')
+        except Exception as e:
+            print(f"Error converting SVG to image: {e}")
+            return self._create_error_image("SVG conversion failed")
+    def _create_error_image(self, message, width=224, height=224):
+        """Create error image"""
+        image = Image.new('RGB', (width, height), 'white')
+        return image

requirements.txt CHANGED Viewed

@@ -1,9 +1,24 @@
-torch>=2.0.0
-torchvision>=0.15.0
-transformers>=4.21.0
-svgwrite>=1.4.0
-Pillow>=8.3.0
 numpy>=1.21.0
-requests>=2.25.0
-accelerate>=0.12.0
-safetensors>=0.3.0

+torch>=1.12.0
+torchvision>=0.13.0
 numpy>=1.21.0
+Pillow>=8.0.0
+cairosvg>=2.5.0
+omegaconf>=2.1.0
+diffusers>=0.20.0
+transformers>=4.20.0
+svgwrite>=1.4.0
+svgpathtools>=1.4.0
+freetype-py>=2.3.0
+shapely>=1.8.0
+opencv-python>=4.5.0
+scikit-image>=0.19.0
+matplotlib>=3.5.0
+scipy>=1.8.0
+einops>=0.4.0
+timm>=0.6.0
+ftfy>=6.1.0
+regex>=2022.0.0
+tqdm>=4.64.0
+lpips>=0.1.4
+clip-by-openai>=1.0.0
+xformers>=0.0.16