import os import uuid from pathlib import Path import argparse import spaces import gradio as gr from PIL import Image from diffusers.utils import export_to_video from inference import load_model, inference_on_image # ----------------------- # 1. Load model # ----------------------- args = argparse.Namespace() args.blur2vid_hf_repo_path = "tedlasai/blur2vid" args.pretrained_model_path = "THUDM/CogVideoX-2b" args.model_config_path = "training/configs/outsidephotos.yaml" args.video_width = 1280 args.video_height = 720 args.seed = None pipe, model_config = load_model(args) OUTPUT_DIR = Path("/tmp/generated_videos") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) @spaces.GPU(timeout=300, duration=120) def generate_video_from_image(image: Image.Image, interval_key: str, num_inference_steps: int) -> str: """ Wrapper for Gradio. Takes an image and returns a video path. """ if image is None: raise gr.Error("Please upload an image first.") print("Generating video") import torch print("CUDA:", torch.cuda.is_available()) print("Device:", torch.cuda.get_device_name(0)) print("bf16 supported:", torch.cuda.is_bf16_supported()) args.num_inference_steps = num_inference_steps video_id = uuid.uuid4().hex output_path = OUTPUT_DIR / f"{video_id}.mp4" args.device = "cuda" pipe.to(args.device) processed_image, video = inference_on_image(pipe, image, interval_key, model_config, args) export_to_video(video, output_path, fps=20) if not os.path.exists(output_path): raise gr.Error("Video generation failed: output file not found.") return str(output_path) with gr.Blocks(css="footer {visibility: hidden}") as demo: gr.Markdown( """ # 🖼️ ➜ 🎬 Recover Motion from a Blurry Image This demo accompanies the paper **“Generating the Past, Present, and Future from a Motion-Blurred Image”** by Tedla *et al.*, ACM Transactions on Graphics (SIGGRAPH Asia 2025). - 🌐 **Project page:** - 💻 **Code:** Upload a blurry image and the model will generate a short video showing the recovered motion based on your selection. Note: The image will be resized to 1280×720. We recommend uploading landscape-oriented images. """ ) with gr.Row(): with gr.Column(): image_in = gr.Image( type="pil", label="Input image", interactive=True, ) with gr.Row(): tense_choice = gr.Radio( label="Select the interval to be generated:", choices=["present", "past, present and future"], value="past, present and future", interactive=True, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=4, maximum=50, step=1, value=20, info="More steps = better quality but slower. Too many steps may time out ZeroGPU.", ) generate_btn = gr.Button("Generate video", variant="primary") with gr.Column(): video_out = gr.Video( label="Generated video", format="mp4", autoplay=True, loop=True, ) generate_btn.click( fn=generate_video_from_image, inputs=[image_in, tense_choice, num_inference_steps], outputs=video_out, api_name="predict", ) if __name__ == "__main__": demo.launch()