Spaces:

tedlasai
/

blur2vid

Running on Zero

tedlasai commited on Nov 16, 2025

Commit

6a1328e

1 Parent(s): c1f2d8e

debug weight dtype

Files changed (2) hide show

gradio/app.py CHANGED Viewed

@@ -71,6 +71,12 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
                 label="Input image",
                 interactive=True,
             )
             generate_btn = gr.Button("Generate video", variant="primary")
         with gr.Column():
             video_out = gr.Video(

                 label="Input image",
                 interactive=True,
             )
+            tense_choice = gr.Dropdown(
+                label="I want to generate the",
+                choices=["present", "past, present and future"],
+                value="past, present and future",           # default selection
+                interactive=True,
+            )
             generate_btn = gr.Button("Generate video", variant="primary")
         with gr.Column():
             video_out = gr.Video(

inference.py CHANGED Viewed

@@ -61,7 +61,7 @@ INTERVALS = {
         "mode": "1x",
         "fps": 240
     },
-    "past_present_and_future": {
         "in_start": 4,
         "in_end": 12,
         "out_start": 0,
@@ -161,6 +161,10 @@ def load_model(args):
     # as these weights are only used for inference, keeping weights in full precision is not required.
     weight_dtype = torch.bfloat16
     pipe = ControlnetCogVideoXPipeline.from_pretrained(
         args.pretrained_model_path,
         tokenizer=tokenizer,
@@ -241,16 +245,13 @@ def main(args):
         image_paths = [image_path]
     pipe, model_config = load_model(args)
-    # text_encoder.to(args.device, dtype=weight_dtype)
-    # transformer.to(args.device, dtype=weight_dtype)
-    # vae.to(args.device, dtype=weight_dtype)
     pipe = pipe.to(args.device)
     for image_path in image_paths:
         image = Image.open(image_path)
-        processed_image, video = inference_on_image(pipe, image, "past_present_and_future", model_config, args)
         vid_output_path = output_path / f"{image_path.stem}.mp4"
         export_to_video(video, vid_output_path, fps=20)

         "mode": "1x",
         "fps": 240
     },
+    "past, present and future": {
         "in_start": 4,
         "in_end": 12,
         "out_start": 0,
     # as these weights are only used for inference, keeping weights in full precision is not required.
     weight_dtype = torch.bfloat16
+    # text_encoder.to(dtype=weight_dtype)
+    # transformer.to(dtype=weight_dtype)
+    # vae.to(dtype=weight_dtype)
     pipe = ControlnetCogVideoXPipeline.from_pretrained(
         args.pretrained_model_path,
         tokenizer=tokenizer,
         image_paths = [image_path]
     pipe, model_config = load_model(args)
     pipe = pipe.to(args.device)
     for image_path in image_paths:
         image = Image.open(image_path)
+        processed_image, video = inference_on_image(pipe, image, "past, present and future", model_config, args)
         vid_output_path = output_path / f"{image_path.stem}.mp4"
         export_to_video(video, vid_output_path, fps=20)