tedlasai commited on
Commit
6a1328e
·
1 Parent(s): c1f2d8e

debug weight dtype

Browse files
Files changed (2) hide show
  1. gradio/app.py +6 -0
  2. inference.py +7 -6
gradio/app.py CHANGED
@@ -71,6 +71,12 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
71
  label="Input image",
72
  interactive=True,
73
  )
 
 
 
 
 
 
74
  generate_btn = gr.Button("Generate video", variant="primary")
75
  with gr.Column():
76
  video_out = gr.Video(
 
71
  label="Input image",
72
  interactive=True,
73
  )
74
+ tense_choice = gr.Dropdown(
75
+ label="I want to generate the",
76
+ choices=["present", "past, present and future"],
77
+ value="past, present and future", # default selection
78
+ interactive=True,
79
+ )
80
  generate_btn = gr.Button("Generate video", variant="primary")
81
  with gr.Column():
82
  video_out = gr.Video(
inference.py CHANGED
@@ -61,7 +61,7 @@ INTERVALS = {
61
  "mode": "1x",
62
  "fps": 240
63
  },
64
- "past_present_and_future": {
65
  "in_start": 4,
66
  "in_end": 12,
67
  "out_start": 0,
@@ -161,6 +161,10 @@ def load_model(args):
161
  # as these weights are only used for inference, keeping weights in full precision is not required.
162
  weight_dtype = torch.bfloat16
163
 
 
 
 
 
164
  pipe = ControlnetCogVideoXPipeline.from_pretrained(
165
  args.pretrained_model_path,
166
  tokenizer=tokenizer,
@@ -241,16 +245,13 @@ def main(args):
241
  image_paths = [image_path]
242
 
243
  pipe, model_config = load_model(args)
244
-
245
- # text_encoder.to(args.device, dtype=weight_dtype)
246
- # transformer.to(args.device, dtype=weight_dtype)
247
- # vae.to(args.device, dtype=weight_dtype)
248
  pipe = pipe.to(args.device)
249
 
250
  for image_path in image_paths:
251
  image = Image.open(image_path)
252
 
253
- processed_image, video = inference_on_image(pipe, image, "past_present_and_future", model_config, args)
254
 
255
  vid_output_path = output_path / f"{image_path.stem}.mp4"
256
  export_to_video(video, vid_output_path, fps=20)
 
61
  "mode": "1x",
62
  "fps": 240
63
  },
64
+ "past, present and future": {
65
  "in_start": 4,
66
  "in_end": 12,
67
  "out_start": 0,
 
161
  # as these weights are only used for inference, keeping weights in full precision is not required.
162
  weight_dtype = torch.bfloat16
163
 
164
+ # text_encoder.to(dtype=weight_dtype)
165
+ # transformer.to(dtype=weight_dtype)
166
+ # vae.to(dtype=weight_dtype)
167
+
168
  pipe = ControlnetCogVideoXPipeline.from_pretrained(
169
  args.pretrained_model_path,
170
  tokenizer=tokenizer,
 
245
  image_paths = [image_path]
246
 
247
  pipe, model_config = load_model(args)
248
+
 
 
 
249
  pipe = pipe.to(args.device)
250
 
251
  for image_path in image_paths:
252
  image = Image.open(image_path)
253
 
254
+ processed_image, video = inference_on_image(pipe, image, "past, present and future", model_config, args)
255
 
256
  vid_output_path = output_path / f"{image_path.stem}.mp4"
257
  export_to_video(video, vid_output_path, fps=20)