BAAI
/

Emu3-Gen

@@ -63,7 +63,7 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
 )
-tokenizer = AutoTokenizer.from_pretrained(EMU_HUB, trust_remote_code=True)
 image_processor = AutoImageProcessor.from_pretrained(VQ_HUB, trust_remote_code=True)
 image_tokenizer = AutoModel.from_pretrained(VQ_HUB, device_map="cuda:0", trust_remote_code=True).eval()
 processor = Emu3Processor(image_processor, image_tokenizer, tokenizer)
@@ -81,6 +81,7 @@ kwargs = dict(
     ratio="1:1",
     image_area=model.config.image_area,
     return_tensors="pt",
 )
 pos_inputs = processor(text=prompt, **kwargs)
 neg_inputs = processor(text=NEGATIVE_PROMPT, **kwargs)
@@ -95,7 +96,8 @@ GENERATION_CONFIG = GenerationConfig(
     top_k=2048,
 )
-h, w = pos_inputs.image_size[0]
 constrained_fn = processor.build_prefix_constrained_fn(h, w)
 logits_processor = LogitsProcessorList([
     UnbatchedClassifierFreeGuidanceLogitsProcessor(
@@ -113,7 +115,8 @@ logits_processor = LogitsProcessorList([
 outputs = model.generate(
     pos_inputs.input_ids.to("cuda:0"),
     GENERATION_CONFIG,
-    logits_processor=logits_processor
 )
 mm_list = processor.decode(outputs[0])
@@ -121,5 +124,4 @@ for idx, im in enumerate(mm_list):
     if not isinstance(im, Image.Image):
         continue
     im.save(f"result_{idx}.png")
 ```

     trust_remote_code=True,
 )
+tokenizer = AutoTokenizer.from_pretrained(EMU_HUB, trust_remote_code=True, padding_side="left")
 image_processor = AutoImageProcessor.from_pretrained(VQ_HUB, trust_remote_code=True)
 image_tokenizer = AutoModel.from_pretrained(VQ_HUB, device_map="cuda:0", trust_remote_code=True).eval()
 processor = Emu3Processor(image_processor, image_tokenizer, tokenizer)
     ratio="1:1",
     image_area=model.config.image_area,
     return_tensors="pt",
+    padding="longest",
 )
 pos_inputs = processor(text=prompt, **kwargs)
 neg_inputs = processor(text=NEGATIVE_PROMPT, **kwargs)
     top_k=2048,
 )
+h = pos_inputs.image_size[:, 0]
+w = pos_inputs.image_size[:, 1]
 constrained_fn = processor.build_prefix_constrained_fn(h, w)
 logits_processor = LogitsProcessorList([
     UnbatchedClassifierFreeGuidanceLogitsProcessor(
 outputs = model.generate(
     pos_inputs.input_ids.to("cuda:0"),
     GENERATION_CONFIG,
+    logits_processor=logits_processor,
+    attention_mask=pos_inputs.attention_mask.to("cuda:0"),
 )
 mm_list = processor.decode(outputs[0])
     if not isinstance(im, Image.Image):
         continue
     im.save(f"result_{idx}.png")
 ```