Instructions to use cvssp/audioldm2-large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use cvssp/audioldm2-large with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("cvssp/audioldm2-large", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| { | |
| "_class_name": "AudioLDM2Pipeline", | |
| "_diffusers_version": "0.20.0.dev0", | |
| "feature_extractor": [ | |
| "transformers", | |
| "ClapFeatureExtractor" | |
| ], | |
| "language_model": [ | |
| "transformers", | |
| "GPT2Model" | |
| ], | |
| "projection_model": [ | |
| "audioldm2", | |
| "AudioLDM2ProjectionModel" | |
| ], | |
| "scheduler": [ | |
| "diffusers", | |
| "DDIMScheduler" | |
| ], | |
| "text_encoder": [ | |
| "transformers", | |
| "ClapModel" | |
| ], | |
| "text_encoder_2": [ | |
| "transformers", | |
| "T5EncoderModel" | |
| ], | |
| "tokenizer": [ | |
| "transformers", | |
| "RobertaTokenizerFast" | |
| ], | |
| "tokenizer_2": [ | |
| "transformers", | |
| "T5TokenizerFast" | |
| ], | |
| "unet": [ | |
| "audioldm2", | |
| "AudioLDM2UNet2DConditionModel" | |
| ], | |
| "vae": [ | |
| "diffusers", | |
| "AutoencoderKL" | |
| ], | |
| "vocoder": [ | |
| "transformers", | |
| "SpeechT5HifiGan" | |
| ] | |
| } | |