import gradio as gr import torch from transformers import AutoModelForTextToWaveform, AutoProcessor # Load model and processor model_name = "hexgrad/Kokoro-82M" processor = AutoProcessor.from_pretrained(model_name) model = AutoModelForTextToWaveform.from_pretrained(model_name, torch_dtype=torch.float16) # Move to GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) def text_to_audio(text, speed=1.0): """Convert text to audio using Kokoro model""" # Process the input text inputs = processor(text=text, return_tensors="pt") inputs = {k: v.to(device) for k, v in inputs.items()} # Set generation parameters gen_kwargs = { "do_sample": True, "temperature": 0.7, "length_penalty": 1.0, "repetition_penalty": 2.0, "top_p": 0.9, } # Generate waveform with torch.no_grad(): waveform = model.generate(**inputs, **gen_kwargs).cpu().numpy()[0] # Create a sample rate (typical for audio is 24000) sample_rate = 24000 # Apply speed factor if needed if speed != 1.0: import numpy as np import librosa waveform = librosa.effects.time_stretch(waveform.astype(np.float32), rate=speed) return sample_rate, waveform # Create Gradio interface with gr.Blocks(title="Kokoro Text-to-Audio") as app: gr.Markdown("# 🎵 Kokoro Text-to-Audio Converter") gr.Markdown("Convert text to speech using hexgrad/Kokoro-82M model") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Enter your text", placeholder="Type something to convert to audio...", lines=5 ) speed_slider = gr.Slider( minimum=0.5, maximum=1.5, value=1.0, step=0.1, label="Speech Speed" ) submit_btn = gr.Button("Generate Audio") with gr.Column(): audio_output = gr.Audio(label="Generated Audio", type="numpy") submit_btn.click( fn=text_to_audio, inputs=[text_input, speed_slider], outputs=[audio_output] ) gr.Markdown("### Usage Tips") gr.Markdown("- For best results, keep your text reasonably short") gr.Markdown("- Adjust the speed slider to modify the pace of speech") gr.Markdown("- The model may take a moment to load on first use") # Launch the app if __name__ == "__main__": app.launch()