import gradio as gr from transformers import AutoModel import soundfile as sf import numpy as np # Load IndicF5 model model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True) def generate_tts(text, ref_audio, ref_text): if ref_audio is None: return "⚠️ कृपया एक आवाज फाइल अपलोड करा", None # ref_audio = path string (because gr.Audio gives filepath) ref_audio_path = ref_audio # Run model audio = model(text, ref_audio_path=ref_audio_path, ref_text=ref_text) audio = np.array(audio, dtype=np.float32) # Save output out_path = "output.wav" sf.write(out_path, audio, samplerate=24000) return "✅ आवाज तयार झाला!", out_path with gr.Blocks() as demo: gr.Markdown("## 🎙️ माझं Marathi Voice Cloning (IndicF5)") text = gr.Textbox(label="तुम्हाला काय बोलायचं आहे?") ref_audio = gr.Audio(type="filepath", label="तुमचा आवाज (WAV, 6–30 सेकंद)") ref_text = gr.Textbox(label="त्या ऑडिओमध्ये तुम्ही काय बोललात?") out_msg = gr.Textbox(label="Status") out_audio = gr.Audio(label="Generated Audio", type="filepath") btn = gr.Button("Generate Speech") btn.click(generate_tts, inputs=[text, ref_audio, ref_text], outputs=[out_msg, out_audio]) demo.launch()