Spaces:
Running
Running
| import gradio as gr | |
| import tempfile | |
| from TTS.api import TTS | |
| from huggingface_hub import hf_hub_download | |
| import torch | |
| import json | |
| from snfl_imdann import TifinaghNumberConverter | |
| import re | |
| CUDA = torch.cuda.is_available() | |
| REPO_ID = "ayymen/Coqui-TTS-Vits-Multispeaker" | |
| VOICE_CONVERSION_MODELS = { | |
| 'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24', | |
| 'openvoice_v1': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v1', | |
| 'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2', | |
| } | |
| VARIANTS = {"Tachelhit": "shi", "Tarifit": "rif"} | |
| SPEAKERS = ["yan", "sin", "idj"] | |
| my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech" | |
| my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)." | |
| my_examples = [ | |
| ["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?", "shi", "yan", True], | |
| ["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?", "shi", "sin", False], | |
| ["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ.", "shi", "yan", False], | |
| ["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!", "shi", "yan", False], | |
| ["ⴰⵣⵓⵍ. ⵎⴰⵎⵛ ⵜⴷⵊⵉⵜ?", "rif", "idj", True], | |
| ["ⴰⵇⵎⵎⵓⵎ ⵉⵇⵏⴻⵏ ⵓⵔ ⵜ ⵜⵜⵉⴷⴼⵏ ⵉⵣⴰⵏ.", "rif", "idj", False], | |
| ["ⵇⵇⵉⵎ ⵅ ⵜⴰⴷⴷⴰⵔⵜ ⵏⵏⵛ!", "rif", "idj", False], | |
| ["ⵜⴻⵜⵜⵏ ⴰⴳ ⵡⵓⵛⵛⵏ, ⵜⵜⵔⵓⵏ ⵅ ⵓⵎⴽⵙⴰ.", "rif", "idj", False] | |
| ] | |
| best_model_path = hf_hub_download(repo_id=REPO_ID, filename="checkpoint_390000.pth") | |
| config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json") | |
| api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu") | |
| # pre-download voice conversion models | |
| for model in VOICE_CONVERSION_MODELS.values(): | |
| api.load_vc_model_by_name(model, gpu=CUDA) | |
| with open(config_path, "r") as f: | |
| config = json.load(f) | |
| available_chars = config["characters"]["characters"] | |
| available_punct = config["characters"]["punctuations"] | |
| available_chars = available_chars + "".join([str(i) for i in range(10)]) | |
| placeholder = f"The available characters are: {available_chars} and the available punctuation is: {available_punct}" | |
| my_inputs = [ | |
| gr.Textbox(lines=5, label="Input Text", placeholder=placeholder), | |
| gr.Dropdown(label="Variant", choices=list(VARIANTS.items()), value="shi"), | |
| gr.Dropdown(label="Speaker", choices=SPEAKERS, value="yan"), | |
| gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=False), | |
| gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"), | |
| gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())), | |
| ] | |
| my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True) | |
| def tts(text: str, variant: str = "shi", speaker: str = "yan", split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'): | |
| # replace oov characters | |
| text = text.replace("\n", ". ") | |
| text = text.replace("(", ",") | |
| text = text.replace(")", ",") | |
| text = text.replace('"', ",") | |
| text = text.replace("'", ",") | |
| text = text.replace(";", ",") | |
| text = text.replace("-", " ") | |
| # convert numbers to their spoken form | |
| text = re.sub(r"\d+", lambda x: TifinaghNumberConverter.convert(int(x.group(0))), text) | |
| with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp: | |
| if speaker_wav: | |
| api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA) | |
| api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant) | |
| else: | |
| api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant) | |
| return fp.name | |
| iface = gr.Interface( | |
| fn=tts, | |
| inputs=my_inputs, | |
| outputs=my_outputs, | |
| title=my_title, | |
| description=my_description, | |
| examples=my_examples, | |
| cache_examples=True | |
| ) | |
| iface.launch() | |