DeeeeeeM
commited on
Commit
·
a27f548
1
Parent(s):
8c6b002
added minor changes in the description
Browse files
app.py
CHANGED
|
@@ -258,16 +258,20 @@ with gr.Blocks() as interface:
|
|
| 258 |
- Improved transcription (GPT-4) (In progress)
|
| 259 |
- Text to Speech (In progress)
|
| 260 |
|
| 261 |
-
<b>NOTE: This app is currently in the process of applying other AI-solutions for other use cases.</b>
|
| 262 |
"""
|
| 263 |
)
|
| 264 |
|
| 265 |
with gr.Tabs():
|
| 266 |
with gr.TabItem("Speech to Text"):
|
| 267 |
-
gr.HTML("<h2 style='text-align: left;'>OpenAI/Whisper + stable-ts</
|
| 268 |
gr.Markdown(
|
| 269 |
"""
|
| 270 |
Open Ai's <b>Whisper</b> is a versatile speech recognition model trained on diverse audio for tasks like multilingual transcription, translation, and language ID. With the help of <b>stable-ts</b>, it provides accurate word-level timestamps in chronological order without extra processing.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
"""
|
| 272 |
)
|
| 273 |
#General Settings
|
|
@@ -292,19 +296,20 @@ with gr.Blocks() as interface:
|
|
| 292 |
)
|
| 293 |
model_size = gr.Dropdown(
|
| 294 |
choices=[
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
],
|
| 304 |
label="Model Size",
|
| 305 |
value="large-v2",
|
| 306 |
interactive=True
|
| 307 |
)
|
|
|
|
| 308 |
#Advanced Settings
|
| 309 |
with gr.Accordion("Advanced Settings", open=False):
|
| 310 |
gr.Markdown(
|
|
|
|
| 258 |
- Improved transcription (GPT-4) (In progress)
|
| 259 |
- Text to Speech (In progress)
|
| 260 |
|
| 261 |
+
<i><b>NOTE: This app is currently in the process of applying other AI-solutions for other use cases.</b></i>
|
| 262 |
"""
|
| 263 |
)
|
| 264 |
|
| 265 |
with gr.Tabs():
|
| 266 |
with gr.TabItem("Speech to Text"):
|
| 267 |
+
gr.HTML("<h2 style='text-align: left;'>OpenAI / Whisper + stable-ts</h2>")
|
| 268 |
gr.Markdown(
|
| 269 |
"""
|
| 270 |
Open Ai's <b>Whisper</b> is a versatile speech recognition model trained on diverse audio for tasks like multilingual transcription, translation, and language ID. With the help of <b>stable-ts</b>, it provides accurate word-level timestamps in chronological order without extra processing.
|
| 271 |
+
|
| 272 |
+
<i>Note: The default values are set for balanced and faster processing,
|
| 273 |
+
you can choose: large, large v2, and large v3 <b>MODEL SIZE</b> for more accuracy, but they may take longer to process.</i>
|
| 274 |
+
|
| 275 |
"""
|
| 276 |
)
|
| 277 |
#General Settings
|
|
|
|
| 296 |
)
|
| 297 |
model_size = gr.Dropdown(
|
| 298 |
choices=[
|
| 299 |
+
"large-v3-turbo",
|
| 300 |
+
"large-v3",
|
| 301 |
+
"large-v2",
|
| 302 |
+
"large",
|
| 303 |
+
"medium",
|
| 304 |
+
"small",
|
| 305 |
+
"base",
|
| 306 |
+
"tiny"
|
| 307 |
],
|
| 308 |
label="Model Size",
|
| 309 |
value="large-v2",
|
| 310 |
interactive=True
|
| 311 |
)
|
| 312 |
+
|
| 313 |
#Advanced Settings
|
| 314 |
with gr.Accordion("Advanced Settings", open=False):
|
| 315 |
gr.Markdown(
|