DeeeeeeM
commited on
Commit
·
7716a94
1
Parent(s):
5b9ff42
added minor changes
Browse files- app.py +9 -2
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -6,12 +6,15 @@ import gradio as gr
|
|
| 6 |
import torch
|
| 7 |
import stable_whisper
|
| 8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
|
|
|
| 9 |
|
| 10 |
def process_media(
|
| 11 |
model_size, source_lang, upload, model_type,
|
| 12 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
| 13 |
max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
|
| 14 |
):
|
|
|
|
|
|
|
| 15 |
# ----- is file empty? checker ----- #
|
| 16 |
if upload is None:
|
| 17 |
return None, None, None, None
|
|
@@ -27,7 +30,8 @@ def process_media(
|
|
| 27 |
model = stable_whisper.load_model(model_size, device=device)
|
| 28 |
|
| 29 |
try:
|
| 30 |
-
result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False,
|
|
|
|
| 31 |
#result.save_as_json(word_transcription_path)
|
| 32 |
except Exception as e:
|
| 33 |
return None, None, None, None
|
|
@@ -87,6 +91,9 @@ def process_media(
|
|
| 87 |
audio_out = temp_path if mime and mime.startswith("audio") else None
|
| 88 |
video_out = temp_path if mime and mime.startswith("video") else None
|
| 89 |
|
|
|
|
|
|
|
|
|
|
| 90 |
return audio_out, video_out, transcript_txt, srt_file_path
|
| 91 |
|
| 92 |
def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
|
|
@@ -300,7 +307,7 @@ with gr.Blocks() as interface:
|
|
| 300 |
)
|
| 301 |
model_size = gr.Dropdown(
|
| 302 |
choices=[
|
| 303 |
-
"large-v3-turbo",
|
| 304 |
"large-v3",
|
| 305 |
"large-v2",
|
| 306 |
"large",
|
|
|
|
| 6 |
import torch
|
| 7 |
import stable_whisper
|
| 8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
| 9 |
+
import time
|
| 10 |
|
| 11 |
def process_media(
|
| 12 |
model_size, source_lang, upload, model_type,
|
| 13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
| 14 |
max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
|
| 15 |
):
|
| 16 |
+
start_time = time.time()
|
| 17 |
+
|
| 18 |
# ----- is file empty? checker ----- #
|
| 19 |
if upload is None:
|
| 20 |
return None, None, None, None
|
|
|
|
| 30 |
model = stable_whisper.load_model(model_size, device=device)
|
| 31 |
|
| 32 |
try:
|
| 33 |
+
result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, no_speech_threshold=0.9)
|
| 34 |
+
#remove background music/noise: denoiser="demucs"
|
| 35 |
#result.save_as_json(word_transcription_path)
|
| 36 |
except Exception as e:
|
| 37 |
return None, None, None, None
|
|
|
|
| 91 |
audio_out = temp_path if mime and mime.startswith("audio") else None
|
| 92 |
video_out = temp_path if mime and mime.startswith("video") else None
|
| 93 |
|
| 94 |
+
elapsed = time.time() - start_time
|
| 95 |
+
print(f"process_media completed in {elapsed:.2f} seconds")
|
| 96 |
+
|
| 97 |
return audio_out, video_out, transcript_txt, srt_file_path
|
| 98 |
|
| 99 |
def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
|
|
|
|
| 307 |
)
|
| 308 |
model_size = gr.Dropdown(
|
| 309 |
choices=[
|
| 310 |
+
"deepdml/faster-whisper-large-v3-turbo-ct2",
|
| 311 |
"large-v3",
|
| 312 |
"large-v2",
|
| 313 |
"large",
|
requirements.txt
CHANGED
|
@@ -2,5 +2,6 @@ gradio>=3.0.0
|
|
| 2 |
stable-ts
|
| 3 |
stable-ts[fw]
|
| 4 |
demucs
|
| 5 |
-
torch==2.
|
| 6 |
numpy<2
|
|
|
|
|
|
| 2 |
stable-ts
|
| 3 |
stable-ts[fw]
|
| 4 |
demucs
|
| 5 |
+
torch==2.6.0
|
| 6 |
numpy<2
|
| 7 |
+
chardet
|