Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
c4c59ec
1
Parent(s):
08735bb
Transcription APIs
Browse files- .gitignore +1 -0
- evals.py +25 -1
- pyproject.toml +2 -0
- uv.lock +21 -0
.gitignore
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
floresp-*
|
| 2 |
glottolog-*
|
|
|
|
| 3 |
LanguageCodes.tab
|
| 4 |
ScriptCodes.csv
|
| 5 |
.cache
|
|
|
|
| 1 |
floresp-*
|
| 2 |
glottolog-*
|
| 3 |
+
*.m4a
|
| 4 |
LanguageCodes.tab
|
| 5 |
ScriptCodes.csv
|
| 6 |
.cache
|
evals.py
CHANGED
|
@@ -11,15 +11,17 @@ import pandas as pd
|
|
| 11 |
import requests
|
| 12 |
from aiolimiter import AsyncLimiter
|
| 13 |
from dotenv import load_dotenv
|
|
|
|
| 14 |
from joblib.memory import Memory
|
| 15 |
from langcodes import Language, standardize_tag
|
| 16 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
| 17 |
from openai import AsyncOpenAI
|
|
|
|
| 18 |
from requests import get
|
| 19 |
from rich import print
|
| 20 |
from tqdm.asyncio import tqdm_asyncio
|
| 21 |
from transformers import NllbTokenizer
|
| 22 |
-
from
|
| 23 |
|
| 24 |
# config
|
| 25 |
models = [
|
|
@@ -48,6 +50,28 @@ tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
| 48 |
rate_limit = AsyncLimiter(max_rate=20, time_period=1)
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# load general language data
|
| 52 |
languages = {
|
| 53 |
lang: pop
|
|
|
|
| 11 |
import requests
|
| 12 |
from aiolimiter import AsyncLimiter
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
+
from elevenlabs import ElevenLabs
|
| 15 |
from joblib.memory import Memory
|
| 16 |
from langcodes import Language, standardize_tag
|
| 17 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
| 18 |
from openai import AsyncOpenAI
|
| 19 |
+
from pyglottolog import Glottolog
|
| 20 |
from requests import get
|
| 21 |
from rich import print
|
| 22 |
from tqdm.asyncio import tqdm_asyncio
|
| 23 |
from transformers import NllbTokenizer
|
| 24 |
+
from huggingface_hub import InferenceClient
|
| 25 |
|
| 26 |
# config
|
| 27 |
models = [
|
|
|
|
| 50 |
rate_limit = AsyncLimiter(max_rate=20, time_period=1)
|
| 51 |
|
| 52 |
|
| 53 |
+
@cache
|
| 54 |
+
def transcribe(filename, model="elevenlabs/scribe_v1"):
|
| 55 |
+
provider, modelname = model.split("/")
|
| 56 |
+
with open(filename, "rb") as f:
|
| 57 |
+
audio = f.read()
|
| 58 |
+
match provider:
|
| 59 |
+
case "elevenlabs":
|
| 60 |
+
client = ElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
|
| 61 |
+
response = client.speech_to_text.convert(model_id=modelname, file=audio)
|
| 62 |
+
return response.text
|
| 63 |
+
case "openai":
|
| 64 |
+
client = InferenceClient(api_key=getenv("HUGGINGFACE_ACCESS_TOKEN"))
|
| 65 |
+
output = client.automatic_speech_recognition(model=model, audio=audio)
|
| 66 |
+
return output.text
|
| 67 |
+
case _:
|
| 68 |
+
raise ValueError(f"Model {model} not supported")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
print(transcribe("data/test.m4a", "openai/whisper-large-v3-turbo"))
|
| 72 |
+
exit()
|
| 73 |
+
|
| 74 |
+
|
| 75 |
# load general language data
|
| 76 |
languages = {
|
| 77 |
lang: pop
|
pyproject.toml
CHANGED
|
@@ -15,7 +15,9 @@ dependencies = [
|
|
| 15 |
dev-dependencies = [
|
| 16 |
"aiolimiter>=1.1.0",
|
| 17 |
"bert-score>=0.3.13",
|
|
|
|
| 18 |
"evaluate==0.4.0",
|
|
|
|
| 19 |
"joblib>=1.4.2",
|
| 20 |
"langcodes>=3.5.0",
|
| 21 |
"openai>=1.52.2",
|
|
|
|
| 15 |
dev-dependencies = [
|
| 16 |
"aiolimiter>=1.1.0",
|
| 17 |
"bert-score>=0.3.13",
|
| 18 |
+
"elevenlabs>=1.53.0",
|
| 19 |
"evaluate==0.4.0",
|
| 20 |
+
"huggingface-hub>=0.29.1",
|
| 21 |
"joblib>=1.4.2",
|
| 22 |
"langcodes>=3.5.0",
|
| 23 |
"openai>=1.52.2",
|
uv.lock
CHANGED
|
@@ -565,6 +565,23 @@ wheels = [
|
|
| 565 |
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
|
| 566 |
]
|
| 567 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
[[package]]
|
| 569 |
name = "evaluate"
|
| 570 |
version = "0.4.0"
|
|
@@ -1194,7 +1211,9 @@ dependencies = [
|
|
| 1194 |
dev = [
|
| 1195 |
{ name = "aiolimiter" },
|
| 1196 |
{ name = "bert-score" },
|
|
|
|
| 1197 |
{ name = "evaluate" },
|
|
|
|
| 1198 |
{ name = "joblib" },
|
| 1199 |
{ name = "langcodes" },
|
| 1200 |
{ name = "openai" },
|
|
@@ -1220,7 +1239,9 @@ requires-dist = [
|
|
| 1220 |
dev = [
|
| 1221 |
{ name = "aiolimiter", specifier = ">=1.1.0" },
|
| 1222 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
|
|
|
| 1223 |
{ name = "evaluate", specifier = "==0.4.0" },
|
|
|
|
| 1224 |
{ name = "joblib", specifier = ">=1.4.2" },
|
| 1225 |
{ name = "langcodes", specifier = ">=3.5.0" },
|
| 1226 |
{ name = "openai", specifier = ">=1.52.2" },
|
|
|
|
| 565 |
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
|
| 566 |
]
|
| 567 |
|
| 568 |
+
[[package]]
|
| 569 |
+
name = "elevenlabs"
|
| 570 |
+
version = "1.53.0"
|
| 571 |
+
source = { registry = "https://pypi.org/simple" }
|
| 572 |
+
dependencies = [
|
| 573 |
+
{ name = "httpx" },
|
| 574 |
+
{ name = "pydantic" },
|
| 575 |
+
{ name = "pydantic-core" },
|
| 576 |
+
{ name = "requests" },
|
| 577 |
+
{ name = "typing-extensions" },
|
| 578 |
+
{ name = "websockets" },
|
| 579 |
+
]
|
| 580 |
+
sdist = { url = "https://files.pythonhosted.org/packages/83/c1/5bf18b8f8be29032196484a4f55d1ec85ada125d3a5df87a5ebe5b754a0e/elevenlabs-1.53.0.tar.gz", hash = "sha256:bc900f7e6123575014672e5f4f004a12c75508d6b22e370a1327bd0d536b0f0a", size = 149562 }
|
| 581 |
+
wheels = [
|
| 582 |
+
{ url = "https://files.pythonhosted.org/packages/5f/b8/63c7bbbe9d02b1a16afe02512727e4afa17e25ed85c2a0f298b31ac1ddb1/elevenlabs-1.53.0-py3-none-any.whl", hash = "sha256:90b33135204bdd538ab3624dff31aa57b3192f660f72b71c2bf18b8436a53e2d", size = 344998 },
|
| 583 |
+
]
|
| 584 |
+
|
| 585 |
[[package]]
|
| 586 |
name = "evaluate"
|
| 587 |
version = "0.4.0"
|
|
|
|
| 1211 |
dev = [
|
| 1212 |
{ name = "aiolimiter" },
|
| 1213 |
{ name = "bert-score" },
|
| 1214 |
+
{ name = "elevenlabs" },
|
| 1215 |
{ name = "evaluate" },
|
| 1216 |
+
{ name = "huggingface-hub" },
|
| 1217 |
{ name = "joblib" },
|
| 1218 |
{ name = "langcodes" },
|
| 1219 |
{ name = "openai" },
|
|
|
|
| 1239 |
dev = [
|
| 1240 |
{ name = "aiolimiter", specifier = ">=1.1.0" },
|
| 1241 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
| 1242 |
+
{ name = "elevenlabs", specifier = ">=1.53.0" },
|
| 1243 |
{ name = "evaluate", specifier = "==0.4.0" },
|
| 1244 |
+
{ name = "huggingface-hub" },
|
| 1245 |
{ name = "joblib", specifier = ">=1.4.2" },
|
| 1246 |
{ name = "langcodes", specifier = ">=3.5.0" },
|
| 1247 |
{ name = "openai", specifier = ">=1.52.2" },
|