Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 33 additions & 37 deletions scripts/generate_sherpa_audio_all_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import re
import shutil
import tarfile
import tempfile
import zipfile
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Optional

Expand Down Expand Up @@ -113,6 +112,30 @@ def get_project_languages() -> Dict[str, str]:
return lang_map


@lru_cache(maxsize=1)
def get_iso_mapping() -> Dict[str, str]:
"""
Read data/index.json and create a mapping from ISO 639-3 codes
to the project's language codes (usually ISO 639-1 or a 3-letter code).
"""
index_path = Path("data/index.json")
if not index_path.exists():
return {}

with index_path.open("r", encoding="utf-8") as f:
languages = json.load(f)

mapping = {}
for lang in languages:
iso3 = lang.get("iso639_3")
code = lang.get("language")
if iso3 and code:
# Map 3-letter code to project language code
mapping[iso3] = code

return mapping


def extract_model_id_lang(model_id: str) -> Optional[str]:
"""
Extract language code from SherpaOnnx model ID.
Expand All @@ -125,37 +148,10 @@ def extract_model_id_lang(model_id: str) -> Optional[str]:
# MMS models: mms_<iso_639_3> or mms_<iso_639_2>
if model_id.startswith("mms_"):
code = model_id[4:].split("-")[0].split("_")[0]
# Convert 3-letter ISO 639-3 to 2-letter if possible
iso639_3_to_1 = {
"eng": "en", "spa": "es", "fra": "fr", "deu": "de",
"ita": "it", "por": "pt", "nld": "nl", "pol": "pl",
"rus": "ru", "ara": "ar", "hin": "hi", "ben": "bn",
"jpn": "ja", "kor": "ko", "zho": "zh", "tha": "th",
"vie": "vi", "tur": "tr", "fin": "fi", "swe": "sv",
"nor": "no", "dan": "da", "ces": "cs", "ell": "el",
"heb": "he", "ukr": "uk", "ron": "ro", "hun": "hu",
"afr": "af", "amh": "am", "asm": "as", "aze": "az",
"bel": "be", "bul": "bg", "cat": "ca", "ceb": "ceb",
"ckb": "ku", "dzo": "dz", "est": "et", "fas": "fa",
"gle": "ga", "glg": "gl", "guj": "gu", "hye": "hy",
"ibo": "ig", "ind": "id", "jav": "jv", "kat": "ka",
"khm": "km", "lao": "lo", "lav": "lv", "lit": "lt",
"lug": "lg", "mal": "ml", "mar": "mr", "mkd": "mk",
"mlt": "mt", "mya": "my", "nep": "ne", "pan": "pa",
"pus": "ps", "sin": "si", "slk": "sk", "slv": "sl",
"sna": "sn", "som": "so", "sqi": "sq", "srp": "sr",
"swa": "sw", "tam": "ta", "tel": "te", "tgk": "tg",
"tlh": "tlh", "tsn": "tn", "tur": "tr", "ukr": "uk",
"urd": "ur", "uzb": "uz", "wol": "wo", "xho": "xh",
"yor": "yo", "zul": "zu",
# Additional 3-letter codes
"aag": "aa", "aak": "aa", "aau": "aa",
"abk": "ab", "ady": "ady", "afh": "af",
"agq": "agq", "aht": "aht", "aia": "aia",
"aka": "ak", "als": "als", "amh": "am",
# Add more as needed from MMS model list
}
return iso639_3_to_1.get(code, code)

# Use mapping from data/index.json instead of hardcoded dictionary
iso_map = get_iso_mapping()
return iso_map.get(code, code)

# VITS/models with lang codes
if "-" in model_id:
Expand Down Expand Up @@ -330,7 +326,7 @@ def synthesize_with_model(model_dir: Path, text: str, model_id: str) -> Optional
# Create appropriate config
if model_type == "kokoro":
if not voices_bin:
print(f" Kokoro model requires voices.bin")
print(" Kokoro model requires voices.bin")
return None
config = create_kokoro_config(model_file, tokens_file, voices_bin, espeak_dir)
elif model_type == "matcha":
Expand All @@ -353,7 +349,7 @@ def synthesize_with_model(model_dir: Path, text: str, model_id: str) -> Optional
audio = tts.generate(text)

if len(audio.samples) == 0:
print(f" No audio generated")
print(" No audio generated")
return None

# Convert to bytes
Expand Down Expand Up @@ -604,7 +600,7 @@ def main():
success = download_model(model_id, model_url, model_dir)

if not success:
print(f" Failed to download model")
print(" Failed to download model")
failed += 1
cleanup_model(model_dir)
continue
Expand All @@ -613,7 +609,7 @@ def main():
result = synthesize_with_model(model_dir, phrase, model_id)

if result is None:
print(f" Failed to synthesize audio")
print(" Failed to synthesize audio")
failed += 1
cleanup_model(model_dir)
continue
Expand Down
Loading