Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unified language codes --> #9 #28

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions tts_wrapper/engines/elevenlabs/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from ...tts import FileFormat
from ...exceptions import UnsupportedFileFormat
import requests

from ...engines.utils import (
getISOLangCode,
)
audio_format = ("pcm_22050",)


Expand Down Expand Up @@ -199,11 +201,11 @@ def get_voices(self):
) # Default to 'en-US'
if voice["high_quality_base_model_ids"] == "eleven_multilingual_v1":
voice["language_codes"] = [
language_code for language_code in supported_languages_v1.keys()
getISOLangCode(language_code) for language_code in supported_languages_v1.keys()
]
else:
voice["language_codes"] = [
language_code for language_code in supported_languages_v2.keys()
getISOLangCode(language_code) for language_code in supported_languages_v2.keys()
]
voice["name"] = voice["name"]
voice["gender"] = "Unknown"
Expand Down
6 changes: 4 additions & 2 deletions tts_wrapper/engines/google/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import struct
from google.cloud import texttospeech_v1beta1 as texttospeech
from google.oauth2 import service_account

from ...engines.utils import (
getISOLangCode,
)

class GoogleClient:
def __init__(self, credentials: Union[str, Dict]) -> None:
Expand Down Expand Up @@ -137,7 +139,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
voice_data = {
"id": voice.name,
"name": voice.name,
"language_codes": voice.language_codes,
"language_codes": [getISOLangCode(item) for item in voice.language_codes],
"gender": voice.ssml_gender.name, # 'MALE', 'FEMALE', 'NEUTRAL'
}
standardized_voices.append(voice_data)
Expand Down
7 changes: 5 additions & 2 deletions tts_wrapper/engines/googletrans/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from io import BytesIO
import logging
import mp3
from ...engines.utils import (
getISOLangCode,
)

try:
from gtts import gTTS
Expand Down Expand Up @@ -133,7 +136,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
standardized_voices.append(
{
"id": f"{lang_code}-{accent}",
"language_codes": [lang_code],
"language_codes": [getISOLangCode(lang_code)],
"name": f"{lang_name} ({accent})",
"gender": "Unknown",
}
Expand All @@ -142,7 +145,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
standardized_voices.append(
{
"id": lang_code,
"language_codes": [lang_code],
"language_codes": [getISOLangCode(lang_code)],
"name": lang_name,
"gender": "Unknown",
}
Expand Down
7 changes: 5 additions & 2 deletions tts_wrapper/engines/microsoft/client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
from typing import Tuple, List, Dict, Any, Optional
from tts_wrapper.tts import FileFormat
import io
import wave
from ...exceptions import ModuleNotInstalled

from ...engines.utils import (
getISOLangCode,
)
try:
import requests
except ImportError:
Expand Down Expand Up @@ -77,7 +80,7 @@ def get_available_voices(self) -> List[Dict[str, Any]]:
for voice in voices:
voice_dict = {
"id": voice["ShortName"],
"language_codes": [voice["Locale"]],
"language_codes": [getISOLangCode(voice["Locale"])],
"name": voice["LocalName"],
"gender": voice["Gender"], # 'Gender' is already a string
}
Expand Down
6 changes: 4 additions & 2 deletions tts_wrapper/engines/polly/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from ...exceptions import ModuleNotInstalled
import json
import io

from ...engines.utils import (
getISOLangCode,
)
Credentials = Tuple[str, str, str]

FORMATS = {
Expand Down Expand Up @@ -98,7 +100,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
standardized_voices = []
for voice in voices:
voice["id"] = voice["Id"]
voice["language_codes"] = [voice["LanguageCode"]]
voice["language_codes"] = [getISOLangCode(voice["LanguageCode"])]
voice["name"] = voice["Name"]
voice["gender"] = voice["Gender"]
standardized_voices.append(voice)
Expand Down
5 changes: 4 additions & 1 deletion tts_wrapper/engines/sherpaonnx/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import json
import logging
import threading
from ...engines.utils import (
getISOLangCode,
)

try:
import numpy as np
Expand Down Expand Up @@ -268,7 +271,7 @@ def get_voices(self) -> List[Dict[str, str]]:
"id": voice["Iso Code"],
"name": voice["Language Name"],
"gender": "N",
"language_codes": [voice["Iso Code"]],
"language_codes": [getISOLangCode(voice["Iso Code"])],
}
for voice in self.voices_cache
]
Expand Down
25 changes: 17 additions & 8 deletions tts_wrapper/engines/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from io import BytesIO
import re
from typing import List, Dict, Tuple
from langcodes import *


def process_wav(raw: bytes) -> bytes:
Expand All @@ -22,19 +23,20 @@ def create_temp_filename(suffix="") -> str:
tempfile.gettempdir(), f"{tempfile.gettempprefix()}_{random_seq}{suffix}"
)


def estimate_word_timings(text: str, wpm: int = 150) -> List[Tuple[float, float, str]]:
# Remove SSML tags
text = re.sub('<[^<]+?>', '', text)

# Split text into words, keeping punctuation
words = re.findall(r'\b[\w\']+\b|[.,!?;]', text)

words_per_second = wpm / 60
base_seconds_per_word = 1 / words_per_second

timings = []
current_time = 0.0

for i, word in enumerate(words):
# Adjust timing based on word length and type
if len(word) <= 3:
Expand All @@ -43,7 +45,7 @@ def estimate_word_timings(text: str, wpm: int = 150) -> List[Tuple[float, float,
duration = base_seconds_per_word * 1.2
else:
duration = base_seconds_per_word

# Adjust for punctuation
if word in '.,!?;':
duration = base_seconds_per_word * 0.5
Expand All @@ -52,14 +54,21 @@ def estimate_word_timings(text: str, wpm: int = 150) -> List[Tuple[float, float,
prev_start, prev_end, prev_word = timings[-1]
timings[-1] = (prev_start, prev_end + 0.2, prev_word)
current_time += 0.2

# Add natural variations
variation = (hash(word) % 20 - 10) / 100 # -10% to +10% variation
duration *= (1 + variation)

end_time = current_time + duration
timings.append((current_time, end_time, word))
current_time = end_time

return timings


def getISOLangCode(dialect: str):
try:
return str(Language.get(dialect))
except Exception as dialectError:
dialect = dialect.split('-')[0]
return str(Language.get(dialect))
6 changes: 4 additions & 2 deletions tts_wrapper/engines/uwp/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import asyncio
from typing import List, Dict, Any, Optional
from ...exceptions import ModuleNotInstalled

from ...engines.utils import (
getISOLangCode,
)
try:
from winrt.windows.media.speechsynthesis import SpeechSynthesizer
from winrt.windows.storage.streams import DataReader
Expand Down Expand Up @@ -42,7 +44,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
for voice in voices:
standardized_voice = {
"id": voice.id,
"language_codes": [voice.language],
"language_codes": [getISOLangCode(voice.language)],
"name": voice.display_name,
"gender": voice.gender.value,
}
Expand Down
6 changes: 4 additions & 2 deletions tts_wrapper/engines/watson/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import threading
import json
import logging

from ...engines.utils import (
getISOLangCode,
)
from ...exceptions import ModuleNotInstalled

Credentials = Tuple[str, str, str] # api_key, region, instance_id
Expand Down Expand Up @@ -161,7 +163,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
for voice in voices:
standardized_voice = {
"id": voice["name"],
"language_codes": [voice["language"]],
"language_codes": [getISOLangCode(voice["language"])],
"name": voice["name"].split("_")[1].replace("V3Voice", ""),
"gender": voice["gender"],
}
Expand Down
6 changes: 4 additions & 2 deletions tts_wrapper/engines/witai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from ...exceptions import UnsupportedFileFormat
import logging
import requests

from ...engines.utils import (
getISOLangCode,
)
FORMATS = {"mp3": "mp3", "pcm": "raw", "wav": "wav"}


Expand Down Expand Up @@ -48,7 +50,7 @@ def get_voices(self) -> List[Dict[str, Any]]:
standardized_voices.append(
{
"id": voice["name"],
"language_codes": [locale],
"language_codes": [getISOLangCode(locale)],
"name": voice["name"].split("$")[1],
"gender": voice["gender"],
"styles": voice.get("styles", []),
Expand Down
Loading