Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve synth to bytestream #21

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ cov.xml
credentials-private.json
examples/*.wav
examples/*.mp3
examples/ttsandtranslate-7dd2e2d80d42.json
10 changes: 6 additions & 4 deletions examples/test-eleven.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,26 @@

client = ElevenLabsClient(credentials=(os.getenv('ELEVENLABS_API_KEY')))
tts = ElevenLabsTTS(client)
print(client.get_voices())
#print(client.get_voices())
# # # pausing
try:
ssml_text = tts.ssml.add(
"This is me speaking with Speak function and ElevenLabs"
"This is me speaking with Speak function and ElevenLabs. I should be hearing a sentence"
)
print ("SSML TEXT")
print(ssml_text)
tts.speak_streamed(ssml_text)
# Pause after 5 seconds
time.sleep(0.3)
tts.pause_audio()
print("Pausing..")
# Resume after 3 seconds
time.sleep(0.5)
tts.resume_audio()
#tts.resume_audio()
print("Resuming")
# Stop after 2 seconds
time.sleep(1)
tts.stop_audio()
#tts.stop_audio()
print("Stopping.")
except Exception as e:
print(f"Error at pausing: {e}")
Expand Down
54 changes: 32 additions & 22 deletions examples/test-google-stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,31 +40,41 @@ def main():
)
logging.info(f"Text to synthesize: {text}")

# Test synth_to_bytestream method
output_file_bytestream = "output_streamed_google.wav" # Change to 'mp3' or 'flac' as needed
audio_format = "wav" # Supported formats: 'wav', 'mp3', 'flac'

if audio_format.lower() == 'wav':
# Initialize WAV file
with wave.open(output_file_bytestream, 'wb') as wf:
wf.setnchannels(1) # Mono
wf.setsampwidth(2) # 16-bit PCM
wf.setframerate(tts.audio_rate)
logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")

for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
wf.writeframes(audio_chunk) # Write PCM frames to WAV file

logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")

else:
# Handle non-WAV formats if implemented
pass

## Test synth_to_bytestream method
#output_file_bytestream = "output_streamed_google.wav" # Change to 'mp3' or 'flac' as needed
#audio_format = "wav" # Supported formats: 'wav', 'mp3', 'flac'
#
#if audio_format.lower() == 'wav':
# # Initialize WAV file
# with wave.open(output_file_bytestream, 'wb') as wf:
# wf.setnchannels(1) # Mono
# wf.setsampwidth(2) # 16-bit PCM
# wf.setframerate(tts.audio_rate)
# logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")
#
# for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
# logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
# wf.writeframes(audio_chunk) # Write PCM frames to WAV file
#
# logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")
#
#else:
# # Handle non-WAV formats if implemented
# pass
#
# Test speak_streamed method
output_file_speak_streamed = "output_speak_streamed_google.wav"
tts.speak_streamed(text)
# Pause playback after 5 seconds
# time.sleep(2)
tts.pause_playback()
print("Playback paused.")

# Resume playback after 3 seconds
time.sleep(3)
tts.resume_playback()
print("Playback resumed.")

logging.info(f"Audio successfully saved to {output_file_speak_streamed} in wav format via speak_streamed.")

except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion examples/test-google.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# print("Resuming")
# # Stop after 2 seconds
# time.sleep(1)
# tts.stop_audio()
tts.stop_audio()
# print("Stopping.")
except Exception as e:
print(f"Error at pausing: {e}")
Expand Down
2 changes: 1 addition & 1 deletion examples/test-googleTrans.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Define the text to be synthesized
text = "Hello, This is a word timing test"
start_time = time.time()
tts.speak(text)
tts.speak_streamed(text)
synthesis_time = time.time()
print(f"Synthesis time: {synthesis_time - start_time:.3f} seconds")
text = "Hello, This is a word timing test"
Expand Down
2 changes: 1 addition & 1 deletion examples/test-mms.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)

tts.speak(ssml_text)
tts.speak_streamed(ssml_text)
time.sleep(0.5)

print("save to file")
Expand Down
53 changes: 53 additions & 0 deletions examples/test-pico.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from tts_wrapper import PicoTTS, PicoClient
import json
import time
from pathlib import Path
import os

# Initialize the client with only the lang parameter
client = PicoClient()
tts = PicoTTS(client)
text = "hello world i like monkeys"
tts.speak_streamed(text)

print(text)

# volume control test
print("Volume setting is from 0-100")
text_read = ""
try:
tts.set_property("volume", "50")
print("Setting volume at 50")
text_read = f"The current volume is at fifty"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)
tts.speak(ssml_text)
time.sleep(0.5)

#clear ssml so the previous text is not repeated

tts.set_property("volume", "100")
print("Setting volume at 100")
text_read = f"The current volume is at a hundred"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)

tts.speak(ssml_text)
time.sleep(0.5)

tts.set_property("volume", "10")
print("Setting volume at 10")
text_read = f"The current volume is at ten"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)

tts.speak(ssml_text)
time.sleep(0.5)

print("save to file")
tts.synth_to_file(ssml_text, "pico_output.wav", "wav")
except Exception as e:
print(f"Error at setting volume: {e}")
55 changes: 28 additions & 27 deletions examples/test-polly.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def on_end():
tts.connect('onStart', on_start)
tts.connect('onEnd', on_end)
print(tts)
tts.start_playback_with_callbacks(text, callback=my_callback)
# tts.start_playback_with_callbacks(text, callback=my_callback)
tts.speak_streamed(text)
print("save to file")
tts.synth_to_file(text, "polly_output.wav", "wav")
except Exception as e:
Expand All @@ -96,33 +97,33 @@ def on_end():
# volume control test
# print("Volume setting is from 0-100")
# text_read = ""
# try:
# tts.set_property("volume", "50")
# print("Setting volume at 50")
# text_read = f"The current volume is at 50"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# tts.speak_streamed(ssml_text)
# time.sleep(5)
try:
tts.set_property("volume", "50")
print("Setting volume at 50")
text_read = f"The current volume is at 50"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
tts.speak_streamed(ssml_text)
time.sleep(5)
#
# #clear ssml so the previous text is not repeated
# tts.ssml.clear_ssml()
# tts.set_property("volume", "100")
# print("Setting volume at 100")
# text_read = f"The current volume is at 100"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# tts.speak_streamed(ssml_text)
# time.sleep(5)
tts.ssml.clear_ssml()
tts.set_property("volume", "100")
print("Setting volume at 100")
text_read = f"The current volume is at 100"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
tts.speak_streamed(ssml_text)
time.sleep(5)
#
# tts.ssml.clear_ssml()
# tts.set_property("volume", "10")
# print("Setting volume at 10")
# text_read = f"The current volume is at 10"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# tts.speak_streamed(ssml_text)
# time.sleep(5)
tts.ssml.clear_ssml()
tts.set_property("volume", "10")
print("Setting volume at 10")
text_read = f"The current volume is at 10"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
tts.speak_streamed(ssml_text)
time.sleep(5)
#
# except Exception as e:
# print(f"Error at setting volume: {e}")
except Exception as e:
print(f"Error at setting volume: {e}")
64 changes: 64 additions & 0 deletions examples/test-sapi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from tts_wrapper import SAPITTS, SAPIClient, SAPISSML
import json
import time
from pathlib import Path
import os

# Initialize the client with only the lang parameter
client = SAPIClient()
tts = SAPITTS(client)
text = "hello world i like monkeys"
tts.speak_streamed(text)

print(text)

# volume control test
print("Volume setting is from 0-100")
text_read = ""
try:
tts.set_property("volume", "50")
print("Setting volume at 50")
text_read = f"The current volume is at fifty"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)
tts.speak_streamed(ssml_text)
time.sleep(0.5)

#clear ssml so the previous text is not repeated

tts.set_property("volume", "100")
print("Setting volume at 100")
text_read = f"The current volume is at a hundred"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)

tts.speak_streamed(ssml_text)
time.sleep(0.5)

tts.set_property("volume", "10")
print("Setting volume at 10")
text_read = f"The current volume is at ten"
text_with_prosody = tts.construct_prosody_tag(text_read)
ssml_text = tts.ssml.add(text_with_prosody)
print("ssml_text", ssml_text)

tts.speak_streamed(ssml_text)
time.sleep(0.5)

print("save to file")
tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
except Exception as e:
print(f"Error at setting volume: {e}")

# # Demonstrate saving audio to a file
try:
ssml_text = tts.ssml.add(f"This is me speaking with for save to file function and SAPI text to speech")
output_file = Path(f"output_sapi.mp3")
tts.synth_to_file(ssml_text, str(output_file), format='mp3')
# # or you could do
#tts.speak(ssml_text)
print(f"Audio content saved to {output_file}")
except Exception as e:
print(f"Error at saving: {e}")
2 changes: 1 addition & 1 deletion examples/test-sherpaonnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main():
f.write(audio_chunk) # Write the chunk to the file

logging.info(f"Audio successfully saved to {output_file} in {audio_format} format.")

tts.speak_streamed(text)
except Exception as e:
logging.error(f"An error occurred during synthesis: {e}")

Expand Down
54 changes: 54 additions & 0 deletions examples/test-uwp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from tts_wrapper import UWPTTS, UWPClient
import json
import time
from pathlib import Path
import os

# Initialize the client with only the lang parameter
client = UWPClient()
tts = UWPTTS(client)
text = "hello world i like monkeys"
#print(tts.get_voices())
tts.speak_streamed(text)

print(text)

# volume control test
#print("Volume setting is from 0-100")
#text_read = ""
#try:
# tts.set_property("volume", "50")
# print("Setting volume at 50")
# text_read = f"The current volume is at fifty"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# print("ssml_text", ssml_text)
# tts.speak(ssml_text)
# time.sleep(0.5)

#clear ssml so the previous text is not repeated

# tts.set_property("volume", "100")
# print("Setting volume at 100")
# text_read = f"The current volume is at a hundred"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# print("ssml_text", ssml_text)

# tts.speak(ssml_text)
# time.sleep(0.5)

# tts.set_property("volume", "10")
# print("Setting volume at 10")
# text_read = f"The current volume is at ten"
# text_with_prosody = tts.construct_prosody_tag(text_read)
# ssml_text = tts.ssml.add(text_with_prosody)
# print("ssml_text", ssml_text)

# tts.speak(ssml_text)
# time.sleep(0.5)

# print("save to file")
# tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
#except Exception as e:
# print(f"Error at setting volume: {e}")
Loading