willwade · willwade · Sep 22, 2024 · Sep 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,4 @@ cov.xml
 credentials-private.json
 examples/*.wav
 examples/*.mp3
+examples/ttsandtranslate-7dd2e2d80d42.json
diff --git a/examples/test-eleven.py b/examples/test-eleven.py
@@ -8,24 +8,26 @@
 
 client = ElevenLabsClient(credentials=(os.getenv('ELEVENLABS_API_KEY')))
 tts = ElevenLabsTTS(client)
-print(client.get_voices())
+#print(client.get_voices())
 # # # pausing
 try:
     ssml_text = tts.ssml.add(
-        "This is me speaking with Speak function and ElevenLabs"
+        "This is me speaking with Speak function and ElevenLabs. I should be hearing a sentence"
     )
+    print ("SSML TEXT")
+    print(ssml_text)
     tts.speak_streamed(ssml_text)
     # Pause after 5 seconds
     time.sleep(0.3)
     tts.pause_audio()
     print("Pausing..")
     # Resume after 3 seconds
     time.sleep(0.5)
-    tts.resume_audio()
+    #tts.resume_audio()
     print("Resuming")
     # Stop after 2 seconds
     time.sleep(1)
-    tts.stop_audio()
+    #tts.stop_audio()
     print("Stopping.")
 except Exception as e:
     print(f"Error at pausing: {e}")

diff --git a/examples/test-google-stream.py b/examples/test-google-stream.py
@@ -40,31 +40,41 @@ def main():
         )
         logging.info(f"Text to synthesize: {text}")
 
-        # Test synth_to_bytestream method
-        output_file_bytestream = "output_streamed_google.wav"  # Change to 'mp3' or 'flac' as needed
-        audio_format = "wav"  # Supported formats: 'wav', 'mp3', 'flac'
-
-        if audio_format.lower() == 'wav':
-            # Initialize WAV file
-            with wave.open(output_file_bytestream, 'wb') as wf:
-                wf.setnchannels(1)  # Mono
-                wf.setsampwidth(2)  # 16-bit PCM
-                wf.setframerate(tts.audio_rate)
-                logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")
-
-                for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
-                    logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
-                    wf.writeframes(audio_chunk)  # Write PCM frames to WAV file
-
-            logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")
-
-        else:
-            # Handle non-WAV formats if implemented
-            pass
-
+        ## Test synth_to_bytestream method
+        #output_file_bytestream = "output_streamed_google.wav"  # Change to 'mp3' or 'flac' as needed
+        #audio_format = "wav"  # Supported formats: 'wav', 'mp3', 'flac'
+        #
+        #if audio_format.lower() == 'wav':
+        #    # Initialize WAV file
+        #    with wave.open(output_file_bytestream, 'wb') as wf:
+        #        wf.setnchannels(1)  # Mono
+        #        wf.setsampwidth(2)  # 16-bit PCM
+        #        wf.setframerate(tts.audio_rate)
+        #        logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")
+        #
+        #        for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
+        #            logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
+        #            wf.writeframes(audio_chunk)  # Write PCM frames to WAV file
+        #
+        #    logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")
+        #
+        #else:
+        #    # Handle non-WAV formats if implemented
+        #    pass
+        #
         # Test speak_streamed method
         output_file_speak_streamed = "output_speak_streamed_google.wav"
         tts.speak_streamed(text)
+        # Pause playback after 5 seconds
+        # time.sleep(2)
+        tts.pause_playback()
+        print("Playback paused.")
+
+        # Resume playback after 3 seconds
+        time.sleep(3)
+        tts.resume_playback()
+        print("Playback resumed.")
+
         logging.info(f"Audio successfully saved to {output_file_speak_streamed} in wav format via speak_streamed.")
 
     except Exception as e:

diff --git a/examples/test-google.py b/examples/test-google.py
@@ -25,7 +25,7 @@
 #     print("Resuming")
 #     # Stop after 2 seconds
 #     time.sleep(1)
-#     tts.stop_audio()
+    tts.stop_audio()
 #     print("Stopping.")
 except Exception as e:
     print(f"Error at pausing: {e}")

diff --git a/examples/test-googleTrans.py b/examples/test-googleTrans.py
@@ -17,7 +17,7 @@
     # Define the text to be synthesized
     text = "Hello, This is a word timing test"
     start_time = time.time()
-    tts.speak(text)
+    tts.speak_streamed(text)
     synthesis_time = time.time()
     print(f"Synthesis time: {synthesis_time - start_time:.3f} seconds")
     text = "Hello, This is a word timing test"

diff --git a/examples/test-mms.py b/examples/test-mms.py
@@ -44,7 +44,7 @@
     ssml_text = tts.ssml.add(text_with_prosody)
     print("ssml_text", ssml_text)
 
-    tts.speak(ssml_text)
+    tts.speak_streamed(ssml_text)
     time.sleep(0.5)
 
     print("save to file")

diff --git a/examples/test-pico.py b/examples/test-pico.py
@@ -0,0 +1,53 @@
+from tts_wrapper import PicoTTS, PicoClient
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = PicoClient()
+tts = PicoTTS(client)
+text = "hello world i like monkeys"
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+print("Volume setting is from 0-100")
+text_read = ""
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at fifty"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+
+    #clear ssml so the previous text is not repeated
+
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at a hundred"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at ten"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+
+    print("save to file")
+    tts.synth_to_file(ssml_text, "pico_output.wav", "wav")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
diff --git a/examples/test-polly.py b/examples/test-polly.py
@@ -87,7 +87,8 @@ def on_end():
     tts.connect('onStart', on_start)
     tts.connect('onEnd', on_end)
     print(tts)
-    tts.start_playback_with_callbacks(text, callback=my_callback)
+#    tts.start_playback_with_callbacks(text, callback=my_callback)
+    tts.speak_streamed(text)
     print("save to file")
     tts.synth_to_file(text, "polly_output.wav", "wav")
 except Exception as e:
@@ -96,33 +97,33 @@ def on_end():
 # volume control test
 # print("Volume setting is from 0-100")
 # text_read = ""
-# try:
-#     tts.set_property("volume", "50")
-#     print("Setting volume at 50")
-#     text_read = f"The current volume is at 50"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at 50"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 #     
 #     #clear ssml so the previous text is not repeated
-#     tts.ssml.clear_ssml()
-#     tts.set_property("volume", "100")
-#     print("Setting volume at 100")
-#     text_read = f"The current volume is at 100"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+    tts.ssml.clear_ssml()
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at 100"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 # 
-#     tts.ssml.clear_ssml()
-#     tts.set_property("volume", "10")
-#     print("Setting volume at 10")
-#     text_read = f"The current volume is at 10"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)        
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+    tts.ssml.clear_ssml()
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at 10"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 # 
-# except Exception as e:
-#     print(f"Error at setting volume: {e}")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
diff --git a/examples/test-sapi.py b/examples/test-sapi.py
@@ -0,0 +1,64 @@
+from tts_wrapper import SAPITTS, SAPIClient, SAPISSML
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = SAPIClient()
+tts = SAPITTS(client)
+text = "hello world i like monkeys"
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+print("Volume setting is from 0-100")
+text_read = ""
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at fifty"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+
+    #clear ssml so the previous text is not repeated
+
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at a hundred"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at ten"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+
+    print("save to file")
+    tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
+
+# # Demonstrate saving audio to a file
+try:
+    ssml_text = tts.ssml.add(f"This is me speaking with for save to file function and SAPI text to speech")
+    output_file = Path(f"output_sapi.mp3")
+    tts.synth_to_file(ssml_text, str(output_file), format='mp3')
+#     # or you could do
+     #tts.speak(ssml_text)
+    print(f"Audio content saved to {output_file}")
+except Exception as e:
+    print(f"Error at saving: {e}")    
diff --git a/examples/test-sherpaonnx.py b/examples/test-sherpaonnx.py
@@ -47,7 +47,7 @@ def main():
                 f.write(audio_chunk)  # Write the chunk to the file
 
         logging.info(f"Audio successfully saved to {output_file} in {audio_format} format.")
-
+        tts.speak_streamed(text)
     except Exception as e:
         logging.error(f"An error occurred during synthesis: {e}")
 

diff --git a/examples/test-uwp.py b/examples/test-uwp.py
@@ -0,0 +1,54 @@
+from tts_wrapper import UWPTTS, UWPClient
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = UWPClient()
+tts = UWPTTS(client)
+text = "hello world i like monkeys"
+#print(tts.get_voices())
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+#print("Volume setting is from 0-100")
+#text_read = ""
+#try:
+#    tts.set_property("volume", "50")
+#    print("Setting volume at 50")
+#    text_read = f"The current volume is at fifty"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+
+    #clear ssml so the previous text is not repeated
+
+#    tts.set_property("volume", "100")
+#    print("Setting volume at 100")
+#    text_read = f"The current volume is at a hundred"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+
+#    tts.set_property("volume", "10")
+#    print("Setting volume at 10")
+#    text_read = f"The current volume is at ten"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)        
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+
+#    print("save to file")
+#    tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
+#except Exception as e:
+#    print(f"Error at setting volume: {e}")