From 60845d61a9c99a9e1d6424bc3f05d80ee5443213 Mon Sep 17 00:00:00 2001
From: Chafid Ahmad <chafid@gmail.com>
Date: Sun, 22 Sep 2024 17:29:29 +0700
Subject: [PATCH 1/2] initial commit

---
 .gitignore                           |  1 +
 examples/test-google-stream.py       | 54 ++++++++++++++++------------
 examples/test-sherpaonnx.py          |  2 +-
 tts_wrapper/engines/google/google.py |  8 ++++-
 tts_wrapper/tts.py                   |  6 ++++
 5 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0deceeb8..9f8f58ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ cov.xml
 credentials-private.json
 examples/*.wav
 examples/*.mp3
+examples/ttsandtranslate-7dd2e2d80d42.json
diff --git a/examples/test-google-stream.py b/examples/test-google-stream.py
index e6d3da0e..6a9eb3fe 100644
--- a/examples/test-google-stream.py
+++ b/examples/test-google-stream.py
@@ -40,31 +40,41 @@ def main():
         )
         logging.info(f"Text to synthesize: {text}")
 
-        # Test synth_to_bytestream method
-        output_file_bytestream = "output_streamed_google.wav"  # Change to 'mp3' or 'flac' as needed
-        audio_format = "wav"  # Supported formats: 'wav', 'mp3', 'flac'
-
-        if audio_format.lower() == 'wav':
-            # Initialize WAV file
-            with wave.open(output_file_bytestream, 'wb') as wf:
-                wf.setnchannels(1)  # Mono
-                wf.setsampwidth(2)  # 16-bit PCM
-                wf.setframerate(tts.audio_rate)
-                logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")
-
-                for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
-                    logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
-                    wf.writeframes(audio_chunk)  # Write PCM frames to WAV file
-
-            logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")
-
-        else:
-            # Handle non-WAV formats if implemented
-            pass
-
+        ## Test synth_to_bytestream method
+        #output_file_bytestream = "output_streamed_google.wav"  # Change to 'mp3' or 'flac' as needed
+        #audio_format = "wav"  # Supported formats: 'wav', 'mp3', 'flac'
+        #
+        #if audio_format.lower() == 'wav':
+        #    # Initialize WAV file
+        #    with wave.open(output_file_bytestream, 'wb') as wf:
+        #        wf.setnchannels(1)  # Mono
+        #        wf.setsampwidth(2)  # 16-bit PCM
+        #        wf.setframerate(tts.audio_rate)
+        #        logging.info(f"Starting synthesis and streaming to {output_file_bytestream} in {audio_format} format.")
+        #
+        #        for chunk_idx, audio_chunk in enumerate(tts.synth_to_bytestream(text, format=audio_format)):
+        #            logging.info(f"Received audio chunk {chunk_idx} with size {len(audio_chunk)} bytes")
+        #            wf.writeframes(audio_chunk)  # Write PCM frames to WAV file
+        #
+        #    logging.info(f"Audio successfully saved to {output_file_bytestream} in {audio_format} format via synth_to_bytestream.")
+        #
+        #else:
+        #    # Handle non-WAV formats if implemented
+        #    pass
+        #
         # Test speak_streamed method
         output_file_speak_streamed = "output_speak_streamed_google.wav"
         tts.speak_streamed(text)
+        # Pause playback after 5 seconds
+        # time.sleep(2)
+        tts.pause_playback()
+        print("Playback paused.")
+
+        # Resume playback after 3 seconds
+        time.sleep(3)
+        tts.resume_playback()
+        print("Playback resumed.")
+
         logging.info(f"Audio successfully saved to {output_file_speak_streamed} in wav format via speak_streamed.")
 
     except Exception as e:
diff --git a/examples/test-sherpaonnx.py b/examples/test-sherpaonnx.py
index 3182cbd9..32d4e187 100755
--- a/examples/test-sherpaonnx.py
+++ b/examples/test-sherpaonnx.py
@@ -47,7 +47,7 @@ def main():
                 f.write(audio_chunk)  # Write the chunk to the file
 
         logging.info(f"Audio successfully saved to {output_file} in {audio_format} format.")
-
+        tts.speak_streamed(text)
     except Exception as e:
         logging.error(f"An error occurred during synthesis: {e}")
 
diff --git a/tts_wrapper/engines/google/google.py b/tts_wrapper/engines/google/google.py
index d93c3a88..9d17a07f 100644
--- a/tts_wrapper/engines/google/google.py
+++ b/tts_wrapper/engines/google/google.py
@@ -35,6 +35,7 @@ def __init__(
         self.audio_started = False
         self.audio_stopped = False
         self.audio_killed = False
+        self.is_paused = False
 
     # Audio playback callback, called continuously to stream audio from the buffer
     def play_audio_callback(
@@ -273,7 +274,6 @@ def speak_streamed(
                 logging.info(
                     f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
                 )
-
                 if audio_format.lower() == "wav":
                     # Convert bytes back to numpy float32 array for playback
                     # Assuming audio_chunk is raw PCM data (LINEAR16)
@@ -325,6 +325,12 @@ def speak_streamed(
                     f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
                 )
 
+    def pause_playback(self):
+        is_paused = True
+
+    def resume_playback(self):
+        is_paused = False
+
     def play_audio(self):
         """
         Plays audio from the audio_buffer using sounddevice.
diff --git a/tts_wrapper/tts.py b/tts_wrapper/tts.py
index f56c1412..f132356e 100644
--- a/tts_wrapper/tts.py
+++ b/tts_wrapper/tts.py
@@ -446,3 +446,9 @@ def _convert_to_ssml(self, text: str) -> str:
             ssml_parts.append(f'<mark name="word{i}"/>{word}')
         ssml_parts.append("</speak>")
         return " ".join(ssml_parts)
+
+    def pause_playback(self):
+        pass
+
+    def resume_playback(self):
+        pass
\ No newline at end of file

From 0cdc177bcb30c8ddc0b435860ac6233834612ebd Mon Sep 17 00:00:00 2001
From: Chafid Ahmad <chafid@gmail.com>
Date: Tue, 24 Sep 2024 17:09:31 +0700
Subject: [PATCH 2/2] adding speak_streamed and related functions too all
 engines

---
 examples/test-eleven.py                       |  10 +-
 examples/test-google.py                       |   2 +-
 examples/test-googleTrans.py                  |   2 +-
 examples/test-mms.py                          |   2 +-
 examples/test-pico.py                         |  53 ++++
 examples/test-polly.py                        |  55 +++--
 examples/test-sapi.py                         |  64 +++++
 examples/test-uwp.py                          |  54 ++++
 examples/test-watson.py                       |  79 +++---
 examples/test-witai.py                        |  11 +-
 tts_wrapper/engines/__init__.py               |  10 +-
 tts_wrapper/engines/elevenlabs/elevenlabs.py  | 227 ++++++++++++++++-
 tts_wrapper/engines/google/google.py          |   6 -
 .../engines/googletrans/googletrans.py        | 229 ++++++++++++++++-
 tts_wrapper/engines/microsoft/microsoft.py    | 226 ++++++++++++++++-
 tts_wrapper/engines/mms/mms.py                | 224 ++++++++++++++++-
 tts_wrapper/engines/pico/pico.py              | 224 ++++++++++++++++-
 tts_wrapper/engines/polly/polly.py            | 230 +++++++++++++++++-
 tts_wrapper/engines/sapi/sapi.py              | 229 ++++++++++++++++-
 tts_wrapper/engines/watson/watson.py          | 229 ++++++++++++++++-
 tts_wrapper/engines/witai/witai.py            | 229 ++++++++++++++++-
 tts_wrapper/tts.py                            |   1 +
 22 files changed, 2297 insertions(+), 99 deletions(-)
 create mode 100644 examples/test-pico.py
 create mode 100644 examples/test-sapi.py
 create mode 100644 examples/test-uwp.py

diff --git a/examples/test-eleven.py b/examples/test-eleven.py
index 5c701347..962dc342 100644
--- a/examples/test-eleven.py
+++ b/examples/test-eleven.py
@@ -8,12 +8,14 @@
 
 client = ElevenLabsClient(credentials=(os.getenv('ELEVENLABS_API_KEY')))
 tts = ElevenLabsTTS(client)
-print(client.get_voices())
+#print(client.get_voices())
 # # # pausing
 try:
     ssml_text = tts.ssml.add(
-        "This is me speaking with Speak function and ElevenLabs"
+        "This is me speaking with Speak function and ElevenLabs. I should be hearing a sentence"
     )
+    print ("SSML TEXT")
+    print(ssml_text)
     tts.speak_streamed(ssml_text)
     # Pause after 5 seconds
     time.sleep(0.3)
@@ -21,11 +23,11 @@
     print("Pausing..")
     # Resume after 3 seconds
     time.sleep(0.5)
-    tts.resume_audio()
+    #tts.resume_audio()
     print("Resuming")
     # Stop after 2 seconds
     time.sleep(1)
-    tts.stop_audio()
+    #tts.stop_audio()
     print("Stopping.")
 except Exception as e:
     print(f"Error at pausing: {e}")
diff --git a/examples/test-google.py b/examples/test-google.py
index 41c43012..e5d2f4b1 100644
--- a/examples/test-google.py
+++ b/examples/test-google.py
@@ -25,7 +25,7 @@
 #     print("Resuming")
 #     # Stop after 2 seconds
 #     time.sleep(1)
-#     tts.stop_audio()
+    tts.stop_audio()
 #     print("Stopping.")
 except Exception as e:
     print(f"Error at pausing: {e}")
diff --git a/examples/test-googleTrans.py b/examples/test-googleTrans.py
index b5a5742f..14b28620 100644
--- a/examples/test-googleTrans.py
+++ b/examples/test-googleTrans.py
@@ -17,7 +17,7 @@
     # Define the text to be synthesized
     text = "Hello, This is a word timing test"
     start_time = time.time()
-    tts.speak(text)
+    tts.speak_streamed(text)
     synthesis_time = time.time()
     print(f"Synthesis time: {synthesis_time - start_time:.3f} seconds")
     text = "Hello, This is a word timing test"
diff --git a/examples/test-mms.py b/examples/test-mms.py
index b86fec3d..0975ca58 100644
--- a/examples/test-mms.py
+++ b/examples/test-mms.py
@@ -44,7 +44,7 @@
     ssml_text = tts.ssml.add(text_with_prosody)
     print("ssml_text", ssml_text)
 
-    tts.speak(ssml_text)
+    tts.speak_streamed(ssml_text)
     time.sleep(0.5)
 
     print("save to file")
diff --git a/examples/test-pico.py b/examples/test-pico.py
new file mode 100644
index 00000000..08404617
--- /dev/null
+++ b/examples/test-pico.py
@@ -0,0 +1,53 @@
+from tts_wrapper import PicoTTS, PicoClient
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = PicoClient()
+tts = PicoTTS(client)
+text = "hello world i like monkeys"
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+print("Volume setting is from 0-100")
+text_read = ""
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at fifty"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+    
+    #clear ssml so the previous text is not repeated
+
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at a hundred"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at ten"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak(ssml_text)
+    time.sleep(0.5)
+
+    print("save to file")
+    tts.synth_to_file(ssml_text, "pico_output.wav", "wav")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
\ No newline at end of file
diff --git a/examples/test-polly.py b/examples/test-polly.py
index f9ea227d..283ab3bb 100644
--- a/examples/test-polly.py
+++ b/examples/test-polly.py
@@ -87,7 +87,8 @@ def on_end():
     tts.connect('onStart', on_start)
     tts.connect('onEnd', on_end)
     print(tts)
-    tts.start_playback_with_callbacks(text, callback=my_callback)
+#    tts.start_playback_with_callbacks(text, callback=my_callback)
+    tts.speak_streamed(text)
     print("save to file")
     tts.synth_to_file(text, "polly_output.wav", "wav")
 except Exception as e:
@@ -96,33 +97,33 @@ def on_end():
 # volume control test
 # print("Volume setting is from 0-100")
 # text_read = ""
-# try:
-#     tts.set_property("volume", "50")
-#     print("Setting volume at 50")
-#     text_read = f"The current volume is at 50"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at 50"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 #     
 #     #clear ssml so the previous text is not repeated
-#     tts.ssml.clear_ssml()
-#     tts.set_property("volume", "100")
-#     print("Setting volume at 100")
-#     text_read = f"The current volume is at 100"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+    tts.ssml.clear_ssml()
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at 100"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 # 
-#     tts.ssml.clear_ssml()
-#     tts.set_property("volume", "10")
-#     print("Setting volume at 10")
-#     text_read = f"The current volume is at 10"
-#     text_with_prosody = tts.construct_prosody_tag(text_read)        
-#     ssml_text = tts.ssml.add(text_with_prosody)
-#     tts.speak_streamed(ssml_text)
-#     time.sleep(5)
+    tts.ssml.clear_ssml()
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at 10"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    tts.speak_streamed(ssml_text)
+    time.sleep(5)
 # 
-# except Exception as e:
-#     print(f"Error at setting volume: {e}")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
diff --git a/examples/test-sapi.py b/examples/test-sapi.py
new file mode 100644
index 00000000..6730ffaa
--- /dev/null
+++ b/examples/test-sapi.py
@@ -0,0 +1,64 @@
+from tts_wrapper import SAPITTS, SAPIClient, SAPISSML
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = SAPIClient()
+tts = SAPITTS(client)
+text = "hello world i like monkeys"
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+print("Volume setting is from 0-100")
+text_read = ""
+try:
+    tts.set_property("volume", "50")
+    print("Setting volume at 50")
+    text_read = f"The current volume is at fifty"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+    
+    #clear ssml so the previous text is not repeated
+
+    tts.set_property("volume", "100")
+    print("Setting volume at 100")
+    text_read = f"The current volume is at a hundred"
+    text_with_prosody = tts.construct_prosody_tag(text_read)
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+
+    tts.set_property("volume", "10")
+    print("Setting volume at 10")
+    text_read = f"The current volume is at ten"
+    text_with_prosody = tts.construct_prosody_tag(text_read)        
+    ssml_text = tts.ssml.add(text_with_prosody)
+    print("ssml_text", ssml_text)
+
+    tts.speak_streamed(ssml_text)
+    time.sleep(0.5)
+
+    print("save to file")
+    tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
+except Exception as e:
+    print(f"Error at setting volume: {e}")
+
+# # Demonstrate saving audio to a file
+try:
+    ssml_text = tts.ssml.add(f"This is me speaking with for save to file function and SAPI text to speech")
+    output_file = Path(f"output_sapi.mp3")
+    tts.synth_to_file(ssml_text, str(output_file), format='mp3')
+#     # or you could do
+     #tts.speak(ssml_text)
+    print(f"Audio content saved to {output_file}")
+except Exception as e:
+    print(f"Error at saving: {e}")    
\ No newline at end of file
diff --git a/examples/test-uwp.py b/examples/test-uwp.py
new file mode 100644
index 00000000..0c474154
--- /dev/null
+++ b/examples/test-uwp.py
@@ -0,0 +1,54 @@
+from tts_wrapper import UWPTTS, UWPClient
+import json
+import time
+from pathlib import Path
+import os
+
+# Initialize the client with only the lang parameter
+client = UWPClient()
+tts = UWPTTS(client)
+text = "hello world i like monkeys"
+#print(tts.get_voices())
+tts.speak_streamed(text)
+
+print(text)
+
+# volume control test
+#print("Volume setting is from 0-100")
+#text_read = ""
+#try:
+#    tts.set_property("volume", "50")
+#    print("Setting volume at 50")
+#    text_read = f"The current volume is at fifty"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+    
+    #clear ssml so the previous text is not repeated
+
+#    tts.set_property("volume", "100")
+#    print("Setting volume at 100")
+#    text_read = f"The current volume is at a hundred"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+
+#    tts.set_property("volume", "10")
+#    print("Setting volume at 10")
+#    text_read = f"The current volume is at ten"
+#    text_with_prosody = tts.construct_prosody_tag(text_read)        
+#    ssml_text = tts.ssml.add(text_with_prosody)
+#    print("ssml_text", ssml_text)
+
+#    tts.speak(ssml_text)
+#    time.sleep(0.5)
+
+#    print("save to file")
+#    tts.synth_to_file(ssml_text, "mms_output.wav", "wav")
+#except Exception as e:
+#    print(f"Error at setting volume: {e}")
\ No newline at end of file
diff --git a/examples/test-watson.py b/examples/test-watson.py
index 8422abf1..f9767284 100644
--- a/examples/test-watson.py
+++ b/examples/test-watson.py
@@ -13,26 +13,27 @@
 client = WatsonClient(credentials=(api_key, region, instance_id))
 tts = WatsonTTS(client=client)
 
-# print(client.get_voices())
+print(client.get_voices())
 # # # pausing
-# try:
-#     ssml_text = tts.ssml.add(f"This is me speaking with Speak function and ElevenLabs")
-#     tts.speak_streamed(ssml_text)
-#     # Pause after 5 seconds
-#     time.sleep(0.3)
-#     tts.pause_audio()
-#     print("Pausing..")
-#     # Resume after 3 seconds
-#     time.sleep(0.5)
-#     tts.resume_audio()
-#     print("Resuming")
-#     # Stop after 2 seconds
-#     time.sleep(1)
-#     tts.stop_audio()
-#     print("Stopping.")
-# except Exception as e:
-#     print(f"Error at pausing: {e}")
-#   
+try:
+    ssml_text = tts.ssml.add(f"This is me speaking with Speak function and ElevenLabs")
+    tts.speak_streamed(ssml_text)
+    print (ssml_text)
+    # Pause after 5 seconds
+    #time.sleep(0.3)
+    #tts.pause_audio()
+    #print("Pausing..")
+    # Resume after 3 seconds
+    #time.sleep(0.5)
+    #tts.resume_audio()
+    print("Resuming")
+    # Stop after 2 seconds
+    #time.sleep(1)
+    #tts.stop_audio()
+    print("Stopping.")
+except Exception as e:
+    print(f"Error at pausing: {e}")
+  
 # time.sleep(3)
 # 
 # # Demonstrate saving audio to a file
@@ -79,26 +80,26 @@
 
 # ## calbacks
 
-def my_callback(word: str, start_time: float, end_time: float):
-    duration = end_time - start_time
-    print(f"Word: {word}, Duration: {duration:.3f}s")
-
-def on_start():
-    print('Speech started')
-
-def on_end():
-    print('Speech ended')
-
-try:
-    text = "Hello, This is a word timing test"
-    tts.connect('onStart', on_start)
-    tts.connect('onEnd', on_end)
-    tts.start_playback_with_callbacks(text, callback=my_callback)
-except Exception as e:
-    print(f"Error at callbacks: {e}")
-
-time.sleep(3)
-
+#def my_callback(word: str, start_time: float, end_time: float):
+#    duration = end_time - start_time
+#    print(f"Word: {word}, Duration: {duration:.3f}s")
+#
+#def on_start():
+#    print('Speech started')
+#
+#def on_end():
+#    print('Speech ended')
+#
+#try:
+#    text = "Hello, This is a word timing test"
+#    tts.connect('onStart', on_start)
+#    tts.connect('onEnd', on_end)
+#    tts.start_playback_with_callbacks(text, callback=my_callback)
+#except Exception as e:
+#    print(f"Error at callbacks: {e}")
+#
+#time.sleep(3)
+#
 try:
     text = "Test saving audio to file"
     print(text)
diff --git a/examples/test-witai.py b/examples/test-witai.py
index 460f00ee..64016b36 100644
--- a/examples/test-witai.py
+++ b/examples/test-witai.py
@@ -3,6 +3,7 @@
 import os 
 import os
 from load_credentials import load_credentials
+
 # Load credentials
 load_credentials('credentials.json')
 
@@ -44,10 +45,12 @@ def on_end():
     print('Speech ended')
 
 try:
-    text = "Hello, This is a word timing test"
-    tts.connect('onStart', on_start)
-    tts.connect('onEnd', on_end)
-    tts.start_playback_with_callbacks(text, callback=my_callback)
+    #text = "Hello, This is a word timing test"
+    #tts.connect('onStart', on_start)
+    #tts.connect('onEnd', on_end)
+    #tts.start_playback_with_callbacks(text, callback=my_callback)
+    text = "This is a speak streamed function test using WITAI"
+    tts.speak_streamed(text)
 except Exception as e:
     print(f"Error at callbacks: {e}")
 
diff --git a/tts_wrapper/engines/__init__.py b/tts_wrapper/engines/__init__.py
index 1be424ae..66f3bbca 100644
--- a/tts_wrapper/engines/__init__.py
+++ b/tts_wrapper/engines/__init__.py
@@ -1,10 +1,10 @@
 import sys
-from .google import *
-from .microsoft import *
+from .google import * #
+from .microsoft import * #
 from .pico import *
-from .polly import *
-from .sapi import *
-from .watson import *
+from .polly import * #
+from .sapi import * #
+from .watson import * 
 from .elevenlabs import *
 from .uwp import *
 from .witai import *
diff --git a/tts_wrapper/engines/elevenlabs/elevenlabs.py b/tts_wrapper/engines/elevenlabs/elevenlabs.py
index f2b80f6b..1b7f225e 100644
--- a/tts_wrapper/engines/elevenlabs/elevenlabs.py
+++ b/tts_wrapper/engines/elevenlabs/elevenlabs.py
@@ -1,10 +1,16 @@
-from typing import Any, List, Dict, Optional, Tuple
+from typing import Any, List, Dict, Optional, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import ElevenLabsClient, ElevenLabsSSMLRoot
 import re
 import numpy as np
 import pathlib
+import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 
 class ElevenLabsTTS(AbstractTTS):
@@ -18,6 +24,12 @@ def __init__(
         self._client = client
         self.audio_rate = 22050  # Kept at 22050
         self.set_voice(voice or "yoZ06aMxZJJ28mfd3POQ", lang or "en-US")
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format         
 
     def synth_to_bytes(self, text: Any) -> bytes:
         if not self._voice:
@@ -49,6 +61,46 @@ def get_audio_duration(self) -> float:
             return num_samples / self.audio_rate
         return 0.0
 
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0
+
     def adjust_volume_value(self, generated_audio: bytes, volume: float) -> bytes:
         # check if generated audio length is odd. If it is, add an empty byte since np.frombuffer is expecting
         # an even length
@@ -122,3 +174,176 @@ def set_voice(self, voice_id: str, lang_id: str = None):
         self._voice = voice_id
         # NB: Lang doesnt do much for ElevenLabs
         self._lang = lang_id
+
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[ElevenLabs.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+
+            text_segments = self._split_text(str(text))
+                
+            for segment_idx, segment in enumerate(text_segments):             
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                self.generated_audio, word_timings = self._client.synth(str(text), self._voice)
+                self.set_timings(word_timings)
+                
+                audio_bytes = self.generated_audio
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[ElevenLabs.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(text, format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
diff --git a/tts_wrapper/engines/google/google.py b/tts_wrapper/engines/google/google.py
index 9d17a07f..13dde23d 100644
--- a/tts_wrapper/engines/google/google.py
+++ b/tts_wrapper/engines/google/google.py
@@ -35,7 +35,6 @@ def __init__(
         self.audio_started = False
         self.audio_stopped = False
         self.audio_killed = False
-        self.is_paused = False
 
     # Audio playback callback, called continuously to stream audio from the buffer
     def play_audio_callback(
@@ -325,11 +324,6 @@ def speak_streamed(
                     f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
                 )
 
-    def pause_playback(self):
-        is_paused = True
-
-    def resume_playback(self):
-        is_paused = False
 
     def play_audio(self):
         """
diff --git a/tts_wrapper/engines/googletrans/googletrans.py b/tts_wrapper/engines/googletrans/googletrans.py
index 8aa70a6f..60658753 100644
--- a/tts_wrapper/engines/googletrans/googletrans.py
+++ b/tts_wrapper/engines/googletrans/googletrans.py
@@ -1,10 +1,19 @@
 # engine.py
 
-from typing import Any, List, Optional, Dict
+from typing import Any, List, Optional, Dict, Literal, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import GoogleTransClient, GoogleTransSSML
+
+import re
+import numpy as np
+import pathlib
 import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 
 class GoogleTransTTS(AbstractTTS):
@@ -12,6 +21,12 @@ def __init__(self, client: GoogleTransClient):
         super().__init__()
         self.client = client
         self.audio_rate = 24000
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format   
 
     def get_voices(self):
         return self.client.get_voices()
@@ -36,3 +51,215 @@ def set_voice(self, voice_id: str, lang_id: Optional[str] = None):
     def construct_prosody_tag(self, text: str) -> str:
         # Implement SSML prosody tag construction if needed
         return text
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[GoogletTransTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(str(text), format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[GoogletTransTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth_to_bytes(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0    
\ No newline at end of file
diff --git a/tts_wrapper/engines/microsoft/microsoft.py b/tts_wrapper/engines/microsoft/microsoft.py
index 6b9c5056..988bd028 100644
--- a/tts_wrapper/engines/microsoft/microsoft.py
+++ b/tts_wrapper/engines/microsoft/microsoft.py
@@ -1,5 +1,5 @@
 # The MS SpeechSDK can do a lot of our base class - and better. So lets overrride that
-from typing import Any, List, Dict, Optional
+from typing import Any, List, Dict, Optional, Tuple, Generator
 
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
@@ -17,8 +17,14 @@
 except ImportError:
     speechsdk = None  # type: ignore
 
+import numpy as np
 import logging
 import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
+import re
 
 
 class MicrosoftTTS(AbstractTTS):
@@ -32,6 +38,11 @@ def __init__(
         self._client = client
         self.set_voice(voice or "en-US-JennyNeural", lang or "en-US")
         self._ssml = MicrosoftSSML(self._lang, self._voice)
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False  
 
         # Ensure we're requesting word boundary information
         self._client.speech_config.set_property(
@@ -150,3 +161,216 @@ def _is_ssml(self, ssml):
     @property
     def ssml(self) -> MicrosoftSSML:
         return self._ssml
+
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[MicrosoftTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(text, format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[MicrosoftTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth_to_bytes(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0        
\ No newline at end of file
diff --git a/tts_wrapper/engines/mms/mms.py b/tts_wrapper/engines/mms/mms.py
index 4345d351..140d8f86 100644
--- a/tts_wrapper/engines/mms/mms.py
+++ b/tts_wrapper/engines/mms/mms.py
@@ -1,9 +1,16 @@
-from typing import Any, List, Dict, Optional
+from typing import Any, List, Dict, Optional, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import MMSClient, MMSSSML
+
+import numpy as np
+import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 import re
-import io
 
 try:
     import numpy as np
@@ -116,3 +123,216 @@ def ssml(self) -> MMSSSML:
 
     def get_voices(self) -> List[Dict[str, Any]]:
         return self._client.get_voices()
+
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[MMSTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(text, format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[MMSTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth(text=str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0        
\ No newline at end of file
diff --git a/tts_wrapper/engines/pico/pico.py b/tts_wrapper/engines/pico/pico.py
index a2bb348c..58bfc61d 100644
--- a/tts_wrapper/engines/pico/pico.py
+++ b/tts_wrapper/engines/pico/pico.py
@@ -1,9 +1,17 @@
-from typing import Any, List, Optional
+from typing import Any, List, Dict, Optional, Tuple, Generator
 
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import PicoClient
 
+import numpy as np
+import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
+import re
 
 class PicoTTS(AbstractTTS):
     def __init__(self, client: PicoClient, voice: Optional[str] = None) -> None:
@@ -15,3 +23,217 @@ def synth_to_bytes(self, text: Any) -> bytes:
 
     def construct_prosody_tag(self, text: str) -> str:
         pass
+
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[PicoTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(text, format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[GoogleTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth(str(segment), self._voice)
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0        
\ No newline at end of file
diff --git a/tts_wrapper/engines/polly/polly.py b/tts_wrapper/engines/polly/polly.py
index 1ef109f2..58794206 100644
--- a/tts_wrapper/engines/polly/polly.py
+++ b/tts_wrapper/engines/polly/polly.py
@@ -1,7 +1,16 @@
-from typing import Any, List, Optional, Dict, Tuple
+from typing import Any, List, Dict, Optional, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import PollyClient, PollySSML
+import re
+import numpy as np
+import pathlib
+import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 
 class PollyTTS(AbstractTTS):
@@ -15,6 +24,12 @@ def __init__(
         self._client = client
         self.set_voice(voice or "Joanna", lang or "en-US")
         self.audio_rate = 16000
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format       
 
     def synth_to_bytes(self, text: Any) -> bytes:
         if not self._is_ssml(str(text)):
@@ -97,3 +112,216 @@ def mapped_to_predefined_word(self, volume: str) -> str:
             return "loud"
         if 81 <= volume_in_float <= 100:
             return "x-loud"
+
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[PollyTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(str(text), format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[PollyTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth_to_bytes(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0    
diff --git a/tts_wrapper/engines/sapi/sapi.py b/tts_wrapper/engines/sapi/sapi.py
index 0edb0ca2..2a71f8db 100644
--- a/tts_wrapper/engines/sapi/sapi.py
+++ b/tts_wrapper/engines/sapi/sapi.py
@@ -1,8 +1,17 @@
-from typing import Any, List, Optional, Literal
+from typing import Any, List, Optional, Literal, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import SAPIClient
 from .ssml import SAPISSML
+import re
+import numpy as np
+import pathlib
+import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 
 class SAPITTS(AbstractTTS):
@@ -13,6 +22,12 @@ def supported_formats(cls) -> List[FileFormat]:
     def __init__(self, client: SAPIClient) -> None:
         super().__init__()
         self._client = client
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format   
 
     def set_voice(self, voice_id: str, lang_id: Optional[str] = None):
         """Set the TTS voice by ID and optionally set the language ID."""
@@ -76,3 +91,215 @@ def _map_volume_to_predefined_word(self, volume: str) -> str:
             return "x-loud"
         else:
             return "medium"
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[SAPITTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(str(text), format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[SAPITTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self._client.synth(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0    
diff --git a/tts_wrapper/engines/watson/watson.py b/tts_wrapper/engines/watson/watson.py
index 67a2bba9..0d93c2ac 100644
--- a/tts_wrapper/engines/watson/watson.py
+++ b/tts_wrapper/engines/watson/watson.py
@@ -1,8 +1,17 @@
-from typing import Any, List, Optional, Dict, Tuple
+from typing import Any, List, Optional, Dict, Literal, Tuple, Generator
 from ...exceptions import UnsupportedFileFormat
 from ...tts import AbstractTTS, FileFormat
 from . import WatsonClient, WatsonSSML
+
+import re
+import numpy as np
+import pathlib
 import logging
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 
 class WatsonTTS(AbstractTTS):
@@ -17,6 +26,12 @@ def __init__(
         self._voice = voice or "en-US_LisaV3Voice"
         self.audio_rate = 22050
         self.word_timings = []
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format   
 
     def get_audio_duration(self) -> float:
         if self.generated_audio:
@@ -85,3 +100,215 @@ def set_voice(self, voice_id: str, lang_id: str):
 
     def construct_prosody_tag(self, text: str) -> str:
         pass
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[WatsonTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(str(text), format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[WatsonTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth_to_bytes(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0    
\ No newline at end of file
diff --git a/tts_wrapper/engines/witai/witai.py b/tts_wrapper/engines/witai/witai.py
index a2dcfad1..0f95b4c0 100644
--- a/tts_wrapper/engines/witai/witai.py
+++ b/tts_wrapper/engines/witai/witai.py
@@ -1,12 +1,19 @@
 from ...tts import AbstractTTS, FileFormat
-from typing import Optional, List, Dict, Any
+from typing import Any, List, Optional, Dict, Literal, Tuple, Generator
 from . import WitAiClient, WitAiSSML
 from ...engines.utils import (
     estimate_word_timings,
 )  # Import the timing estimation function
 from ...exceptions import UnsupportedFileFormat
+import re
+import numpy as np
+import pathlib
 import logging
-
+import threading
+import queue
+import sounddevice as sd
+import time
+from io import BytesIO
 
 class WitAiTTS(AbstractTTS):
     def __init__(
@@ -20,6 +27,12 @@ def __init__(
         self._voice = voice
         self._lang = lang
         self.audio_rate = 24000  # Adjusted based on Wit.ai's 24kHz sample rate for PCM
+        self.audio_buffer = queue.Queue()
+        self.playback_finished = threading.Event()
+        self.audio_started = False
+        self.audio_stopped = False
+        self.audio_killed = False       
+        self.audio_format = "wav"  # Default format   
 
     def synth_to_bytes(self, text: str) -> bytes:
         if not self._is_ssml(str(text)):
@@ -50,3 +63,215 @@ def set_voice(self, voice_id: str, lang_id: str):
 
     def construct_prosody_tag(self, text: str) -> str:
         pass
+
+    def speak_streamed(
+        self,
+        text: str,
+        save_to_file_path: Optional[str] = None,
+        audio_format: Optional[str] = "wav",
+    ) -> None:
+        """
+        Synthesizes text and plays it back using sounddevice in a streaming fashion.
+        Optionally saves the audio to a file after playback completes.
+
+        :param text: The text to synthesize and play.
+        :param save_to_file_path: Path to save the audio file (optional).
+        :param audio_format: Audio format to save (e.g., 'wav', 'mp3', 'flac').
+        """
+        logging.info(
+            "[WatsonTTS.speak_streamed] Starting speech synthesis and playback..."
+        )
+
+        # Reset flags
+        self.audio_started = False
+        self.audio_stopped = False
+        self.playback_finished.clear()
+
+        # Open the output file if saving is required
+        output_file = None
+        if save_to_file_path:
+            output_file = open(save_to_file_path, "wb")
+            logging.info(
+                f"Saving audio to {save_to_file_path} in {audio_format} format."
+            )
+
+        try:
+            # Start audio playback in a separate thread
+            playback_thread = threading.Thread(target=self.play_audio)
+            playback_thread.start()
+
+            # Iterate over the generator returned by synth_to_bytestream
+            for chunk_idx, audio_chunk in enumerate(
+                self.synth_to_bytestream(str(text), format=audio_format)
+            ):
+                logging.info(
+                    f"Processing audio chunk {chunk_idx} with size {len(audio_chunk)} bytes"
+                )
+                if audio_format.lower() == "wav":
+                    # Convert bytes back to numpy float32 array for playback
+                    # Assuming audio_chunk is raw PCM data (LINEAR16)
+                    samples = (
+                        np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32)
+                        / 32767.0
+                    )
+                elif audio_format.lower() in ["mp3", "flac"]:
+                    # For formats like MP3 or FLAC, you need to decode them back to PCM
+                    # This requires additional processing which is not implemented here
+                    # For simplicity, we'll skip playback for non-WAV formats
+                    samples = None
+                    logging.warning(
+                        f"Playback for format '{audio_format}' is not implemented."
+                    )
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {audio_format}")
+
+                if samples is not None:
+                    # Add audio samples to the buffer for streaming playback
+                    self.audio_buffer.put(samples)
+                    logging.info(f"Audio chunk {chunk_idx} added to buffer")
+
+                # Write the chunk to the file if saving
+                if save_to_file_path:
+                    output_file.write(
+                        audio_chunk
+                    )  # Corrected from f.write to output_file.write
+
+                if not self.audio_started and samples is not None:
+                    logging.info("Starting audio playback...")
+                    self.audio_started = True
+
+            # Signal that audio generation is complete
+            self.audio_stopped = True
+
+            # Wait for playback to finish
+            playback_thread.join()
+            logging.info("Playback finished.")
+
+        except Exception as e:
+            logging.error(f"Error during speak_streamed: {e}")
+            self.audio_killed = True
+
+        finally:
+            if output_file:
+                output_file.close()
+                logging.info(
+                    f"Audio successfully saved to {save_to_file_path} in {audio_format} format."
+                )
+
+    def synth_to_bytestream(
+        self, text: Any, format: Optional[str] = "wav"
+    ) -> Generator[bytes, None, None]:
+        """
+        Synthesizes text to an in-memory bytestream in the specified audio format.
+        Yields audio data chunks as they are generated.
+
+        :param text: The text to synthesize.
+        :param format: The desired audio format (e.g., 'wav', 'mp3', 'flac'). Defaults to 'wav'.
+        :return: A generator yielding bytes objects containing audio data.
+        """
+        try:
+            logging.info(f"[WatsonTTS.synth_to_bytestream] Synthesizing text: {text}")
+
+            # Split the text into smaller segments (e.g., sentences) for incremental synthesis
+            text_segments = self._split_text(text)
+
+            for segment_idx, segment in enumerate(text_segments):
+                logging.info(f"Synthesizing segment {segment_idx}: {segment}")
+                #result = self._client.synth(
+                #    str(segment), self._voice, self._lang, include_timepoints=True
+                #)
+                #audio_bytes = result["audio_content"]
+                audio_bytes =  self.synth_to_bytes(str(segment))
+
+                if format.lower() == "wav":
+                    # Yield raw PCM data (skip WAV header if necessary)
+                    # Google TTS returns LINEAR16 PCM in WAV format
+                    audio_stream = BytesIO(audio_bytes)
+                    audio_stream.seek(44)  # Skip the 44-byte WAV header
+                    chunk_size = 1024  # Number of bytes per chunk
+
+                    while True:
+                        chunk = audio_stream.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                elif format.lower() in ["mp3", "flac"]:
+                    # Convert PCM to the desired format using _convert_audio
+                    pcm_data = np.frombuffer(audio_bytes, dtype=np.int16)
+                    converted_audio = self._convert_audio(
+                        pcm_data, format, self.audio_rate
+                    )
+                    chunk_size = 4096  # Number of bytes per chunk
+                    audio_io = BytesIO(converted_audio)
+
+                    while True:
+                        chunk = audio_io.read(chunk_size)
+                        if not chunk:
+                            break
+                        yield chunk
+
+                else:
+                    raise UnsupportedFileFormat(f"Unsupported format: {format}")
+
+        except Exception as e:
+            logging.error(f"Error in synth_to_bytestream: {e}")
+            raise
+
+    def play_audio(self):
+        """
+        Plays audio from the audio_buffer using sounddevice.
+        """
+        try:
+            logging.info("Starting audio playback thread...")
+            with sd.OutputStream(
+                samplerate=self.audio_rate,
+                channels=1,
+                callback=self.play_audio_callback,
+                blocksize=4096,
+                dtype="float32",
+            ):
+                self.playback_finished.wait()
+        except Exception as e:
+            logging.error(f"Error during audio playback: {e}")
+            self.audio_killed = True
+
+    def _split_text(self, text: str) -> List[str]:
+        # Simple sentence splitter based on punctuation.
+        sentences = re.split(r"(?<=[.!?]) +", text)
+        return sentences
+
+    # Audio playback callback, called continuously to stream audio from the buffer
+    def play_audio_callback(
+        self, outdata: np.ndarray, frames: int, time_info, status: sd.CallbackFlags
+    ):
+        if self.audio_killed or (
+            self.audio_started and self.audio_buffer.empty() and self.audio_stopped
+        ):
+            logging.error("AUDIO KILLED OR STOPPED OR BUFFER EMPTY")
+            self.playback_finished.set()
+            return
+
+        if self.audio_buffer.empty():
+            outdata.fill(0)
+            return
+
+        n = 0
+        while n < frames and not self.audio_buffer.empty():
+            remaining = frames - n
+            current_chunk = self.audio_buffer.queue[0]
+            k = current_chunk.shape[0]
+
+            if remaining <= k:
+                outdata[n:, 0] = current_chunk[:remaining]
+                self.audio_buffer.queue[0] = current_chunk[remaining:]
+                n = frames
+                if self.audio_buffer.queue[0].shape[0] == 0:
+                    self.audio_buffer.get()
+                break
+
+            outdata[n : n + k, 0] = self.audio_buffer.get()
+            n += k
+
+        if n < frames:
+            outdata[n:, 0] = 0    
\ No newline at end of file
diff --git a/tts_wrapper/tts.py b/tts_wrapper/tts.py
index f132356e..8ef81f93 100644
--- a/tts_wrapper/tts.py
+++ b/tts_wrapper/tts.py
@@ -295,6 +295,7 @@ def callback(self, outdata, frames, time, status):
             # Each frame is 2 bytes for int16,
             # so frames * 2 gives the number of bytes
             end_position = self.position + frames * 2
+
             data = self.audio_bytes[self.position : end_position]
             if len(data) < frames * 2:
                 # Not enough data to fill outdata, zero-pad it