From 5754a1b113acbf84caf005c71dfaa8c2d7fd0225 Mon Sep 17 00:00:00 2001
From: David vonThenen <12752197+dvonthenen@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:00:31 -0700
Subject: [PATCH] Port Enhancements from EA Release

---
 deepgram/audio/microphone/microphone.py       | 138 ++++++++++--------
 deepgram/audio/speaker/speaker.py             | 111 +++++++-------
 .../common/v1/abstract_async_websocket.py     |   5 +-
 .../common/v1/abstract_sync_websocket.py      |   5 +-
 .../speak/v1/websocket/async_client.py        |  18 ++-
 deepgram/clients/speak/v1/websocket/client.py |  18 ++-
 .../{async_complete => async_simple}/main.py  |   0
 .../{complete => output_to_wav}/main.py       |  72 +++------
 .../text-to-speech/websocket/simple/main.py   |  72 ++++++---
 9 files changed, 252 insertions(+), 187 deletions(-)
 rename examples/text-to-speech/websocket/{async_complete => async_simple}/main.py (100%)
 rename examples/text-to-speech/websocket/{complete => output_to_wav}/main.py (53%)

diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py
index ae452984..91ba0df0 100644
--- a/deepgram/audio/microphone/microphone.py
+++ b/deepgram/audio/microphone/microphone.py
@@ -22,8 +22,8 @@ class Microphone:  # pylint: disable=too-many-instance-attributes
 
     _logger: verboselogs.VerboseLogger
 
-    _audio: "pyaudio.PyAudio"
-    _stream: "pyaudio.Stream"
+    _audio: Optional["pyaudio.PyAudio"] = None
+    _stream: Optional["pyaudio.Stream"] = None
 
     _chunk: int
     _rate: int
@@ -145,59 +145,31 @@ def start(self) -> bool:
             self._asyncio_thread = None
             self._push_callback = self._push_callback_org
 
-        self._stream = self._audio.open(
-            format=self._format,
-            channels=self._channels,
-            rate=self._rate,
-            input=True,
-            frames_per_buffer=self._chunk,
-            input_device_index=self._input_device_index,
-            stream_callback=self._callback,
-        )
+        if self._audio is not None:
+            self._stream = self._audio.open(
+                format=self._format,
+                channels=self._channels,
+                rate=self._rate,
+                input=True,
+                output=False,
+                frames_per_buffer=self._chunk,
+                input_device_index=self._input_device_index,
+                stream_callback=self._callback,
+            )
+
+        if self._stream is None:
+            self._logger.error("start failed. No stream created.")
+            self._logger.debug("Microphone.start LEAVE")
+            return False
 
         self._exit.clear()
-        self._stream.start_stream()
+        if self._stream is not None:
+            self._stream.start_stream()
 
         self._logger.notice("start succeeded")
         self._logger.debug("Microphone.start LEAVE")
         return True
 
-    def _callback(
-        self, input_data, frame_count, time_info, status_flags
-    ):  # pylint: disable=unused-argument
-        """
-        The callback used to process data in callback mode.
-        """
-        # dynamic import of pyaudio as not to force the requirements on the SDK (and users)
-        import pyaudio  # pylint: disable=import-outside-toplevel
-
-        self._logger.debug("Microphone._callback ENTER")
-
-        if self._exit.is_set():
-            self._logger.info("exit is Set")
-            self._logger.notice("_callback stopping...")
-            self._logger.debug("Microphone._callback LEAVE")
-            return None, pyaudio.paAbort
-
-        if input_data is None:
-            self._logger.warning("input_data is None")
-            self._logger.debug("Microphone._callback LEAVE")
-            return None, pyaudio.paContinue
-
-        try:
-            if self._is_muted:
-                size = len(input_data)
-                input_data = b"\x00" * size
-
-            self._push_callback(input_data)
-        except Exception as e:
-            self._logger.error("Error while sending: %s", str(e))
-            self._logger.debug("Microphone._callback LEAVE")
-            raise
-
-        self._logger.debug("Microphone._callback LEAVE")
-        return input_data, pyaudio.paContinue
-
     def mute(self) -> bool:
         """
         mute - mutes the microphone stream
@@ -205,17 +177,17 @@ def mute(self) -> bool:
         Returns:
             bool: True if the stream was muted, False otherwise
         """
-        self._logger.debug("Microphone.mute ENTER")
+        self._logger.verbose("Microphone.mute ENTER")
 
         if self._stream is None:
             self._logger.error("mute failed. Library not initialized.")
-            self._logger.debug("Microphone.mute LEAVE")
+            self._logger.verbose("Microphone.mute LEAVE")
             return False
 
         self._is_muted = True
 
         self._logger.notice("mute succeeded")
-        self._logger.debug("Microphone.mute LEAVE")
+        self._logger.verbose("Microphone.mute LEAVE")
         return True
 
     def unmute(self) -> bool:
@@ -225,19 +197,42 @@ def unmute(self) -> bool:
         Returns:
             bool: True if the stream was unmuted, False otherwise
         """
-        self._logger.debug("Microphone.unmute ENTER")
+        self._logger.verbose("Microphone.unmute ENTER")
 
         if self._stream is None:
             self._logger.error("unmute failed. Library not initialized.")
-            self._logger.debug("Microphone.unmute LEAVE")
+            self._logger.verbose("Microphone.unmute LEAVE")
             return False
 
         self._is_muted = False
 
         self._logger.notice("unmute succeeded")
-        self._logger.debug("Microphone.unmute LEAVE")
+        self._logger.verbose("Microphone.unmute LEAVE")
         return True
 
+    def is_muted(self) -> bool:
+        """
+        is_muted - returns the state of the stream
+
+        Args:
+            None
+
+        Returns:
+            True if the stream is muted, False otherwise
+        """
+        self._logger.spam("Microphone.is_muted ENTER")
+
+        if self._stream is None:
+            self._logger.spam("is_muted: stream is None")
+            self._logger.spam("Microphone.is_muted LEAVE")
+            return False
+
+        val = self._is_muted
+
+        self._logger.spam("is_muted: %s", val)
+        self._logger.spam("Microphone.is_muted LEAVE")
+        return val
+
     def finish(self) -> bool:
         """
         finish - stops the microphone stream
@@ -255,7 +250,6 @@ def finish(self) -> bool:
             self._logger.notice("stopping stream...")
             self._stream.stop_stream()
             self._stream.close()
-            self._stream = None  # type: ignore
             self._logger.notice("stream stopped")
 
         # clean up the thread
@@ -265,13 +259,43 @@ def finish(self) -> bool:
             self._asyncio_thread
             is not None
         ):
-            self._logger.notice("stopping asyncio loop...")
+            self._logger.notice("stopping _asyncio_loop...")
             self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
             self._asyncio_thread.join()
-            self._asyncio_thread = None
             self._logger.notice("_asyncio_thread joined")
+        self._stream = None
+        self._asyncio_thread = None
 
         self._logger.notice("finish succeeded")
         self._logger.debug("Microphone.finish LEAVE")
 
         return True
+
+    def _callback(
+        self, input_data, frame_count, time_info, status_flags
+    ):  # pylint: disable=unused-argument
+        """
+        The callback used to process data in callback mode.
+        """
+        # dynamic import of pyaudio as not to force the requirements on the SDK (and users)
+        import pyaudio  # pylint: disable=import-outside-toplevel
+
+        if self._exit.is_set():
+            self._logger.notice("_callback exit is Set. stopping...")
+            return None, pyaudio.paAbort
+
+        if input_data is None:
+            self._logger.warning("input_data is None")
+            return None, pyaudio.paContinue
+
+        try:
+            if self._is_muted:
+                size = len(input_data)
+                input_data = b"\x00" * size
+
+            self._push_callback(input_data)
+        except Exception as e:
+            self._logger.error("Error while sending: %s", str(e))
+            raise
+
+        return input_data, pyaudio.paContinue
diff --git a/deepgram/audio/speaker/speaker.py b/deepgram/audio/speaker/speaker.py
index faee51c4..66bc2dba 100644
--- a/deepgram/audio/speaker/speaker.py
+++ b/deepgram/audio/speaker/speaker.py
@@ -15,6 +15,8 @@
 from ...utils import verboselogs
 from .constants import LOGGING, CHANNELS, RATE, CHUNK, TIMEOUT, PLAYBACK_DELTA
 
+from ..microphone import Microphone
+
 if TYPE_CHECKING:
     import pyaudio
 
@@ -28,7 +30,7 @@ class Speaker:  # pylint: disable=too-many-instance-attributes
 
     _logger: verboselogs.VerboseLogger
 
-    _audio: "pyaudio.PyAudio"
+    _audio: Optional["pyaudio.PyAudio"] = None
     _stream: Optional["pyaudio.Stream"] = None
 
     _chunk: int
@@ -56,6 +58,8 @@ class Speaker:  # pylint: disable=too-many-instance-attributes
     _pull_callback_org: Optional[Callable] = None
     _pull_callback: Optional[Callable] = None
 
+    _microphone: Optional[Microphone] = None
+
     def __init__(
         self,
         pull_callback: Optional[Callable] = None,
@@ -66,6 +70,7 @@ def __init__(
         channels: int = CHANNELS,
         last_play_delta_in_ms: int = PLAYBACK_DELTA,
         output_device_index: Optional[int] = None,
+        microphone: Optional[Microphone] = None,
     ):  # pylint: disable=too-many-positional-arguments
         # dynamic import of pyaudio as not to force the requirements on the SDK (and users)
         import pyaudio  # pylint: disable=import-outside-toplevel
@@ -80,6 +85,8 @@ def __init__(
         self._last_datagram = datetime.now()
         self._lock_wait = threading.Lock()
 
+        self._microphone = microphone
+
         self._audio = pyaudio.PyAudio()
         self._chunk = chunk
         self._rate = rate
@@ -117,10 +124,6 @@ def set_pull_callback(self, pull_callback: Callable) -> None:
         """
         self._pull_callback_org = pull_callback
 
-    # def _start_asyncio_loop(self) -> None:
-    #     self._asyncio_loop = asyncio.new_event_loop()
-    #     self._asyncio_loop.run_forever()
-
     def start(self, active_loop: Optional[asyncio.AbstractEventLoop] = None) -> bool:
         """
         starts - starts the Speaker stream
@@ -147,45 +150,25 @@ def start(self, active_loop: Optional[asyncio.AbstractEventLoop] = None) -> bool
         self._exit.clear()
         self._queue = queue.Queue()
 
-        self._stream = self._audio.open(
-            format=self._format,
-            channels=self._channels,
-            rate=self._rate,
-            input=False,
-            output=True,
-            frames_per_buffer=self._chunk,
-            output_device_index=self._output_device_index,
-        )
+        if self._audio is not None:
+            self._stream = self._audio.open(
+                format=self._format,
+                channels=self._channels,
+                rate=self._rate,
+                input=False,
+                output=True,
+                frames_per_buffer=self._chunk,
+                output_device_index=self._output_device_index,
+            )
+
+        if self._stream is None:
+            self._logger.error("start failed. No stream created.")
+            self._logger.debug("Speaker.start LEAVE")
+            return False
 
         self._push_callback = self._push_callback_org
         self._pull_callback = self._pull_callback_org
 
-        # if inspect.iscoroutinefunction(
-        #     self._push_callback_org
-        # ) or inspect.iscoroutinefunction(self._pull_callback_org):
-        #     self._logger.verbose("Starting asyncio loop...")
-        #     self._asyncio_thread = threading.Thread(target=self._start_asyncio_loop)
-        #     self._asyncio_thread.start()
-
-        # # determine if the push_callback is a coroutine
-        # if inspect.iscoroutinefunction(self._push_callback_org):
-        #     self._logger.verbose("async/await push callback")
-        #     self._push_callback = lambda data: asyncio.run_coroutine_threadsafe(
-        #         self._push_callback_org(data), self._asyncio_loop
-        #     ).result()
-        # else:
-        #     self._logger.verbose("threaded push callback")
-        #     self._push_callback = self._push_callback_org
-
-        # if inspect.iscoroutinefunction(self._pull_callback_org):
-        #     self._logger.verbose("async/await pull callback")
-        #     self._pull_callback = lambda: asyncio.run_coroutine_threadsafe(
-        #         self._pull_callback_org(), self._asyncio_loop
-        #     ).result()
-        # else:
-        #     self._logger.verbose("threaded pull callback")
-        #     self._pull_callback = self._pull_callback_org
-
         # start the play thread
         self._thread = threading.Thread(
             target=self._play, args=(self._queue, self._stream, self._exit), daemon=True
@@ -193,7 +176,8 @@ def start(self, active_loop: Optional[asyncio.AbstractEventLoop] = None) -> bool
         self._thread.start()
 
         # Start the stream
-        self._stream.start_stream()
+        if self._stream is not None:
+            self._stream.start_stream()
 
         # Start the receiver thread within the start function
         self._logger.verbose("Starting receiver thread...")
@@ -205,6 +189,18 @@ def start(self, active_loop: Optional[asyncio.AbstractEventLoop] = None) -> bool
 
         return True
 
+    def wait_for_complete_with_mute(self, mic: Microphone):
+        """
+        This method will mute/unmute a Microphone and block until the speak is done playing sound.
+        """
+        self._logger.debug("Speaker.wait_for_complete ENTER")
+
+        mic.mute()
+        self.wait_for_complete()
+        mic.unmute()
+
+        self._logger.debug("Speaker.wait_for_complete LEAVE")
+
     def wait_for_complete(self):
         """
         This method will block until the speak is done playing sound.
@@ -328,29 +324,24 @@ def finish(self) -> bool:
             self._logger.notice("stopping stream...")
             self._stream.stop_stream()
             self._stream.close()
-            self._stream = None
             self._logger.notice("stream stopped")
 
         if self._thread is not None:
-            self._logger.notice("joining thread...")
+            self._logger.notice("joining _thread...")
             self._thread.join()
-            self._thread = None
             self._logger.notice("thread stopped")
 
-        # if self._asyncio_thread is not None:
-        #     self._logger.notice("stopping asyncio loop...")
-        #     self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
-        #     self._asyncio_thread.join()
-        #     self._asyncio_thread = None
-        #     self._logger.notice("_asyncio_thread joined")
-
         if self._receiver_thread is not None:
-            self._logger.notice("stopping asyncio loop...")
+            self._logger.notice("stopping _receiver_thread...")
             self._receiver_thread.join()
-            self._receiver_thread = None
             self._logger.notice("_receiver_thread joined")
 
-        self._queue = None  # type: ignore
+        with self._queue.mutex:
+            self._queue.queue.clear()
+
+        self._stream = None
+        self._thread = None
+        self._receiver_thread = None
 
         self._logger.notice("finish succeeded")
         self._logger.debug("Speaker.finish LEAVE")
@@ -363,9 +354,21 @@ def _play(self, audio_out, stream, stop):
         """
         while not stop.is_set():
             try:
+                if self._microphone is not None and self._microphone.is_muted():
+                    with self._lock_wait:
+                        delta = datetime.now() - self._last_datagram
+                        diff_in_ms = delta.total_seconds() * 1000
+                        if diff_in_ms > float(self._last_play_delta_in_ms):
+                            self._logger.debug(
+                                "LastPlay delta is greater than threshold. Unmute!"
+                            )
+                            self._microphone.unmute()
                 data = audio_out.get(True, TIMEOUT)
                 with self._lock_wait:
                     self._last_datagram = datetime.now()
+                    if self._microphone is not None and not self._microphone.is_muted():
+                        self._logger.debug("New speaker sound detected. Mute!")
+                        self._microphone.mute()
                 stream.write(data)
             except queue.Empty:
                 pass
diff --git a/deepgram/clients/common/v1/abstract_async_websocket.py b/deepgram/clients/common/v1/abstract_async_websocket.py
index 8a35a98d..c24253da 100644
--- a/deepgram/clients/common/v1/abstract_async_websocket.py
+++ b/deepgram/clients/common/v1/abstract_async_websocket.py
@@ -448,7 +448,10 @@ async def finish(self) -> bool:
 
             # debug the threads
             for thread in threading.enumerate():
-                self._logger.debug("after running thread: %s", thread.name)
+                if thread is not None and thread.name is not None:
+                    self._logger.debug("after running thread: %s", thread.name)
+                else:
+                    self._logger.debug("after running thread: unknown_thread_name")
             self._logger.debug("number of active threads: %s", threading.active_count())
 
             self._logger.notice("finish succeeded")
diff --git a/deepgram/clients/common/v1/abstract_sync_websocket.py b/deepgram/clients/common/v1/abstract_sync_websocket.py
index b29e9e1d..30d19504 100644
--- a/deepgram/clients/common/v1/abstract_sync_websocket.py
+++ b/deepgram/clients/common/v1/abstract_sync_websocket.py
@@ -445,7 +445,10 @@ def finish(self) -> bool:
 
         # debug the threads
         for thread in threading.enumerate():
-            self._logger.debug("before running thread: %s", thread.name)
+            if thread is not None and thread.name is not None:
+                self._logger.debug("before running thread: %s", thread.name)
+            else:
+                self._logger.debug("after running thread: unknown_thread_name")
         self._logger.debug("number of active threads: %s", threading.active_count())
 
         self._logger.notice("finish succeeded")
diff --git a/deepgram/clients/speak/v1/websocket/async_client.py b/deepgram/clients/speak/v1/websocket/async_client.py
index c5c1cb16..b7965c69 100644
--- a/deepgram/clients/speak/v1/websocket/async_client.py
+++ b/deepgram/clients/speak/v1/websocket/async_client.py
@@ -27,6 +27,7 @@
 )
 from .options import SpeakWSOptions
 
+from .....audio.microphone import Microphone
 from .....audio.speaker import Speaker, RATE, CHANNELS, PLAYBACK_DELTA
 
 ONE_SECOND = 1
@@ -62,9 +63,13 @@ class AsyncSpeakWSClient(
     _options: Optional[Dict] = None
     _headers: Optional[Dict] = None
 
+    _speaker_created: bool = False
     _speaker: Optional[Speaker] = None
+    _microphone: Optional[Microphone] = None
 
-    def __init__(self, config: DeepgramClientOptions):
+    def __init__(
+        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
+    ):
         if config is None:
             raise DeepgramError("Config is required")
         self._logger = verboselogs.VerboseLogger(__name__)
@@ -80,6 +85,9 @@ def __init__(self, config: DeepgramClientOptions):
         self._last_datagram = None
         self._flush_count = 0
 
+        # microphone
+        self._microphone = microphone
+
         # init handlers
         self._event_handlers = {
             event: [] for event in SpeakWebSocketEvents.__members__.values()
@@ -104,6 +112,8 @@ def __init__(self, config: DeepgramClientOptions):
             self._logger.debug("channels: %s", channels)
             self._logger.debug("device_index: %s", device_index)
 
+            self._speaker_created = True
+
             if device_index is not None:
                 self._speaker = Speaker(
                     rate=rate,
@@ -111,6 +121,7 @@ def __init__(self, config: DeepgramClientOptions):
                     last_play_delta_in_ms=playback_delta_in_ms,
                     verbose=self._config.verbose,
                     output_device_index=device_index,
+                    microphone=self._microphone,
                 )
             else:
                 self._speaker = Speaker(
@@ -118,6 +129,7 @@ def __init__(self, config: DeepgramClientOptions):
                     channels=channels,
                     last_play_delta_in_ms=playback_delta_in_ms,
                     verbose=self._config.verbose,
+                    microphone=self._microphone,
                 )
 
         # call the parent constructor
@@ -628,6 +640,10 @@ async def finish(self) -> bool:
             if await super().finish() is False:
                 self._logger.error("AsyncListenWebSocketClient.finish failed")
 
+            if self._speaker is not None and self._speaker_created:
+                self._speaker.finish()
+                self._speaker_created = False
+
             # Before cancelling, check if the tasks were created
             # debug the threads
             for thread in threading.enumerate():
diff --git a/deepgram/clients/speak/v1/websocket/client.py b/deepgram/clients/speak/v1/websocket/client.py
index f334bb9b..05580bfa 100644
--- a/deepgram/clients/speak/v1/websocket/client.py
+++ b/deepgram/clients/speak/v1/websocket/client.py
@@ -27,6 +27,7 @@
 )
 from .options import SpeakWSOptions
 
+from .....audio.microphone import Microphone
 from .....audio.speaker import Speaker, RATE, CHANNELS, PLAYBACK_DELTA
 
 ONE_SECOND = 1
@@ -63,9 +64,13 @@ class SpeakWSClient(
     _options: Optional[Dict] = None
     _headers: Optional[Dict] = None
 
+    _speaker_created: bool = False
     _speaker: Optional[Speaker] = None
+    _microphone: Optional[Microphone] = None
 
-    def __init__(self, config: DeepgramClientOptions):
+    def __init__(
+        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
+    ):
         if config is None:
             raise DeepgramError("Config is required")
 
@@ -83,6 +88,9 @@ def __init__(self, config: DeepgramClientOptions):
         self._last_datagram = None
         self._flush_count = 0
 
+        # microphone
+        self._microphone = microphone
+
         # init handlers
         self._event_handlers = {
             event: [] for event in SpeakWebSocketEvents.__members__.values()
@@ -107,6 +115,8 @@ def __init__(self, config: DeepgramClientOptions):
             self._logger.debug("channels: %s", channels)
             self._logger.debug("device_index: %s", device_index)
 
+            self._speaker_created = True
+
             if device_index is not None:
                 self._speaker = Speaker(
                     rate=rate,
@@ -114,6 +124,7 @@ def __init__(self, config: DeepgramClientOptions):
                     last_play_delta_in_ms=playback_delta_in_ms,
                     verbose=self._config.verbose,
                     output_device_index=device_index,
+                    microphone=self._microphone,
                 )
             else:
                 self._speaker = Speaker(
@@ -121,6 +132,7 @@ def __init__(self, config: DeepgramClientOptions):
                     channels=channels,
                     last_play_delta_in_ms=playback_delta_in_ms,
                     verbose=self._config.verbose,
+                    microphone=self._microphone,
                 )
 
         # call the parent constructor
@@ -624,6 +636,10 @@ def finish(self) -> bool:
         if super().finish() is False:
             self._logger.error("ListenWebSocketClient.finish failed")
 
+        if self._speaker is not None and self._speaker_created:
+            self._speaker.finish()
+            self._speaker_created = False
+
         # debug the threads
         for thread in threading.enumerate():
             self._logger.debug("before running thread: %s", thread.name)
diff --git a/examples/text-to-speech/websocket/async_complete/main.py b/examples/text-to-speech/websocket/async_simple/main.py
similarity index 100%
rename from examples/text-to-speech/websocket/async_complete/main.py
rename to examples/text-to-speech/websocket/async_simple/main.py
diff --git a/examples/text-to-speech/websocket/complete/main.py b/examples/text-to-speech/websocket/output_to_wav/main.py
similarity index 53%
rename from examples/text-to-speech/websocket/complete/main.py
rename to examples/text-to-speech/websocket/output_to_wav/main.py
index 612ae7da..b5a5629b 100644
--- a/examples/text-to-speech/websocket/complete/main.py
+++ b/examples/text-to-speech/websocket/output_to_wav/main.py
@@ -4,7 +4,7 @@
 
 import time
 from deepgram.utils import verboselogs
-
+import wave
 
 from deepgram import (
     DeepgramClient,
@@ -13,23 +13,20 @@
     SpeakWSOptions,
 )
 
-TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
-
-global warning_notice
-warning_notice = True
+AUDIO_FILE = "output.wav"
+TTS_TEXT = "Hello, this is a text to speech example using Deepgram. How are you doing today? I am fine thanks for asking."
 
 
 def main():
     try:
         # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
-        config: DeepgramClientOptions = DeepgramClientOptions(
-            options={
-                # "auto_flush_speak_delta": "500",
-                "speaker_playback": "true",
-            },
-            # verbose=verboselogs.DEBUG,
-        )
-        deepgram: DeepgramClient = DeepgramClient("", config)
+        # config: DeepgramClientOptions = DeepgramClientOptions(
+        #     # options={"auto_flush_speak_delta": "500", "speaker_playback": "true"},
+        #     verbose=verboselogs.SPAM,
+        # )
+        # deepgram: DeepgramClient = DeepgramClient("", config)
+        # otherwise, use default config
+        deepgram: DeepgramClient = DeepgramClient()
 
         # Create a websocket connection to Deepgram
         dg_connection = deepgram.speak.websocket.v("1")
@@ -38,46 +35,25 @@ def on_open(self, open, **kwargs):
             print(f"\n\n{open}\n\n")
 
         def on_binary_data(self, data, **kwargs):
-            global warning_notice
-            if warning_notice:
-                print("Received binary data")
-                print("You can do something with the binary data here")
-                print("OR")
-                print(
-                    "If you want to simply play the audio, set speaker_playback to true in the options for DeepgramClientOptions"
-                )
-                warning_notice = False
-
-        def on_metadata(self, metadata, **kwargs):
-            print(f"\n\n{metadata}\n\n")
-
-        def on_flush(self, flushed, **kwargs):
-            print(f"\n\n{flushed}\n\n")
-
-        def on_clear(self, clear, **kwargs):
-            print(f"\n\n{clear}\n\n")
+            print("Received binary data")
+            with open(AUDIO_FILE, "ab") as f:
+                f.write(data)
+                f.flush()
 
         def on_close(self, close, **kwargs):
             print(f"\n\n{close}\n\n")
 
-        def on_warning(self, warning, **kwargs):
-            print(f"\n\n{warning}\n\n")
-
-        def on_error(self, error, **kwargs):
-            print(f"\n\n{error}\n\n")
-
-        def on_unhandled(self, unhandled, **kwargs):
-            print(f"\n\n{unhandled}\n\n")
-
         dg_connection.on(SpeakWebSocketEvents.Open, on_open)
         dg_connection.on(SpeakWebSocketEvents.AudioData, on_binary_data)
-        dg_connection.on(SpeakWebSocketEvents.Metadata, on_metadata)
-        dg_connection.on(SpeakWebSocketEvents.Flushed, on_flush)
-        dg_connection.on(SpeakWebSocketEvents.Cleared, on_clear)
         dg_connection.on(SpeakWebSocketEvents.Close, on_close)
-        dg_connection.on(SpeakWebSocketEvents.Error, on_error)
-        dg_connection.on(SpeakWebSocketEvents.Warning, on_warning)
-        dg_connection.on(SpeakWebSocketEvents.Unhandled, on_unhandled)
+
+        # Generate a generic WAV container header
+        # since we don't support containerized audio, we need to generate a header
+        header = wave.open(AUDIO_FILE, "wb")
+        header.setnchannels(1)  # Mono audio
+        header.setsampwidth(2)  # 16-bit audio
+        header.setframerate(16000)  # Sample rate of 16000 Hz
+        header.close()
 
         # connect to websocket
         options = SpeakWSOptions(
@@ -93,13 +69,11 @@ def on_unhandled(self, unhandled, **kwargs):
 
         # send the text to Deepgram
         dg_connection.send_text(TTS_TEXT)
-
         # if auto_flush_speak_delta is not used, you must flush the connection by calling flush()
         dg_connection.flush()
 
         # Indicate that we've finished
-        dg_connection.wait_for_complete()
-
+        time.sleep(7)
         print("\n\nPress Enter to stop...\n\n")
         input()
 
diff --git a/examples/text-to-speech/websocket/simple/main.py b/examples/text-to-speech/websocket/simple/main.py
index b5a5629b..612ae7da 100644
--- a/examples/text-to-speech/websocket/simple/main.py
+++ b/examples/text-to-speech/websocket/simple/main.py
@@ -4,7 +4,7 @@
 
 import time
 from deepgram.utils import verboselogs
-import wave
+
 
 from deepgram import (
     DeepgramClient,
@@ -13,20 +13,23 @@
     SpeakWSOptions,
 )
 
-AUDIO_FILE = "output.wav"
-TTS_TEXT = "Hello, this is a text to speech example using Deepgram. How are you doing today? I am fine thanks for asking."
+TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
+
+global warning_notice
+warning_notice = True
 
 
 def main():
     try:
         # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
-        # config: DeepgramClientOptions = DeepgramClientOptions(
-        #     # options={"auto_flush_speak_delta": "500", "speaker_playback": "true"},
-        #     verbose=verboselogs.SPAM,
-        # )
-        # deepgram: DeepgramClient = DeepgramClient("", config)
-        # otherwise, use default config
-        deepgram: DeepgramClient = DeepgramClient()
+        config: DeepgramClientOptions = DeepgramClientOptions(
+            options={
+                # "auto_flush_speak_delta": "500",
+                "speaker_playback": "true",
+            },
+            # verbose=verboselogs.DEBUG,
+        )
+        deepgram: DeepgramClient = DeepgramClient("", config)
 
         # Create a websocket connection to Deepgram
         dg_connection = deepgram.speak.websocket.v("1")
@@ -35,25 +38,46 @@ def on_open(self, open, **kwargs):
             print(f"\n\n{open}\n\n")
 
         def on_binary_data(self, data, **kwargs):
-            print("Received binary data")
-            with open(AUDIO_FILE, "ab") as f:
-                f.write(data)
-                f.flush()
+            global warning_notice
+            if warning_notice:
+                print("Received binary data")
+                print("You can do something with the binary data here")
+                print("OR")
+                print(
+                    "If you want to simply play the audio, set speaker_playback to true in the options for DeepgramClientOptions"
+                )
+                warning_notice = False
+
+        def on_metadata(self, metadata, **kwargs):
+            print(f"\n\n{metadata}\n\n")
+
+        def on_flush(self, flushed, **kwargs):
+            print(f"\n\n{flushed}\n\n")
+
+        def on_clear(self, clear, **kwargs):
+            print(f"\n\n{clear}\n\n")
 
         def on_close(self, close, **kwargs):
             print(f"\n\n{close}\n\n")
 
+        def on_warning(self, warning, **kwargs):
+            print(f"\n\n{warning}\n\n")
+
+        def on_error(self, error, **kwargs):
+            print(f"\n\n{error}\n\n")
+
+        def on_unhandled(self, unhandled, **kwargs):
+            print(f"\n\n{unhandled}\n\n")
+
         dg_connection.on(SpeakWebSocketEvents.Open, on_open)
         dg_connection.on(SpeakWebSocketEvents.AudioData, on_binary_data)
+        dg_connection.on(SpeakWebSocketEvents.Metadata, on_metadata)
+        dg_connection.on(SpeakWebSocketEvents.Flushed, on_flush)
+        dg_connection.on(SpeakWebSocketEvents.Cleared, on_clear)
         dg_connection.on(SpeakWebSocketEvents.Close, on_close)
-
-        # Generate a generic WAV container header
-        # since we don't support containerized audio, we need to generate a header
-        header = wave.open(AUDIO_FILE, "wb")
-        header.setnchannels(1)  # Mono audio
-        header.setsampwidth(2)  # 16-bit audio
-        header.setframerate(16000)  # Sample rate of 16000 Hz
-        header.close()
+        dg_connection.on(SpeakWebSocketEvents.Error, on_error)
+        dg_connection.on(SpeakWebSocketEvents.Warning, on_warning)
+        dg_connection.on(SpeakWebSocketEvents.Unhandled, on_unhandled)
 
         # connect to websocket
         options = SpeakWSOptions(
@@ -69,11 +93,13 @@ def on_close(self, close, **kwargs):
 
         # send the text to Deepgram
         dg_connection.send_text(TTS_TEXT)
+
         # if auto_flush_speak_delta is not used, you must flush the connection by calling flush()
         dg_connection.flush()
 
         # Indicate that we've finished
-        time.sleep(7)
+        dg_connection.wait_for_complete()
+
         print("\n\nPress Enter to stop...\n\n")
         input()