diff --git a/README.md b/README.md index 3c242c2..5b427cb 100644 --- a/README.md +++ b/README.md @@ -31,34 +31,37 @@ The server supports two backends `faster_whisper` and `tensorrt`. If running `te ### Running the Server - [Faster Whisper](https://github.com/SYSTRAN/faster-whisper) backend ```bash -python3 run_server.py --port 9090 \ - --backend faster_whisper +python3 -m whisper_live.server --port 9090 --backend faster_whisper # running with custom model -python3 run_server.py --port 9090 \ - --backend faster_whisper \ - -fw "/path/to/custom/faster/whisper/model" +python3 -m whisper_live.server \ + -p 9090 \ + --backend faster_whisper \ + -fw "/path/to/custom/faster/whisper/model" ``` - TensorRT backend. Currently, we recommend to only use the docker setup for TensorRT. Follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md) which works as expected. Make sure to build your TensorRT Engines before running the server with TensorRT backend. ```bash # Run English only model -python3 run_server.py -p 9090 \ - -b tensorrt \ - -trt /home/TensorRT-LLM/examples/whisper/whisper_small_en +python3 -m whisper_live.server \ + -p 9090 \ + -b tensorrt \ + -trt /home/TensorRT-LLM/examples/whisper/whisper_small_en # Run Multilingual model -python3 run_server.py -p 9090 \ - -b tensorrt \ - -trt /home/TensorRT-LLM/examples/whisper/whisper_small \ - -m +python3 -m whisper_live.server \ + -p 9090 \ + -b tensorrt \ + -trt /home/TensorRT-LLM/examples/whisper/whisper_small \ + -m ``` #### Controlling OpenMP Threads To control the number of threads used by OpenMP, you can set the `OMP_NUM_THREADS` environment variable. This is useful for managing CPU resources and ensuring consistent performance. If not specified, `OMP_NUM_THREADS` is set to `1` by default. You can change this by using the `--omp_num_threads` argument: ```bash -python3 run_server.py --port 9090 \ - --backend faster_whisper \ - --omp_num_threads 4 +python3 -m whisper_live.server \ + -p 9090 \ + --backend faster_whisper \ + --omp_num_threads 4 ``` #### Single model mode @@ -131,9 +134,10 @@ client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/b bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en # Run server with tiny.en - python3 run_server.py --port 9090 \ - --backend tensorrt \ - --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_en" + python3 -m whisper_live.server \ + -p 9090 \ + --backend tensorrt \ + --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_en" ``` - CPU diff --git a/run_server.py b/run_server.py index db66da4..f3b3b70 100644 --- a/run_server.py +++ b/run_server.py @@ -1,50 +1,4 @@ -import argparse -import os - if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--port', '-p', - type=int, - default=9090, - help="Websocket port to run the server on.") - parser.add_argument('--backend', '-b', - type=str, - default='faster_whisper', - help='Backends from ["tensorrt", "faster_whisper"]') - parser.add_argument('--faster_whisper_custom_model_path', '-fw', - type=str, default=None, - help="Custom Faster Whisper Model") - parser.add_argument('--trt_model_path', '-trt', - type=str, - default=None, - help='Whisper TensorRT model path') - parser.add_argument('--trt_multilingual', '-m', - action="store_true", - help='Boolean only for TensorRT model. True if multilingual.') - parser.add_argument('--omp_num_threads', '-omp', - type=int, - default=1, - help="Number of threads to use for OpenMP") - parser.add_argument('--no_single_model', '-nsm', - action='store_true', - help='Set this if every connection should instantiate its own model. Only relevant for custom model, passed using -trt or -fw.') - args = parser.parse_args() - - if args.backend == "tensorrt": - if args.trt_model_path is None: - raise ValueError("Please Provide a valid tensorrt model path") - - if "OMP_NUM_THREADS" not in os.environ: - os.environ["OMP_NUM_THREADS"] = str(args.omp_num_threads) + from whisper_live.server import main - from whisper_live.server import TranscriptionServer - server = TranscriptionServer() - server.run( - "0.0.0.0", - port=args.port, - backend=args.backend, - faster_whisper_custom_model_path=args.faster_whisper_custom_model_path, - whisper_tensorrt_path=args.trt_model_path, - trt_multilingual=args.trt_multilingual, - single_model=not args.no_single_model, - ) + main() diff --git a/whisper_live/server.py b/whisper_live/server.py index b6efc77..ae41d17 100644 --- a/whisper_live/server.py +++ b/whisper_live/server.py @@ -1080,3 +1080,67 @@ def update_segments(self, segments, duration): self.timestamp_offset += offset return last_segment + + +def _arg_parser(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--bind', + type=str, + default="0.0.0.0", + help="Host address to bind the server.") + parser.add_argument('--port', '-p', + type=int, + default=9090, + help="Websocket port to run the server on.") + parser.add_argument('--backend', '-b', + type=str, + default='faster_whisper', + help='Backends from ["tensorrt", "faster_whisper"]') + parser.add_argument('--faster_whisper_custom_model_path', '-fw', + type=str, default=None, + help="Custom Faster Whisper Model") + parser.add_argument('--trt_model_path', '-trt', + type=str, + default=None, + help='Whisper TensorRT model path') + parser.add_argument('--trt_multilingual', '-m', + action="store_true", + help='Boolean only for TensorRT model. True if multilingual.') + parser.add_argument('--omp_num_threads', '-omp', + type=int, + default=1, + help="Number of threads to use for OpenMP") + parser.add_argument('--no_single_model', '-nsm', + action='store_true', + help='Set this if every connection should instantiate its own model. Only relevant for custom model, passed using -trt or -fw.') + return parser + + +def main(): + parser = _arg_parser() + + args = parser.parse_args() + + if args.backend == "tensorrt": + if args.trt_model_path is None: + raise ValueError("Please Provide a valid tensorrt model path") + + if "OMP_NUM_THREADS" not in os.environ: + os.environ["OMP_NUM_THREADS"] = str(args.omp_num_threads) + + from whisper_live.server import TranscriptionServer + server = TranscriptionServer() + server.run( + args.bind, + port=args.port, + backend=args.backend, + faster_whisper_custom_model_path=args.faster_whisper_custom_model_path, + whisper_tensorrt_path=args.trt_model_path, + trt_multilingual=args.trt_multilingual, + single_model=not args.no_single_model, + ) + +if __name__ == "__main__": + main()