Skip to content

Commit

Permalink
support for round robin allocation of cuda cards to workers
Browse files Browse the repository at this point in the history
A gunicorn post_fork hook has been added to set CUDA_VISIBLE_DEVICES, which
sets the device torch will use.

A app level config variable "APP_CUDA_DEVICE_COUNT" is required to
indicate how many devices are to be used.

The devices are allocated to the docker in the docker compose configuration.
  • Loading branch information
Richard Beare authored and richardbeare committed Aug 3, 2023
1 parent fca0a2f commit effa069
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
18 changes: 18 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
def post_fork(server, worker):
server.log.info("Worker spawned (pid: %s)", worker.pid)
cuda_device_count = int(os.getenv("APP_CUDA_DEVICE_COUNT", -1))

if cuda_device_count > 0:
# set variables for cuda resource allocation
# Needs to be done before loading models
# The number of devices to use should be set via
# APP_CUDA_DEVICE_COUNT in env_app and the docker compose
# file should allocate cards to the container
cudaid = worker.age % cuda_device_count
worker.log.info("Setting cuda device " + str(cudaid))
os.environ["CUDA_VISIBLE_DEVICES"] = str(cudaid)
else:
worker.log.info("APP_CUDA_DEVICE_COUNT device variables not set")


3 changes: 3 additions & 0 deletions medcat_service/nlp_processor/medcat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(self):
self.app_model = os.getenv("APP_MODEL_NAME", "unknown")
self.entity_output_mode = os.getenv("ANNOTATIONS_ENTITY_OUTPUT_MODE", "dict").lower()


self.cat = self._create_cat()
self.cat.train = os.getenv("APP_TRAINING_MODE", False)

Expand All @@ -70,11 +71,13 @@ def __init__(self):
# this is available to constrain torch threads when there
# isn't a GPU
# You probably want to set to 1
# Not sure what happens if torch is using a cuda device
if self.torch_threads > 0:
import torch
torch.set_num_threads(self.torch_threads)
self.log.info("Torch threads set to " + str(self.torch_threads))


self.log.info("MedCAT processor is ready")

def get_app_info(self):
Expand Down
3 changes: 2 additions & 1 deletion start-service-prod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESSS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)
#
echo "Starting up Flask app using gunicorn server ..."
gunicorn --bind $SERVER_HOST:$SERVER_PORT --workers=$SERVER_WORKERS --threads=$SERVER_THREADS --timeout=$SERVER_WORKER_TIMEOUT \
--access-logformat="$SERVER_ACCESS_LOG_FORMAT" --access-logfile=- --log-file=- --log-level info \
--access-logformat="$SERVER_ACCESS_LOG_FORMAT" --access-logfile=- --log-file=- --log-level info \
--config /cat/config.py \
wsgi

0 comments on commit effa069

Please sign in to comment.