From aaf11cadfb74c5455a0df8ac19eee4141fff5e87 Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Wed, 20 Oct 2021 17:36:16 -0300 Subject: [PATCH 1/7] Implementing backlog interpreters cache cleaning --- bothub/nlu_worker/interpreter_manager.py | 75 +++++++++++++++++++++--- bothub/settings.py | 10 ++++ 2 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 bothub/settings.py diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py index 9b953df..7a3dbca 100644 --- a/bothub/nlu_worker/interpreter_manager.py +++ b/bothub/nlu_worker/interpreter_manager.py @@ -1,18 +1,58 @@ +import logging +import threading +import time +import gc + +from typing import Callable, Union from rasa.nlu import components from tempfile import mkdtemp +from datetime import datetime +from bothub import settings from bothub.shared.utils.persistor import BothubPersistor from bothub.shared.utils.backend import backend from bothub.shared.utils.rasa_components.bothub_interpreter import BothubInterpreter +logger = logging.getLogger(__name__) + + +class SetInterval: + """ + Creates a thread that execute a function every x seconds + """ + def __init__(self, interval: Union[int, float], action: Callable): + """ + :param interval: Period in seconds + :param action: Callable function + """ + self.interval = interval + self.action = action + self.stopEvent = threading.Event() + thread = threading.Thread(target=self._set_interval, daemon=True) + thread.start() + + def _set_interval(self): + next_time = time.time() + self.interval + while not self.stopEvent.wait(next_time - time.time()): + next_time += self.interval + self.action() + + def cancel(self): + self.stopEvent.set() + class InterpreterManager: def __init__(self): self.cached_interpreters = {} + SetInterval(settings.WORKER_CACHE_CLEANING_PERIOD, self._clean_cache) def get_interpreter( - self, repository_version, repository_authorization, rasa_version, use_cache=True - ): + self, + repository_version, + repository_authorization, + rasa_version, + use_cache=True + ) -> BothubInterpreter: update_request = backend().request_backend_parse_nlu_persistor( repository_version, repository_authorization, rasa_version, no_bot_data=True @@ -24,11 +64,12 @@ def get_interpreter( last_training = f"{update_request.get('total_training_end')}" # tries to fetch cache - cached_retrieved = self.cached_interpreters.get(repository_name) - if cached_retrieved and use_cache: - # returns cache only if it's the same training - if cached_retrieved["last_training"] == last_training: - return cached_retrieved["interpreter_data"] + retrieved_cache = self.cached_interpreters.get(repository_name) + if retrieved_cache and use_cache: + # retrieve cache only if it's the same training + if retrieved_cache["last_training"] == last_training: + retrieved_cache["last_request"] = datetime.now() + return retrieved_cache["interpreter_data"] persistor = BothubPersistor( repository_version, repository_authorization, rasa_version @@ -43,11 +84,27 @@ def get_interpreter( model_directory, components.ComponentBuilder(use_cache=False) ) - # update/creates cache - if use_cache: + if use_cache: # update/creates cache self.cached_interpreters[repository_name] = { "last_training": last_training, "interpreter_data": interpreter, + "last_request": datetime.now() } return interpreter + + def _clean_cache(self) -> None: + logger.info("Cleaning repositories cache") + cur_time = datetime.now() + + to_remove = [] + for interpreter in self.cached_interpreters: + idle_time = (cur_time - self.cached_interpreters[interpreter]['last_request']).total_seconds() + if idle_time > settings.INTERPRETER_CACHE_IDLE_LIMIT: + to_remove.append(interpreter) + + for interpreter in to_remove: + del self.cached_interpreters[interpreter] + + objects_collected = gc.collect() + logger.info(f"{objects_collected} objects collected") diff --git a/bothub/settings.py b/bothub/settings.py new file mode 100644 index 0000000..0df83fb --- /dev/null +++ b/bothub/settings.py @@ -0,0 +1,10 @@ +from decouple import config + +# Period of time (seconds) the worker will look for idle interpreters to free space +WORKER_CACHE_CLEANING_PERIOD = config( + "WORKER_CACHE_CLEANING_PERIOD", cast=int, default=6*3600 +) +# Idle limit of time (seconds) the interpreter will be cached +INTERPRETER_CACHE_IDLE_LIMIT = config( + "WORKER_CACHE_CLEANING_PERIOD", cast=int, default=24*3600 +) From e1ffc82ac8b3b07de068d8a60afe9441b5893021 Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Wed, 20 Oct 2021 17:44:18 -0300 Subject: [PATCH 2/7] Fix cast env. variables --- bothub/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bothub/settings.py b/bothub/settings.py index 0df83fb..0406630 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -2,9 +2,9 @@ # Period of time (seconds) the worker will look for idle interpreters to free space WORKER_CACHE_CLEANING_PERIOD = config( - "WORKER_CACHE_CLEANING_PERIOD", cast=int, default=6*3600 + "WORKER_CACHE_CLEANING_PERIOD", cast=float, default=6*3600 ) # Idle limit of time (seconds) the interpreter will be cached INTERPRETER_CACHE_IDLE_LIMIT = config( - "WORKER_CACHE_CLEANING_PERIOD", cast=int, default=24*3600 + "WORKER_CACHE_CLEANING_PERIOD", cast=float, default=24*3600 ) From c41b3c83797314730ed2d9f6c5e91e60ca0e3666 Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Wed, 20 Oct 2021 18:02:08 -0300 Subject: [PATCH 3/7] Small changes --- bothub/nlu_worker/interpreter_manager.py | 5 +++-- bothub/settings.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py index 7a3dbca..8eff161 100644 --- a/bothub/nlu_worker/interpreter_manager.py +++ b/bothub/nlu_worker/interpreter_manager.py @@ -59,7 +59,7 @@ def get_interpreter( ) repository_name = ( - f"{update_request.get('version_id')}_" f"{update_request.get('language')}" + f"{update_request.get('version_id')}_{update_request.get('language')}" ) last_training = f"{update_request.get('total_training_end')}" @@ -99,7 +99,8 @@ def _clean_cache(self) -> None: to_remove = [] for interpreter in self.cached_interpreters: - idle_time = (cur_time - self.cached_interpreters[interpreter]['last_request']).total_seconds() + last_request = self.cached_interpreters[interpreter]['last_request'] + idle_time = (cur_time - last_request).total_seconds() if idle_time > settings.INTERPRETER_CACHE_IDLE_LIMIT: to_remove.append(interpreter) diff --git a/bothub/settings.py b/bothub/settings.py index 0406630..6621557 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -2,7 +2,7 @@ # Period of time (seconds) the worker will look for idle interpreters to free space WORKER_CACHE_CLEANING_PERIOD = config( - "WORKER_CACHE_CLEANING_PERIOD", cast=float, default=6*3600 + "WORKER_CACHE_CLEANING_PERIOD", cast=float, default=3*3600 ) # Idle limit of time (seconds) the interpreter will be cached INTERPRETER_CACHE_IDLE_LIMIT = config( From 5384e3011eb78e24006e2f937b8f1e2a051f271c Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Wed, 20 Oct 2021 18:05:07 -0300 Subject: [PATCH 4/7] Adding info log --- bothub/nlu_worker/interpreter_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py index 8eff161..00d66f7 100644 --- a/bothub/nlu_worker/interpreter_manager.py +++ b/bothub/nlu_worker/interpreter_manager.py @@ -107,5 +107,6 @@ def _clean_cache(self) -> None: for interpreter in to_remove: del self.cached_interpreters[interpreter] + logger.info(f"{len(to_remove)} interpreters cleaned") objects_collected = gc.collect() logger.info(f"{objects_collected} objects collected") From 2cd34e6ffd0b07c9334486a3057d0ac108d4f275 Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Thu, 21 Oct 2021 15:49:10 -0300 Subject: [PATCH 5/7] Change settings location --- bothub/nlu_worker/interpreter_manager.py | 2 +- bothub/{ => nlu_worker}/settings.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename bothub/{ => nlu_worker}/settings.py (100%) diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py index 00d66f7..71e0348 100644 --- a/bothub/nlu_worker/interpreter_manager.py +++ b/bothub/nlu_worker/interpreter_manager.py @@ -8,7 +8,7 @@ from tempfile import mkdtemp from datetime import datetime -from bothub import settings +from bothub.nlu_worker import settings from bothub.shared.utils.persistor import BothubPersistor from bothub.shared.utils.backend import backend from bothub.shared.utils.rasa_components.bothub_interpreter import BothubInterpreter diff --git a/bothub/settings.py b/bothub/nlu_worker/settings.py similarity index 100% rename from bothub/settings.py rename to bothub/nlu_worker/settings.py From 2a054a20bb9e74254709f1cc3d9c4cf5676ed269 Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Thu, 21 Oct 2021 16:19:40 -0300 Subject: [PATCH 6/7] Improving dockerfile --- bothub/nlu_worker/interpreter_manager.py | 2 +- bothub/{nlu_worker => }/settings.py | 0 nlp.Dockerfile | 5 ++--- 3 files changed, 3 insertions(+), 4 deletions(-) rename bothub/{nlu_worker => }/settings.py (100%) diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py index 71e0348..00d66f7 100644 --- a/bothub/nlu_worker/interpreter_manager.py +++ b/bothub/nlu_worker/interpreter_manager.py @@ -8,7 +8,7 @@ from tempfile import mkdtemp from datetime import datetime -from bothub.nlu_worker import settings +from bothub import settings from bothub.shared.utils.persistor import BothubPersistor from bothub.shared.utils.backend import backend from bothub.shared.utils.rasa_components.bothub_interpreter import BothubInterpreter diff --git a/bothub/nlu_worker/settings.py b/bothub/settings.py similarity index 100% rename from bothub/nlu_worker/settings.py rename to bothub/settings.py diff --git a/nlp.Dockerfile b/nlp.Dockerfile index 5363659..2fd7933 100644 --- a/nlp.Dockerfile +++ b/nlp.Dockerfile @@ -46,9 +46,8 @@ RUN pip install -U pip setuptools RUN pip install --find-links=${PYTHON_WHEELS_PATH} ${PIP_REQUIREMENTS} -COPY bothub/nlu_worker ${WORKDIR}/bothub/nlu_worker -COPY bothub/shared ${WORKDIR}/bothub/shared -COPY bothub/__init__.py ${WORKDIR}/bothub +COPY bothub ${WORKDIR}/bothub + COPY start_celery.py . COPY celery_app.py . From 4bd89a9820dbe9eb4009bfdbf5c91633ef9e95df Mon Sep 17 00:00:00 2001 From: Lucas Agra Date: Thu, 21 Oct 2021 17:29:30 -0300 Subject: [PATCH 7/7] Fixing settings.py --- bothub/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bothub/settings.py b/bothub/settings.py index 6621557..9d2ccca 100644 --- a/bothub/settings.py +++ b/bothub/settings.py @@ -6,5 +6,5 @@ ) # Idle limit of time (seconds) the interpreter will be cached INTERPRETER_CACHE_IDLE_LIMIT = config( - "WORKER_CACHE_CLEANING_PERIOD", cast=float, default=24*3600 + "INTERPRETER_CACHE_IDLE_LIMIT", cast=float, default=24*3600 )