From f1d81bb7ea6ebe208d243bf7f82b7deb6a2a8291 Mon Sep 17 00:00:00 2001 From: willgraf <7930703+willgraf@users.noreply.github.com> Date: Tue, 24 Mar 2020 17:09:35 -0700 Subject: [PATCH] Memory management improvements (#28) * call gc.collect after every cleaning * only save name and status from k8s list_pod_for_all_namespaces response * remove unused failure_stale_seconds * upgrade redis to 3.4.1 * reset janitor state at then end of clean() * upgrade to kubernetes==10.0.0 for 1.14.0 compatibility * test_repair_redis_key to keep coverage up --- clean-redis.py | 2 ++ redis_janitor/janitors.py | 24 ++++++++++++------------ redis_janitor/janitors_test.py | 31 ++++++++++++++++++++++--------- requirements.txt | 4 ++-- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/clean-redis.py b/clean-redis.py index c1b5b35..6228072 100644 --- a/clean-redis.py +++ b/clean-redis.py @@ -27,6 +27,7 @@ from __future__ import division from __future__ import print_function +import gc import logging import logging.handlers import sys @@ -94,6 +95,7 @@ def initialize_logger(debug_mode=True): while True: try: janitor.clean() + gc.collect() time.sleep(INTERVAL) except Exception as err: # pylint: disable=broad-except _logger.critical('Fatal Error: %s: %s', type(err).__name__, err) diff --git a/redis_janitor/janitors.py b/redis_janitor/janitors.py index d6cd28c..8602f95 100644 --- a/redis_janitor/janitors.py +++ b/redis_janitor/janitors.py @@ -47,7 +47,6 @@ def __init__(self, namespace='default', backoff=3, stale_time=600, # 10 minutes - failure_stale_seconds=60, pod_refresh_interval=5,): self.redis_client = redis_client self.logger = logging.getLogger(str(self.__class__.__name__)) @@ -55,7 +54,6 @@ def __init__(self, self.queues = str(queue).lower().split(queue_delimiter) self.namespace = namespace self.stale_time = int(stale_time) - self.failure_stale_seconds = failure_stale_seconds self.pod_refresh_interval = int(pod_refresh_interval) # empty initializers, update them with _update_pods @@ -169,7 +167,8 @@ def repair_redis_key(self, redis_key): def _update_pods(self): """Refresh pod data and update timestamp""" namespaced_pods = self.list_pod_for_all_namespaces() - self.pods = {pod.metadata.name: pod for pod in namespaced_pods} + self.pods = {pod.metadata.name: pod.status.phase + for pod in namespaced_pods} self.pods_updated_at = datetime.datetime.now(pytz.UTC) def update_pods(self): @@ -187,11 +186,7 @@ def update_pods(self): def is_valid_pod(self, pod_name): self.update_pods() # only updates if stale - is_valid = False - if pod_name in self.pods: - pod_phase = self.pods[pod_name].status.phase - if pod_phase in self.valid_pod_phases: - is_valid = True + is_valid = self.pods.get(pod_name) in self.valid_pod_phases return is_valid def _timestamp_to_age(self, ts): @@ -210,7 +205,7 @@ def is_stale_update_time(self, updated_time, stale_time=None): stale_time = stale_time if stale_time else self.stale_time if not updated_time: return False - if not stale_time > 0: + if stale_time <= 0: return False last_updated = self._timestamp_to_age(updated_time) return last_updated >= stale_time @@ -234,7 +229,7 @@ def should_clean_key(self, key, updated_ts): # 'alive with status %s but is_stale turned off.', # key, self.cleaning_queue, updated_ts, # updated_seconds, pod_name, - # self.pods[pod_name].status.phase) + # self.pods[pod_name]) # # self.kill_pod(pod_name, self.namespace) return False @@ -248,7 +243,7 @@ def should_clean_key(self, key, updated_ts): self.logger.info('Key `%s` in queue `%s` was last updated by ' 'pod `%s` %s seconds ago, but that pod has status' ' %s.', key, self.cleaning_queue, pod_name, - updated_seconds, self.pods[pod_name].status.phase) + updated_seconds, self.pods[pod_name]) return True def clean_key(self, key): @@ -258,7 +253,7 @@ def clean_key(self, key): 'updated_by', ] res = self.redis_client.hmget(key, *required_keys) - hvals = {k: v for k, v in zip(required_keys, res)} + hvals = dict(zip(required_keys, res)) should_clean = self.should_clean_key(key, hvals.get('updated_at')) @@ -292,3 +287,8 @@ def clean(self): self.total_repairs += cleaned self.logger.info('Repaired %s key%s (%s total).', cleaned, 's' if cleaned else '', self.total_repairs) + + # reset state to like new + self.cleaning_queue = '' + self.pods = {} + self.pods_updated_at = None diff --git a/redis_janitor/janitors_test.py b/redis_janitor/janitors_test.py index 36a1525..361746c 100644 --- a/redis_janitor/janitors_test.py +++ b/redis_janitor/janitors_test.py @@ -221,7 +221,24 @@ def dummy_lrem(key, count, value): assert int(janitor.remove_key_from_queue(valid_key)) == 1 assert int(janitor.remove_key_from_queue(invalid_key)) == 0 - def test__udpate_pods(self): + def test_repair_redis_key(self): + janitor = self.get_client() + + def remove_key(_): + return True + + # Remove key and put it back in the work queue + janitor.remove_key_from_queue = remove_key + janitor.repair_redis_key('testkey') + + def fail_to_remove(_): + return False + + # Could not remove key, should log it. + janitor.remove_key_from_queue = fail_to_remove + janitor.repair_redis_key('testkey') + + def test__update_pods(self): janitor = self.get_client() janitor._update_pods() # pylint: disable=E1101 @@ -230,10 +247,8 @@ def test__udpate_pods(self): assert isinstance(janitor.pods_updated_at, datetime.datetime) assert len(janitor.pods) == len(expected) for e in expected: - name = e.metadata.name - assert name in janitor.pods - assert janitor.pods[name].metadata.name == name - assert janitor.pods[name].status.phase == e.status.phase + assert e.metadata.name in janitor.pods + assert janitor.pods[e.metadata.name] == e.status.phase def test_udpate_pods(self): janitor = self.get_client(pod_refresh_interval=10000) @@ -244,10 +259,8 @@ def test_udpate_pods(self): assert isinstance(janitor.pods_updated_at, datetime.datetime) assert len(janitor.pods) == len(expected) for e in expected: - name = e.metadata.name - assert name in janitor.pods - assert janitor.pods[name].metadata.name == name - assert janitor.pods[name].status.phase == e.status.phase + assert e.metadata.name in janitor.pods + assert janitor.pods[e.metadata.name] == e.status.phase # now that we've called it once, lets make sure it doesnt happen again janitor.pods = {} # resetting this for test diff --git a/requirements.txt b/requirements.txt index 02e80f3..0ad67e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -redis==3.2.1 -kubernetes==9.0.0 +redis==3.4.1 +kubernetes==10.0.0 pytz==2019.1 python-dateutil==2.8.0 python-decouple==3.1