Skip to content

Commit

Permalink
Memory management improvements (#28)
Browse files Browse the repository at this point in the history
* call gc.collect after  every cleaning

* only save name and status from k8s list_pod_for_all_namespaces response

* remove unused failure_stale_seconds

* upgrade redis to 3.4.1

* reset janitor state at then end of clean()

* upgrade to kubernetes==10.0.0 for 1.14.0 compatibility

* test_repair_redis_key to keep coverage up
  • Loading branch information
willgraf authored Mar 25, 2020
1 parent 895a0cf commit f1d81bb
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 23 deletions.
2 changes: 2 additions & 0 deletions clean-redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from __future__ import division
from __future__ import print_function

import gc
import logging
import logging.handlers
import sys
Expand Down Expand Up @@ -94,6 +95,7 @@ def initialize_logger(debug_mode=True):
while True:
try:
janitor.clean()
gc.collect()
time.sleep(INTERVAL)
except Exception as err: # pylint: disable=broad-except
_logger.critical('Fatal Error: %s: %s', type(err).__name__, err)
Expand Down
24 changes: 12 additions & 12 deletions redis_janitor/janitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,13 @@ def __init__(self,
namespace='default',
backoff=3,
stale_time=600, # 10 minutes
failure_stale_seconds=60,
pod_refresh_interval=5,):
self.redis_client = redis_client
self.logger = logging.getLogger(str(self.__class__.__name__))
self.backoff = backoff
self.queues = str(queue).lower().split(queue_delimiter)
self.namespace = namespace
self.stale_time = int(stale_time)
self.failure_stale_seconds = failure_stale_seconds
self.pod_refresh_interval = int(pod_refresh_interval)

# empty initializers, update them with _update_pods
Expand Down Expand Up @@ -169,7 +167,8 @@ def repair_redis_key(self, redis_key):
def _update_pods(self):
"""Refresh pod data and update timestamp"""
namespaced_pods = self.list_pod_for_all_namespaces()
self.pods = {pod.metadata.name: pod for pod in namespaced_pods}
self.pods = {pod.metadata.name: pod.status.phase
for pod in namespaced_pods}
self.pods_updated_at = datetime.datetime.now(pytz.UTC)

def update_pods(self):
Expand All @@ -187,11 +186,7 @@ def update_pods(self):

def is_valid_pod(self, pod_name):
self.update_pods() # only updates if stale
is_valid = False
if pod_name in self.pods:
pod_phase = self.pods[pod_name].status.phase
if pod_phase in self.valid_pod_phases:
is_valid = True
is_valid = self.pods.get(pod_name) in self.valid_pod_phases
return is_valid

def _timestamp_to_age(self, ts):
Expand All @@ -210,7 +205,7 @@ def is_stale_update_time(self, updated_time, stale_time=None):
stale_time = stale_time if stale_time else self.stale_time
if not updated_time:
return False
if not stale_time > 0:
if stale_time <= 0:
return False
last_updated = self._timestamp_to_age(updated_time)
return last_updated >= stale_time
Expand All @@ -234,7 +229,7 @@ def should_clean_key(self, key, updated_ts):
# 'alive with status %s but is_stale turned off.',
# key, self.cleaning_queue, updated_ts,
# updated_seconds, pod_name,
# self.pods[pod_name].status.phase)
# self.pods[pod_name])
# # self.kill_pod(pod_name, self.namespace)
return False

Expand All @@ -248,7 +243,7 @@ def should_clean_key(self, key, updated_ts):
self.logger.info('Key `%s` in queue `%s` was last updated by '
'pod `%s` %s seconds ago, but that pod has status'
' %s.', key, self.cleaning_queue, pod_name,
updated_seconds, self.pods[pod_name].status.phase)
updated_seconds, self.pods[pod_name])
return True

def clean_key(self, key):
Expand All @@ -258,7 +253,7 @@ def clean_key(self, key):
'updated_by',
]
res = self.redis_client.hmget(key, *required_keys)
hvals = {k: v for k, v in zip(required_keys, res)}
hvals = dict(zip(required_keys, res))

should_clean = self.should_clean_key(key, hvals.get('updated_at'))

Expand Down Expand Up @@ -292,3 +287,8 @@ def clean(self):
self.total_repairs += cleaned
self.logger.info('Repaired %s key%s (%s total).', cleaned,
's' if cleaned else '', self.total_repairs)

# reset state to like new
self.cleaning_queue = ''
self.pods = {}
self.pods_updated_at = None
31 changes: 22 additions & 9 deletions redis_janitor/janitors_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,24 @@ def dummy_lrem(key, count, value):
assert int(janitor.remove_key_from_queue(valid_key)) == 1
assert int(janitor.remove_key_from_queue(invalid_key)) == 0

def test__udpate_pods(self):
def test_repair_redis_key(self):
janitor = self.get_client()

def remove_key(_):
return True

# Remove key and put it back in the work queue
janitor.remove_key_from_queue = remove_key
janitor.repair_redis_key('testkey')

def fail_to_remove(_):
return False

# Could not remove key, should log it.
janitor.remove_key_from_queue = fail_to_remove
janitor.repair_redis_key('testkey')

def test__update_pods(self):
janitor = self.get_client()
janitor._update_pods()
# pylint: disable=E1101
Expand All @@ -230,10 +247,8 @@ def test__udpate_pods(self):
assert isinstance(janitor.pods_updated_at, datetime.datetime)
assert len(janitor.pods) == len(expected)
for e in expected:
name = e.metadata.name
assert name in janitor.pods
assert janitor.pods[name].metadata.name == name
assert janitor.pods[name].status.phase == e.status.phase
assert e.metadata.name in janitor.pods
assert janitor.pods[e.metadata.name] == e.status.phase

def test_udpate_pods(self):
janitor = self.get_client(pod_refresh_interval=10000)
Expand All @@ -244,10 +259,8 @@ def test_udpate_pods(self):
assert isinstance(janitor.pods_updated_at, datetime.datetime)
assert len(janitor.pods) == len(expected)
for e in expected:
name = e.metadata.name
assert name in janitor.pods
assert janitor.pods[name].metadata.name == name
assert janitor.pods[name].status.phase == e.status.phase
assert e.metadata.name in janitor.pods
assert janitor.pods[e.metadata.name] == e.status.phase

# now that we've called it once, lets make sure it doesnt happen again
janitor.pods = {} # resetting this for test
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
redis==3.2.1
kubernetes==9.0.0
redis==3.4.1
kubernetes==10.0.0
pytz==2019.1
python-dateutil==2.8.0
python-decouple==3.1

0 comments on commit f1d81bb

Please sign in to comment.