Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--cleanuprss option to clear caches when memory reaches a certain limit #257

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions splash/monitors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
""" Splash periodic monitoring tasks """
from __future__ import absolute_import, division
import gc
import time

from splash.utils import memory_to_absolute, get_ru_maxrss, get_mem_usage, MB
from splash.qtutils import clear_caches


def monitor_maxrss(maxrss, check_intreval=60):
from twisted.internet import reactor, task
from twisted.python import log

maxrss = memory_to_absolute(maxrss)

def check_maxrss():
if get_ru_maxrss() > maxrss * MB:
log.msg("maxrss exceeded %d MB, shutting down..." % maxrss)
reactor.stop()

if maxrss:
log.msg("maxrss limit: %d MB" % maxrss)
t = task.LoopingCall(check_maxrss)
t.start(check_intreval, now=False)


def monitor_currss(threshold, verbosity, min_interval=30, check_interval=10):
"""
Monitor current memory usage and try to free memory
if it exceeds a `threshold` (in MB) and at least `min_interval`
seconds passed since last cleanup.

Memory is measured on event loop ticks. Temporary memory usage
spikes may not be taken in account.
"""
from twisted.internet import task
from twisted.python import log

objgraph = None
if verbosity >= 3:
try:
import objgraph
objgraph.show_growth()
except ImportError:
pass

threshold = memory_to_absolute(threshold)
last_cleanup = [-1.0]

def check_memusage():
rss = get_mem_usage()
peak = get_ru_maxrss()

if verbosity >= 2:
log.msg("Memory usage: %0.1fMB (%0.1fMB peak)" % (rss / MB,
peak / MB))

if rss > threshold * MB:
now = time.time()
interval = now - last_cleanup[0]
if interval > min_interval:
if verbosity >= 1:
log.msg(
"Splash uses too much memory: %0.1f > %0.1f. "
"Cleaning up WebKit caches.." % (rss / MB, threshold)
)

clear_caches()
gc.collect()

rss_new = get_mem_usage()
if verbosity >= 1:
log.msg("Memory freed: %0.1f MB" % ((rss - rss_new) / MB))
last_cleanup[0] = time.time()

if verbosity >= 3 and objgraph:
objgraph.show_growth(limit=100)
else:
if verbosity >= 2:
log.msg(
"Splash uses too much memory (%0.1f > %0.1f.), but "
"the cache was cleared recently (%0.1f seconds ago)" %
(rss / MB, threshold, interval)
)

if threshold:
log.msg("cleanup threshold: %d MB" % threshold)
t = task.LoopingCall(check_memusage)
t.start(check_interval, now=False)
29 changes: 8 additions & 21 deletions splash/server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import
from __future__ import absolute_import, division
import os
import sys
import optparse
Expand All @@ -10,6 +10,8 @@
from splash import defaults, __version__
from splash import xvfb
from splash.qtutils import init_qt_app
from splash.monitors import monitor_currss, monitor_maxrss


def install_qtreactor(verbose):
init_qt_app(verbose)
Expand All @@ -24,6 +26,8 @@ def parse_opts():
op.add_option("-f", "--logfile", help="log file")
op.add_option("-m", "--maxrss", type=float, default=0,
help="exit if max RSS reaches this value (in MB or ratio of physical mem) (default: %default)")
op.add_option("--cleanuprss", type=float, default=0,
help="clean WebKit caches if current RSS reaches this value (in MB or ratio of physical mem) (default: %default)")
op.add_option("-p", "--port", type="int", default=defaults.SPLASH_PORT,
help="port to listen to (default: %default)")
op.add_option("-s", "--slots", type="int", default=defaults.SLOTS,
Expand Down Expand Up @@ -100,6 +104,8 @@ def start_logging(opts):
def splash_started(opts, stderr):
if opts.logfile:
stderr.write("Splash started - logging to: %s\n" % opts.logfile)
else:
stderr.write("Splash started")


def bump_nofile_limit():
Expand Down Expand Up @@ -219,26 +225,6 @@ def splash_server(portnum, slots, network_manager, max_timeout,
reactor.listenTCP(proxy_portnum, proxy_server_factory)


def monitor_maxrss(maxrss):
from twisted.internet import reactor, task
from twisted.python import log
from splash.utils import get_ru_maxrss, get_total_phymem

# Support maxrss as a ratio of total physical memory
if 0.0 < maxrss < 1.0:
maxrss = get_total_phymem() * maxrss / (1024 ** 2)

def check_maxrss():
if get_ru_maxrss() > maxrss * (1024 ** 2):
log.msg("maxrss exceeded %d MB, shutting down..." % maxrss)
reactor.stop()

if maxrss:
log.msg("maxrss limit: %d MB" % maxrss)
t = task.LoopingCall(check_maxrss)
t.start(60, now=False)


def default_splash_server(portnum, max_timeout, slots=None,
cache_enabled=None, cache_path=None, cache_size=None,
proxy_profiles_path=None, js_profiles_path=None,
Expand Down Expand Up @@ -347,6 +333,7 @@ def main():
install_qtreactor(opts.verbosity >= 5)

monitor_maxrss(opts.maxrss)
monitor_currss(opts.cleanuprss, opts.verbosity)
if opts.manhole:
manhole_server()

Expand Down
30 changes: 29 additions & 1 deletion splash/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import psutil


MB = 1024*1024
_REQUIRED = object()


Expand Down Expand Up @@ -80,11 +81,38 @@ def get_ru_maxrss():
return size


def get_mem_usage():
"""
Return RSS usage of the current process (in bytes).
>>> MB = 1024*1024
>>> 5*MB < get_mem_usage() < 2048*MB
True
"""
proc = psutil.Process(os.getpid())
try:
return proc.memory_info().rss
except AttributeError:
# psutil < 2.x
return proc.get_memory_info()[0]


def memory_to_absolute(ratio):
"""
Calculate absolute RSS value given a ratio of total physical memory.
If 0 < ratio < 1.0 then ration is considered already absolute and returned
as-is.
"""
from splash.utils import get_total_phymem
if 0.0 < ratio < 1.0:
return get_total_phymem() * ratio / MB
return ratio


def get_total_phymem():
""" Return the total amount of physical memory available. """
try:
return psutil.virtual_memory().total
except AttributeError: # psutil < 2.0
except AttributeError: # psutil < 2.0
return psutil.phymem_usage().total


Expand Down