Skip to content

Commit

Permalink
Cache configurability - expiry time, read/write controls (#13)
Browse files Browse the repository at this point in the history
* stop printing url_map

* add a custom argparser action for bool options

* add cache control options

* use context manager on StringIO

* make HashedTempFile a native context manager with optional closing

* cache rules everything around me

* update README
  • Loading branch information
briantist authored Sep 4, 2022
1 parent a9b82b3 commit f3594cf
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 49 deletions.
41 changes: 26 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ usage: python -m galactory [-h] [-c CONFIG] [--listen-addr LISTEN_ADDR]
[--prefer-configured-key] [--log-file LOG_FILE]
[--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [--log-headers]
[--log-body] [--proxy-upstream PROXY_UPSTREAM]
[-npns NO_PROXY_NAMESPACE]
[-npns NO_PROXY_NAMESPACE] [--cache-minutes CACHE_MINUTES]
[--cache-read CACHE_READ] [--cache-write CACHE_WRITE]
galactory is a partial Ansible Galaxy proxy that uploads and downloads collections, using an
Artifactory generic repository as its backend.
Expand All @@ -48,30 +49,40 @@ optional arguments:
The host name and port of the server, as seen from clients. Used for
generating links. [env var: GALACTORY_SERVER_NAME]
--artifactory-path ARTIFACTORY_PATH
The URL of the path in Artifactory where collections are stored. [env var:
GALACTORY_ARTIFACTORY_PATH]
The URL of the path in Artifactory where collections are stored.
[env var: GALACTORY_ARTIFACTORY_PATH]
--artifactory-api-key ARTIFACTORY_API_KEY
If set, is the API key used to access Artifactory. [env var:
GALACTORY_ARTIFACTORY_API_KEY]
--use-galaxy-key If set, uses the Galaxy token as the Artifactory API key. [env var:
GALACTORY_USE_GALAXY_KEY]
If set, is the API key used to access Artifactory.
[env var: GALACTORY_ARTIFACTORY_API_KEY]
--use-galaxy-key If set, uses the Galaxy token as the Artifactory API key.
[env var: GALACTORY_USE_GALAXY_KEY]
--prefer-configured-key
If set, prefer the confgured Artifactory key over the Galaxy token. [env
var: GALACTORY_PREFER_CONFIGURED_KEY]
--log-file LOG_FILE If set, logging will go to this file instead of the console. [env var:
GALACTORY_LOG_FILE]
If set, prefer the confgured Artifactory key over the Galaxy token.
[env var: GALACTORY_PREFER_CONFIGURED_KEY]
--log-file LOG_FILE If set, logging will go to this file instead of the console.
[env var: GALACTORY_LOG_FILE]
--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
The desired logging level. [env var: GALACTORY_LOG_LEVEL]
--log-headers Log the headers of every request (DEBUG level only). [env var:
GALACTORY_LOG_HEADERS]
--log-body Log the body of every request (DEBUG level only). [env var:
GALACTORY_LOG_BODY]
--log-headers Log the headers of every request (DEBUG level only).
[env var: GALACTORY_LOG_HEADERS]
--log-body Log the body of every request (DEBUG level only).
[env var: GALACTORY_LOG_BODY]
--proxy-upstream PROXY_UPSTREAM
If set, then find, pull and cache results from the specified galaxy server
in addition to local. [env var: GALACTORY_PROXY_UPSTREAM]
-npns NO_PROXY_NAMESPACE, --no-proxy-namespace NO_PROXY_NAMESPACE
Requests for this namespace should never be proxied. Can be specified
multiple times. [env var: GALACTORY_NO_PROXY_NAMESPACE]
--cache-minutes CACHE_MINUTES
The time period that a cache entry should be considered valid.
[env var: GALACTORY_CACHE_MINUTES]
--cache-read CACHE_READ
Look for upsteam caches and use their values.
[env var: GALACTORY_CACHE_READ]
--cache-write CACHE_WRITE
Populate the upstream cache in Artifactory. Should be false when no API key is
provided or the key has no permission to write.
[env var: GALACTORY_CACHE_WRITE]
Args that start with '--' (eg. --listen-addr) can also be set in a config file
(/etc/galactory.d/*.conf or ~/.galactory/*.conf or specified via -c). Config file syntax allows:
Expand Down
29 changes: 27 additions & 2 deletions galactory/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,32 @@
# (c) 2022 Brian Scholer (@briantist)

import logging
from configargparse import ArgParser
from configargparse import ArgParser, ArgumentError, Action
from artifactory import ArtifactoryPath

from . import create_app


# TODO: when py3.8 support is dropped, switch to using argparse.BooleanOptionalAction
class _StrBool(Action):
FALSES = {'false', '0', 'no'}
TRUES = {'true', '1', 'yes'}

def _booler(self, value):
if isinstance(value, bool):
return value

if value.lower() in self.FALSES:
return False
if value.lower() in self.TRUES:
return True

raise ArgumentError(self, f"Expecting 'true', 'false', 'yes', 'no', '1' or '0', but got '{value}'")

def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, self._booler(values))


if __name__ == '__main__':
parser = ArgParser(
prog='python -m galactory',
Expand Down Expand Up @@ -37,6 +57,9 @@
parser.add_argument('--log-body', action='store_true', env_var='GALACTORY_LOG_BODY', help='Log the body of every request (DEBUG level only).')
parser.add_argument('--proxy-upstream', type=lambda x: str(x).rstrip('/') + '/', env_var='GALACTORY_PROXY_UPSTREAM', help='If set, then find, pull and cache results from the specified galaxy server in addition to local.')
parser.add_argument('-npns', '--no-proxy-namespace', action='append', default=[], env_var='GALACTORY_NO_PROXY_NAMESPACE', help='Requests for this namespace should never be proxied. Can be specified multiple times.')
parser.add_argument('--cache-minutes', default=60, type=int, env_var='GALACTORY_CACHE_MINUTES', help='The time period that a cache entry should be considered valid.')
parser.add_argument('--cache-read', action=_StrBool, default=True, env_var='GALACTORY_CACHE_READ', help='Look for upsteam caches and use their values.')
parser.add_argument('--cache-write', action=_StrBool, default=True, env_var='GALACTORY_CACHE_WRITE', help='Populate the upstream cache in Artifactory. Should be false when no API key is provided or the key has no permission to write.')
args = parser.parse_args()

logging.basicConfig(filename=args.log_file, level=args.log_level)
Expand All @@ -51,7 +74,9 @@
USE_GALAXY_KEY=args.use_galaxy_key,
PREFER_CONFIGURED_KEY=args.prefer_configured_key,
SERVER_NAME=args.server_name,
CACHE_MINUTES=args.cache_minutes,
CACHE_READ=args.cache_read,
CACHE_WRITE=args.cache_write,
)

print(app.url_map)
app.run(args.listen_addr, args.listen_port, threaded=True)
15 changes: 12 additions & 3 deletions galactory/api/v2/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ def collection(namespace, collection):
repository = authorize(request, current_app.config['ARTIFACTORY_PATH'])
upstream = current_app.config['PROXY_UPSTREAM']
no_proxy = current_app.config['NO_PROXY_NAMESPACES']
cache_minutes = current_app.config['CACHE_MINUTES']
cache_read = current_app.config['CACHE_READ']
cache_write = current_app.config['CACHE_WRITE']

upstream_result = None
if upstream and (not no_proxy or namespace not in no_proxy):
proxy = ProxyUpstream(repository, upstream)
proxy = ProxyUpstream(repository, upstream, cache_read, cache_write, cache_minutes)
upstream_result = proxy.proxy(request)

results = _collection_listing(repository, namespace, collection)
Expand Down Expand Up @@ -66,10 +69,13 @@ def versions(namespace, collection):
repository = authorize(request, current_app.config['ARTIFACTORY_PATH'])
upstream = current_app.config['PROXY_UPSTREAM']
no_proxy = current_app.config['NO_PROXY_NAMESPACES']
cache_minutes = current_app.config['CACHE_MINUTES']
cache_read = current_app.config['CACHE_READ']
cache_write = current_app.config['CACHE_WRITE']

upstream_result = None
if upstream and (not no_proxy or namespace not in no_proxy):
proxy = ProxyUpstream(repository, upstream)
proxy = ProxyUpstream(repository, upstream, cache_read, cache_write, cache_minutes)
upstream_result = proxy.proxy(request)

collections = collected_collections(repository, namespace=namespace, name=collection)
Expand Down Expand Up @@ -117,12 +123,15 @@ def version(namespace, collection, version):
repository = authorize(request, current_app.config['ARTIFACTORY_PATH'])
upstream = current_app.config['PROXY_UPSTREAM']
no_proxy = current_app.config['NO_PROXY_NAMESPACES']
cache_minutes = current_app.config['CACHE_MINUTES']
cache_read = current_app.config['CACHE_READ']
cache_write = current_app.config['CACHE_WRITE']

try:
info = next(discover_collections(repository, namespace=namespace, name=collection, version=version))
except StopIteration:
if upstream and (not no_proxy or namespace not in no_proxy):
proxy = ProxyUpstream(repository, upstream)
proxy = ProxyUpstream(repository, upstream, cache_read, cache_write, cache_minutes)
upstream_result = proxy.proxy(request)
return jsonify(upstream_result)
else:
Expand Down
10 changes: 8 additions & 2 deletions galactory/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def download(filename):
artifact = authorize(request, current_app.config['ARTIFACTORY_PATH'] / filename)
upstream = current_app.config['PROXY_UPSTREAM']
# no_proxy = current_app.config['NO_PROXY_NAMESPACES']
cache_minutes = current_app.config['CACHE_MINUTES']
cache_read = current_app.config['CACHE_READ']
cache_write = current_app.config['CACHE_WRITE']

try:
stat = artifact.stat()
Expand All @@ -28,9 +31,12 @@ def download(filename):
if not upstream: # or not (not no_proxy or namespace not in no_proxy):
abort(C.HTTP_NOT_FOUND)

proxy = ProxyUpstream(artifact, upstream)
proxy = ProxyUpstream(artifact, upstream, cache_read, cache_write, cache_minutes)

with proxy.proxy_download(request) as resp, _chunk_to_temp(None, iterator=resp.iter_content, close=cache_write) as tmp:
if not cache_write:
return send_file(tmp.handle, as_attachment=True, download_name=filename, etag=False)

with proxy.proxy_download(request) as resp, _chunk_to_temp(None, iterator=resp.iter_content) as tmp:
try:
artifact.deploy(tmp.handle, md5=tmp.md5, sha1=tmp.sha1, sha256=tmp.sha256)
except ArtifactoryException as exc:
Expand Down
42 changes: 29 additions & 13 deletions galactory/upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _time_decoder(pairs):
loaded = json.load(f, object_pairs_hook=_time_decoder)
return cls(data=loaded['data'], metadata=loaded['metadata'], **kwargs)

def __init__(self, data=None, metadata=None, expiry_delta=timedelta(hours=1), calculate_expiry_on_read=False) -> None:
def __init__(self, expiry_delta, data=None, metadata=None, calculate_expiry_on_read=True) -> None:
raw = {'metadata': {}, 'data': {}}
self._expiry_delta = expiry_delta
self._calc_on_read = calculate_expiry_on_read
Expand Down Expand Up @@ -113,29 +113,41 @@ def _to_serializable_dict(self):
class ProxyUpstream:
_cache_path = '_cache'

def __init__(self, repository, upstream_url) -> None:
def __init__(self, repository, upstream_url, read_cache, write_cache, cache_expiry_minutes) -> None:
self._repository = repository
self._upstream = upstream_url
self._read_cache = read_cache
self._write_cache = write_cache
self._cache_expiry_delta = timedelta(minutes=cache_expiry_minutes)

def _get_cache(self, request, **kwargs) -> _CacheEntry:

def _get_cache(self, request, expiry_delta=None, **kwargs) -> _CacheEntry:
path = self._repository / self._cache_path / request.path / 'data.json'

if expiry_delta is None:
expiry_delta = self._cache_expiry_delta

if not self._read_cache:
return _CacheEntry(expiry_delta=expiry_delta, **kwargs)

try:
with path.open() as f:
return _CacheEntry.from_file(f, **kwargs)
return _CacheEntry.from_file(f, expiry_delta=expiry_delta, **kwargs)
except ArtifactoryException:
return _CacheEntry(**kwargs)
return _CacheEntry(expiry_delta=expiry_delta, **kwargs)

def _set_cache(self, request, cache) -> None:
if not self._write_cache:
return

from . import DateTimeIsoFormatJSONEncoder

path = self._repository / self._cache_path / request.path / 'data.json'
buffer = StringIO()
cache.update()
json.dump(cache._to_serializable_dict(), buffer, cls=DateTimeIsoFormatJSONEncoder)
buffer.seek(0)
path.deploy(buffer)
buffer.close()
with StringIO() as buffer:
cache.update()
json.dump(cache._to_serializable_dict(), buffer, cls=DateTimeIsoFormatJSONEncoder)
buffer.seek(0)
path.deploy(buffer)

@contextmanager
def proxy_download(self, request):
Expand Down Expand Up @@ -171,10 +183,14 @@ def proxy(self, request):
# abort(Response(resp.text, resp.status_code))

else:
current_app.logger.info(f"Cache miss: {request.url}")
if self._read_cache:
current_app.logger.info(f"Cache miss: {request.url}")

data = resp.json()
cache.data = data
self._set_cache(request, cache)

if self._write_cache:
self._set_cache(request, cache)
else:
current_app.logger.info(f"Cache hit: {request.url}")

Expand Down
39 changes: 25 additions & 14 deletions galactory/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import math
import hashlib

from collections import namedtuple
from tempfile import SpooledTemporaryFile
from contextlib import contextmanager
from urllib.request import urlopen
from urllib3 import Retry
from requests.adapters import HTTPAdapter
Expand Down Expand Up @@ -188,11 +186,23 @@ def lcm(a, b, *more):
return abs(a * z) // math.gcd(a, z)


HashedTempFile = namedtuple('HashedTempFile', ('handle', 'md5', 'sha1', 'sha256'))
class HashedTempFile():
def __init__(self, handle, md5, sha1, sha256, close=True) -> None:
self.handle = handle
self.md5 = md5
self.sha1 = sha1
self.sha256 = sha256
self._close = close

def __enter__(self):
return self

@contextmanager
def _chunk_to_temp(fsrc, iterator=None, spool_size=5*1024*1024, seek_to_zero=True, chunk_multiplier=64) -> HashedTempFile:
def __exit__(self, exc_type, exc_value, exc_tb):
if self._close:
self.handle.close()


def _chunk_to_temp(fsrc, iterator=None, spool_size=5*1024*1024, seek_to_zero=True, chunk_multiplier=64, close=True) -> HashedTempFile:
md5sum = hashlib.md5()
sha1sum = hashlib.sha1()
sha256sum = hashlib.sha256()
Expand All @@ -201,14 +211,15 @@ def _chunk_to_temp(fsrc, iterator=None, spool_size=5*1024*1024, seek_to_zero=Tru

it = iter(lambda: fsrc.read(chunk_size), b'') if iterator is None else iterator(chunk_size)

with SpooledTemporaryFile(max_size=spool_size) as tmp:
for chunk in it:
md5sum.update(chunk)
sha1sum.update(chunk)
sha256sum.update(chunk)
tmp.write(chunk)
tmp = SpooledTemporaryFile(max_size=spool_size)

for chunk in it:
md5sum.update(chunk)
sha1sum.update(chunk)
sha256sum.update(chunk)
tmp.write(chunk)

if seek_to_zero:
tmp.seek(0)
if seek_to_zero:
tmp.seek(0)

yield HashedTempFile(tmp, md5sum.hexdigest(), sha1sum.hexdigest(), sha256sum.hexdigest())
return HashedTempFile(tmp, md5sum.hexdigest(), sha1sum.hexdigest(), sha256sum.hexdigest(), close=close)

0 comments on commit f3594cf

Please sign in to comment.