diff --git a/iiify/app.py b/iiify/app.py index c617e4b..9f6be85 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -8,7 +8,6 @@ from iiif2 import iiif, web from .resolver import ia_resolver, create_manifest, create_manifest3, getids, collection, \ purify_domain, cantaloupe_resolver, create_collection3 -from .url2iiif import url2ia from .configs import options, cors, approot, cache_root, media_root, \ cache_expr, version, image_server, cache_timeouts @@ -54,21 +53,6 @@ def index(): return jsonify(getids(q, cursor=cursor)) -@app.route('/iiif/url2iiif') -def url2iiif(): - url = request.args.get('url', '') - if not url: - abort(400) - try: - domain = purify_domain(request.args.get('domain', request.url_root)) - filehash = url2ia(url) - time.sleep(20) - return redirect('%surl2iiif$%s' % (domain, filehash)) - except Exception as e: - print(e) - abort(400) - - @app.route('/iiif/collection.json') def catalog(): cursor = request.args.get('cursor', '') diff --git a/iiify/resolver.py b/iiify/resolver.py index 73dfb71..18bbc16 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -11,11 +11,11 @@ IMG_CTX = 'http://iiif.io/api/image/2/context.json' PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json' -ARCHIVE = 'http://archive.org' METADATA_FIELDS = ("title", "volume", "publisher", "subject", "date", "contributor", "creator") bookdata = 'http://%s/BookReader/BookReaderJSON.php' bookreader = "http://%s/BookReader/BookReaderImages.php" -URI_PRIFIX = "https://iiif.archive.org/iiif" +ARCHIVE = 'http://archive.org' # move to config +URI_PRIFIX = "https://iiif.archive.org/iiif" # move to config valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff'] diff --git a/iiify/url2iiif.py b/iiify/url2iiif.py deleted file mode 100644 index f645e09..0000000 --- a/iiify/url2iiif.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import hashlib -import requests -import tempfile -import mimetypes -import internetarchive as ia -from .configs import s3key, s3secret, iiif_domain - -BUF_SIZE = 65536 # 64kb -SIZE_LIMIT_MB = 200 -PATH = '/2/url2iiif' -URL2IIIF_ITEMNAME = 'url2iiif' - -def ia_item_exists(itemname): - """Use IA tool to check whether itemname exists""" - try: - r = requests.get('https://archive.org/metadata/%s' % itemname) - if 'metadata' in r.json(): - return True - return False - except Exception as e: - print(e) - return False - -def download_file(url, path=PATH): - filepath = '%s/%s' % (path, url.split('/')[-1]) - r = requests.get(url, stream=True) - with open(filepath, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - return filepath - - -def get_filehash(filepath): - hasher = hashlib.sha256() - with open(filepath, 'rb') as f: - while True: - data = f.read(BUF_SIZE) - if not data: - break - hasher.update(data) - return hasher.hexdigest() - -def url2ia(url): - """Creates an archive.org item for """ - hr = requests.head(url) - if not 'image/' in hr.headers['Content-Type']: - raise ValueError( - 'Service only works with urls with clearly ' - 'identifiable images (e.g. ending in .png, .jpg, .gif, etc.') - - print(hr.headers['Content-Length']) - if (int(hr.headers['Content-Length']) / 1000000.) > SIZE_LIMIT_MB: - raise IOError('File size exceeds %smb' % SIZE_LIMIT_MB) - - filepath = download_file(url, path=PATH) - filehash = get_filehash(filepath) - filepath2 = os.path.join(PATH, filehash) - os.rename(filepath, filepath2) - - ia.upload(URL2IIIF_ITEMNAME, filepath2, access_key=s3key, secret_key=s3secret) - return filehash - -if __name__ == "__main__": - pass