From 8948b6f6516c0bf93bd28371cf8953dc546a4c1f Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Mon, 24 Jul 2023 16:53:05 -0400 Subject: [PATCH 1/9] Add mimetypes --- iiify/resolver.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index 67e13ed..8b6cf87 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -32,12 +32,25 @@ def getids(q, limit=1000, cursor=''): def to_mimetype(format): formats = { "VBR MP3": "audio/mp3", + '32Kbps MP3': "audio/mp3", + "56Kbps MP3": "audio/mp3", + "64Kbps MP3": "audio/mp3", + "96Kbps MP3": "audio/mp3", + "128Kbps MP3": "audio/mp3", "Flac": "audio/flac", "Ogg Vorbis": "audio/ogg", + "Ogg Video": "video/ogg", "WAVE": "audio/wav", "MPEG4": "video/mp4", "24bit Flac": "audio/flac", - 'Shorten': "audio/shn" + 'Shorten': "audio/shn", + "MPEG2": "video/mpeg", + "512Kb MPEG4": "video/mpeg", + "HiRes MPEG4": "video/mpeg", + "h.264 MPEG4": "video/mpeg", + "h.264": "video/mpeg", + "Matroska": "video/x-matroska", + "Cinepack": "video/x-msvideo" } return formats.get(format, "application/octet-stream") From f2e51b93a13025adf40aa1cc26e9f6182a67c537 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Mon, 24 Jul 2023 16:53:59 -0400 Subject: [PATCH 2/9] Add mediatypes --- iiify/resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index 8b6cf87..bdf631f 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -402,7 +402,7 @@ def create_manifest3(identifier, domain=None, page=None): if file['name'] in derivatives: body = Choice(items=[]) # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 - for format in ['VBR MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']: + for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']: if format in derivatives[file['name']]: r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", type='Audio', From 0742d7f1c5e869ed12e07941fe2979ce3b32a0ed Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Mon, 24 Jul 2023 16:54:14 -0400 Subject: [PATCH 3/9] Add mediatypes and add logic to handle lists in derivs --- iiify/resolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index bdf631f..db1fb3e 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -379,7 +379,7 @@ def create_manifest3(identifier, domain=None, page=None): originals = [] derivatives = {} for f in metadata['files']: - if f['source'] == 'derivative': + if f['source'] == 'derivative' and not isinstance(f['original'], list): if f['original'] in derivatives: derivatives[f['original']][f['format']] = f else: @@ -388,7 +388,7 @@ def create_manifest3(identifier, domain=None, page=None): originals.append(f) # create the canvases for each original - for file in [f for f in originals if f['format'] in ['VBR MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]: + for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"https://iiif.archivelab.org/iiif/{identifier}/{slugged_id}/canvas" From 09782eff85584054f5f899b86366c0cf678456b4 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Mon, 24 Jul 2023 16:55:07 -0400 Subject: [PATCH 4/9] Add 'choice' to moving image logic, and add mediatypes --- iiify/resolver.py | 59 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index db1fb3e..df37422 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -428,24 +428,59 @@ def create_manifest3(identifier, domain=None, page=None): manifest.add_item(c) elif mediatype == "movies": - canvas_files = [f for f in metadata['files'] if f['source'].lower() == 'original' and f['format'] == "MPEG4"] - for file in canvas_files: + # sort the files into originals and derivatives, splitting the derivatives into buckets based on the original + originals = [] + derivatives = {} + for f in metadata['files']: + if f['source'] == 'derivative': + if f['original'] in derivatives: + derivatives[f['original']][f['format']] = f + else: + derivatives[f['original']] = {f['format']: f} + elif f['source'] == 'original': + originals.append(f) + + # create the canvases for each original + for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"https://iiif.archivelab.org/iiif/{identifier}/{slugged_id}/canvas" c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) + + # create intermediary objects ap = AnnotationPage(id=f"https://iiif.archivelab.org/iiif/{identifier}/{slugged_id}/page") anno = Annotation(id=f"https://iiif.archivelab.org/iiif/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) - r = ResourceItem( - id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}", - type='Video', - format=to_mimetype(file['format']), - label={"none": [file['format']]}, - duration=float(file['length']), - height=int(file['height']), - width=int(file['width']) - ) - anno.body = r + + # create body based on whether there are derivatives or not: + if file['name'] in derivatives: + body = Choice(items=[]) + # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 + for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']: + if format in derivatives[file['name']]: + r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", + type='Video', + format=to_mimetype(format), + label={"none": [format]}, + duration=float(file['length']), + height=int(file['height']), + width=int(file['width']), + ) + body.items.append(r) + elif file['format'] == format: + r = ResourceItem( + id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}", + type='Video', + format=to_mimetype(format), + label={"none": [format]}, + duration=float(file['length']), + height=int(file['height']), + width=int(file['width'])) + body.items.append(r) + else: + # todo: deal with instances where there are no derivatives for whatever reason + pass + + anno.body = body ap.add_item(anno) c.add_item(ap) manifest.add_item(c) From 2122e04b5c60f96cdd5d1c0d1d5e8d253b2f4e4e Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Mon, 24 Jul 2023 16:55:20 -0400 Subject: [PATCH 5/9] Add some tests --- tests/test_manifests.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_manifests.py b/tests/test_manifests.py index 46192cb..b4d44da 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -39,5 +39,29 @@ def test_v3_vermont_Life_Magazine(self): self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}") + def test_v3_single_video_manifest(self): + resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}") + + #logic to cover etree mediatype github issue #123 + def test_v3_etree_mediatype(self): + resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(len(manifest['items']),36,f"Expected 1 canvas but got: {len(manifest['items'])}") + +''' to test: +TvQuran.com__Alafasi (64Kbps MP3) +alice_in_wonderland_librivox (128kbps mp3) +kaled_jalil (no derivatives) +taboca_201002_03 (h.264 MPEG4, OGG Theora) +Dokku_obrash (geo-restricted?) + +''' + if __name__ == '__main__': unittest.main() \ No newline at end of file From 7e7854b27f25839570a0bb088088e09e76c38068 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Tue, 25 Jul 2023 10:53:15 -0400 Subject: [PATCH 6/9] Fix quotes --- iiify/resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index df37422..54e233b 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -32,7 +32,7 @@ def getids(q, limit=1000, cursor=''): def to_mimetype(format): formats = { "VBR MP3": "audio/mp3", - '32Kbps MP3': "audio/mp3", + "32Kbps MP3": "audio/mp3", "56Kbps MP3": "audio/mp3", "64Kbps MP3": "audio/mp3", "96Kbps MP3": "audio/mp3", From 39dff593b68c26186b9584892540d71f589f67b2 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Tue, 25 Jul 2023 11:54:54 -0400 Subject: [PATCH 7/9] Add aif, m4a, mp4 audio --- iiify/resolver.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/iiify/resolver.py b/iiify/resolver.py index 54e233b..5335a6d 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -50,7 +50,10 @@ def to_mimetype(format): "h.264 MPEG4": "video/mpeg", "h.264": "video/mpeg", "Matroska": "video/x-matroska", - "Cinepack": "video/x-msvideo" + "Cinepack": "video/x-msvideo", + "AIFF": "audio/aiff", + "Apple Lossless Audio": "audio/x-m4a", + "MPEG-4 Audio": "audio/mp4" } return formats.get(format, "application/octet-stream") @@ -388,7 +391,7 @@ def create_manifest3(identifier, domain=None, page=None): originals.append(f) # create the canvases for each original - for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]: + for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]: normalised_id = file['name'].rsplit(".", 1)[0] slugged_id = normalised_id.replace(" ", "-") c_id = f"https://iiif.archivelab.org/iiif/{identifier}/{slugged_id}/canvas" @@ -402,7 +405,7 @@ def create_manifest3(identifier, domain=None, page=None): if file['name'] in derivatives: body = Choice(items=[]) # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 - for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'Flac', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']: + for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']: if format in derivatives[file['name']]: r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", type='Audio', From 543418c2a3637b2d8f0fc354ed20b4af9117cff1 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Tue, 25 Jul 2023 13:33:52 -0400 Subject: [PATCH 8/9] Add tests --- tests/test_manifests.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/test_manifests.py b/tests/test_manifests.py index b4d44da..f3a282c 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -54,13 +54,41 @@ def test_v3_etree_mediatype(self): self.assertEqual(len(manifest['items']),36,f"Expected 1 canvas but got: {len(manifest['items'])}") + + def test_v3_64Kbps_MP3(self): + resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),114,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'") + + + def test_v3_128Kbps_MP3(self): + resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),12,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'") + + def test_v3_h264_MPEG4_OGG_Theora(self): + resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),251,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") + self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") + + def test_v3_aiff(self): + resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + self.assertEqual(len(manifest['items']),38,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual("AIFF".lower() in resp.text.lower(), True, f"Expected the string 'AIFF'") + ''' to test: -TvQuran.com__Alafasi (64Kbps MP3) -alice_in_wonderland_librivox (128kbps mp3) kaled_jalil (no derivatives) -taboca_201002_03 (h.264 MPEG4, OGG Theora) Dokku_obrash (geo-restricted?) - +m4a filetypes (No length to files?) ''' if __name__ == '__main__': From 657ecf73ccfd0d85f4b9cecf4e89f90c692adec0 Mon Sep 17 00:00:00 2001 From: Josh Hadro Date: Tue, 25 Jul 2023 14:07:12 -0400 Subject: [PATCH 9/9] Fix canvas numbers in the tests --- tests/test_manifests.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_manifests.py b/tests/test_manifests.py index f3a282c..f6e2fe3 100644 --- a/tests/test_manifests.py +++ b/tests/test_manifests.py @@ -52,14 +52,14 @@ def test_v3_etree_mediatype(self): self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(len(manifest['items']),36,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}") def test_v3_64Kbps_MP3(self): resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(len(manifest['items']),114,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}") self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'") @@ -67,14 +67,14 @@ def test_v3_128Kbps_MP3(self): resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(len(manifest['items']),12,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}") self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'") def test_v3_h264_MPEG4_OGG_Theora(self): resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(len(manifest['items']),251,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}") self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'") self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'") @@ -82,7 +82,7 @@ def test_v3_aiff(self): resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json") self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(len(manifest['items']),38,f"Expected 1 canvas but got: {len(manifest['items'])}") + self.assertEqual(len(manifest['items']),38,f"Expected 38 canvases but got: {len(manifest['items'])}") self.assertEqual("AIFF".lower() in resp.text.lower(), True, f"Expected the string 'AIFF'") ''' to test: