From 4c39fe827f92ab09444b458e3ba49a3978b121e9 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 17 Jul 2024 15:36:26 -0400 Subject: [PATCH 1/3] fix: Partially revert "Egg storage doesn't need to return open files" 721caaf and "Avoid creating a temporary file when activating egg" 5da703c, since different implementations of IEggStorage might return file(-like) objects --- docs/news.rst | 2 +- scrapyd/interfaces.py | 4 ++-- scrapyd/runner.py | 34 ++++++++++++++++++++++++++++------ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/docs/news.rst b/docs/news.rst index c32c0b9c..f53c5845 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -25,7 +25,7 @@ Documentation Changed ~~~~~~~ -- **BACKWARDS-INCOMPATIBLE CHANGE:** The ``IEggStorage.get()`` interface returns a ``(version, filename)`` tuple, instead of a ``(version, file)`` tuple. +- The ``IEggStorage.get()`` interface can return a ``(version, filename)`` tuple in addition to a ``(version, file)`` tuple. - Drop support for end-of-life Python version 3.7. Web UI diff --git a/scrapyd/interfaces.py b/scrapyd/interfaces.py index 06a70d98..2207285a 100644 --- a/scrapyd/interfaces.py +++ b/scrapyd/interfaces.py @@ -9,8 +9,8 @@ def put(eggfile, project, version): version""" def get(project, version=None): - """Return a tuple (version, filename) for the egg matching the specified - project and version. If version is None, the latest version is + """Return a tuple (version, file or filename) for the egg matching the + specified project and version. If version is None, the latest version is returned. If no egg is found for the given project/version (None, None) should be returned.""" diff --git a/scrapyd/runner.py b/scrapyd/runner.py index 3a08976a..e6008ad7 100644 --- a/scrapyd/runner.py +++ b/scrapyd/runner.py @@ -1,5 +1,7 @@ import os +import shutil import sys +import tempfile from contextlib import contextmanager from scrapy.utils.misc import load_object @@ -18,12 +20,32 @@ def project_environment(project): eggstorage = eggstorage_cls(config) eggversion = os.environ.get('SCRAPYD_EGG_VERSION', None) - version, eggpath = eggstorage.get(project, eggversion) - if eggpath: - activate_egg(eggpath) - - assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded" - yield + version, egg = eggstorage.get(project, eggversion) + + tmp = None + if egg: + if hasattr(egg, 'name'): # for example, FileIO + try: + activate_egg(egg.name) + finally: + egg.close() + if hasattr(egg, 'read'): # for example, BytesIO + try: + tmp = tempfile.NamedTemporaryFile(suffix='.egg', prefix=f'{project}-{version}-', delete=False) + shutil.copyfileobj(egg, tmp) + tmp.close() + activate_egg(tmp.name) + finally: + egg.close() + else: # a file path + activate_egg(egg) + + try: + assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded" + yield + finally: + if tmp: + os.remove(tmp.name) def main(): From 931ec6aef7137dd4c883bd58691091e558c12c2d Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 17 Jul 2024 15:44:46 -0400 Subject: [PATCH 2/3] fix: Partially revert "Egg storage doesn't need to return open files" 721caaf to restore IEggStorage.get() interface --- docs/news.rst | 1 - scrapyd/eggstorage.py | 2 +- scrapyd/interfaces.py | 4 ++-- scrapyd/runner.py | 15 +++++---------- tests/test_eggstorage.py | 24 +++++++++++++++--------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/news.rst b/docs/news.rst index f53c5845..ed4978da 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -25,7 +25,6 @@ Documentation Changed ~~~~~~~ -- The ``IEggStorage.get()`` interface can return a ``(version, filename)`` tuple in addition to a ``(version, file)`` tuple. - Drop support for end-of-life Python version 3.7. Web UI diff --git a/scrapyd/eggstorage.py b/scrapyd/eggstorage.py index d020909a..575d73e4 100644 --- a/scrapyd/eggstorage.py +++ b/scrapyd/eggstorage.py @@ -29,7 +29,7 @@ def get(self, project, version=None): version = self.list(project)[-1] except IndexError: return None, None - return version, self._eggpath(project, version) + return version, open(self._eggpath(project, version), 'rb') def list(self, project): versions = [ diff --git a/scrapyd/interfaces.py b/scrapyd/interfaces.py index 2207285a..1495f5c8 100644 --- a/scrapyd/interfaces.py +++ b/scrapyd/interfaces.py @@ -9,8 +9,8 @@ def put(eggfile, project, version): version""" def get(project, version=None): - """Return a tuple (version, file or filename) for the egg matching the - specified project and version. If version is None, the latest version is + """Return a tuple (version, file) for the egg matching the specified + project and version. If version is None, the latest version is returned. If no egg is found for the given project/version (None, None) should be returned.""" diff --git a/scrapyd/runner.py b/scrapyd/runner.py index e6008ad7..0850f092 100644 --- a/scrapyd/runner.py +++ b/scrapyd/runner.py @@ -24,21 +24,16 @@ def project_environment(project): tmp = None if egg: - if hasattr(egg, 'name'): # for example, FileIO - try: + try: + if hasattr(egg, 'name'): # for example, FileIO activate_egg(egg.name) - finally: - egg.close() - if hasattr(egg, 'read'): # for example, BytesIO - try: + else: # for example, BytesIO tmp = tempfile.NamedTemporaryFile(suffix='.egg', prefix=f'{project}-{version}-', delete=False) shutil.copyfileobj(egg, tmp) tmp.close() activate_egg(tmp.name) - finally: - egg.close() - else: # a file path - activate_egg(egg) + finally: + egg.close() try: assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded" diff --git a/tests/test_eggstorage.py b/tests/test_eggstorage.py index 8d3e81d6..1299eec3 100644 --- a/tests/test_eggstorage.py +++ b/tests/test_eggstorage.py @@ -78,20 +78,26 @@ def test_put_get_list_delete(self): ]) self.assertEqual(self.eggst.list('mybot2'), []) - v, name = self.eggst.get('mybot') - self.assertEqual(v, "03_ver") - with open(name, 'rb') as f: + v, f = self.eggst.get('mybot') + try: + self.assertEqual(v, "03_ver") self.assertEqual(f.read(), b"egg03") + finally: + f.close() - v, name = self.eggst.get('mybot', '02_my branch') - self.assertEqual(v, "02_my branch") - with open(name, 'rb') as f: + v, f = self.eggst.get('mybot', '02_my branch') + try: + self.assertEqual(v, "02_my branch") self.assertEqual(f.read(), b"egg02") + finally: + f.close() - v, name = self.eggst.get('mybot', '02_my_branch') - self.assertEqual(v, "02_my_branch") - with open(name, 'rb') as f: + v, f = self.eggst.get('mybot', '02_my_branch') + try: + self.assertEqual(v, "02_my_branch") self.assertEqual(f.read(), b"egg02") + finally: + f.close() self.eggst.delete('mybot', '02_my branch') self.assertEqual(self.eggst.list('mybot'), ['01', '03_ver']) From 5112752406cba56b34251c255457bfd81712860f Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 17 Jul 2024 16:03:40 -0400 Subject: [PATCH 3/3] test: Close files for Windows --- tests/test_webservice.py | 42 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/test_webservice.py b/tests/test_webservice.py index ab4b4e86..d2dfb066 100644 --- a/tests/test_webservice.py +++ b/tests/test_webservice.py @@ -79,15 +79,19 @@ def test_delete_version(self, txrequest, site_with_egg): } storage = site_with_egg.app.getComponent(IEggStorage) - egg = storage.get('quotesbot') + version, egg = storage.get('quotesbot') + if egg: + egg.close() + content = site_with_egg.children[endpoint].render_POST(txrequest) - no_egg = storage.get('quotesbot') + no_version, no_egg = storage.get('quotesbot') + if no_egg: + no_egg.close() - assert egg[0] is not None + assert version is not None assert content['status'] == 'ok' assert 'node_name' in content - assert storage.get('quotesbot') - assert no_egg[0] is None + assert no_version is None def test_delete_project(self, txrequest, site_with_egg): endpoint = b'delproject.json' @@ -96,15 +100,19 @@ def test_delete_project(self, txrequest, site_with_egg): } storage = site_with_egg.app.getComponent(IEggStorage) - egg = storage.get('quotesbot') + version, egg = storage.get('quotesbot') + if egg: + egg.close() + content = site_with_egg.children[endpoint].render_POST(txrequest) - no_egg = storage.get('quotesbot') + no_version, no_egg = storage.get('quotesbot') + if no_egg: + no_egg.close() - assert egg[0] is not None + assert version is not None assert content['status'] == 'ok' assert 'node_name' in content - assert storage.get('quotesbot') - assert no_egg[0] is None + assert no_version is None @mock.patch('scrapyd.webservice.get_spider_list', new=fake_list_spiders) def test_addversion(self, txrequest, site_no_egg): @@ -118,15 +126,19 @@ def test_addversion(self, txrequest, site_no_egg): txrequest.args[b'egg'] = [f.read()] storage = site_no_egg.app.getComponent(IEggStorage) - egg = storage.get('quotesbot') + version, egg = storage.get('quotesbot') + if egg: + egg.close() + content = site_no_egg.children[endpoint].render_POST(txrequest) - no_egg = storage.get('quotesbot') + no_version, no_egg = storage.get('quotesbot') + if no_egg: + no_egg.close() - assert egg[0] is None + assert version is None assert content['status'] == 'ok' assert 'node_name' in content - assert storage.get('quotesbot') - assert no_egg[0] == '0_1' + assert no_version == '0_1' @mock.patch('scrapyd.webservice.get_spider_list', new=fake_list_spiders_other)