From a62293c7667a27aa96a6872b627f7b2fe767c7e7 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 23 Jul 2024 00:22:36 -0400 Subject: [PATCH] feat: Add ScrapyProcessProtocol.__repr__. Remove redundant tests in test_endpoints.py (renamed test_server.py). --- scrapyd/launcher.py | 6 ++ tests/__init__.py | 2 +- tests/conftest.py | 7 +- tests/test_launcher.py | 41 ++++++++++- tests/{test_endpoints.py => test_server.py} | 77 +++------------------ tests/test_webservice.py | 12 ++-- tests/test_website.py | 6 +- 7 files changed, 71 insertions(+), 80 deletions(-) rename tests/{test_endpoints.py => test_server.py} (54%) diff --git a/scrapyd/launcher.py b/scrapyd/launcher.py index 9c2c0411..3797e6de 100644 --- a/scrapyd/launcher.py +++ b/scrapyd/launcher.py @@ -103,6 +103,12 @@ def __init__(self, project, spider, job, env, args): self.args = args self.deferred = defer.Deferred() + def __repr__(self): + return ( + f"ScrapyProcessProtocol(pid={self.pid} project={self.project} spider={self.spider} job={self.job} " + f"start_time={self.start_time} end_time={self.end_time} env={self.env} args={self.args})" + ) + def outReceived(self, data): log.info(data.rstrip(), log_system=f"Launcher,{self.pid}/stdout") diff --git a/tests/__init__.py b/tests/__init__.py index 5c889de6..c57e3836 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,7 +7,7 @@ def get_egg_data(basename): return pkgutil.get_data("tests", f"fixtures/{basename}.egg") -def has_settings(root): +def has_settings(): return os.path.exists("scrapy.cfg") diff --git a/tests/conftest.py b/tests/conftest.py index 60b417d7..efe4a0ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,9 +38,14 @@ def chdir(monkeypatch, tmp_path): params=[ None, (Config.SECTION, "items_dir", "items"), - ] + "scrapy.cfg", + ], + ids=["default", "items_dir", "settings"], ) def app(request, chdir): + if request.param == "scrapy.cfg": + shutil.copytree(os.path.join(BASEDIR, "fixtures", "filesystem"), chdir, dirs_exist_ok=True) + config = Config() if isinstance(request.param, tuple): config.cp.set(*request.param) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index d4f37bd1..21758b8f 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -1,7 +1,8 @@ +import datetime import re import pytest -from twisted.internet import error +from twisted.internet import defer, error from twisted.logger import LogLevel, capturedLogs, eventAsText from twisted.python import failure @@ -9,6 +10,7 @@ from scrapyd.config import Config from scrapyd.interfaces import IEnvironment from scrapyd.launcher import Launcher, get_crawl_args +from tests import has_settings def message(captured): @@ -73,6 +75,39 @@ def test_start_service_max_proc(app): ) +@pytest.mark.parametrize( + ("message", "expected"), + [ + ({}, {}), + ({"_version": "v1"}, {"SCRAPYD_EGG_VERSION": "v1"}), + ], +) +def test_spawn_process(launcher, message, expected): + launcher._spawn_process({"_project": "localproject", "_spider": "s1", "_job": "j1", **message}, 1) # noqa: SLF001 + + process = launcher.processes[1] + + assert isinstance(process.pid, int) + assert process.project == "localproject" + assert process.spider == "s1" + assert process.job == "j1" + assert isinstance(process.start_time, datetime.datetime) + assert process.end_time is None + assert isinstance(process.args, list) # see tests below + assert isinstance(process.deferred, defer.Deferred) + + # scrapyd.environ.Environ.get_environment + assert process.env["SCRAPY_PROJECT"] == "localproject" + for key, value in expected.items(): + assert process.env[key] == value + if "SCRAPYD_EGG_VERSION" not in expected: + assert "SCRAPYD_EGG_VERSION" not in process.env + if has_settings(): + assert process.env["SCRAPY_SETTINGS_MODULE"] == "localproject.settings" + else: + assert "SCRAPY_SETTINGS_MODULE" not in process.env + + def test_out_received(process): with capturedLogs() as captured: process.outReceived(b"out\n") @@ -155,3 +190,7 @@ def test_process_ended_terminated(environ, process): "args=\\['\\S+', '-m', 'scrapyd\\.runner', 'crawl', 's1', '-s', 'LOG_FILE=\\S+', '-a', '_job=j1'\\]", message(captured), ) + + +def test_repr(process): + assert repr(process).startswith(f"ScrapyProcessProtocol(pid={process.pid} project=p1 spider=s1 job=j1 start_time=") diff --git a/tests/test_endpoints.py b/tests/test_server.py similarity index 54% rename from tests/test_endpoints.py rename to tests/test_server.py index c7752ada..6783682c 100644 --- a/tests/test_endpoints.py +++ b/tests/test_server.py @@ -4,7 +4,6 @@ import pytest import requests -from requests.models import Response from tests import get_egg_data from tests.mockserver import MockScrapydServer @@ -16,35 +15,10 @@ def mock_scrapyd(chdir): yield server -@pytest.fixture() -def quotesbot_egg(): - return io.BytesIO(get_egg_data("quotesbot")) - - -@pytest.fixture() -def quotesbot_egg_asyncio(): - # This egg file contains settings with TWISTED_REACTOR set to asyncio ractor - return io.BytesIO(get_egg_data("quotesbot_asyncio")) - - -def _deploy(mock_scrapyd, quotesbot_egg) -> Response: - url = mock_scrapyd.urljoin("addversion.json") - data = {b"project": b"quotesbot", b"version": b"0.01"} - files = {b"egg": quotesbot_egg} - return requests.post(url, data=data, files=files) - - def test_urljoin(mock_scrapyd): assert mock_scrapyd.urljoin("foo") == f"{mock_scrapyd.url}foo" -def test_root(mock_scrapyd): - response = requests.get(mock_scrapyd.url) - - assert response.status_code == 200 - assert re.search("To schedule a spider you need to use the API", response.text) - - def test_auth(): with MockScrapydServer(username="bob", password="hunter2") as server: assert requests.get(server.url).status_code == 401 @@ -113,48 +87,15 @@ def test_options(mock_scrapyd, webservice, method): assert response.headers["Allow"] == f"OPTIONS, HEAD, {method}" -def test_launch_spider_get(mock_scrapyd): - response = requests.get(mock_scrapyd.urljoin("schedule.json")) - - assert response.status_code == 200 - assert response.json()["status"] == "error" - - -def test_spider_list_no_project(mock_scrapyd): - response = requests.get(mock_scrapyd.urljoin("listspiders.json")) - data = response.json() - - assert response.status_code == 200 - assert data["status"] == "error" - assert data["message"] == "'project' parameter is required" - - -def test_spider_list_project_no_egg(mock_scrapyd): - response = requests.get(mock_scrapyd.urljoin("listprojects.json")) - data = response.json() - - assert response.status_code == 200 - assert data["status"] == "ok" - - -def test_addversion_and_delversion(mock_scrapyd, quotesbot_egg): - response = _deploy(mock_scrapyd, quotesbot_egg) - data = response.json() - - assert response.status_code == 200 - assert data["spiders"] == 2 - assert data["status"] == "ok" - assert data["project"] == "quotesbot" - - url = mock_scrapyd.urljoin("delversion.json") - res = requests.post(url, data={"project": "quotesbot", "version": "0.01"}) - - assert res.status_code == 200 - assert res.json()["status"] == "ok" - - -def test_failed_settings(mock_scrapyd, quotesbot_egg_asyncio): - response = _deploy(mock_scrapyd, quotesbot_egg_asyncio) +# https://github.com/scrapy/scrapyd/issues/377 +def test_other_reactors(mock_scrapyd): + response = requests.post( + mock_scrapyd.urljoin("addversion.json"), + data={b"project": b"quotesbot", b"version": b"0.01"}, + # The egg's quotesbot/settings.py file sets TWISTED_REACTOR to + # "twisted.internet.asyncioreactor.AsyncioSelectorReactor" + files={b"egg": io.BytesIO(get_egg_data("quotesbot_asyncio"))}, + ) assert response.status_code == 200 assert response.json()["status"] == "ok" diff --git a/tests/test_webservice.py b/tests/test_webservice.py index 1692273a..dfe90598 100644 --- a/tests/test_webservice.py +++ b/tests/test_webservice.py @@ -33,7 +33,7 @@ def scrapy_process(): def get_local_projects(root): - return ["localproject"] if has_settings(root) else [] + return ["localproject"] if has_settings() else [] def add_test_version(app, project, version, basename): @@ -185,7 +185,7 @@ def test_daemonstatus(txrequest, root_with_egg, scrapy_process): ], ) def test_list_spiders(txrequest, root, args, spiders, run_only_if_has_settings): - if run_only_if_has_settings and not has_settings(root): + if run_only_if_has_settings and not has_settings(): pytest.skip("[settings] section is not set") root_add_version(root, "myproject", "r1", "mybot") @@ -205,7 +205,7 @@ def test_list_spiders(txrequest, root, args, spiders, run_only_if_has_settings): ], ) def test_list_spiders_nonexistent(txrequest, root, args, param, run_only_if_has_settings): - if run_only_if_has_settings and not has_settings(root): + if run_only_if_has_settings and not has_settings(): pytest.skip("[settings] section is not set") root_add_version(root, "myproject", "r1", "mybot") @@ -437,7 +437,7 @@ def test_add_version(txrequest, root): def test_add_version_settings(txrequest, root): - if not has_settings(root): + if not has_settings(): pytest.skip("[settings] section is not set") args = {b"project": [b"localproject"], b"version": [b"0.1"], b"egg": [get_egg_data("quotesbot")]} @@ -461,7 +461,7 @@ def test_add_version_invalid(txrequest, root): ], ) def test_schedule(txrequest, root, args, run_only_if_has_settings): - if run_only_if_has_settings and not has_settings(root): + if run_only_if_has_settings and not has_settings(): pytest.skip("[settings] section is not set") project = args[b"project"][0].decode() @@ -529,7 +529,7 @@ def test_schedule_parameters(txrequest, root_with_egg): ], ) def test_schedule_nonexistent(txrequest, root, args, param, run_only_if_has_settings): - if run_only_if_has_settings and not has_settings(root): + if run_only_if_has_settings and not has_settings(): pytest.skip("[settings] section is not set") root_add_version(root, "myproject", "r1", "mybot") diff --git a/tests/test_website.py b/tests/test_website.py index 1b1c5b6d..32d537f7 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -28,11 +28,11 @@ def test_render_logs_dir(txrequest, root): # https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_static.py def test_render_logs_file(txrequest, root): os.makedirs(os.path.join("logs", "quotesbot")) - with open(os.path.join("logs", "foo.bar"), "wb") as f: + with open(os.path.join("logs", "foo.txt"), "wb") as f: f.write(b"baz") file = root.children[b"logs"] - request = DummyRequest([b"foo.bar"]) + request = DummyRequest([b"foo.txt"]) child = resource.getChildForRequest(file, request) d = _render(child, request) @@ -78,7 +78,7 @@ def test_render_home(txrequest, root_with_egg): content = root_with_egg.children[b""].render_GET(txrequest) expect_headers = { b"Content-Type": [b"text/html; charset=utf-8"], - b"Content-Length": [b"736" if has_settings(root_with_egg) else b"714"], + b"Content-Length": [b"736" if has_settings() else b"714"], } if root_with_egg.local_items: expect_headers[b"Content-Length"] = [b"751"]