diff --git a/docs/news.rst b/docs/news.rst index ca9dcff4..ff147ff0 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -42,7 +42,6 @@ Web UI API ^^^ -- The ``Access-Control-Allow-Methods`` response header contains only the HTTP methods to which webservices respond. - Clarify error messages, for example: - ``'project' parameter is required``, instead of ``'project'`` (KeyError) @@ -79,6 +78,7 @@ Library - ``sorted_versions`` to ``scrapyd.eggstorage`` - ``get_crawl_args`` to ``scrapyd.launcher`` +- :ref:`jobstorage` uses the ``ScrapyProcessProtocol`` class, by default. If :ref:`jobstorage` is set to ``scrapyd.jobstorage.SqliteJobStorage``, Scrapyd 1.3.0 uses a ``Job`` class, instead. - Move the ``activate_egg`` function from the ``scrapyd.eggutils`` module to its caller, the ``scrapyd.runner`` module. - Move the ``job_items_url`` and ``job_log_url`` functions from the ``scrapyd.jobstorage`` module to the ``scrapyd.utils`` module. :ref:`jobstorage` is not responsible for URLs. - Change the ``get_crawl_args`` function to no longer convert ``bytes`` to ``str``, as already done by its caller. @@ -100,7 +100,8 @@ Fixed API ^^^ -- The Content-Length header counts the number of bytes, instead of the number of characters. +- The ``Content-Length`` header counts the number of bytes, instead of the number of characters. +- The ``Access-Control-Allow-Methods`` response header contains only the HTTP methods to which webservices respond. - The :ref:`schedule.json` webservice sets the ``node_name`` field in error responses. - The next pending job for all but one project was unreported by the :ref:`daemonstatus.json` and :ref:`listjobs.json` webservices, and was not cancellable by the :ref:`cancel.json` webservice. diff --git a/scrapyd/jobstorage.py b/scrapyd/jobstorage.py index c67dce0a..84749991 100644 --- a/scrapyd/jobstorage.py +++ b/scrapyd/jobstorage.py @@ -3,38 +3,11 @@ Job storage was previously in-memory only and managed by the launcher. """ -import datetime - from zope.interface import implementer from scrapyd import sqlite from scrapyd.interfaces import IJobStorage - - -class Job: - def __init__(self, project, spider, job=None, start_time=None, end_time=None): - self.project = project - self.spider = spider - self.job = job - self.start_time = start_time if start_time else datetime.datetime.now() - self.end_time = end_time if end_time else datetime.datetime.now() - - # For equality assertions in tests. - def __eq__(self, other): - return ( - self.project == other.project - and self.spider == other.spider - and self.job == other.job - and self.start_time == other.start_time - and self.end_time == other.end_time - ) - - # For error messsages in tests. - def __repr__(self): - return ( - f"Job(project={self.project}, spider={self.spider}, job={self.job}, " - f"start_time={self.start_time}, end_time={self.end_time})" - ) +from scrapyd.launcher import ScrapyProcessProtocol @implementer(IJobStorage) @@ -74,5 +47,8 @@ def __len__(self): return len(self.jobs) def __iter__(self): - for project, spider, job, start_time, end_time in self.jobs: - yield Job(project=project, spider=spider, job=job, start_time=start_time, end_time=end_time) + for project, spider, jobid, start_time, end_time in self.jobs: + job = ScrapyProcessProtocol(project, spider, jobid, env={}, args=[]) + job.start_time = start_time + job.end_time = end_time + yield job diff --git a/scrapyd/launcher.py b/scrapyd/launcher.py index 246224ad..81faa779 100644 --- a/scrapyd/launcher.py +++ b/scrapyd/launcher.py @@ -88,21 +88,34 @@ def _get_max_proc(self, config): # https://docs.twisted.org/en/stable/api/twisted.internet.protocol.ProcessProtocol.html class ScrapyProcessProtocol(protocol.ProcessProtocol): def __init__(self, project, spider, job, env, args): - self.pid = None self.project = project self.spider = spider self.job = job + self.pid = None self.start_time = datetime.datetime.now() self.end_time = None - self.env = env self.args = args + self.env = env self.deferred = defer.Deferred() + # For equality assertions in tests. + def __eq__(self, other): + return ( + self.project == other.project + and self.spider == other.spider + and self.job == other.job + and self.pid == other.pid + and self.start_time == other.start_time + and self.end_time == other.end_time + and self.args == other.args + and self.env == other.env + ) + # For error messsages in tests. def __repr__(self): return ( - f"ScrapyProcessProtocol(pid={self.pid} project={self.project} spider={self.spider} job={self.job} " - f"start_time={self.start_time} end_time={self.end_time} env={self.env} args={self.args})" + f"ScrapyProcessProtocol(project={self.project} spider={self.spider} job={self.job} pid={self.pid} " + f"start_time={self.start_time} end_time={self.end_time} args={self.args} env={self.env})" ) def outReceived(self, data): diff --git a/scrapyd/sqlite.py b/scrapyd/sqlite.py index 31ad8165..5e060d0b 100644 --- a/scrapyd/sqlite.py +++ b/scrapyd/sqlite.py @@ -29,6 +29,8 @@ def __init__(self, database, table): def __len__(self): return self.conn.execute(f"SELECT COUNT(*) FROM {self.table}").fetchone()[0] + # SQLite JSON is enabled by default since 3.38.0 (2022-02-22), and JSONB is available since 3.45.0 (2024-01-15). + # https://sqlite.org/json1.html def encode(self, obj): return sqlite3.Binary(json.dumps(obj).encode("ascii")) diff --git a/tests/__init__.py b/tests/__init__.py index f83b04f7..a3533f36 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,9 +1,12 @@ +import datetime import io import os.path import pkgutil from twisted.logger import eventAsText +from scrapyd.launcher import ScrapyProcessProtocol + def get_egg_data(basename): return pkgutil.get_data("tests", f"fixtures/{basename}.egg") @@ -19,3 +22,14 @@ def root_add_version(root, project, version, basename): def get_message(captured): return eventAsText(captured[0]).split(" ", 1)[1] + + +def get_finished_job(project="p1", spider="s1", job="j1", start_time=None, end_time=None): + if start_time is None: + start_time = datetime.datetime.now() + if end_time is None: + end_time = datetime.datetime.now() + process = ScrapyProcessProtocol(project, spider, job, {}, []) + process.start_time = start_time + process.end_time = end_time + return process diff --git a/tests/test_job.py b/tests/test_job.py deleted file mode 100644 index fd6197ff..00000000 --- a/tests/test_job.py +++ /dev/null @@ -1,16 +0,0 @@ -import datetime - -from scrapyd.jobstorage import Job - - -def test_job_repr(): - job = Job( - "p1", - "s1", - start_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 0), - end_time=datetime.datetime(2001, 2, 3, 4, 5, 7, 0), - ) - assert ( - repr(job) - == "Job(project=p1, spider=s1, job=None, start_time=2001-02-03 04:05:06, end_time=2001-02-03 04:05:07)" - ) diff --git a/tests/test_jobstorage.py b/tests/test_jobstorage.py index d5d21734..422c9210 100644 --- a/tests/test_jobstorage.py +++ b/tests/test_jobstorage.py @@ -4,11 +4,12 @@ from scrapyd.config import Config from scrapyd.interfaces import IJobStorage -from scrapyd.jobstorage import Job, MemoryJobStorage, SqliteJobStorage +from scrapyd.jobstorage import MemoryJobStorage, SqliteJobStorage +from tests import get_finished_job -job1 = Job("p1", "s1", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 7)) -job2 = Job("p2", "s2", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 8)) -job3 = Job("p3", "s3", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 9)) +job1 = get_finished_job("p1", "s1", "j1", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 7)) +job2 = get_finished_job("p2", "s2", "j2", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 8)) +job3 = get_finished_job("p3", "s3", "j3", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 9)) def pytest_generate_tests(metafunc): diff --git a/tests/test_launcher.py b/tests/test_launcher.py index bce8922f..0d524396 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -183,4 +183,4 @@ def test_process_ended_terminated(environ, process): def test_repr(process): - assert repr(process).startswith(f"ScrapyProcessProtocol(pid={process.pid} project=p1 spider=s1 job=j1 start_time=") + assert repr(process).startswith(f"ScrapyProcessProtocol(project=p1 spider=s1 job=j1 pid={process.pid} start_time=") diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index 97a98894..f29e680f 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -2,8 +2,8 @@ import pytest -from scrapyd.jobstorage import Job from scrapyd.sqlite import JsonSqlitePriorityQueue, SqliteFinishedJobs +from tests import get_finished_job @pytest.fixture() @@ -14,9 +14,9 @@ def jsonsqlitepriorityqueue(): @pytest.fixture() def sqlitefinishedjobs(): q = SqliteFinishedJobs(":memory:") - q.add(Job("p1", "s1", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 7))) - q.add(Job("p2", "s2", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 8))) - q.add(Job("p3", "s3", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 9))) + q.add(get_finished_job("p1", "s1", "j1", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 7))) + q.add(get_finished_job("p2", "s2", "j2", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 8))) + q.add(get_finished_job("p3", "s3", "j3", end_time=datetime.datetime(2001, 2, 3, 4, 5, 6, 9))) return q diff --git a/tests/test_webservice.py b/tests/test_webservice.py index 9a64a675..d7775a6f 100644 --- a/tests/test_webservice.py +++ b/tests/test_webservice.py @@ -4,7 +4,7 @@ import os import re import sys -from unittest.mock import MagicMock, call +from unittest.mock import MagicMock, PropertyMock, call import pytest from twisted.logger import LogLevel, capturedLogs @@ -12,12 +12,11 @@ from scrapyd.exceptions import DirectoryTraversalError, RunnerError from scrapyd.interfaces import IEggStorage -from scrapyd.jobstorage import Job from scrapyd.launcher import ScrapyProcessProtocol from scrapyd.webservice import spider_list -from tests import get_egg_data, get_message, has_settings, root_add_version +from tests import get_egg_data, get_finished_job, get_message, has_settings, root_add_version -job1 = Job( +job1 = get_finished_job( project="p1", spider="s1", job="j1", @@ -30,7 +29,9 @@ def scrapy_process(): process = ScrapyProcessProtocol(project="p1", spider="s1", job="j1", env={}, args=[]) process.start_time = datetime.datetime(2001, 2, 3, 4, 5, 6, 9) + process.end_time = datetime.datetime(2001, 2, 3, 4, 5, 6, 10) process.transport = MagicMock() + type(process.transport).pid = PropertyMock(return_value=12345) return process @@ -290,8 +291,8 @@ def test_status(txrequest, root, scrapy_process, args): root.update_projects() if args: - root.launcher.finished.add(Job(project="p2", spider="s2", job="j1")) - root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j1", {}, []) + root.launcher.finished.add(get_finished_job("p2", "s2", "j1")) + root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j1", env={}, args=[]) root.poller.queues["p2"].add("s2", _job="j1") expected = {"currstate": None} @@ -325,8 +326,8 @@ def test_list_jobs(txrequest, root, scrapy_process, args): root.update_projects() if args: - root.launcher.finished.add(Job(project="p2", spider="s2", job="j2")) - root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j2", {}, []) + root.launcher.finished.add(get_finished_job("p2", "s2", "j2")) + root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j2", env={}, args=[]) root.poller.queues["p2"].add("s2", _job="j2") expected = {"pending": [], "running": [], "finished": []} @@ -336,9 +337,9 @@ def test_list_jobs(txrequest, root, scrapy_process, args): expected["finished"].append( { - "id": "j1", "project": "p1", "spider": "s1", + "id": "j1", "start_time": "2001-02-03 04:05:06.000007", "end_time": "2001-02-03 04:05:06.000008", "items_url": "/items/p1/s1/j1.jl", @@ -351,11 +352,11 @@ def test_list_jobs(txrequest, root, scrapy_process, args): expected["running"].append( { - "id": "j1", "project": "p1", "spider": "s1", - "start_time": "2001-02-03 04:05:06.000009", + "id": "j1", "pid": None, + "start_time": "2001-02-03 04:05:06.000009", } ) assert_content(txrequest, root, "GET", "listjobs", args, expected) @@ -371,9 +372,9 @@ def test_list_jobs(txrequest, root, scrapy_process, args): expected["pending"].append( { - "id": "j1", "project": "p1", "spider": "s1", + "id": "j1", "version": "0.1", "settings": {"DOWNLOAD_DELAY=2": "TRACK=Cause = Time"}, "args": {"other": "one"}, @@ -645,7 +646,7 @@ def test_cancel(txrequest, root, scrapy_process, args): root.launcher.processes[0] = scrapy_process root.launcher.processes[1] = scrapy_process - root.launcher.processes[2] = ScrapyProcessProtocol("p2", "s2", "j2", {}, []) + root.launcher.processes[2] = ScrapyProcessProtocol("p2", "s2", "j2", env={}, args=[]) expected["prevstate"] = "running" assert_content(txrequest, root, "POST", "cancel", args, expected) diff --git a/tests/test_website.py b/tests/test_website.py index f319ba3d..76103a21 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -7,10 +7,9 @@ from twisted.web.test.requesthelper import DummyRequest from scrapyd.app import application -from scrapyd.jobstorage import Job from scrapyd.launcher import ScrapyProcessProtocol from scrapyd.website import Root -from tests import has_settings, root_add_version +from tests import get_finished_job, has_settings, root_add_version def assert_headers(txrequest): @@ -33,7 +32,7 @@ def assert_hrefs(urls, text, header): # Derived from test_emptyChildUnicodeParent. # https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_static.py -def test_render_logs_dir(txrequest, root): +def test_logs_dir(txrequest, root): os.makedirs(os.path.join("logs", "quotesbot")) file = root.children[b"logs"] @@ -49,7 +48,7 @@ def test_render_logs_dir(txrequest, root): # Derived from test_indexNames. # https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_static.py -def test_render_logs_file(txrequest, root): +def test_logs_file(txrequest, root): os.makedirs(os.path.join("logs", "quotesbot")) with open(os.path.join("logs", "foo.txt"), "wb") as f: f.write(b"baz") @@ -74,7 +73,7 @@ def cbRendered(ignored): @pytest.mark.parametrize("cancel", [True, False], ids=["cancel", "no_cancel"]) @pytest.mark.parametrize("header", [True, False], ids=["header", "no_header"]) -def test_render_jobs(txrequest, config, cancel, header): +def test_jobs(txrequest, config, cancel, header): if not cancel: config.cp.remove_option("services", "cancel.json") @@ -82,8 +81,8 @@ def test_render_jobs(txrequest, config, cancel, header): root_add_version(root, "quotesbot", "0.1", "quotesbot") root.update_projects() - root.launcher.finished.add(Job("p1", "s1", "j-finished")) - root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j-running", {}, []) + root.launcher.finished.add(get_finished_job("p1", "s1", "j-finished")) + root.launcher.processes[0] = ScrapyProcessProtocol("p2", "s2", "j-running", env={}, args=[]) root.poller.queues["quotesbot"].add("quotesbot", _job="j-pending") if header: @@ -117,11 +116,12 @@ def test_render_jobs(txrequest, config, cancel, header): else: assert b"Cancel" not in content assert b'/cancel.json">' not in content + assert b' value="j-finished">' not in content @pytest.mark.parametrize("with_egg", [True, False]) @pytest.mark.parametrize("header", [True, False]) -def test_render_home(txrequest, root, with_egg, header): +def test_home(txrequest, root, with_egg, header): if with_egg: root_add_version(root, "quotesbot", "0.1", "quotesbot") root.update_projects()