docs: Improve interfaces documentation, in particular field availabil…

…ity and inter-dependencies
scrapy · Jul 25, 2024 · bf5e1bd · bf5e1bd
1 parent ea65213
commit bf5e1bd
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 62 deletions.
diff --git a/docs/config.rst b/docs/config.rst
@@ -185,7 +185,7 @@ Options
   -  ``scrapyd.poller.QueuePoller``. When using the default :ref:`application` and :ref:`launcher` values:
 
     -  The launcher adds :ref:`max_proc` capacity at startup, and one capacity each time a Scrapy process ends.
-    -  The :ref:`application` starts a timer so that, every :ref:`poll_interval` seconds, a job starts if there's capacity: that is, if the number of Scrapy processes that are running is less than the :ref:`max_proc` value.
+    -  The :ref:`application` starts a timer so that, every :ref:`poll_interval` seconds, jobs start if there's capacity: that is, if the number of Scrapy processes that are running is less than the :ref:`max_proc` value.
 
   -  Implement your own, using the ``IPoller`` interface
 

diff --git a/scrapyd/interfaces.py b/scrapyd/interfaces.py
@@ -3,180 +3,215 @@
 
 class IEggStorage(Interface):
     """
-    A component that handles storing and retrieving eggs.
+    A component to store project eggs.
     """
 
     def put(eggfile, project, version):
-        """Store the egg (passed in the file object) under the given project and
-        version"""
+        """
+        Store the egg (a file object), which represents a ``version`` of the ``project``.
+        """
 
     def get(project, version=None):
-        """Return a tuple (version, file) for the egg matching the specified
-        project and version. If version is None, the latest version is
-        returned. If no egg is found for the given project/version (None, None)
-        should be returned."""
+        """
+        Return ``(version, file)`` for the egg matching the ``project`` and ``version``.
+
+        If ``version`` is ``None``, the latest version and corresponding file are returned.
+
+        If no egg is found, ``(None, None)`` is returned.
+
+        .. tip:: Remember to close the ``file`` when done.
+        """
 
     def list(project):
-        """Return the list of versions which have eggs stored (for the given
-        project) in order (the latest version is the currently used)."""
+        """
+        Return all versions of the ``project`` in order, with the latest version last.
+        """
 
     def list_projects():
         """
-        Return the list of projects from the stored eggs.
+        Return all projects in storage.
 
         .. versionadded:: 1.3.0
            Move this logic into the interface and its implementations, to allow customization.
         """
 
     def delete(project, version=None):
-        """Delete the egg stored for the given project and version. If should
-        also delete the project if no versions are left"""
+        """
+        Delete the egg matching the ``project`` and ``version``. Delete the ``project``, if no versions remains.
+        """
 
 
 class IPoller(Interface):
     """
-    A component that polls for projects that need to run.
+    A component that tracks capacity for new jobs, and starts jobs when ready.
     """
 
     queues = Attribute(
         """
         An object (like a ``dict``) with a ``__getitem__`` method that accepts a project's name and returns its
-        :py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>`.
+        :py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>` of pending jobs.
         """
     )
 
     def poll():
-        """Called periodically to poll for projects"""
+        """
+        Called periodically to start jobs if there's capacity.
+        """
 
     def next():
-        """Return the next message.
+        """
+        Return the next pending job.
 
-        It should return a Deferred which will get fired when there is a new
-        project that needs to run, or already fired if there was a project
-        waiting to run already.
+        It should return a Deferred that will be fired when there's capacity, or already fired if there's capacity.
 
-        The message is a dict containing (at least):
+        The pending job is a ``dict`` containing at least the ``_project`` name, ``_spider`` name and ``_job`` ID.
+        The job ID is unique, at least within the project.
 
-        -  the name of the project to be run in the ``_project`` key
-        -  the name of the spider to be run in the ``_spider`` key
-        -  a unique identifier for this run in the ``_job`` key
+        The pending job is later passed to :meth:`scrapyd.interfaces.IEnvironment.get_environment`.
 
-        This message will be passed later to :meth:`scrapyd.interfaces.IEnvironment.get_environment`.
+        .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop`
         """
 
     def update_projects():
-        """Called when projects may have changed, to refresh the available
-        projects, including at initialization"""
+        """
+        Called when projects may have changed, to refresh the available projects, including at initialization.
+        """
 
 
 class ISpiderQueue(Interface):
+    """
+    A component to store pending jobs.
+
+    The ``dict`` keys used by the chosen ``ISpiderQueue`` implementation must match the chosen:
+
+    -  :ref:`launcher` service (which calls :meth:`scrapyd.interfaces.IPoller.next`)
+    -  :py:interface:`~scrapyd.interfaces.IEnvironment` implementation (see :meth:`scrapyd.interfaces.IPoller.next`)
+    -  :ref:`webservices<config-services>` that schedule, cancel or list pending jobs
+    """
+
     def add(name, priority, **spider_args):
         """
-        Add a spider to the queue given its name a some spider arguments.
-
-        This method can return a deferred.
+        Add a pending job, given the spider ``name``, crawl ``priority`` and keyword arguments, which might include the
+        ``_job`` ID, egg ``_version`` and Scrapy ``settings`` depending on the implementation, with keyword arguments
+        that are not recognized by the implementation being treated as spider arguments.
 
         .. versionchanged:: 1.3.0
            Add the ``priority`` parameter.
         """
 
     def pop():
-        """Pop the next message from the queue. The messages is a dict
-        containing a key ``name`` with the spider name and other keys as spider
-        attributes.
-
-        This method can return a deferred."""
+        """
+        Pop the next pending job. The pending job is a ``dict`` containing the spider ``name``. Depending on the
+        implementation, other keys might include the ``_job`` ID, egg ``_version`` and Scrapy ``settings``, with
+        keyword arguments that are not recognized by the receiver being treated as spider arguments.
+        """
 
     def list():
-        """Return a list with the messages in the queue. Each message is a dict
-        which must have a ``name`` key (with the spider name), and other optional
-        keys that will be used as spider arguments, to create the spider.
+        """
+        Return the pending jobs.
 
-        This method can return a deferred."""
+        .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop`
+        """
 
     def count():
-        """Return the number of spiders in the queue.
-
-        This method can return a deferred."""
+        """
+        Return the number of pending jobs.
+        """
 
     def remove(func):
-        """Remove all elements from the queue for which func(element) is true,
-        and return the number of removed elements.
+        """
+        Remove pending jobs for which ``func(job)`` is true, and return the number of removed pending jobss.
         """
 
     def clear():
-        """Clear the queue.
-
-        This method can return a deferred."""
+        """
+        Remove all pending jobs.
+        """
 
 
 class ISpiderScheduler(Interface):
     """
-    A component to schedule spider runs.
+    A component to schedule jobs.
     """
 
     def schedule(project, spider_name, priority, **spider_args):
         """
-        Schedule a spider for the given project.
+        Schedule a crawl.
 
         .. versionchanged:: 1.3.0
            Add the ``priority`` parameter.
         """
 
     def list_projects():
-        """Return the list of available projects"""
+        """
+        Return all projects that can be scheduled.
+        """
 
     def update_projects():
-        """Called when projects may have changed, to refresh the available
-        projects, including at initialization"""
+        """
+        Called when projects may have changed, to refresh the available projects, including at initialization.
+        """
 
 
 class IEnvironment(Interface):
     """
-    A component to generate the environment of crawler processes.
+    A component to generate the environment of jobs.
+
+    The chosen ``IEnvironment`` implementation must match the chosen :ref:`launcher` service.
     """
 
     def get_settings(message):
         """
         Return the Scrapy settings to use for running the process.
 
-        ``message`` is the message received from the :meth:`scrapyd.interfaces.IPoller.next` method.
+        Depending on the chosen :ref:`launcher`, this would be one of more ``LOG_FILE`` or ``FEEDS``.
 
         .. versionadded:: 1.4.2
            Support for overriding Scrapy settings via ``SCRAPY_`` environment variables was removed in Scrapy 2.8.
+
+        :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method
         """
 
     def get_environment(message, slot):
-        """Return the environment variables to use for running the process.
+        """
+        Return the environment variables to use for running the process.
 
-        ``message`` is the message received from the :meth:`scrapyd.interfaces.IPoller.next` method.
-        ``slot`` is the ``Launcher`` slot where the process will be running.
+        Depending on the chosen :ref:`launcher`, this would be one of more of ``SCRAPY_PROJECT``,
+        ``SCRAPYD_EGG_VERSION`` or ``SCRAPY_SETTINGS_MODULE``.
+
+        :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method
+        :param slot: the :ref:`launcher` slot for tracking the process
         """
 
 
 class IJobStorage(Interface):
     """
-    A component that handles storing and retrieving finished jobs.
+    A component to store finished jobs.
 
     .. versionadded:: 1.3.0
     """
 
     def add(job):
-        """Add a finished job in the storage."""
+        """
+        Add a finished job in the storage.
+        """
 
     def list():
         """
-        Return a list of the finished jobs.
+        Return the finished jobs.
 
         .. seealso:: :meth:`scrapyd.interfaces.IJobStorage.__iter__`
         """
 
     def __len__():
-        """Return a number of the finished jobs."""
+        """
+        Return the number of finished jobs.
+        """
 
     def __iter__():
         """
         Iterate over the finished jobs in reverse order by ``end_time``.
 
-        A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time``.
+        A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time`` and may have the
+        attributes ``args`` (``scrapy crawl`` CLI arguments) and ``env`` (environment variables).
         """