From 6deb57a9c8eca31e6b353d55eb131c1946125cd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= <perceval.wajsburt@aphp.fr>
Date: Wed, 13 Sep 2023 12:11:01 +0200
Subject: [PATCH] chore: improve coverage and clean entry points

---
 edsnlp/components.py                          |   1 -
 edsnlp/patch_spacy_dot_components.py          |  81 +------
 .../core/normalizer/lowercase/factory.py      |  25 ---
 .../pipelines/core/normalizer/normalizer.py   |   2 +-
 .../__init__.py                               |   0
 .../normalizer/remove_lowercase/factory.py    |  47 +++++
 edsnlp/pipelines/factories.py                 |   4 +-
 edsnlp/utils/blocs.py                         | 198 ------------------
 edsnlp/utils/collections.py                   |  13 --
 pyproject.toml                                | 175 ++++++++++------
 tests/pipelines/test_pipelines.py             |   4 +
 tests/test_span_args.py                       |  32 +++
 12 files changed, 206 insertions(+), 376 deletions(-)
 delete mode 100644 edsnlp/components.py
 delete mode 100644 edsnlp/pipelines/core/normalizer/lowercase/factory.py
 rename edsnlp/pipelines/core/normalizer/{lowercase => remove_lowercase}/__init__.py (100%)
 create mode 100644 edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py
 delete mode 100644 edsnlp/utils/blocs.py
 create mode 100644 tests/test_span_args.py

diff --git a/edsnlp/components.py b/edsnlp/components.py
deleted file mode 100644
index 30198e08c..000000000
--- a/edsnlp/components.py
+++ /dev/null
@@ -1 +0,0 @@
-from edsnlp.pipelines.factories import *  # noqa : used to import pipelines
diff --git a/edsnlp/patch_spacy_dot_components.py b/edsnlp/patch_spacy_dot_components.py
index 7f1b62dab..61383e096 100644
--- a/edsnlp/patch_spacy_dot_components.py
+++ b/edsnlp/patch_spacy_dot_components.py
@@ -5,7 +5,6 @@
 from spacy.errors import Errors
 from spacy.language import FactoryMeta
 from spacy.pipe_analysis import validate_attrs
-from spacy.pipeline import Pipe
 from spacy.util import SimpleFrozenDict, SimpleFrozenList, registry
 
 
@@ -51,10 +50,11 @@ def factory(
     if not isinstance(name, str):
         raise ValueError(Errors.E963.format(decorator="factory"))
     if not isinstance(default_config, dict):
-        err = Errors.E962.format(
-            style="default config", name=name, cfg_type=type(default_config)
+        raise ValueError(
+            Errors.E962.format(
+                style="default config", name=name, cfg_type=type(default_config)
+            )
         )
-        raise ValueError(err)
 
     def add_factory(factory_func: Callable) -> Callable:
         internal_name = cls.get_factory_name(name)
@@ -102,77 +102,4 @@ def add_factory(factory_func: Callable) -> Callable:
     return add_factory
 
 
-@classmethod
-def component(
-    cls,
-    name: str,
-    *,
-    assigns: Iterable[str] = SimpleFrozenList(),
-    requires: Iterable[str] = SimpleFrozenList(),
-    retokenizes: bool = False,
-    func: Optional["Pipe"] = None,
-) -> Callable[..., Any]:
-    """
-    Patched from spaCy to allow back dots in factory
-    names (https://github.com/aphp/edsnlp/pull/152)
-
-    Register a new pipeline component. Can be used for stateless function
-    components that don't require a separate factory. Can be used as a
-    decorator on a function or classmethod, or called as a function with the
-    factory provided as the func keyword argument. To create a component and
-    add it to the pipeline, you can use nlp.add_pipe(name).
-
-    name (str): The name of the component factory.
-    assigns (Iterable[str]): Doc/Token attributes assigned by this component,
-        e.g. "token.ent_id". Used for pipeline analysis.
-    requires (Iterable[str]): Doc/Token attributes required by this component,
-        e.g. "token.ent_id". Used for pipeline analysis.
-    retokenizes (bool): Whether the component changes the tokenization.
-        Used for pipeline analysis.
-    func (Optional[Callable]): Factory function if not used as a decorator.
-
-    DOCS: https://spacy.io/api/language#component
-    """
-    if name is not None:
-        if not isinstance(name, str):
-            raise ValueError(Errors.E963.format(decorator="component"))
-    component_name = name if name is not None else util.get_object_name(func)
-
-    def add_component(component_func: "Pipe") -> Callable:
-        if isinstance(func, type):  # function is a class
-            raise ValueError(Errors.E965.format(name=component_name))
-
-        def factory_func(nlp, name: str) -> "Pipe":
-            return component_func
-
-        internal_name = cls.get_factory_name(name)
-        if internal_name in registry.factories:
-            # We only check for the internal name here – it's okay if it's a
-            # subclass and the base class has a factory of the same name. We
-            # also only raise if the function is different to prevent raising
-            # if module is reloaded. It's hacky, but we need to check the
-            # existing functure for a closure and whether that's identical
-            # to the component function (because factory_func created above
-            # will always be different, even for the same function)
-            existing_func = registry.factories.get(internal_name)
-            closure = existing_func.__closure__
-            wrapped = [c.cell_contents for c in closure][0] if closure else None
-            if util.is_same_func(wrapped, component_func):
-                factory_func = existing_func  # noqa: F811
-
-        cls.factory(
-            component_name,
-            assigns=assigns,
-            requires=requires,
-            retokenizes=retokenizes,
-            func=factory_func,
-        )
-        return component_func
-
-    if func is not None:  # Support non-decorator use cases
-        return add_component(func)
-    return add_component
-
-
 spacy.Language.factory = factory
-spacy.Language.component = component
diff --git a/edsnlp/pipelines/core/normalizer/lowercase/factory.py b/edsnlp/pipelines/core/normalizer/lowercase/factory.py
deleted file mode 100644
index 5205db840..000000000
--- a/edsnlp/pipelines/core/normalizer/lowercase/factory.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from spacy.language import Language
-from spacy.tokens import Doc
-
-
-@Language.component("remove-lowercase", assigns=["token.norm"])
-@Language.component("eds.remove-lowercase", assigns=["token.norm"])
-def remove_lowercase(doc: Doc):
-    """
-    Add case on the `NORM` custom attribute. Should always be applied first.
-
-    Parameters
-    ----------
-    doc : Doc
-        The spaCy `Doc` object.
-
-    Returns
-    -------
-    Doc
-        The document, with case put back in `NORM`.
-    """
-
-    for token in doc:
-        token.norm_ = token.text
-
-    return doc
diff --git a/edsnlp/pipelines/core/normalizer/normalizer.py b/edsnlp/pipelines/core/normalizer/normalizer.py
index 69ac645b9..3d598c572 100644
--- a/edsnlp/pipelines/core/normalizer/normalizer.py
+++ b/edsnlp/pipelines/core/normalizer/normalizer.py
@@ -4,9 +4,9 @@
 from spacy.tokens import Doc
 
 from .accents.accents import AccentsConverter
-from .lowercase.factory import remove_lowercase
 from .pollution.pollution import PollutionTagger
 from .quotes.quotes import QuotesConverter
+from .remove_lowercase.factory import remove_lowercase
 from .spaces.spaces import SpacesTagger
 
 
diff --git a/edsnlp/pipelines/core/normalizer/lowercase/__init__.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py
similarity index 100%
rename from edsnlp/pipelines/core/normalizer/lowercase/__init__.py
rename to edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py
diff --git a/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py
new file mode 100644
index 000000000..e1018aed2
--- /dev/null
+++ b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py
@@ -0,0 +1,47 @@
+from spacy.language import Language
+from spacy.tokens import Doc
+
+from edsnlp.utils.deprecation import deprecated_factory
+
+
+def remove_lowercase(doc: Doc):
+    """
+    Add case on the `NORM` custom attribute. Should always be applied first.
+
+    Parameters
+    ----------
+    doc : Doc
+        The spaCy `Doc` object.
+
+    Returns
+    -------
+    Doc
+        The document, with case put back in `NORM`.
+    """
+
+    for token in doc:
+        token.norm_ = token.text
+
+    return doc
+
+
+@deprecated_factory("remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"])
+@deprecated_factory(
+    "eds.remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"]
+)
+@Language.factory("eds.remove_lowercase", assigns=["token.norm"])
+def create_component(
+    nlp: Language,
+    name: str,
+):
+    """
+    Add case on the `NORM` custom attribute. Should always be applied first.
+
+    Parameters
+    ----------
+    nlp : Language
+        The pipeline object.
+    name : str
+        The name of the component.
+    """
+    return remove_lowercase  # pragma: no cover
diff --git a/edsnlp/pipelines/factories.py b/edsnlp/pipelines/factories.py
index 0dceed4bb..0460491b4 100644
--- a/edsnlp/pipelines/factories.py
+++ b/edsnlp/pipelines/factories.py
@@ -6,9 +6,11 @@
 from .core.matcher.factory import create_component as matcher
 from .core.normalizer.accents.factory import create_component as accents
 from .core.normalizer.factory import create_component as normalizer
-from .core.normalizer.lowercase.factory import remove_lowercase
 from .core.normalizer.pollution.factory import create_component as pollution
 from .core.normalizer.quotes.factory import create_component as quotes
+from .core.normalizer.remove_lowercase.factory import (
+    create_component as remove_lowercase,
+)
 from .core.normalizer.spaces.factory import create_component as spaces
 from .core.sentences.factory import create_component as sentences
 from .core.terminology.factory import create_component as terminology
diff --git a/edsnlp/utils/blocs.py b/edsnlp/utils/blocs.py
deleted file mode 100644
index f00bf8de3..000000000
--- a/edsnlp/utils/blocs.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-Utility that extracts code blocs and runs them.
-
-Largely inspired by https://github.com/koaning/mktestdocs
-"""
-
-import re
-from pathlib import Path
-from typing import List
-
-BLOCK_PATTERN = re.compile(
-    (
-        r"((?P<skip><!-- no-check -->)\s+)?(?P<indent> *)"
-        r"```(?P<title>.*?)\n(?P<code>.+?)```"
-    ),
-    flags=re.DOTALL,
-)
-OUTPUT_PATTERN = "# Out: "
-
-
-def check_outputs(code: str) -> str:
-    """
-    Looks for output patterns, and modifies the bloc:
-
-    1. The preceding line becomes `#!python v = expr`
-    2. The output line becomes an `#!python assert` statement
-
-    Parameters
-    ----------
-    code : str
-        Code block
-
-    Returns
-    -------
-    str
-        Modified code bloc with assert statements
-    """
-
-    lines: List[str] = code.split("\n")
-    code = []
-
-    skip = False
-
-    if len(lines) < 2:
-        return code
-
-    for expression, output in zip(lines[:-1], lines[1:]):
-        if skip:
-            skip = not skip
-            continue
-
-        if output.startswith(OUTPUT_PATTERN):
-            expression = f"v = {expression}"
-
-            output = output[len(OUTPUT_PATTERN) :].replace('"', r"\"")
-            output = f'assert repr(v) == "{output}" or str(v) == "{output}"'
-
-            code.append(expression)
-            code.append(output)
-
-            skip = True
-
-        else:
-            code.append(expression)
-
-    if not skip:
-        code.append(output)
-
-    return "\n".join(code)
-
-
-def remove_indentation(code: str, indent: int) -> str:
-    """
-    Remove indentation from a code bloc.
-
-    Parameters
-    ----------
-    code : str
-        Code bloc
-    indent : int
-        Level of indentation
-
-    Returns
-    -------
-    str
-        Modified code bloc
-    """
-
-    if not indent:
-        return code
-
-    lines = []
-
-    for line in code.split("\n"):
-        lines.append(line[indent:])
-
-    return "\n".join(lines)
-
-
-def grab_code_blocks(docstring: str, lang="python") -> List[str]:
-    """
-    Given a docstring, grab all the markdown codeblocks found in docstring.
-
-    Parameters
-    ----------
-    docstring : str
-        Full text.
-    lang : str, optional
-        Language to execute, by default "python"
-
-    Returns
-    -------
-    List[str]
-        Extracted code blocks
-    """
-    codeblocks = []
-
-    for match in BLOCK_PATTERN.finditer(docstring):
-        d = match.groupdict()
-
-        if d["skip"]:
-            continue
-
-        if lang in d["title"]:
-            code = remove_indentation(d["code"], len(d["indent"]))
-            code = check_outputs(code)
-            codeblocks.append(code)
-
-    return codeblocks
-
-
-def printer(code: str) -> None:
-    """
-    Prints a code bloc with lines for easier debugging.
-
-    Parameters
-    ----------
-    code : str
-        Code bloc.
-    """
-    lines = []
-    for i, line in enumerate(code.split("\n")):
-        lines.append(f"{i + 1:03}  {line}")
-
-    print("\n".join(lines))
-
-
-def check_docstring(obj, lang=""):
-    """
-    Given a function, test the contents of the docstring.
-    """
-    for b in grab_code_blocks(obj.__doc__, lang=lang):
-        try:
-            exec(b, {"__MODULE__": "__main__"})
-        except Exception:
-            print(f"Error Encountered in `{obj.__name__}`. Caused by:\n")
-            printer(b)
-            raise
-
-
-def check_raw_string(raw, lang="python"):
-    """
-    Given a raw string, test the contents.
-    """
-    for b in grab_code_blocks(raw, lang=lang):
-        try:
-            exec(b, {"__MODULE__": "__main__"})
-        except Exception:
-            printer(b)
-            raise
-
-
-def check_raw_file_full(raw, lang="python"):
-    all_code = "\n".join(grab_code_blocks(raw, lang=lang))
-    try:
-        exec(all_code, {"__MODULE__": "__main__"})
-    except Exception:
-        printer(all_code)
-        raise
-
-
-def check_md_file(path: Path, memory: bool = False) -> None:
-    """
-    Given a markdown file, parse the contents for Python code blocs
-    and check that each independant bloc does not cause an error.
-
-    Parameters
-    ----------
-    path : Path
-        Path to the markdown file to execute.
-    memory : bool, optional
-        Whether to keep results from one bloc to the next, by default `#!python False`
-    """
-    text = Path(path).read_text()
-    if memory:
-        check_raw_file_full(text, lang="python")
-    else:
-        check_raw_string(text, lang="python")
diff --git a/edsnlp/utils/collections.py b/edsnlp/utils/collections.py
index 47db54aea..e69de29bb 100644
--- a/edsnlp/utils/collections.py
+++ b/edsnlp/utils/collections.py
@@ -1,13 +0,0 @@
-def dedup(sequence, key=None):
-    """
-    Deduplicate a sequence, keeping the last occurrence of each item.
-
-    Parameters
-    ----------
-    sequence : Sequence
-        Sequence to deduplicate
-    key : Callable, optional
-        Key function to use for deduplication, by default None
-    """
-    key = (lambda x: x) if key is None else key
-    return list({key(item): item for item in sequence}.values())
diff --git a/pyproject.toml b/pyproject.toml
index cc2fae411..1c5150953 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,60 +85,114 @@ version = { attr = "edsnlp.__version__" }
 where = ["."]
 
 [project.entry-points."spacy_factories"]
-"eds.matcher" = "edsnlp.pipelines.factories:matcher"
-"eds.terminology" = "edsnlp.pipelines.factories:terminology"
-"eds.contextual_matcher" = "edsnlp.pipelines.factories:contextual_matcher"
-"eds.endlines" = "edsnlp.pipelines.factories:endlines"
-"eds.sentences" = "edsnlp.pipelines.factories:sentences"
-"eds.normalizer" = "edsnlp.pipelines.factories:normalizer"
-"eds.accents" = "edsnlp.pipelines.factories:accents"
-"eds.spaces" = "edsnlp.pipelines.factories:spaces"
-"eds.lowercase" = "edsnlp.pipelines.factories:remove_lowercase"
-"eds.pollution" = "edsnlp.pipelines.factories:pollution"
-"eds.quotes" = "edsnlp.pipelines.factories:quotes"
-"eds.charlson" = "edsnlp.pipelines.factories:charlson"
-"eds.sofa" = "edsnlp.pipelines.factories:sofa"
-"eds.elston_ellis" = "edsnlp.pipelines.factories:elston_ellis"
-"eds.tnm" = "edsnlp.pipelines.factories:tnm"
-"eds.priority" = "edsnlp.pipelines.factories:priority"
-"eds.ccmu" = "edsnlp.pipelines.factories:ccmu"
-"eds.gemsa" = "edsnlp.pipelines.factories:gemsa"
-"eds.covid" = "edsnlp.pipelines.factories:covid"
-"eds.cim10" = "edsnlp.pipelines.factories:cim10"
-"eds.history" = "edsnlp.pipelines.factories:history"
-"eds.family" = "edsnlp.pipelines.factories:family"
-"eds.hypothesis" = "edsnlp.pipelines.factories:hypothesis"
-"eds.negation" = "edsnlp.pipelines.factories:negation"
-"eds.rspeech" = "edsnlp.pipelines.factories:rspeech"
-"eds.consultation_dates" = "edsnlp.pipelines.factories:consultation_dates"
-"eds.dates" = "edsnlp.pipelines.factories:dates"
-"eds.reason" = "edsnlp.pipelines.factories:reason"
-"eds.sections" = "edsnlp.pipelines.factories:sections"
-"eds.context" = "edsnlp.pipelines.factories:context"
-"eds.measurements" = "edsnlp.pipelines.factories:measurements"
-"eds.drugs" = "edsnlp.pipelines.factories:drugs"
-"eds.nested_ner" = "edsnlp.pipelines.factories:nested_ner"
-"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component"
-"eds.adicap" = "edsnlp.pipelines.factories:adicap"
-"eds.umls" = "edsnlp.pipelines.factories:umls"
-"eds.diabetes" = "edsnlp.pipelines.factories:diabetes"
-"eds.tobacco" = "edsnlp.pipelines.factories:tobacco"
-"eds.aids" = "edsnlp.pipelines.factories:aids"
-"eds.lymphoma" = "edsnlp.pipelines.factories:lymphoma"
-"eds.leukemia" = "edsnlp.pipelines.factories:leukemia"
-"eds.solid_tumor" = "edsnlp.pipelines.factories:solid_tumor"
-"eds.ckd" = "edsnlp.components:ckd"
-"eds.hemiplegia" = "edsnlp.components:hemiplegia"
-"eds.liver_disease" = "edsnlp.components:liver_disease"
-"eds.peptic_ulcer_disease" = "edsnlp.components:peptic_ulcer_disease"
-"eds.connective_tissue_disease" = "edsnlp.components:connective_tissue_disease"
-"eds.copd" = "edsnlp.components:copd"
-"eds.dementia" = "edsnlp.components:dementia"
-"eds.cerebrovascular_accident" = "edsnlp.components:cerebrovascular_accident"
-"eds.peripheral_vascular_disease" = "edsnlp.components:peripheral_vascular_disease"
-"eds.congestive_heart_failure" = "edsnlp.components:congestive_heart_failure"
-"eds.myocardial_infarction" = "edsnlp.components:myocardial_infarction"
-"eds.alcohol" = "edsnlp.components:alcohol"
+# Core
+"eds.accents"                     = "edsnlp.pipelines.core.normalizer.accents.factory:create_component"
+"eds.context"                     = "edsnlp.pipelines.core.context.factory:create_component"
+"eds.contextual_matcher"          = "edsnlp.pipelines.core.contextual_matcher.factory:create_component"
+"eds.endlines"                    = "edsnlp.pipelines.core.endlines.factory:create_component"
+"eds.matcher"                     = "edsnlp.pipelines.core.matcher.factory:create_component"
+"eds.normalizer"                  = "edsnlp.pipelines.core.normalizer.factory:create_component"
+"eds.pollution"                   = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component"
+"eds.quotes"                      = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component"
+"eds.remove_lowercase"            = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
+"eds.sentences"                   = "edsnlp.pipelines.core.sentences.factory:create_component"
+"eds.spaces"                      = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component"
+"eds.terminology"                 = "edsnlp.pipelines.core.terminology.factory:create_component"
+
+# NER
+"eds.adicap"                      = "edsnlp.pipelines.ner.adicap.factory:create_component"
+"eds.ccmu"                        = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
+"eds.charlson"                    = "edsnlp.pipelines.ner.scores.charlson.factory:create_component"
+"eds.cim10"                       = "edsnlp.pipelines.ner.cim10.factory:create_component"
+"eds.covid"                       = "edsnlp.pipelines.ner.covid.factory:create_component"
+"eds.drugs"                       = "edsnlp.pipelines.ner.drugs.factory:create_component"
+"eds.elston_ellis"                = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
+"eds.gemsa"                       = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
+"eds.priority"                    = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
+"eds.score"                       = "edsnlp.pipelines.ner.scores.factory:create_component"
+"eds.sofa"                        = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
+"eds.tnm"                         = "edsnlp.pipelines.ner.tnm.factory:create_component"
+"eds.umls"                        = "edsnlp.pipelines.ner.umls.factory:create_component"
+
+# NER/Comorbidities
+"eds.aids"                        = "edsnlp.pipelines.ner.disorders.aids.factory:create_component"
+"eds.alcohol"                     = "edsnlp.pipelines.ner.behaviors.alcohol.factory:create_component"
+"eds.cerebrovascular_accident"    = "edsnlp.pipelines.ner.disorders.cerebrovascular_accident.factory:create_component"
+"eds.ckd"                         = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component"
+"eds.congestive_heart_failure"    = "edsnlp.pipelines.ner.disorders.congestive_heart_failure.factory:create_component"
+"eds.connective_tissue_disease"   = "edsnlp.pipelines.ner.disorders.connective_tissue_disease.factory:create_component"
+"eds.copd"                        = "edsnlp.pipelines.ner.disorders.copd.factory:create_component"
+"eds.dementia"                    = "edsnlp.pipelines.ner.disorders.dementia.factory:create_component"
+"eds.diabetes"                    = "edsnlp.pipelines.ner.disorders.diabetes.factory:create_component"
+"eds.hemiplegia"                  = "edsnlp.pipelines.ner.disorders.hemiplegia.factory:create_component"
+"eds.leukemia"                    = "edsnlp.pipelines.ner.disorders.leukemia.factory:create_component"
+"eds.liver_disease"               = "edsnlp.pipelines.ner.disorders.liver_disease.factory:create_component"
+"eds.lymphoma"                    = "edsnlp.pipelines.ner.disorders.lymphoma.factory:create_component"
+"eds.myocardial_infarction"       = "edsnlp.pipelines.ner.disorders.myocardial_infarction.factory:create_component"
+"eds.peptic_ulcer_disease"        = "edsnlp.pipelines.ner.disorders.peptic_ulcer_disease.factory:create_component"
+"eds.peripheral_vascular_disease" = "edsnlp.pipelines.ner.disorders.peripheral_vascular_disease.factory:create_component"
+"eds.solid_tumor"                 = "edsnlp.pipelines.ner.disorders.solid_tumor.factory:create_component"
+"eds.tobacco"                     = "edsnlp.pipelines.ner.behaviors.tobacco.factory:create_component"
+
+# Qualifiers
+"eds.family"                      = "edsnlp.pipelines.qualifiers.family.factory:create_component"
+"eds.history"                     = "edsnlp.pipelines.qualifiers.history.factory:create_component"
+"eds.hypothesis"                  = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component"
+"eds.negation"                    = "edsnlp.pipelines.qualifiers.negation.factory:create_component"
+"eds.reported_speech"             = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"
+
+# Misc
+"eds.consultation_dates"          = "edsnlp.pipelines.misc.consultation_dates.factory:create_component"
+"eds.dates"                       = "edsnlp.pipelines.misc.dates.factory:create_component"
+"eds.measurements"                = "edsnlp.pipelines.misc.measurements.factory:create_component"
+"eds.reason"                      = "edsnlp.pipelines.misc.reason.factory:create_component"
+"eds.sections"                    = "edsnlp.pipelines.misc.sections.factory:create_component"
+"eds.tables"                      = "edsnlp.pipelines.misc.tables.factory:create_component"
+
+# Trainable
+"eds.nested_ner"                  = "edsnlp.pipelines.trainable.nested_ner.factory:create_component"
+"eds.span_qualifier"              = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component"
+
+# Deprecated (links to the same factories as above)
+"SOFA"                   = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
+"accents"                = "edsnlp.pipelines.core.normalizer.accents.factory:create_component"
+"charlson"               = "edsnlp.pipelines.ner.scores.charlson.factory:create_component"
+"consultation_dates"     = "edsnlp.pipelines.misc.consultation_dates.factory:create_component"
+"contextual-matcher"     = "edsnlp.pipelines.core.contextual_matcher.factory:create_component"
+"dates"                  = "edsnlp.pipelines.misc.dates.factory:create_component"
+"eds.AIDS"               = "edsnlp.pipelines.ner.disorders.aids.factory:create_component"
+"eds.CKD"                = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component"
+"eds.COPD"               = "edsnlp.pipelines.ner.disorders.copd.factory:create_component"
+"eds.SOFA"               = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
+"eds.TNM"                = "edsnlp.pipelines.ner.tnm.factory:create_component"
+"eds.elston-ellis"       = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
+"eds.elstonellis"        = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
+"eds.emergency.ccmu"     = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
+"eds.emergency.gemsa"    = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
+"eds.emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
+"eds.measures"           = "edsnlp.pipelines.misc.measurements.factory:create_component"
+"eds.remove-lowercase"   = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
+"emergency.ccmu"         = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
+"emergency.gemsa"        = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
+"emergency.priority"     = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
+"endlines"               = "edsnlp.pipelines.core.endlines.factory:create_component"
+"family"                 = "edsnlp.pipelines.qualifiers.family.factory:create_component"
+"hypothesis"             = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component"
+"matcher"                = "edsnlp.pipelines.core.matcher.factory:create_component"
+"negation"               = "edsnlp.pipelines.qualifiers.negation.factory:create_component"
+"normalizer"             = "edsnlp.pipelines.core.normalizer.factory:create_component"
+"pollution"              = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component"
+"quotes"                 = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component"
+"reason"                 = "edsnlp.pipelines.misc.reason.factory:create_component"
+"remove-lowercase"       = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
+"reported_speech"        = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"
+"rspeech"                = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"
+"score"                  = "edsnlp.pipelines.ner.scores.factory:create_component"
+"sections"               = "edsnlp.pipelines.misc.sections.factory:create_component"
+"sentences"              = "edsnlp.pipelines.core.sentences.factory:create_component"
+"spaces"                 = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component"
+"tables"                 = "edsnlp.pipelines.misc.tables.factory:create_component"
+"terminology"            = "edsnlp.pipelines.core.terminology.factory:create_component"
 
 [project.entry-points."spacy_architectures"]
 "eds.stack_crf_ner_model.v1" = "edsnlp.pipelines.trainable.nested_ner.stack_crf_ner:create_model"
@@ -239,16 +293,17 @@ omit-covered-files = false
 # badge-format = "svg"
 
 
-[tool.coverage]
+[tool.coverage.report]
 exclude_lines = [
+    "def __repr__",
     "if __name__ == .__main__.:",
-    "if TYPE_CHECKING:",
-    "if typing.TYPE_CHECKING:",
     "@overload",
     "pragma: no cover",
-    "raise AssertionError",
-    "raise NotImplementedError",
-    "def __repr__",
+    "raise .*Error",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "class .*\\bProtocol\\):",
+    "@(abc\\.)?abstractmethod",
     "Span.set_extension.*",
     "Doc.set_extension.*",
     "Token.set_extension.*",
diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py
index c6bf38c56..f37604f63 100644
--- a/tests/pipelines/test_pipelines.py
+++ b/tests/pipelines/test_pipelines.py
@@ -6,3 +6,7 @@ def test_pipelines(doc):
     assert anomalie._.negation
 
     assert not doc[0]._.history
+
+
+def test_import_all():
+    import edsnlp.pipelines.factories  # noqa: F401
diff --git a/tests/test_span_args.py b/tests/test_span_args.py
new file mode 100644
index 000000000..0b73681e6
--- /dev/null
+++ b/tests/test_span_args.py
@@ -0,0 +1,32 @@
+from pydantic import validate_arguments
+
+from edsnlp.pipelines.base import (
+    SpanGetterArg,
+    SpanSetterArg,
+    validate_span_getter,
+    validate_span_setter,
+)
+
+
+def test_span_getter():
+    assert validate_span_getter("ents") == {"ents": True}
+    assert validate_span_getter(["ents"]) == {"ents": True}
+    assert validate_span_getter(["ents", "group"]) == {"ents": True, "group": True}
+    assert validate_span_getter({"grp": True}) == {"grp": True}
+    assert validate_span_getter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}
+
+
+def test_span_setter():
+    assert validate_span_setter("ents") == {"ents": True}
+    assert validate_span_setter(["ents"]) == {"ents": True}
+    assert validate_span_setter(["ents", "group"]) == {"ents": True, "group": True}
+    assert validate_span_setter({"grp": True}) == {"grp": True}
+    assert validate_span_setter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}
+
+
+def test_validate_args():
+    @validate_arguments
+    def my_func(span_getter: SpanGetterArg, span_setter: SpanSetterArg):
+        return span_getter, span_setter
+
+    assert my_func("ents", "ents") == ({"ents": True}, {"ents": True})