From 6deb57a9c8eca31e6b353d55eb131c1946125cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Wed, 13 Sep 2023 12:11:01 +0200 Subject: [PATCH] chore: improve coverage and clean entry points --- edsnlp/components.py | 1 - edsnlp/patch_spacy_dot_components.py | 81 +------ .../core/normalizer/lowercase/factory.py | 25 --- .../pipelines/core/normalizer/normalizer.py | 2 +- .../__init__.py | 0 .../normalizer/remove_lowercase/factory.py | 47 +++++ edsnlp/pipelines/factories.py | 4 +- edsnlp/utils/blocs.py | 198 ------------------ edsnlp/utils/collections.py | 13 -- pyproject.toml | 175 ++++++++++------ tests/pipelines/test_pipelines.py | 4 + tests/test_span_args.py | 32 +++ 12 files changed, 206 insertions(+), 376 deletions(-) delete mode 100644 edsnlp/components.py delete mode 100644 edsnlp/pipelines/core/normalizer/lowercase/factory.py rename edsnlp/pipelines/core/normalizer/{lowercase => remove_lowercase}/__init__.py (100%) create mode 100644 edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py delete mode 100644 edsnlp/utils/blocs.py create mode 100644 tests/test_span_args.py diff --git a/edsnlp/components.py b/edsnlp/components.py deleted file mode 100644 index 30198e08c..000000000 --- a/edsnlp/components.py +++ /dev/null @@ -1 +0,0 @@ -from edsnlp.pipelines.factories import * # noqa : used to import pipelines diff --git a/edsnlp/patch_spacy_dot_components.py b/edsnlp/patch_spacy_dot_components.py index 7f1b62dab..61383e096 100644 --- a/edsnlp/patch_spacy_dot_components.py +++ b/edsnlp/patch_spacy_dot_components.py @@ -5,7 +5,6 @@ from spacy.errors import Errors from spacy.language import FactoryMeta from spacy.pipe_analysis import validate_attrs -from spacy.pipeline import Pipe from spacy.util import SimpleFrozenDict, SimpleFrozenList, registry @@ -51,10 +50,11 @@ def factory( if not isinstance(name, str): raise ValueError(Errors.E963.format(decorator="factory")) if not isinstance(default_config, dict): - err = Errors.E962.format( - style="default config", name=name, cfg_type=type(default_config) + raise ValueError( + Errors.E962.format( + style="default config", name=name, cfg_type=type(default_config) + ) ) - raise ValueError(err) def add_factory(factory_func: Callable) -> Callable: internal_name = cls.get_factory_name(name) @@ -102,77 +102,4 @@ def add_factory(factory_func: Callable) -> Callable: return add_factory -@classmethod -def component( - cls, - name: str, - *, - assigns: Iterable[str] = SimpleFrozenList(), - requires: Iterable[str] = SimpleFrozenList(), - retokenizes: bool = False, - func: Optional["Pipe"] = None, -) -> Callable[..., Any]: - """ - Patched from spaCy to allow back dots in factory - names (https://github.com/aphp/edsnlp/pull/152) - - Register a new pipeline component. Can be used for stateless function - components that don't require a separate factory. Can be used as a - decorator on a function or classmethod, or called as a function with the - factory provided as the func keyword argument. To create a component and - add it to the pipeline, you can use nlp.add_pipe(name). - - name (str): The name of the component factory. - assigns (Iterable[str]): Doc/Token attributes assigned by this component, - e.g. "token.ent_id". Used for pipeline analysis. - requires (Iterable[str]): Doc/Token attributes required by this component, - e.g. "token.ent_id". Used for pipeline analysis. - retokenizes (bool): Whether the component changes the tokenization. - Used for pipeline analysis. - func (Optional[Callable]): Factory function if not used as a decorator. - - DOCS: https://spacy.io/api/language#component - """ - if name is not None: - if not isinstance(name, str): - raise ValueError(Errors.E963.format(decorator="component")) - component_name = name if name is not None else util.get_object_name(func) - - def add_component(component_func: "Pipe") -> Callable: - if isinstance(func, type): # function is a class - raise ValueError(Errors.E965.format(name=component_name)) - - def factory_func(nlp, name: str) -> "Pipe": - return component_func - - internal_name = cls.get_factory_name(name) - if internal_name in registry.factories: - # We only check for the internal name here – it's okay if it's a - # subclass and the base class has a factory of the same name. We - # also only raise if the function is different to prevent raising - # if module is reloaded. It's hacky, but we need to check the - # existing functure for a closure and whether that's identical - # to the component function (because factory_func created above - # will always be different, even for the same function) - existing_func = registry.factories.get(internal_name) - closure = existing_func.__closure__ - wrapped = [c.cell_contents for c in closure][0] if closure else None - if util.is_same_func(wrapped, component_func): - factory_func = existing_func # noqa: F811 - - cls.factory( - component_name, - assigns=assigns, - requires=requires, - retokenizes=retokenizes, - func=factory_func, - ) - return component_func - - if func is not None: # Support non-decorator use cases - return add_component(func) - return add_component - - spacy.Language.factory = factory -spacy.Language.component = component diff --git a/edsnlp/pipelines/core/normalizer/lowercase/factory.py b/edsnlp/pipelines/core/normalizer/lowercase/factory.py deleted file mode 100644 index 5205db840..000000000 --- a/edsnlp/pipelines/core/normalizer/lowercase/factory.py +++ /dev/null @@ -1,25 +0,0 @@ -from spacy.language import Language -from spacy.tokens import Doc - - -@Language.component("remove-lowercase", assigns=["token.norm"]) -@Language.component("eds.remove-lowercase", assigns=["token.norm"]) -def remove_lowercase(doc: Doc): - """ - Add case on the `NORM` custom attribute. Should always be applied first. - - Parameters - ---------- - doc : Doc - The spaCy `Doc` object. - - Returns - ------- - Doc - The document, with case put back in `NORM`. - """ - - for token in doc: - token.norm_ = token.text - - return doc diff --git a/edsnlp/pipelines/core/normalizer/normalizer.py b/edsnlp/pipelines/core/normalizer/normalizer.py index 69ac645b9..3d598c572 100644 --- a/edsnlp/pipelines/core/normalizer/normalizer.py +++ b/edsnlp/pipelines/core/normalizer/normalizer.py @@ -4,9 +4,9 @@ from spacy.tokens import Doc from .accents.accents import AccentsConverter -from .lowercase.factory import remove_lowercase from .pollution.pollution import PollutionTagger from .quotes.quotes import QuotesConverter +from .remove_lowercase.factory import remove_lowercase from .spaces.spaces import SpacesTagger diff --git a/edsnlp/pipelines/core/normalizer/lowercase/__init__.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/lowercase/__init__.py rename to edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py new file mode 100644 index 000000000..e1018aed2 --- /dev/null +++ b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py @@ -0,0 +1,47 @@ +from spacy.language import Language +from spacy.tokens import Doc + +from edsnlp.utils.deprecation import deprecated_factory + + +def remove_lowercase(doc: Doc): + """ + Add case on the `NORM` custom attribute. Should always be applied first. + + Parameters + ---------- + doc : Doc + The spaCy `Doc` object. + + Returns + ------- + Doc + The document, with case put back in `NORM`. + """ + + for token in doc: + token.norm_ = token.text + + return doc + + +@deprecated_factory("remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"]) +@deprecated_factory( + "eds.remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"] +) +@Language.factory("eds.remove_lowercase", assigns=["token.norm"]) +def create_component( + nlp: Language, + name: str, +): + """ + Add case on the `NORM` custom attribute. Should always be applied first. + + Parameters + ---------- + nlp : Language + The pipeline object. + name : str + The name of the component. + """ + return remove_lowercase # pragma: no cover diff --git a/edsnlp/pipelines/factories.py b/edsnlp/pipelines/factories.py index 0dceed4bb..0460491b4 100644 --- a/edsnlp/pipelines/factories.py +++ b/edsnlp/pipelines/factories.py @@ -6,9 +6,11 @@ from .core.matcher.factory import create_component as matcher from .core.normalizer.accents.factory import create_component as accents from .core.normalizer.factory import create_component as normalizer -from .core.normalizer.lowercase.factory import remove_lowercase from .core.normalizer.pollution.factory import create_component as pollution from .core.normalizer.quotes.factory import create_component as quotes +from .core.normalizer.remove_lowercase.factory import ( + create_component as remove_lowercase, +) from .core.normalizer.spaces.factory import create_component as spaces from .core.sentences.factory import create_component as sentences from .core.terminology.factory import create_component as terminology diff --git a/edsnlp/utils/blocs.py b/edsnlp/utils/blocs.py deleted file mode 100644 index f00bf8de3..000000000 --- a/edsnlp/utils/blocs.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Utility that extracts code blocs and runs them. - -Largely inspired by https://github.com/koaning/mktestdocs -""" - -import re -from pathlib import Path -from typing import List - -BLOCK_PATTERN = re.compile( - ( - r"((?P)\s+)?(?P *)" - r"```(?P.*?)\n(?P<code>.+?)```" - ), - flags=re.DOTALL, -) -OUTPUT_PATTERN = "# Out: " - - -def check_outputs(code: str) -> str: - """ - Looks for output patterns, and modifies the bloc: - - 1. The preceding line becomes `#!python v = expr` - 2. The output line becomes an `#!python assert` statement - - Parameters - ---------- - code : str - Code block - - Returns - ------- - str - Modified code bloc with assert statements - """ - - lines: List[str] = code.split("\n") - code = [] - - skip = False - - if len(lines) < 2: - return code - - for expression, output in zip(lines[:-1], lines[1:]): - if skip: - skip = not skip - continue - - if output.startswith(OUTPUT_PATTERN): - expression = f"v = {expression}" - - output = output[len(OUTPUT_PATTERN) :].replace('"', r"\"") - output = f'assert repr(v) == "{output}" or str(v) == "{output}"' - - code.append(expression) - code.append(output) - - skip = True - - else: - code.append(expression) - - if not skip: - code.append(output) - - return "\n".join(code) - - -def remove_indentation(code: str, indent: int) -> str: - """ - Remove indentation from a code bloc. - - Parameters - ---------- - code : str - Code bloc - indent : int - Level of indentation - - Returns - ------- - str - Modified code bloc - """ - - if not indent: - return code - - lines = [] - - for line in code.split("\n"): - lines.append(line[indent:]) - - return "\n".join(lines) - - -def grab_code_blocks(docstring: str, lang="python") -> List[str]: - """ - Given a docstring, grab all the markdown codeblocks found in docstring. - - Parameters - ---------- - docstring : str - Full text. - lang : str, optional - Language to execute, by default "python" - - Returns - ------- - List[str] - Extracted code blocks - """ - codeblocks = [] - - for match in BLOCK_PATTERN.finditer(docstring): - d = match.groupdict() - - if d["skip"]: - continue - - if lang in d["title"]: - code = remove_indentation(d["code"], len(d["indent"])) - code = check_outputs(code) - codeblocks.append(code) - - return codeblocks - - -def printer(code: str) -> None: - """ - Prints a code bloc with lines for easier debugging. - - Parameters - ---------- - code : str - Code bloc. - """ - lines = [] - for i, line in enumerate(code.split("\n")): - lines.append(f"{i + 1:03} {line}") - - print("\n".join(lines)) - - -def check_docstring(obj, lang=""): - """ - Given a function, test the contents of the docstring. - """ - for b in grab_code_blocks(obj.__doc__, lang=lang): - try: - exec(b, {"__MODULE__": "__main__"}) - except Exception: - print(f"Error Encountered in `{obj.__name__}`. Caused by:\n") - printer(b) - raise - - -def check_raw_string(raw, lang="python"): - """ - Given a raw string, test the contents. - """ - for b in grab_code_blocks(raw, lang=lang): - try: - exec(b, {"__MODULE__": "__main__"}) - except Exception: - printer(b) - raise - - -def check_raw_file_full(raw, lang="python"): - all_code = "\n".join(grab_code_blocks(raw, lang=lang)) - try: - exec(all_code, {"__MODULE__": "__main__"}) - except Exception: - printer(all_code) - raise - - -def check_md_file(path: Path, memory: bool = False) -> None: - """ - Given a markdown file, parse the contents for Python code blocs - and check that each independant bloc does not cause an error. - - Parameters - ---------- - path : Path - Path to the markdown file to execute. - memory : bool, optional - Whether to keep results from one bloc to the next, by default `#!python False` - """ - text = Path(path).read_text() - if memory: - check_raw_file_full(text, lang="python") - else: - check_raw_string(text, lang="python") diff --git a/edsnlp/utils/collections.py b/edsnlp/utils/collections.py index 47db54aea..e69de29bb 100644 --- a/edsnlp/utils/collections.py +++ b/edsnlp/utils/collections.py @@ -1,13 +0,0 @@ -def dedup(sequence, key=None): - """ - Deduplicate a sequence, keeping the last occurrence of each item. - - Parameters - ---------- - sequence : Sequence - Sequence to deduplicate - key : Callable, optional - Key function to use for deduplication, by default None - """ - key = (lambda x: x) if key is None else key - return list({key(item): item for item in sequence}.values()) diff --git a/pyproject.toml b/pyproject.toml index cc2fae411..1c5150953 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,60 +85,114 @@ version = { attr = "edsnlp.__version__" } where = ["."] [project.entry-points."spacy_factories"] -"eds.matcher" = "edsnlp.pipelines.factories:matcher" -"eds.terminology" = "edsnlp.pipelines.factories:terminology" -"eds.contextual_matcher" = "edsnlp.pipelines.factories:contextual_matcher" -"eds.endlines" = "edsnlp.pipelines.factories:endlines" -"eds.sentences" = "edsnlp.pipelines.factories:sentences" -"eds.normalizer" = "edsnlp.pipelines.factories:normalizer" -"eds.accents" = "edsnlp.pipelines.factories:accents" -"eds.spaces" = "edsnlp.pipelines.factories:spaces" -"eds.lowercase" = "edsnlp.pipelines.factories:remove_lowercase" -"eds.pollution" = "edsnlp.pipelines.factories:pollution" -"eds.quotes" = "edsnlp.pipelines.factories:quotes" -"eds.charlson" = "edsnlp.pipelines.factories:charlson" -"eds.sofa" = "edsnlp.pipelines.factories:sofa" -"eds.elston_ellis" = "edsnlp.pipelines.factories:elston_ellis" -"eds.tnm" = "edsnlp.pipelines.factories:tnm" -"eds.priority" = "edsnlp.pipelines.factories:priority" -"eds.ccmu" = "edsnlp.pipelines.factories:ccmu" -"eds.gemsa" = "edsnlp.pipelines.factories:gemsa" -"eds.covid" = "edsnlp.pipelines.factories:covid" -"eds.cim10" = "edsnlp.pipelines.factories:cim10" -"eds.history" = "edsnlp.pipelines.factories:history" -"eds.family" = "edsnlp.pipelines.factories:family" -"eds.hypothesis" = "edsnlp.pipelines.factories:hypothesis" -"eds.negation" = "edsnlp.pipelines.factories:negation" -"eds.rspeech" = "edsnlp.pipelines.factories:rspeech" -"eds.consultation_dates" = "edsnlp.pipelines.factories:consultation_dates" -"eds.dates" = "edsnlp.pipelines.factories:dates" -"eds.reason" = "edsnlp.pipelines.factories:reason" -"eds.sections" = "edsnlp.pipelines.factories:sections" -"eds.context" = "edsnlp.pipelines.factories:context" -"eds.measurements" = "edsnlp.pipelines.factories:measurements" -"eds.drugs" = "edsnlp.pipelines.factories:drugs" -"eds.nested_ner" = "edsnlp.pipelines.factories:nested_ner" -"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component" -"eds.adicap" = "edsnlp.pipelines.factories:adicap" -"eds.umls" = "edsnlp.pipelines.factories:umls" -"eds.diabetes" = "edsnlp.pipelines.factories:diabetes" -"eds.tobacco" = "edsnlp.pipelines.factories:tobacco" -"eds.aids" = "edsnlp.pipelines.factories:aids" -"eds.lymphoma" = "edsnlp.pipelines.factories:lymphoma" -"eds.leukemia" = "edsnlp.pipelines.factories:leukemia" -"eds.solid_tumor" = "edsnlp.pipelines.factories:solid_tumor" -"eds.ckd" = "edsnlp.components:ckd" -"eds.hemiplegia" = "edsnlp.components:hemiplegia" -"eds.liver_disease" = "edsnlp.components:liver_disease" -"eds.peptic_ulcer_disease" = "edsnlp.components:peptic_ulcer_disease" -"eds.connective_tissue_disease" = "edsnlp.components:connective_tissue_disease" -"eds.copd" = "edsnlp.components:copd" -"eds.dementia" = "edsnlp.components:dementia" -"eds.cerebrovascular_accident" = "edsnlp.components:cerebrovascular_accident" -"eds.peripheral_vascular_disease" = "edsnlp.components:peripheral_vascular_disease" -"eds.congestive_heart_failure" = "edsnlp.components:congestive_heart_failure" -"eds.myocardial_infarction" = "edsnlp.components:myocardial_infarction" -"eds.alcohol" = "edsnlp.components:alcohol" +# Core +"eds.accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component" +"eds.context" = "edsnlp.pipelines.core.context.factory:create_component" +"eds.contextual_matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component" +"eds.endlines" = "edsnlp.pipelines.core.endlines.factory:create_component" +"eds.matcher" = "edsnlp.pipelines.core.matcher.factory:create_component" +"eds.normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component" +"eds.pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component" +"eds.quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component" +"eds.remove_lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"eds.sentences" = "edsnlp.pipelines.core.sentences.factory:create_component" +"eds.spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component" +"eds.terminology" = "edsnlp.pipelines.core.terminology.factory:create_component" + +# NER +"eds.adicap" = "edsnlp.pipelines.ner.adicap.factory:create_component" +"eds.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"eds.charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component" +"eds.cim10" = "edsnlp.pipelines.ner.cim10.factory:create_component" +"eds.covid" = "edsnlp.pipelines.ner.covid.factory:create_component" +"eds.drugs" = "edsnlp.pipelines.ner.drugs.factory:create_component" +"eds.elston_ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"eds.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"eds.score" = "edsnlp.pipelines.ner.scores.factory:create_component" +"eds.sofa" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"eds.tnm" = "edsnlp.pipelines.ner.tnm.factory:create_component" +"eds.umls" = "edsnlp.pipelines.ner.umls.factory:create_component" + +# NER/Comorbidities +"eds.aids" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component" +"eds.alcohol" = "edsnlp.pipelines.ner.behaviors.alcohol.factory:create_component" +"eds.cerebrovascular_accident" = "edsnlp.pipelines.ner.disorders.cerebrovascular_accident.factory:create_component" +"eds.ckd" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component" +"eds.congestive_heart_failure" = "edsnlp.pipelines.ner.disorders.congestive_heart_failure.factory:create_component" +"eds.connective_tissue_disease" = "edsnlp.pipelines.ner.disorders.connective_tissue_disease.factory:create_component" +"eds.copd" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component" +"eds.dementia" = "edsnlp.pipelines.ner.disorders.dementia.factory:create_component" +"eds.diabetes" = "edsnlp.pipelines.ner.disorders.diabetes.factory:create_component" +"eds.hemiplegia" = "edsnlp.pipelines.ner.disorders.hemiplegia.factory:create_component" +"eds.leukemia" = "edsnlp.pipelines.ner.disorders.leukemia.factory:create_component" +"eds.liver_disease" = "edsnlp.pipelines.ner.disorders.liver_disease.factory:create_component" +"eds.lymphoma" = "edsnlp.pipelines.ner.disorders.lymphoma.factory:create_component" +"eds.myocardial_infarction" = "edsnlp.pipelines.ner.disorders.myocardial_infarction.factory:create_component" +"eds.peptic_ulcer_disease" = "edsnlp.pipelines.ner.disorders.peptic_ulcer_disease.factory:create_component" +"eds.peripheral_vascular_disease" = "edsnlp.pipelines.ner.disorders.peripheral_vascular_disease.factory:create_component" +"eds.solid_tumor" = "edsnlp.pipelines.ner.disorders.solid_tumor.factory:create_component" +"eds.tobacco" = "edsnlp.pipelines.ner.behaviors.tobacco.factory:create_component" + +# Qualifiers +"eds.family" = "edsnlp.pipelines.qualifiers.family.factory:create_component" +"eds.history" = "edsnlp.pipelines.qualifiers.history.factory:create_component" +"eds.hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component" +"eds.negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component" +"eds.reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" + +# Misc +"eds.consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component" +"eds.dates" = "edsnlp.pipelines.misc.dates.factory:create_component" +"eds.measurements" = "edsnlp.pipelines.misc.measurements.factory:create_component" +"eds.reason" = "edsnlp.pipelines.misc.reason.factory:create_component" +"eds.sections" = "edsnlp.pipelines.misc.sections.factory:create_component" +"eds.tables" = "edsnlp.pipelines.misc.tables.factory:create_component" + +# Trainable +"eds.nested_ner" = "edsnlp.pipelines.trainable.nested_ner.factory:create_component" +"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component" + +# Deprecated (links to the same factories as above) +"SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component" +"charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component" +"consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component" +"contextual-matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component" +"dates" = "edsnlp.pipelines.misc.dates.factory:create_component" +"eds.AIDS" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component" +"eds.CKD" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component" +"eds.COPD" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component" +"eds.SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"eds.TNM" = "edsnlp.pipelines.ner.tnm.factory:create_component" +"eds.elston-ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.elstonellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"eds.emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"eds.emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"eds.measures" = "edsnlp.pipelines.misc.measurements.factory:create_component" +"eds.remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"endlines" = "edsnlp.pipelines.core.endlines.factory:create_component" +"family" = "edsnlp.pipelines.qualifiers.family.factory:create_component" +"hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component" +"matcher" = "edsnlp.pipelines.core.matcher.factory:create_component" +"negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component" +"normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component" +"pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component" +"quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component" +"reason" = "edsnlp.pipelines.misc.reason.factory:create_component" +"remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" +"rspeech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" +"score" = "edsnlp.pipelines.ner.scores.factory:create_component" +"sections" = "edsnlp.pipelines.misc.sections.factory:create_component" +"sentences" = "edsnlp.pipelines.core.sentences.factory:create_component" +"spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component" +"tables" = "edsnlp.pipelines.misc.tables.factory:create_component" +"terminology" = "edsnlp.pipelines.core.terminology.factory:create_component" [project.entry-points."spacy_architectures"] "eds.stack_crf_ner_model.v1" = "edsnlp.pipelines.trainable.nested_ner.stack_crf_ner:create_model" @@ -239,16 +293,17 @@ omit-covered-files = false # badge-format = "svg" -[tool.coverage] +[tool.coverage.report] exclude_lines = [ + "def __repr__", "if __name__ == .__main__.:", - "if TYPE_CHECKING:", - "if typing.TYPE_CHECKING:", "@overload", "pragma: no cover", - "raise AssertionError", - "raise NotImplementedError", - "def __repr__", + "raise .*Error", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", "Span.set_extension.*", "Doc.set_extension.*", "Token.set_extension.*", diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index c6bf38c56..f37604f63 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -6,3 +6,7 @@ def test_pipelines(doc): assert anomalie._.negation assert not doc[0]._.history + + +def test_import_all(): + import edsnlp.pipelines.factories # noqa: F401 diff --git a/tests/test_span_args.py b/tests/test_span_args.py new file mode 100644 index 000000000..0b73681e6 --- /dev/null +++ b/tests/test_span_args.py @@ -0,0 +1,32 @@ +from pydantic import validate_arguments + +from edsnlp.pipelines.base import ( + SpanGetterArg, + SpanSetterArg, + validate_span_getter, + validate_span_setter, +) + + +def test_span_getter(): + assert validate_span_getter("ents") == {"ents": True} + assert validate_span_getter(["ents"]) == {"ents": True} + assert validate_span_getter(["ents", "group"]) == {"ents": True, "group": True} + assert validate_span_getter({"grp": True}) == {"grp": True} + assert validate_span_getter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]} + + +def test_span_setter(): + assert validate_span_setter("ents") == {"ents": True} + assert validate_span_setter(["ents"]) == {"ents": True} + assert validate_span_setter(["ents", "group"]) == {"ents": True, "group": True} + assert validate_span_setter({"grp": True}) == {"grp": True} + assert validate_span_setter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]} + + +def test_validate_args(): + @validate_arguments + def my_func(span_getter: SpanGetterArg, span_setter: SpanSetterArg): + return span_getter, span_setter + + assert my_func("ents", "ents") == ({"ents": True}, {"ents": True})