diff --git a/edsnlp/pipelines/core/normalizer/lowercase/factory.py b/edsnlp/pipelines/core/normalizer/lowercase/factory.py deleted file mode 100644 index 5205db840..000000000 --- a/edsnlp/pipelines/core/normalizer/lowercase/factory.py +++ /dev/null @@ -1,25 +0,0 @@ -from spacy.language import Language -from spacy.tokens import Doc - - -@Language.component("remove-lowercase", assigns=["token.norm"]) -@Language.component("eds.remove-lowercase", assigns=["token.norm"]) -def remove_lowercase(doc: Doc): - """ - Add case on the `NORM` custom attribute. Should always be applied first. - - Parameters - ---------- - doc : Doc - The spaCy `Doc` object. - - Returns - ------- - Doc - The document, with case put back in `NORM`. - """ - - for token in doc: - token.norm_ = token.text - - return doc diff --git a/edsnlp/pipelines/core/normalizer/lowercase/__init__.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/lowercase/__init__.py rename to edsnlp/pipelines/core/normalizer/remove_lowercase/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py new file mode 100644 index 000000000..e1018aed2 --- /dev/null +++ b/edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py @@ -0,0 +1,47 @@ +from spacy.language import Language +from spacy.tokens import Doc + +from edsnlp.utils.deprecation import deprecated_factory + + +def remove_lowercase(doc: Doc): + """ + Add case on the `NORM` custom attribute. Should always be applied first. + + Parameters + ---------- + doc : Doc + The spaCy `Doc` object. + + Returns + ------- + Doc + The document, with case put back in `NORM`. + """ + + for token in doc: + token.norm_ = token.text + + return doc + + +@deprecated_factory("remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"]) +@deprecated_factory( + "eds.remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"] +) +@Language.factory("eds.remove_lowercase", assigns=["token.norm"]) +def create_component( + nlp: Language, + name: str, +): + """ + Add case on the `NORM` custom attribute. Should always be applied first. + + Parameters + ---------- + nlp : Language + The pipeline object. + name : str + The name of the component. + """ + return remove_lowercase # pragma: no cover diff --git a/pyproject.toml b/pyproject.toml index cc2fae411..217fa2740 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,60 +85,114 @@ version = { attr = "edsnlp.__version__" } where = ["."] [project.entry-points."spacy_factories"] -"eds.matcher" = "edsnlp.pipelines.factories:matcher" -"eds.terminology" = "edsnlp.pipelines.factories:terminology" -"eds.contextual_matcher" = "edsnlp.pipelines.factories:contextual_matcher" -"eds.endlines" = "edsnlp.pipelines.factories:endlines" -"eds.sentences" = "edsnlp.pipelines.factories:sentences" -"eds.normalizer" = "edsnlp.pipelines.factories:normalizer" -"eds.accents" = "edsnlp.pipelines.factories:accents" -"eds.spaces" = "edsnlp.pipelines.factories:spaces" -"eds.lowercase" = "edsnlp.pipelines.factories:remove_lowercase" -"eds.pollution" = "edsnlp.pipelines.factories:pollution" -"eds.quotes" = "edsnlp.pipelines.factories:quotes" -"eds.charlson" = "edsnlp.pipelines.factories:charlson" -"eds.sofa" = "edsnlp.pipelines.factories:sofa" -"eds.elston_ellis" = "edsnlp.pipelines.factories:elston_ellis" -"eds.tnm" = "edsnlp.pipelines.factories:tnm" -"eds.priority" = "edsnlp.pipelines.factories:priority" -"eds.ccmu" = "edsnlp.pipelines.factories:ccmu" -"eds.gemsa" = "edsnlp.pipelines.factories:gemsa" -"eds.covid" = "edsnlp.pipelines.factories:covid" -"eds.cim10" = "edsnlp.pipelines.factories:cim10" -"eds.history" = "edsnlp.pipelines.factories:history" -"eds.family" = "edsnlp.pipelines.factories:family" -"eds.hypothesis" = "edsnlp.pipelines.factories:hypothesis" -"eds.negation" = "edsnlp.pipelines.factories:negation" -"eds.rspeech" = "edsnlp.pipelines.factories:rspeech" -"eds.consultation_dates" = "edsnlp.pipelines.factories:consultation_dates" -"eds.dates" = "edsnlp.pipelines.factories:dates" -"eds.reason" = "edsnlp.pipelines.factories:reason" -"eds.sections" = "edsnlp.pipelines.factories:sections" -"eds.context" = "edsnlp.pipelines.factories:context" -"eds.measurements" = "edsnlp.pipelines.factories:measurements" -"eds.drugs" = "edsnlp.pipelines.factories:drugs" -"eds.nested_ner" = "edsnlp.pipelines.factories:nested_ner" -"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component" -"eds.adicap" = "edsnlp.pipelines.factories:adicap" -"eds.umls" = "edsnlp.pipelines.factories:umls" -"eds.diabetes" = "edsnlp.pipelines.factories:diabetes" -"eds.tobacco" = "edsnlp.pipelines.factories:tobacco" -"eds.aids" = "edsnlp.pipelines.factories:aids" -"eds.lymphoma" = "edsnlp.pipelines.factories:lymphoma" -"eds.leukemia" = "edsnlp.pipelines.factories:leukemia" -"eds.solid_tumor" = "edsnlp.pipelines.factories:solid_tumor" -"eds.ckd" = "edsnlp.components:ckd" -"eds.hemiplegia" = "edsnlp.components:hemiplegia" -"eds.liver_disease" = "edsnlp.components:liver_disease" -"eds.peptic_ulcer_disease" = "edsnlp.components:peptic_ulcer_disease" -"eds.connective_tissue_disease" = "edsnlp.components:connective_tissue_disease" -"eds.copd" = "edsnlp.components:copd" -"eds.dementia" = "edsnlp.components:dementia" -"eds.cerebrovascular_accident" = "edsnlp.components:cerebrovascular_accident" -"eds.peripheral_vascular_disease" = "edsnlp.components:peripheral_vascular_disease" -"eds.congestive_heart_failure" = "edsnlp.components:congestive_heart_failure" -"eds.myocardial_infarction" = "edsnlp.components:myocardial_infarction" -"eds.alcohol" = "edsnlp.components:alcohol" +# Core +"eds.accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component" +"eds.context" = "edsnlp.pipelines.core.context.factory:create_component" +"eds.contextual_matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component" +"eds.endlines" = "edsnlp.pipelines.core.endlines.factory:create_component" +"eds.matcher" = "edsnlp.pipelines.core.matcher.factory:create_component" +"eds.normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component" +"eds.pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component" +"eds.quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component" +"eds.remove_lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"eds.sentences" = "edsnlp.pipelines.core.sentences.factory:create_component" +"eds.spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component" +"eds.terminology" = "edsnlp.pipelines.core.terminology.factory:create_component" + +# NER +"eds.adicap" = "edsnlp.pipelines.ner.adicap.factory:create_component" +"eds.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"eds.charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component" +"eds.cim10" = "edsnlp.pipelines.ner.cim10.factory:create_component" +"eds.covid" = "edsnlp.pipelines.ner.covid.factory:create_component" +"eds.drugs" = "edsnlp.pipelines.ner.drugs.factory:create_component" +"eds.elston_ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"eds.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"eds.score" = "edsnlp.pipelines.ner.scores.factory:create_component" +"eds.sofa" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"eds.tnm" = "edsnlp.pipelines.ner.tnm.factory:create_component" +"eds.umls" = "edsnlp.pipelines.ner.umls.factory:create_component" + +# NER/Comorbidities +"eds.aids" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component" +"eds.alcohol" = "edsnlp.pipelines.ner.behaviors.alcohol.factory:create_component" +"eds.cerebrovascular_accident" = "edsnlp.pipelines.ner.disorders.cerebrovascular_accident.factory:create_component" +"eds.ckd" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component" +"eds.congestive_heart_failure" = "edsnlp.pipelines.ner.disorders.congestive_heart_failure.factory:create_component" +"eds.connective_tissue_disease" = "edsnlp.pipelines.ner.disorders.connective_tissue_disease.factory:create_component" +"eds.copd" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component" +"eds.dementia" = "edsnlp.pipelines.ner.disorders.dementia.factory:create_component" +"eds.diabetes" = "edsnlp.pipelines.ner.disorders.diabetes.factory:create_component" +"eds.hemiplegia" = "edsnlp.pipelines.ner.disorders.hemiplegia.factory:create_component" +"eds.leukemia" = "edsnlp.pipelines.ner.disorders.leukemia.factory:create_component" +"eds.liver_disease" = "edsnlp.pipelines.ner.disorders.liver_disease.factory:create_component" +"eds.lymphoma" = "edsnlp.pipelines.ner.disorders.lymphoma.factory:create_component" +"eds.myocardial_infarction" = "edsnlp.pipelines.ner.disorders.myocardial_infarction.factory:create_component" +"eds.peptic_ulcer_disease" = "edsnlp.pipelines.ner.disorders.peptic_ulcer_disease.factory:create_component" +"eds.peripheral_vascular_disease" = "edsnlp.pipelines.ner.disorders.peripheral_vascular_disease.factory:create_component" +"eds.solid_tumor" = "edsnlp.pipelines.ner.disorders.solid_tumor.factory:create_component" +"eds.tobacco" = "edsnlp.pipelines.ner.behaviors.tobacco.factory:create_component" + +# Qualifiers +"eds.family" = "edsnlp.pipelines.qualifiers.family.factory:create_component" +"eds.history" = "edsnlp.pipelines.qualifiers.history.factory:create_component" +"eds.hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component" +"eds.negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component" +"eds.reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" + +# Misc +"eds.consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component" +"eds.dates" = "edsnlp.pipelines.misc.dates.factory:create_component" +"eds.measurements" = "edsnlp.pipelines.misc.measurements.factory:create_component" +"eds.reason" = "edsnlp.pipelines.misc.reason.factory:create_component" +"eds.sections" = "edsnlp.pipelines.misc.sections.factory:create_component" +"eds.tables" = "edsnlp.pipelines.misc.tables.factory:create_component" + +# Trainable +"eds.nested_ner" = "edsnlp.pipelines.trainable.nested_ner.factory:create_component" +"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component" + +# Deprecated (links to the same factories as above) +"SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component" +"charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component" +"consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component" +"contextual-matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component" +"dates" = "edsnlp.pipelines.misc.dates.factory:create_component" +"eds.AIDS" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component" +"eds.CKD" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component" +"eds.COPD" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component" +"eds.SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component" +"eds.TNM" = "edsnlp.pipelines.ner.tnm.factory:create_component" +"eds.elston-ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.elstonellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component" +"eds.emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"eds.emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"eds.emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"eds.measures" = "edsnlp.pipelines.misc.measurements.factory:create_component" +"eds.remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component" +"emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component" +"emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component" +"endlines" = "edsnlp.pipelines.core.endlines.factory:create_component" +"family" = "edsnlp.pipelines.qualifiers.family.factory:create_component" +"hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component" +"matcher" = "edsnlp.pipelines.core.matcher.factory:create_component" +"negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component" +"normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component" +"pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component" +"quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component" +"reason" = "edsnlp.pipelines.misc.reason.factory:create_component" +"remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component" +"reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" +"rspeech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component" +"score" = "edsnlp.pipelines.ner.scores.factory:create_component" +"sections" = "edsnlp.pipelines.misc.sections.factory:create_component" +"sentences" = "edsnlp.pipelines.core.sentences.factory:create_component" +"spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component" +"tables" = "edsnlp.pipelines.misc.tables.factory:create_component" +"terminology" = "edsnlp.pipelines.core.terminology.factory:create_component" [project.entry-points."spacy_architectures"] "eds.stack_crf_ner_model.v1" = "edsnlp.pipelines.trainable.nested_ner.stack_crf_ner:create_model" @@ -241,14 +295,15 @@ omit-covered-files = false [tool.coverage] exclude_lines = [ + "def __repr__", "if __name__ == .__main__.:", - "if TYPE_CHECKING:", - "if typing.TYPE_CHECKING:", "@overload", "pragma: no cover", - "raise AssertionError", - "raise NotImplementedError", - "def __repr__", + "raise .*Error", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", "Span.set_extension.*", "Doc.set_extension.*", "Token.set_extension.*", diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index c6bf38c56..f37604f63 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -6,3 +6,7 @@ def test_pipelines(doc): assert anomalie._.negation assert not doc[0]._.history + + +def test_import_all(): + import edsnlp.pipelines.factories # noqa: F401 diff --git a/tests/test_span_args.py b/tests/test_span_args.py new file mode 100644 index 000000000..0b73681e6 --- /dev/null +++ b/tests/test_span_args.py @@ -0,0 +1,32 @@ +from pydantic import validate_arguments + +from edsnlp.pipelines.base import ( + SpanGetterArg, + SpanSetterArg, + validate_span_getter, + validate_span_setter, +) + + +def test_span_getter(): + assert validate_span_getter("ents") == {"ents": True} + assert validate_span_getter(["ents"]) == {"ents": True} + assert validate_span_getter(["ents", "group"]) == {"ents": True, "group": True} + assert validate_span_getter({"grp": True}) == {"grp": True} + assert validate_span_getter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]} + + +def test_span_setter(): + assert validate_span_setter("ents") == {"ents": True} + assert validate_span_setter(["ents"]) == {"ents": True} + assert validate_span_setter(["ents", "group"]) == {"ents": True, "group": True} + assert validate_span_setter({"grp": True}) == {"grp": True} + assert validate_span_setter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]} + + +def test_validate_args(): + @validate_arguments + def my_func(span_getter: SpanGetterArg, span_setter: SpanSetterArg): + return span_getter, span_setter + + assert my_func("ents", "ents") == ({"ents": True}, {"ents": True})