Skip to content

Commit

Permalink
chore: improve coverage and clean entry points
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Sep 13, 2023
1 parent 9943328 commit 2c37c13
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 84 deletions.
25 changes: 0 additions & 25 deletions edsnlp/pipelines/core/normalizer/lowercase/factory.py

This file was deleted.

47 changes: 47 additions & 0 deletions edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from spacy.language import Language
from spacy.tokens import Doc

from edsnlp.utils.deprecation import deprecated_factory


def remove_lowercase(doc: Doc):
"""
Add case on the `NORM` custom attribute. Should always be applied first.
Parameters
----------
doc : Doc
The spaCy `Doc` object.
Returns
-------
Doc
The document, with case put back in `NORM`.
"""

for token in doc:
token.norm_ = token.text

return doc


@deprecated_factory("remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"])
@deprecated_factory(
"eds.remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"]
)
@Language.factory("eds.remove_lowercase", assigns=["token.norm"])
def create_component(
nlp: Language,
name: str,
):
"""
Add case on the `NORM` custom attribute. Should always be applied first.
Parameters
----------
nlp : Language
The pipeline object.
name : str
The name of the component.
"""
return remove_lowercase # pragma: no cover
173 changes: 114 additions & 59 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,60 +85,114 @@ version = { attr = "edsnlp.__version__" }
where = ["."]

[project.entry-points."spacy_factories"]
"eds.matcher" = "edsnlp.pipelines.factories:matcher"
"eds.terminology" = "edsnlp.pipelines.factories:terminology"
"eds.contextual_matcher" = "edsnlp.pipelines.factories:contextual_matcher"
"eds.endlines" = "edsnlp.pipelines.factories:endlines"
"eds.sentences" = "edsnlp.pipelines.factories:sentences"
"eds.normalizer" = "edsnlp.pipelines.factories:normalizer"
"eds.accents" = "edsnlp.pipelines.factories:accents"
"eds.spaces" = "edsnlp.pipelines.factories:spaces"
"eds.lowercase" = "edsnlp.pipelines.factories:remove_lowercase"
"eds.pollution" = "edsnlp.pipelines.factories:pollution"
"eds.quotes" = "edsnlp.pipelines.factories:quotes"
"eds.charlson" = "edsnlp.pipelines.factories:charlson"
"eds.sofa" = "edsnlp.pipelines.factories:sofa"
"eds.elston_ellis" = "edsnlp.pipelines.factories:elston_ellis"
"eds.tnm" = "edsnlp.pipelines.factories:tnm"
"eds.priority" = "edsnlp.pipelines.factories:priority"
"eds.ccmu" = "edsnlp.pipelines.factories:ccmu"
"eds.gemsa" = "edsnlp.pipelines.factories:gemsa"
"eds.covid" = "edsnlp.pipelines.factories:covid"
"eds.cim10" = "edsnlp.pipelines.factories:cim10"
"eds.history" = "edsnlp.pipelines.factories:history"
"eds.family" = "edsnlp.pipelines.factories:family"
"eds.hypothesis" = "edsnlp.pipelines.factories:hypothesis"
"eds.negation" = "edsnlp.pipelines.factories:negation"
"eds.rspeech" = "edsnlp.pipelines.factories:rspeech"
"eds.consultation_dates" = "edsnlp.pipelines.factories:consultation_dates"
"eds.dates" = "edsnlp.pipelines.factories:dates"
"eds.reason" = "edsnlp.pipelines.factories:reason"
"eds.sections" = "edsnlp.pipelines.factories:sections"
"eds.context" = "edsnlp.pipelines.factories:context"
"eds.measurements" = "edsnlp.pipelines.factories:measurements"
"eds.drugs" = "edsnlp.pipelines.factories:drugs"
"eds.nested_ner" = "edsnlp.pipelines.factories:nested_ner"
"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component"
"eds.adicap" = "edsnlp.pipelines.factories:adicap"
"eds.umls" = "edsnlp.pipelines.factories:umls"
"eds.diabetes" = "edsnlp.pipelines.factories:diabetes"
"eds.tobacco" = "edsnlp.pipelines.factories:tobacco"
"eds.aids" = "edsnlp.pipelines.factories:aids"
"eds.lymphoma" = "edsnlp.pipelines.factories:lymphoma"
"eds.leukemia" = "edsnlp.pipelines.factories:leukemia"
"eds.solid_tumor" = "edsnlp.pipelines.factories:solid_tumor"
"eds.ckd" = "edsnlp.components:ckd"
"eds.hemiplegia" = "edsnlp.components:hemiplegia"
"eds.liver_disease" = "edsnlp.components:liver_disease"
"eds.peptic_ulcer_disease" = "edsnlp.components:peptic_ulcer_disease"
"eds.connective_tissue_disease" = "edsnlp.components:connective_tissue_disease"
"eds.copd" = "edsnlp.components:copd"
"eds.dementia" = "edsnlp.components:dementia"
"eds.cerebrovascular_accident" = "edsnlp.components:cerebrovascular_accident"
"eds.peripheral_vascular_disease" = "edsnlp.components:peripheral_vascular_disease"
"eds.congestive_heart_failure" = "edsnlp.components:congestive_heart_failure"
"eds.myocardial_infarction" = "edsnlp.components:myocardial_infarction"
"eds.alcohol" = "edsnlp.components:alcohol"
# Core
"eds.accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component"
"eds.context" = "edsnlp.pipelines.core.context.factory:create_component"
"eds.contextual_matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component"
"eds.endlines" = "edsnlp.pipelines.core.endlines.factory:create_component"
"eds.matcher" = "edsnlp.pipelines.core.matcher.factory:create_component"
"eds.normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component"
"eds.pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component"
"eds.quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component"
"eds.remove_lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
"eds.sentences" = "edsnlp.pipelines.core.sentences.factory:create_component"
"eds.spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component"
"eds.terminology" = "edsnlp.pipelines.core.terminology.factory:create_component"

# NER
"eds.adicap" = "edsnlp.pipelines.ner.adicap.factory:create_component"
"eds.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
"eds.charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component"
"eds.cim10" = "edsnlp.pipelines.ner.cim10.factory:create_component"
"eds.covid" = "edsnlp.pipelines.ner.covid.factory:create_component"
"eds.drugs" = "edsnlp.pipelines.ner.drugs.factory:create_component"
"eds.elston_ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
"eds.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
"eds.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
"eds.score" = "edsnlp.pipelines.ner.scores.factory:create_component"
"eds.sofa" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
"eds.tnm" = "edsnlp.pipelines.ner.tnm.factory:create_component"
"eds.umls" = "edsnlp.pipelines.ner.umls.factory:create_component"

# NER/Comorbidities
"eds.aids" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component"
"eds.alcohol" = "edsnlp.pipelines.ner.behaviors.alcohol.factory:create_component"
"eds.cerebrovascular_accident" = "edsnlp.pipelines.ner.disorders.cerebrovascular_accident.factory:create_component"
"eds.ckd" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component"
"eds.congestive_heart_failure" = "edsnlp.pipelines.ner.disorders.congestive_heart_failure.factory:create_component"
"eds.connective_tissue_disease" = "edsnlp.pipelines.ner.disorders.connective_tissue_disease.factory:create_component"
"eds.copd" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component"
"eds.dementia" = "edsnlp.pipelines.ner.disorders.dementia.factory:create_component"
"eds.diabetes" = "edsnlp.pipelines.ner.disorders.diabetes.factory:create_component"
"eds.hemiplegia" = "edsnlp.pipelines.ner.disorders.hemiplegia.factory:create_component"
"eds.leukemia" = "edsnlp.pipelines.ner.disorders.leukemia.factory:create_component"
"eds.liver_disease" = "edsnlp.pipelines.ner.disorders.liver_disease.factory:create_component"
"eds.lymphoma" = "edsnlp.pipelines.ner.disorders.lymphoma.factory:create_component"
"eds.myocardial_infarction" = "edsnlp.pipelines.ner.disorders.myocardial_infarction.factory:create_component"
"eds.peptic_ulcer_disease" = "edsnlp.pipelines.ner.disorders.peptic_ulcer_disease.factory:create_component"
"eds.peripheral_vascular_disease" = "edsnlp.pipelines.ner.disorders.peripheral_vascular_disease.factory:create_component"
"eds.solid_tumor" = "edsnlp.pipelines.ner.disorders.solid_tumor.factory:create_component"
"eds.tobacco" = "edsnlp.pipelines.ner.behaviors.tobacco.factory:create_component"

# Qualifiers
"eds.family" = "edsnlp.pipelines.qualifiers.family.factory:create_component"
"eds.history" = "edsnlp.pipelines.qualifiers.history.factory:create_component"
"eds.hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component"
"eds.negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component"
"eds.reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"

# Misc
"eds.consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component"
"eds.dates" = "edsnlp.pipelines.misc.dates.factory:create_component"
"eds.measurements" = "edsnlp.pipelines.misc.measurements.factory:create_component"
"eds.reason" = "edsnlp.pipelines.misc.reason.factory:create_component"
"eds.sections" = "edsnlp.pipelines.misc.sections.factory:create_component"
"eds.tables" = "edsnlp.pipelines.misc.tables.factory:create_component"

# Trainable
"eds.nested_ner" = "edsnlp.pipelines.trainable.nested_ner.factory:create_component"
"eds.span_qualifier" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_component"

# Deprecated (links to the same factories as above)
"SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
"accents" = "edsnlp.pipelines.core.normalizer.accents.factory:create_component"
"charlson" = "edsnlp.pipelines.ner.scores.charlson.factory:create_component"
"consultation_dates" = "edsnlp.pipelines.misc.consultation_dates.factory:create_component"
"contextual-matcher" = "edsnlp.pipelines.core.contextual_matcher.factory:create_component"
"dates" = "edsnlp.pipelines.misc.dates.factory:create_component"
"eds.AIDS" = "edsnlp.pipelines.ner.disorders.aids.factory:create_component"
"eds.CKD" = "edsnlp.pipelines.ner.disorders.ckd.factory:create_component"
"eds.COPD" = "edsnlp.pipelines.ner.disorders.copd.factory:create_component"
"eds.SOFA" = "edsnlp.pipelines.ner.scores.sofa.factory:create_component"
"eds.TNM" = "edsnlp.pipelines.ner.tnm.factory:create_component"
"eds.elston-ellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
"eds.elstonellis" = "edsnlp.pipelines.ner.scores.elston_ellis.factory:create_component"
"eds.emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
"eds.emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
"eds.emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
"eds.measures" = "edsnlp.pipelines.misc.measurements.factory:create_component"
"eds.remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
"emergency.ccmu" = "edsnlp.pipelines.ner.scores.emergency.ccmu.factory:create_component"
"emergency.gemsa" = "edsnlp.pipelines.ner.scores.emergency.gemsa.factory:create_component"
"emergency.priority" = "edsnlp.pipelines.ner.scores.emergency.priority.factory:create_component"
"endlines" = "edsnlp.pipelines.core.endlines.factory:create_component"
"family" = "edsnlp.pipelines.qualifiers.family.factory:create_component"
"hypothesis" = "edsnlp.pipelines.qualifiers.hypothesis.factory:create_component"
"matcher" = "edsnlp.pipelines.core.matcher.factory:create_component"
"negation" = "edsnlp.pipelines.qualifiers.negation.factory:create_component"
"normalizer" = "edsnlp.pipelines.core.normalizer.factory:create_component"
"pollution" = "edsnlp.pipelines.core.normalizer.pollution.factory:create_component"
"quotes" = "edsnlp.pipelines.core.normalizer.quotes.factory:create_component"
"reason" = "edsnlp.pipelines.misc.reason.factory:create_component"
"remove-lowercase" = "edsnlp.pipelines.core.normalizer.remove_lowercase.factory:create_component"
"reported_speech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"
"rspeech" = "edsnlp.pipelines.qualifiers.reported_speech.factory:create_component"
"score" = "edsnlp.pipelines.ner.scores.factory:create_component"
"sections" = "edsnlp.pipelines.misc.sections.factory:create_component"
"sentences" = "edsnlp.pipelines.core.sentences.factory:create_component"
"spaces" = "edsnlp.pipelines.core.normalizer.spaces.factory:create_component"
"tables" = "edsnlp.pipelines.misc.tables.factory:create_component"
"terminology" = "edsnlp.pipelines.core.terminology.factory:create_component"

[project.entry-points."spacy_architectures"]
"eds.stack_crf_ner_model.v1" = "edsnlp.pipelines.trainable.nested_ner.stack_crf_ner:create_model"
Expand Down Expand Up @@ -241,14 +295,15 @@ omit-covered-files = false

[tool.coverage]
exclude_lines = [
"def __repr__",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"if typing.TYPE_CHECKING:",
"@overload",
"pragma: no cover",
"raise AssertionError",
"raise NotImplementedError",
"def __repr__",
"raise .*Error",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"class .*\\bProtocol\\):",
"@(abc\\.)?abstractmethod",
"Span.set_extension.*",
"Doc.set_extension.*",
"Token.set_extension.*",
Expand Down
4 changes: 4 additions & 0 deletions tests/pipelines/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ def test_pipelines(doc):
assert anomalie._.negation

assert not doc[0]._.history


def test_import_all():
import edsnlp.pipelines.factories # noqa: F401
32 changes: 32 additions & 0 deletions tests/test_span_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pydantic import validate_arguments

from edsnlp.pipelines.base import (
SpanGetterArg,
SpanSetterArg,
validate_span_getter,
validate_span_setter,
)


def test_span_getter():
assert validate_span_getter("ents") == {"ents": True}
assert validate_span_getter(["ents"]) == {"ents": True}
assert validate_span_getter(["ents", "group"]) == {"ents": True, "group": True}
assert validate_span_getter({"grp": True}) == {"grp": True}
assert validate_span_getter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}


def test_span_setter():
assert validate_span_setter("ents") == {"ents": True}
assert validate_span_setter(["ents"]) == {"ents": True}
assert validate_span_setter(["ents", "group"]) == {"ents": True, "group": True}
assert validate_span_setter({"grp": True}) == {"grp": True}
assert validate_span_setter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}


def test_validate_args():
@validate_arguments
def my_func(span_getter: SpanGetterArg, span_setter: SpanSetterArg):
return span_getter, span_setter

assert my_func("ents", "ents") == ({"ents": True}, {"ents": True})

0 comments on commit 2c37c13

Please sign in to comment.