Skip to content

Commit

Permalink
chore: improve coverage and clean entry points
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Sep 13, 2023
1 parent eb5af67 commit 6deb57a
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 376 deletions.
1 change: 0 additions & 1 deletion edsnlp/components.py

This file was deleted.

81 changes: 4 additions & 77 deletions edsnlp/patch_spacy_dot_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from spacy.errors import Errors
from spacy.language import FactoryMeta
from spacy.pipe_analysis import validate_attrs
from spacy.pipeline import Pipe
from spacy.util import SimpleFrozenDict, SimpleFrozenList, registry


Expand Down Expand Up @@ -51,10 +50,11 @@ def factory(
if not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="factory"))
if not isinstance(default_config, dict):
err = Errors.E962.format(
style="default config", name=name, cfg_type=type(default_config)
raise ValueError(
Errors.E962.format(
style="default config", name=name, cfg_type=type(default_config)
)
)
raise ValueError(err)

def add_factory(factory_func: Callable) -> Callable:
internal_name = cls.get_factory_name(name)
Expand Down Expand Up @@ -102,77 +102,4 @@ def add_factory(factory_func: Callable) -> Callable:
return add_factory


@classmethod
def component(
cls,
name: str,
*,
assigns: Iterable[str] = SimpleFrozenList(),
requires: Iterable[str] = SimpleFrozenList(),
retokenizes: bool = False,
func: Optional["Pipe"] = None,
) -> Callable[..., Any]:
"""
Patched from spaCy to allow back dots in factory
names (https://github.com/aphp/edsnlp/pull/152)
Register a new pipeline component. Can be used for stateless function
components that don't require a separate factory. Can be used as a
decorator on a function or classmethod, or called as a function with the
factory provided as the func keyword argument. To create a component and
add it to the pipeline, you can use nlp.add_pipe(name).
name (str): The name of the component factory.
assigns (Iterable[str]): Doc/Token attributes assigned by this component,
e.g. "token.ent_id". Used for pipeline analysis.
requires (Iterable[str]): Doc/Token attributes required by this component,
e.g. "token.ent_id". Used for pipeline analysis.
retokenizes (bool): Whether the component changes the tokenization.
Used for pipeline analysis.
func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://spacy.io/api/language#component
"""
if name is not None:
if not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="component"))
component_name = name if name is not None else util.get_object_name(func)

def add_component(component_func: "Pipe") -> Callable:
if isinstance(func, type): # function is a class
raise ValueError(Errors.E965.format(name=component_name))

def factory_func(nlp, name: str) -> "Pipe":
return component_func

internal_name = cls.get_factory_name(name)
if internal_name in registry.factories:
# We only check for the internal name here – it's okay if it's a
# subclass and the base class has a factory of the same name. We
# also only raise if the function is different to prevent raising
# if module is reloaded. It's hacky, but we need to check the
# existing functure for a closure and whether that's identical
# to the component function (because factory_func created above
# will always be different, even for the same function)
existing_func = registry.factories.get(internal_name)
closure = existing_func.__closure__
wrapped = [c.cell_contents for c in closure][0] if closure else None
if util.is_same_func(wrapped, component_func):
factory_func = existing_func # noqa: F811

cls.factory(
component_name,
assigns=assigns,
requires=requires,
retokenizes=retokenizes,
func=factory_func,
)
return component_func

if func is not None: # Support non-decorator use cases
return add_component(func)
return add_component


spacy.Language.factory = factory
spacy.Language.component = component
25 changes: 0 additions & 25 deletions edsnlp/pipelines/core/normalizer/lowercase/factory.py

This file was deleted.

2 changes: 1 addition & 1 deletion edsnlp/pipelines/core/normalizer/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from spacy.tokens import Doc

from .accents.accents import AccentsConverter
from .lowercase.factory import remove_lowercase
from .pollution.pollution import PollutionTagger
from .quotes.quotes import QuotesConverter
from .remove_lowercase.factory import remove_lowercase
from .spaces.spaces import SpacesTagger


Expand Down
47 changes: 47 additions & 0 deletions edsnlp/pipelines/core/normalizer/remove_lowercase/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from spacy.language import Language
from spacy.tokens import Doc

from edsnlp.utils.deprecation import deprecated_factory


def remove_lowercase(doc: Doc):
"""
Add case on the `NORM` custom attribute. Should always be applied first.
Parameters
----------
doc : Doc
The spaCy `Doc` object.
Returns
-------
Doc
The document, with case put back in `NORM`.
"""

for token in doc:
token.norm_ = token.text

return doc


@deprecated_factory("remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"])
@deprecated_factory(
"eds.remove-lowercase", "eds.remove_lowercase", assigns=["token.norm"]
)
@Language.factory("eds.remove_lowercase", assigns=["token.norm"])
def create_component(
nlp: Language,
name: str,
):
"""
Add case on the `NORM` custom attribute. Should always be applied first.
Parameters
----------
nlp : Language
The pipeline object.
name : str
The name of the component.
"""
return remove_lowercase # pragma: no cover
4 changes: 3 additions & 1 deletion edsnlp/pipelines/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from .core.matcher.factory import create_component as matcher
from .core.normalizer.accents.factory import create_component as accents
from .core.normalizer.factory import create_component as normalizer
from .core.normalizer.lowercase.factory import remove_lowercase
from .core.normalizer.pollution.factory import create_component as pollution
from .core.normalizer.quotes.factory import create_component as quotes
from .core.normalizer.remove_lowercase.factory import (
create_component as remove_lowercase,
)
from .core.normalizer.spaces.factory import create_component as spaces
from .core.sentences.factory import create_component as sentences
from .core.terminology.factory import create_component as terminology
Expand Down
198 changes: 0 additions & 198 deletions edsnlp/utils/blocs.py

This file was deleted.

Loading

0 comments on commit 6deb57a

Please sign in to comment.