diff --git a/simplemma/lemmatizer.py b/simplemma/lemmatizer.py index 18e55d1..9355768 100644 --- a/simplemma/lemmatizer.py +++ b/simplemma/lemmatizer.py @@ -80,30 +80,6 @@ def __init__( self._fallback_lemmatization_strategy = fallback_lemmatization_strategy self._cached_lemmatize = lru_cache(maxsize=cache_max_size)(self._lemmatize) - def is_known( - self, - token: str, - lang: Union[str, Tuple[str, ...]], - ) -> bool: - """Check if a token is known in the specified language(s). - - Args: - token: The token to check. - lang: The language or languages to check in. - - Returns: - bool: True if the token is known, False otherwise. - """ - - _control_input_type(token) - lang = validate_lang_input(lang) - - dictionary_lookup = DictionaryLookupStrategy() - return any( - dictionary_lookup.get_lemma(token, lang_code) is not None - for lang_code in lang - ) - def lemmatize( self, token: str, @@ -179,9 +155,7 @@ def get_lemmas_in_text( ) -def is_known( - token: str, lang: Union[str, Tuple[str, ...]], greedy: bool = False -) -> bool: +def is_known(token: str, lang: Union[str, Tuple[str, ...]]) -> bool: """Check if a token is known in the specified language(s). Args: @@ -191,8 +165,14 @@ def is_known( Returns: bool: True if the token is known, False otherwise. """ - lemmatizer = _legacy_lemmatizer if not greedy else _legacy_greedy_lemmatizer - return lemmatizer.is_known(token, lang) + + _control_input_type(token) + lang = validate_lang_input(lang) + + dictionary_lookup = DictionaryLookupStrategy(_legacy_dictionary_factory) + return any( + dictionary_lookup.get_lemma(token, lang_code) is not None for lang_code in lang + ) def lemmatize( diff --git a/tests/test_lemmatizer.py b/tests/test_lemmatizer.py index fb27002..caa26c1 100644 --- a/tests/test_lemmatizer.py +++ b/tests/test_lemmatizer.py @@ -452,27 +452,16 @@ def test_subwords() -> None: def test_is_known() -> None: - # logic - with pytest.raises(TypeError): - assert Lemmatizer().is_known(None, lang="en") is None # type: ignore[arg-type] with pytest.raises(TypeError): assert is_known(None, lang="en") is None # type: ignore[arg-type] - with pytest.raises(ValueError): - assert Lemmatizer().is_known("", lang="en") is None with pytest.raises(ValueError): assert is_known("", lang="en") is None - assert ( - Lemmatizer().is_known("FanCY", lang="en") - == is_known("FanCY", lang="en") - == True - ) - # known words - assert ( - Lemmatizer().is_known("Fancy-String", lang="en") - == is_known("Fancy-String", lang="en") - == False - ) + assert is_known("FanCY", lang="en") == True + assert is_known("Fancy-String", lang="en") == False + + assert is_known("espejos", lang=("es", "de")) == True + assert is_known("espejos", lang=("de", "es")) == True def test_get_lemmas_in_text() -> None: