From 8c1d9b0a4eb58c68726ad0a5e6b435011e909aca Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Tue, 14 Jun 2022 15:23:02 +0200 Subject: [PATCH] prepare new version: 0.2.2 --- HISTORY.rst | 14 ++++++++++++++ README.rst | 10 +++++----- py3langid/__init__.py | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 3932375..e32c087 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,20 @@ History ======= +0.2.2 +----- + +* Fixed bug in probability normalization (#6) +* Fully implemented data type argument in ``classify()`` +* Adapted training scripts to Python3 (untested) + + +0.2.1 +----- + +* Maintenance: update and simplify code + + 0.2.0 ----- diff --git a/README.rst b/README.rst index 9bbfb96..49f3081 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ Changes in this fork Execution speed has been improved and the code base has been optimized for Python 3.6+: - Import: Loading the package (``import py3langid``) is about 30% faster -- Startup: Loading the default classification model is 20-25x faster +- Startup: Loading the default classification model is 25-30x faster - Execution: Language detection with ``langid.classify`` is 5-6x faster on paragraphs (less on longer texts) For implementation details see this blog post: `How to make language detection with langid.py faster `_. @@ -50,7 +50,7 @@ Basics: >>> text = 'This text is in English.' # identified language and probability >>> langid.classify(text) - ('en', -56.77428913116455) + ('en', -56.77429) # unpack the result tuple in variables >>> lang, prob = langid.classify(text) # all potential languages @@ -68,11 +68,11 @@ More options: >>> identifier.set_languages(['de', 'en', 'fr']) # this won't work well... >>> identifier.classify('这样不好') - ('en', -81.83166265487671) + ('en', -81.831665) # normalization of probabilities to an interval between 0 and 1 >>> identifier = LanguageIdentifier.from_pickled_model(MODEL_FILE, norm_probs=True) - >>> identifier.classify('This should be enough text.')) + >>> identifier.classify('This should be enough text.') ('en', 1.0) @@ -94,7 +94,7 @@ On the command-line # define a subset of target languages $ echo "This won't be recognized properly." | langid -n -l fr,it,tr - ('it', 0.9703832808613264) + ('it', 0.97038305) diff --git a/py3langid/__init__.py b/py3langid/__init__.py index e6adee6..98dc4c6 100644 --- a/py3langid/__init__.py +++ b/py3langid/__init__.py @@ -1,3 +1,3 @@ from .langid import classify, rank, set_languages -__version__ = '0.2.1' +__version__ = '0.2.2'