From 77763dbf7deda9710a2f5e7bbaeb1145659e5f67 Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 15:55:42 +0900 Subject: [PATCH 01/21] decouple jpype dependencies when installing konlpy --- description.py | 5 +++++ docs/conf.py | 2 +- setup.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 description.py diff --git a/description.py b/description.py new file mode 100644 index 0000000..2ce6408 --- /dev/null +++ b/description.py @@ -0,0 +1,5 @@ +__title__ = 'KoNLPy' +__version__ = '0.4.3' +__author__ = 'Lucy Park' +__license__ = 'GPL v3' +__copyright__ = 'Copyright 2015 Lucy Park' \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 38a0a13..f2706cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,7 @@ def __getattr__(cls, name): # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) -from konlpy import __version__ +from description import __version__ # -- General configuration ----------------------------------------------------- diff --git a/setup.py b/setup.py index 21bdb7f..2b37108 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import os import sys from setuptools import find_packages, setup -from konlpy import __version__ +from description import __version__ def requirements(): def _openreq(reqfile): From 11d93f947ebaa5441918ee37b8147309a653f912 Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 15:56:48 +0900 Subject: [PATCH 02/21] move jpype requirements to install_requires --- setup.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 2b37108..c780db7 100644 --- a/setup.py +++ b/setup.py @@ -11,13 +11,10 @@ def _openreq(reqfile): with open(os.path.join(os.path.dirname(__file__), reqfile)) as f: return f.read().splitlines() - def _genver(major, minorlist): - return ':%s' % ' or '.join('python_version=="%s.%s"' % (major, i) for i in minorlist) - - return { - _genver(2, [6,7]): _openreq('requirements.txt'), - _genver(3, range(5)): _openreq('requirements-py3.txt') - } + if sys.version_info.major >= 3: + return _openreq('requirements-py3.txt') + else: + return _openreq('requirements.txt') setup(name='konlpy', version=__version__, @@ -65,4 +62,4 @@ def _genver(major, minorlist): 'java/bin/kr/lucypark/*/*.class', 'java/bin/kr/lucypark/*/*/*.class', ]}, - extras_require=requirements()) + install_requires=requirements()) From 54b61f4743fd8b031f1b0aac8c365452fbaa7f06 Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 15:57:21 +0900 Subject: [PATCH 03/21] both JPype1 and JPype1-py3 doesn't support Windows. --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index c780db7..b391b5f 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,15 @@ import os import sys +import platform from setuptools import find_packages, setup from description import __version__ def requirements(): + # both JPype1 and JPype1-py3 doesn't support Windows. see http://konlpy.org/en/v0.4.4/install/. + if platform.system() == 'Windows': + return [] + def _openreq(reqfile): with open(os.path.join(os.path.dirname(__file__), reqfile)) as f: return f.read().splitlines() From 952458d80fda988849b31ad388ba1bf90fe315ce Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 16:38:48 +0900 Subject: [PATCH 04/21] update document --- docs/install.rst | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 1a38ba0..9beac97 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -18,9 +18,8 @@ Ubuntu .. sourcecode:: bash - $ sudo apt-get install g++ openjdk-7-jdk python-dev python3-dev # Install Java 1.7 or up - $ pip install JPype1 # Python 2.x - $ pip3 install JPype1-py3 # Python 3.x + # Install Java 1.7 or up + $ sudo apt-get install g++ openjdk-7-jdk python-dev python3-dev 2. Install KoNLPy @@ -57,9 +56,6 @@ CentOS $ make # Build $ sudo make altinstall - $ pip install JPype1 # Python 2.x - $ pip3 install JPype1-py3 # Python 3.x - 2. Install KoNLPy .. sourcecode:: bash @@ -78,21 +74,14 @@ CentOS Mac OS ------ -1. Install dependencies - - .. sourcecode:: bash - - $ pip install JPype1 # Python 2.x - $ pip3 install JPype1-py3 # Python 3.x - -2. Install KoNLPy +1. Install KoNLPy .. sourcecode:: bash $ pip install konlpy # Python 2.x $ pip3 install konlpy # Python 3.x -3. Install MeCab (*optional*) +2. Install MeCab (*optional*) .. sourcecode:: bash From 871f361a7f43c9045a846ad424add843d1920259 Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 17:06:01 +0900 Subject: [PATCH 05/21] fix installation error on python 2.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b391b5f..728f500 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def _openreq(reqfile): with open(os.path.join(os.path.dirname(__file__), reqfile)) as f: return f.read().splitlines() - if sys.version_info.major >= 3: + if sys.version_info >= (3, ): return _openreq('requirements-py3.txt') else: return _openreq('requirements.txt') From e573cb84b7b3c9c9d5e4d12d6948a050b015ac35 Mon Sep 17 00:00:00 2001 From: Shayne Kang Date: Thu, 5 May 2016 23:00:29 +0900 Subject: [PATCH 06/21] fix misspelling --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 728f500..67a1fec 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ from description import __version__ def requirements(): - # both JPype1 and JPype1-py3 doesn't support Windows. see http://konlpy.org/en/v0.4.4/install/. + # both JPype1 and JPype1-py3 don't support Windows. see http://konlpy.org/en/v0.4.4/install/. if platform.system() == 'Windows': return [] From 8c692c33f30a5d836f05c05572660dbb51002a65 Mon Sep 17 00:00:00 2001 From: thoum Date: Sat, 4 Jun 2016 13:36:43 +0900 Subject: [PATCH 07/21] Update mecab.sh --- scripts/mecab.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/mecab.sh b/scripts/mecab.sh index bfd89e6..0a7dd30 100755 --- a/scripts/mecab.sh +++ b/scripts/mecab.sh @@ -42,6 +42,7 @@ cd /tmp curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-1.6.1-20140814.tar.gz tar zxfv mecab-ko-dic-1.6.1-20140814.tar.gz cd mecab-ko-dic-1.6.1-20140814 +./autogen.sh ./configure sudo ldconfig make From 93aff9e6e0bde2de77a3e897c18a664193f43c70 Mon Sep 17 00:00:00 2001 From: theeluwin Date: Thu, 9 Jun 2016 15:14:05 +0900 Subject: [PATCH 08/21] removing sudo & check python3 on mecab install script --- scripts/mecab.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/mecab.sh b/scripts/mecab.sh index bfd89e6..7ee23b9 100755 --- a/scripts/mecab.sh +++ b/scripts/mecab.sh @@ -54,7 +54,10 @@ git clone https://bitbucket.org/eunjeon/mecab-python-0.996.git cd mecab-python-0.996 python setup.py build -sudo python setup.py install -# TODO: check if python3 is installed -python3 setup.py build -sudo python3 setup.py install +python setup.py install + +if hash "python3" &>/dev/null +then + python3 setup.py build + python3 setup.py install +fi From 020bcc50ed7fba862e6042fd509bb19e1afc9a39 Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 27 Jun 2016 16:29:33 +0900 Subject: [PATCH 09/21] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index d2388f8..de47ce9 100644 --- a/README.rst +++ b/README.rst @@ -30,8 +30,8 @@ KoNLPy KoNLPy is a Python package for natural language processing of the Korean language. -- English documentation: http://konlpy.org/en -- 한국어 문서: http://konlpy.org/ko +- English documentation: http://konlpy.org/en/latest +- 한국어 문서: http://konlpy.org/ko/latest Links ------ From aa4e225cb579f05f049dff806379d192258ba55f Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 19:43:43 +0900 Subject: [PATCH 10/21] Resolve #11 --- konlpy/tag/_mecab.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/konlpy/tag/_mecab.py b/konlpy/tag/_mecab.py index 489bfc5..d5573ce 100644 --- a/konlpy/tag/_mecab.py +++ b/konlpy/tag/_mecab.py @@ -103,3 +103,5 @@ def __init__(self, dicpath='/usr/local/lib/mecab/dic/mecab-ko-dic'): self.tagset = utils.read_json('%s/data/tagset/mecab.json' % utils.installpath) except RuntimeError: raise Exception('Invalid MeCab dictionary path: "%s"\nInput the correct path when initiializing class: "Mecab(\'/some/dic/path\')"' % dicpath) + except NameError: + raise Exception('Install MeCab in order to use it: http://konlpy.org/en/latest/install/') From 3e7182b6ac0d39e707924a4162dd9b3451dc8c97 Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 20:07:14 +0900 Subject: [PATCH 11/21] Update CONTRIBUTING.rst --- CONTRIBUTING.rst | 21 ++++++++++++++++----- Makefile | 7 +++++-- requirements-dev.txt | 5 ++++- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a39849a..713b00e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -23,20 +23,31 @@ KoNLPy는 오픈소스 프로젝트입니다. 2. 같은 이슈가 이미 제기되었고, - 이슈가 해결되었다면(closed): 최신 릴리즈에서 문제가 해결되었을 가능성이 높습니다. 쓰레드에서 다른 분들이 어떻게 해결했는지 파악해보는 것도 좋은 방법입니다. - 이슈가 아직 해결되지 않았다면(open): 댓글로 문제 상황을 설명해주세요. 같은 상황을 겪고 있는 사람들이 많이 모일수록 문제는 빠르게 해결될 수 있습니다. -3. 같은 이슈가 아직 제기되지 않았다면, "New Issue" 버튼을 눌러 이슈를 새로 생성해주시면 됩니다. 이슈를 새로 생성하시는 경우에는 사용하는 OS나 패키지 버젼 등을 같이 적어주시면 문제를 빠르게 해결하는데 도움이 됩니다. +3. 같은 이슈가 아직 제기되지 않았다면, "New Issue" 버튼을 눌러 이슈를 새로 생성해주시면 됩니다. 이슈를 새로 생성하시는 경우에는 사용하는 OS나 패키지 버젼 등을 같이 적어주시면 문제를 빠르게 해결하는데 도움이 됩니다. 3. 이슈 제안/해결하기 --------------------- - `깃헙 이슈 `_ 에 코드를 개선할 수 있는 방법을 제안하거나, 제안된 이슈에 대해 토론/해결하실 수 있습니다. -- 기여하신 부분에 대해 정확한 attribution을 할 수 있도록, 가능하다면 pull request를 보내주시기 바랍니다. - 코드를 작성할 때는 다음에 유의해주세요. 1. 탭 대신 공백 4개 사용 2. 문서에서 특별히 언급되지 않은 사항은 일단 코드의 다른 부분들을 참고해서 작성 (+ 다른 분들의 편의를 위해 이 문서를 업데이트 해주세요) 3. 커밋 로그는 설명력 있게 작성 - 4. PR을 보내면 해당 코드는 KoNLPy의 오픈소스 라이센스를 따름 - 5. PR를 보낸 후 코드의 일부를 변경하도록 요청될 경우, ``git commit --amend`` 로 커밋을 수정 +- 코드 작성을 완료한 후 코드가 모든 테스트를 통과하는지 확인해주세요. + 1. 자바 코드를 수정한 경우 + 1. Install `Apache Ant `_ + 1. ``make java`` + 1. 코드를 단 한 줄이라도 수정한 모든 경우:: + + pip install -r requirements-dev.txt + make build # create tar.gz + make check # check code styles + make testall # run tests + +- PR을 보내기 전 다음을 확인해주세요. + 1. PR을 보내면 해당 코드는 KoNLPy의 오픈소스 라이센스를 따름 + 1. PR를 보낸 후 코드의 일부를 변경하도록 요청될 경우, ``git commit --amend`` 로 커밋을 수정 4. 문서 수정하기 @@ -55,7 +66,7 @@ Setup docs 1. Fork and clone KoNLPy:: git clone git@github.com:[your_github_id]/konlpy.git - + 2. Include the following lines in your `~/.bashrc`:: export LC_ALL=en_US.UTF-8 diff --git a/Makefile b/Makefile index 7d40e2d..802cc14 100644 --- a/Makefile +++ b/Makefile @@ -11,11 +11,14 @@ # # TODO: use flake8 and/or pylint +build: + python setup.py sdist --formats=gztar,zip + check: check-manifest pyroma dist/konlpy-*tar.gz - pep8 --ignore==E501 konlpy/*.py - pep8 --ignore==E501 konlpy/*/*.py + pep8 --ignore=E501 konlpy/*.py + pep8 --ignore=E501 konlpy/*/*.py testpypi: sudo python setup.py register -r pypitest diff --git a/requirements-dev.txt b/requirements-dev.txt index 8162895..b0cdc6c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,5 @@ -pytest>=2.6.4 +check-manifest>=0.34 +pep8>=1.7.0 +pyroma>=2.2 pytest-cov>=1.8.1 +pytest>=2.6.4 From 10db1a511f2a39b5e47e6c36c44efd48c11fabdf Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 20:15:14 +0900 Subject: [PATCH 12/21] Upgrade MeCab dictionary version to 2.0.1 --- scripts/mecab.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/mecab.sh b/scripts/mecab.sh index bfd89e6..7532096 100755 --- a/scripts/mecab.sh +++ b/scripts/mecab.sh @@ -38,12 +38,17 @@ make check sudo make install # install mecab-ko-dic +## install requirement automake1.11 cd /tmp -curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-1.6.1-20140814.tar.gz -tar zxfv mecab-ko-dic-1.6.1-20140814.tar.gz -cd mecab-ko-dic-1.6.1-20140814 +curl -LO http://ftpmirror.gnu.org/automake/automake-1.11.tar.gz +tar -zxvf automake-1.15.tar.gz + +curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.1-20150920.tar.gz +tar -zxvf mecab-ko-dic-2.0.1-*.tar.gz + +cd mecab-ko-dic-2.0.1-* ./configure -sudo ldconfig +# if make fails, follow instructions at https://bitbucket.org/eunjeon/mecab-ko-dic/overview make sudo sh -c 'echo "dicdir=/usr/local/lib/mecab/dic/mecab-ko-dic" > /usr/local/etc/mecabrc' sudo make install From 0ac75293c8463c8c8d18e9da9cf6df558e2b3d3c Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 20:39:51 +0900 Subject: [PATCH 13/21] Elaborate error message --- konlpy/tag/_mecab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/konlpy/tag/_mecab.py b/konlpy/tag/_mecab.py index d5573ce..cb8c71e 100644 --- a/konlpy/tag/_mecab.py +++ b/konlpy/tag/_mecab.py @@ -102,6 +102,6 @@ def __init__(self, dicpath='/usr/local/lib/mecab/dic/mecab-ko-dic'): self.tagger = Tagger('-d %s' % dicpath) self.tagset = utils.read_json('%s/data/tagset/mecab.json' % utils.installpath) except RuntimeError: - raise Exception('Invalid MeCab dictionary path: "%s"\nInput the correct path when initiializing class: "Mecab(\'/some/dic/path\')"' % dicpath) + raise Exception('The MeCab dictionary does not exist at "%s". Is the dictionary correctly installed?\nYou can also try entering the dictionary path when initializing the Mecab class: "Mecab(\'/some/dic/path\')"' % dicpath) except NameError: raise Exception('Install MeCab in order to use it: http://konlpy.org/en/latest/install/') From 9e785d876133f81403054cf1d3fa8764ff91fc91 Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 20:46:33 +0900 Subject: [PATCH 14/21] Update CONTRIBUTING.rst --- CONTRIBUTING.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 713b00e..0b2f276 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -35,9 +35,11 @@ KoNLPy는 오픈소스 프로젝트입니다. 2. 문서에서 특별히 언급되지 않은 사항은 일단 코드의 다른 부분들을 참고해서 작성 (+ 다른 분들의 편의를 위해 이 문서를 업데이트 해주세요) 3. 커밋 로그는 설명력 있게 작성 - 코드 작성을 완료한 후 코드가 모든 테스트를 통과하는지 확인해주세요. - 1. 자바 코드를 수정한 경우 - 1. Install `Apache Ant `_ - 1. ``make java`` + 1. 자바 코드를 수정한 경우:: + + # Install `Apache Ant `_ + make java + 1. 코드를 단 한 줄이라도 수정한 모든 경우:: pip install -r requirements-dev.txt From 7ce30787eb88b4a5a348a046f4435f5ec829128a Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 20:49:10 +0900 Subject: [PATCH 15/21] Update MeCab download script --- scripts/mecab.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/mecab.sh b/scripts/mecab.sh index 7532096..e3fd78a 100755 --- a/scripts/mecab.sh +++ b/scripts/mecab.sh @@ -39,16 +39,21 @@ sudo make install # install mecab-ko-dic ## install requirement automake1.11 +# TODO: if not [automake --version] cd /tmp curl -LO http://ftpmirror.gnu.org/automake/automake-1.11.tar.gz -tar -zxvf automake-1.15.tar.gz +tar -zxvf automake-*.tar.gz +cd automake-* +./configure +make +sudo make install +cd /tmp curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.1-20150920.tar.gz tar -zxvf mecab-ko-dic-2.0.1-*.tar.gz - cd mecab-ko-dic-2.0.1-* +./autogen.sh ./configure -# if make fails, follow instructions at https://bitbucket.org/eunjeon/mecab-ko-dic/overview make sudo sh -c 'echo "dicdir=/usr/local/lib/mecab/dic/mecab-ko-dic" > /usr/local/etc/mecabrc' sudo make install From d8611cf1952b0b72abe053b42bbd2f3028d3bd5d Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 21:03:16 +0900 Subject: [PATCH 16/21] Fix PEP8 styling error --- konlpy/jvm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/konlpy/jvm.py b/konlpy/jvm.py index b8e5c9c..285c243 100644 --- a/konlpy/jvm.py +++ b/konlpy/jvm.py @@ -40,7 +40,7 @@ def init_jvm(jvmpath=None): jvmpath = jvmpath or jpype.getDefaultJVMPath() # NOTE: Temporary patch for Issue #76. Erase when possible. - if sys.platform=='darwin'\ + if sys.platform == 'darwin'\ and jvmpath.find('1.8.0') > 0\ and jvmpath.endswith('libjvm.dylib'): jvmpath = '%s/lib/jli/libjli.dylib' % jvmpath.split('/lib/')[0] From acac5e9b8844cab09baf07b4a31c396b546adc14 Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 21:19:53 +0900 Subject: [PATCH 17/21] Increase Java memory allocation (Xmx) --- konlpy/jvm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/konlpy/jvm.py b/konlpy/jvm.py index 285c243..1d98a33 100644 --- a/konlpy/jvm.py +++ b/konlpy/jvm.py @@ -48,6 +48,6 @@ def init_jvm(jvmpath=None): if jvmpath: jpype.startJVM(jvmpath, '-Djava.class.path=%s' % classpath, '-Dfile.encoding=UTF8', - '-ea', '-Xmx768m') + '-ea', '-Xmx1024m') else: raise ValueError("Please specify the JVM path.") From 2b68672065dd247381b785a45de5771da943db98 Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Mon, 14 Nov 2016 21:22:38 +0900 Subject: [PATCH 18/21] Update CONTRIBUTING.rst --- CONTRIBUTING.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 0b2f276..e0f99fe 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -36,13 +36,14 @@ KoNLPy는 오픈소스 프로젝트입니다. 3. 커밋 로그는 설명력 있게 작성 - 코드 작성을 완료한 후 코드가 모든 테스트를 통과하는지 확인해주세요. 1. 자바 코드를 수정한 경우:: - + # Install `Apache Ant `_ make java - + 1. 코드를 단 한 줄이라도 수정한 모든 경우:: pip install -r requirements-dev.txt + pip3 install -r requirements-dev.txt make build # create tar.gz make check # check code styles make testall # run tests From 95a656daaaaf4597dd63d7b4189c1e9f73ae81df Mon Sep 17 00:00:00 2001 From: Lucy Park Date: Tue, 15 Nov 2016 00:28:01 +0900 Subject: [PATCH 19/21] Remove --use-mirrors from .travis.yml - Pip no longer supports this option. - As pointed out in https://github.com/konlpy/konlpy/pull/106 - By @shaynekang --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2ec0c50..cf401c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,8 @@ python: - "3.4" before_install: - - if [[ $TRAVIS_PYTHON_VERSION == 2* ]]; then pip install -r requirements.txt --use-mirrors; fi - - if [[ $TRAVIS_PYTHON_VERSION == 3* ]]; then pip install -r requirements-py3.txt --use-mirrors; fi + - if [[ $TRAVIS_PYTHON_VERSION == 2* ]]; then pip install -r requirements.txt; fi + - if [[ $TRAVIS_PYTHON_VERSION == 3* ]]; then pip install -r requirements-py3.txt; fi - pip install coveralls - pip install pytest-cov From 4e611d858d630e8f953788be0fc59e1c7aa23659 Mon Sep 17 00:00:00 2001 From: Swalloow Date: Tue, 13 Dec 2016 09:04:19 +0900 Subject: [PATCH 20/21] Update reference link --- docs/examples/multithreading.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples/multithreading.rst b/docs/examples/multithreading.rst index c34d587..7cc1816 100644 --- a/docs/examples/multithreading.rst +++ b/docs/examples/multithreading.rst @@ -23,5 +23,5 @@ Check out how much faster it gets! .. note:: - Some useful references on concurrency with Python: - - 장혜식, `"파이썬은 멀티코어 줘도 쓰잘데기가 없나요?"에 대한 파이썬 2.6의 대답 `_, 2008. + - 장혜식, `"파이썬은 멀티코어 줘도 쓰잘데기가 없나요?"에 대한 파이썬 2.6의 대답 `_, 2008. - 하용호, `파이썬으로 클라우드 하고 싶어요 `_, 2011. From 1abd2565660b3bdfe2004362f5a2b8f8fc6c0d40 Mon Sep 17 00:00:00 2001 From: leesangyoon Date: Wed, 15 Feb 2017 15:07:43 +0900 Subject: [PATCH 21/21] Add join parameter --- konlpy/tag/_hannanum.py | 21 +++++++++++++-------- konlpy/tag/_kkma.py | 19 ++++++++++++++----- konlpy/tag/_komoran.py | 24 ++++++++++++++++-------- konlpy/tag/_mecab.py | 22 +++++++++++++--------- konlpy/tag/_twitter.py | 8 ++++++-- test/test_hannanum.py | 10 ++++++++++ test/test_kkma.py | 8 ++++++++ test/test_komoran.py | 10 ++++++++++ test/test_mecab.py | 10 ++++++++++ test/test_twitter.py | 4 ++++ 10 files changed, 104 insertions(+), 32 deletions(-) diff --git a/konlpy/tag/_hannanum.py b/konlpy/tag/_hannanum.py index 60c76ec..dcfb806 100644 --- a/konlpy/tag/_hannanum.py +++ b/konlpy/tag/_hannanum.py @@ -16,9 +16,12 @@ tag_re = '(.+?\\/\\w+)\\+?' -def parse(result, flatten=False): - def parse_opt(opt): - return [tuple(u.rsplit('/', 1)) for u in re.findall(tag_re, opt.strip())] +def parse(result, flatten=False, join=False): + def parse_opt(opt, join=False): + if join: + return [u for u in re.findall(tag_re, opt.strip())] + else: + return [tuple(u.rsplit('/', 1)) for u in re.findall(tag_re, opt.strip())] if not result: return [] @@ -28,10 +31,10 @@ def parse_opt(opt): parts = utils.partition(elems, index) if flatten: - return sum([parse_opt(opt) for part in parts + return sum([parse_opt(opt, join=join) for part in parts for opt in list(filter(None, part))[1:]], []) else: - return [[parse_opt(opt) for opt in list(filter(None, part))[1:]] + return [[parse_opt(opt, join=join) for opt in list(filter(None, part))[1:]] for part in parts] @@ -71,13 +74,15 @@ def analyze(self, phrase): result = self.jhi.morphAnalyzer(phrase) return parse(result) - def pos(self, phrase, ntags=9, flatten=True): + def pos(self, phrase, ntags=9, flatten=True, join=False): """POS tagger. This tagger is HMM based, and calculates the probability of tags. :param ntags: The number of tags. It can be either 9 or 22. - :param flatten: If False, preserves eojeols.""" + :param flatten: If False, preserves eojeols. + :param join: If True, returns joined sets of morph and tag. + """ if ntags == 9: result = self.jhi.simplePos09(phrase) @@ -85,7 +90,7 @@ def pos(self, phrase, ntags=9, flatten=True): result = self.jhi.simplePos22(phrase) else: raise Exception('ntags in [9, 22]') - return parse(result, flatten=flatten) + return parse(result, flatten=flatten, join=join) def nouns(self, phrase): """Noun extractor.""" diff --git a/konlpy/tag/_kkma.py b/konlpy/tag/_kkma.py index 2231c00..eea8fa8 100644 --- a/konlpy/tag/_kkma.py +++ b/konlpy/tag/_kkma.py @@ -46,10 +46,12 @@ def nouns(self, phrase): if not nouns: return [] return [nouns.get(i).getString() for i in range(nouns.size())] - def pos(self, phrase, flatten=True): + def pos(self, phrase, flatten=True, join=False): """POS tagger. - :param flatten: If False, preserves eojeols.""" + :param flatten: If False, preserves eojeols. + :param join: If True, returns joined sets of morph and tag. + """ sentences = self.jki.morphAnalyzer(phrase) morphemes = [] @@ -63,10 +65,17 @@ def pos(self, phrase, flatten=True): if flatten: for k in range(eojeol.size()): morpheme = eojeol.get(k) - morphemes.append((morpheme.getString(), morpheme.getTag())) + if join: + morphemes.append(morpheme.getString() + '/' + morpheme.getTag()) + else: + morphemes.append((morpheme.getString(), morpheme.getTag())) else: - morphemes.append([(eojeol.get(k).getString(), eojeol.get(k).getTag()) - for k in range(eojeol.size())]) + if join: + morphemes.append([eojeol.get(k).getString() + '/' + eojeol.get(k).getTag() + for k in range(eojeol.size())]) + else: + morphemes.append([(eojeol.get(k).getString(), eojeol.get(k).getTag()) + for k in range(eojeol.size())]) return morphemes diff --git a/konlpy/tag/_komoran.py b/konlpy/tag/_komoran.py index 5fcd7d6..f37d584 100644 --- a/konlpy/tag/_komoran.py +++ b/konlpy/tag/_komoran.py @@ -15,14 +15,20 @@ __all__ = ['Komoran'] -def parse(result, flatten): - def _parse(token): - return [tuple(s[1:].rsplit('/', 1)) for s in re.findall('\+.+?/[A-Z]+', token)] +def parse(result, flatten, join=False): + def _parse(token, join=False): + if join: + return [s[1:] for s in re.findall('\+.+?/[A-Z]+', token)] + else: + return [tuple(s[1:].rsplit('/', 1)) for s in re.findall('\+.+?/[A-Z]+', token)] if sys.version_info[0] < 3: - parsed = [[tuple(r.rsplit('/', 1)) for r in sublist] for sublist in result] + if join: + parsed = [[r for r in sublist] for sublist in result] + else: + parsed = [[tuple(r.rsplit('/', 1)) for r in sublist] for sublist in result] else: - parsed = [_parse(i) for i in result[1:-1].split(', ')] + parsed = [_parse(i, join=join) for i in result[1:-1].split(', ')] if flatten: return sum(parsed, []) @@ -49,17 +55,19 @@ class Komoran(): :param dicpath: The path of dictionary files. The KOMORAN system dictionary is loaded by default. """ - def pos(self, phrase, flatten=True): + def pos(self, phrase, flatten=True, join=False): """POS tagger. - :param flatten: If False, preserves eojeols.""" + :param flatten: If False, preserves eojeols. + :param join: If True, returns joined sets of morph and tag. + """ if sys.version_info[0] < 3: result = self.jki.analyzeMorphs(phrase, self.dicpath) else: result = self.jki.analyzeMorphs3(phrase, self.dicpath).toString() - return parse(result, flatten) + return parse(result, flatten, join=join) def nouns(self, phrase): """Noun extractor.""" diff --git a/konlpy/tag/_mecab.py b/konlpy/tag/_mecab.py index cb8c71e..d839f14 100644 --- a/konlpy/tag/_mecab.py +++ b/konlpy/tag/_mecab.py @@ -24,13 +24,16 @@ 'indexed'] # 인덱스 표현 -def parse(result, allattrs=False): - def split(elem): +def parse(result, allattrs=False, join=False): + def split(elem, join=False): if not elem: return ('', 'SY') s, t = elem.split('\t') - return (s, t.split(',', 1)[0]) + if join: + return s + '/' + t.split(',', 1)[0] + else: + return (s, t.split(',', 1)[0]) - return [split(elem) for elem in result.splitlines()[:-1]] + return [split(elem, join=join) for elem in result.splitlines()[:-1]] class Mecab(): @@ -64,26 +67,27 @@ class Mecab(): """ # TODO: check whether flattened results equal non-flattened - def pos(self, phrase, flatten=True): + def pos(self, phrase, flatten=True, join=False): """POS tagger. :param flatten: If False, preserves eojeols. + :param join: If True, returns joined sets of morph and tag. """ if sys.version_info[0] < 3: phrase = phrase.encode('utf-8') if flatten: result = self.tagger.parse(phrase).decode('utf-8') - return parse(result) + return parse(result, join=join) else: - return [parse(self.tagger.parse(eojeol).decode('utf-8')) + return [parse(self.tagger.parse(eojeol).decode('utf-8'), join=join) for eojeol in phrase.split()] else: if flatten: result = self.tagger.parse(phrase) - return parse(result) + return parse(result, join=join) else: - return [parse(self.tagger.parse(eojeol).decode('utf-8')) + return [parse(self.tagger.parse(eojeol).decode('utf-8'), join=join) for eojeol in phrase.split()] def morphs(self, phrase): diff --git a/konlpy/tag/_twitter.py b/konlpy/tag/_twitter.py index 3b98e2b..5b7502d 100644 --- a/konlpy/tag/_twitter.py +++ b/konlpy/tag/_twitter.py @@ -34,7 +34,7 @@ class Twitter(): :param jvmpath: The path of the JVM passed to :py:func:`.init_jvm`. """ - def pos(self, phrase, norm=False, stem=False): + def pos(self, phrase, norm=False, stem=False, join=False): """POS tagger. In contrast to other classes in this subpackage, this POS tagger doesn't have a `flatten` option, @@ -43,13 +43,17 @@ def pos(self, phrase, norm=False, stem=False): :param norm: If True, normalize tokens. :param stem: If True, stem tokens. + :param join: If True, returns joined sets of morph and tag. """ tokens = self.jki.tokenize( phrase, jpype.java.lang.Boolean(norm), jpype.java.lang.Boolean(stem)).toArray() - return [tuple(t.rsplit('/', 1)) for t in tokens] + if join: + return [t for t in tokens] + else: + return [tuple(t.rsplit('/', 1)) for t in tokens] def nouns(self, phrase): """Noun extractor.""" diff --git a/test/test_hannanum.py b/test/test_hannanum.py index 5c88c89..c2a3194 100644 --- a/test/test_hannanum.py +++ b/test/test_hannanum.py @@ -68,3 +68,13 @@ def test_hannanum_pos_22(hannanum_instance, string): (u'\uac00', u'PX'), (u'\uc790', u'EC'), (u'!', u'SF')] + +def test_hannanum_pos_join(hannanum_instance, string): + assert hannanum_instance.pos(string, join=True) ==\ + [u'\uaf43\uac00\ub9c8/N', + u'\ud0c0/P', + u'\uace0/E', + u'\uac15\ub0a8/N', + u'\uac00/P', + u'\uc790/E', + u'!/S'] diff --git a/test/test_kkma.py b/test/test_kkma.py index 2269165..38cedcd 100644 --- a/test/test_kkma.py +++ b/test/test_kkma.py @@ -35,6 +35,14 @@ def test_kkma_pos(kkma_instance, string): (u'\uac00\uc790', u'NNG'), (u'!', u'SF')] +def test_kkma_pos_join(kkma_instance, string): + assert kkma_instance.pos(string, join=True) ==\ + [u'\uaf43\uac00\ub9c8/NNG', + u'\ud0c0\uace0/NNG', + u'\uac15\ub0a8/NNG', + u'\uac00\uc790/NNG', + u'!/SF'] + def test_kkma_sentences(kkma_instance, string): assert kkma_instance.sentences(string) ==\ [u'\uaf43\uac00\ub9c8 \ud0c0\uace0 \uac15\ub0a8 \uac00\uc790!'] diff --git a/test/test_komoran.py b/test/test_komoran.py index a6aa9d1..df957bf 100644 --- a/test/test_komoran.py +++ b/test/test_komoran.py @@ -34,3 +34,13 @@ def test_komoran_pos(komoran_instance, string): (u'\uac00', u'VV'), (u'\uc790', u'EF'), (u'!', u'SF')] + + def test_komoran_pos_join(komoran_instance, string): + assert komoran_instance.pos(string, join=True) ==\ + [u'\uaf43\uac00\ub9c8/NNG', + u'\ud0c0/VV', + u'\uace0/EC', + u'\uac15\ub0a8/NNP', + u'\uac00/VV', + u'\uc790/EF', + u'!/SF'] diff --git a/test/test_mecab.py b/test/test_mecab.py index d032e1a..b87b927 100644 --- a/test/test_mecab.py +++ b/test/test_mecab.py @@ -24,6 +24,16 @@ def test_mecab_pos_43(mecab_instance, string): (u'\uc790', u'EF'), (u'!', u'SF')] +def test_mecab_pos_join(mecab_instance, string): + assert mecab_instance.pos(string, join=True) ==\ + [u'\uaf43\uac00\ub9c8/NNG', + u'\ud0c0/VV', + u'\uace0/EC', + u'\uac15\ub0a8/NNP', + u'\uac00/VV', + u'\uc790/EF', + u'!/SF'] + def test_mecab_morphs(mecab_instance, string): assert mecab_instance.morphs(string) ==\ [u'\uaf43\uac00\ub9c8', diff --git a/test/test_twitter.py b/test/test_twitter.py index 9c15384..d9fc2b0 100644 --- a/test/test_twitter.py +++ b/test/test_twitter.py @@ -33,6 +33,10 @@ def test_tkorean_pos_3(tkorean_instance, string): assert tkorean_instance.pos(string, stem=True, norm=True) ==\ [(u'\uaf43', u'Noun'), (u'\uac00\ub9c8', u'Noun'), (u'\ud0c0\uace0', u'Noun'), (u'\uac15\ub0a8', u'Noun'), (u'\uac00\ub098', u'Noun'), (u'\uc694', u'Josa'), (u'\u314b\u314b', u'KoreanParticle')] +def test_tkorean_pos_join(tkorean_instance, string): + assert tkorean_instance.pos(string, join=True) ==\ + [u'\uaf43/Noun', u'\uac00\ub9c8/Noun', u'\ud0c0\uace0/Noun', u'\uac15\ub0a8/Noun', u'\uac00\ub098/Noun', u'\uc6ac/Noun', u'\u314b\u314b\u314b\u314b/KoreanParticle'] + def test_tkorean_nouns(tkorean_instance, string): assert tkorean_instance.nouns(string) ==\ [u'\uaf43', u'\uac00\ub9c8', u'\ud0c0\uace0', u'\uac15\ub0a8', u'\uac00\ub098', u'\uc6ac']