From 652a513ac4a0d6eeed9144b8a2fcf6d975563e9a Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 25 Aug 2023 22:08:28 +0700 Subject: [PATCH] LaoNLP v1.0 --- README.md | 2 +- laonlp/__init__.py | 15 +++++++++++++++ laonlp/corpus/__init__.py | 15 +++++++++++++++ laonlp/corpus/core.py | 15 +++++++++++++++ laonlp/corpus/lao_words.py | 16 +++++++++++++++- laonlp/corpus/mopt_dict.py | 18 ++++++++++++++++-- laonlp/tag/__init__.py | 15 +++++++++++++++ laonlp/tag/pos_tag.py | 15 +++++++++++++++ laonlp/tokenize/__init__.py | 15 +++++++++++++++ laonlp/translate/__init__.py | 15 +++++++++++++++ laonlp/translate/mopt_dict.py | 17 +++++++++++++++-- laonlp/transliterate/__init__.py | 15 +++++++++++++++ laonlp/util/__init__.py | 15 +++++++++++++++ laonlp/util/digitconv.py | 15 +++++++++++++++ laonlp/util/lao.py | 16 ++++++++++++++++ laonlp/word_vector/__init__.py | 15 +++++++++++++++ laonlp/word_vector/word2vec.py | 15 +++++++++++++++ setup.py | 19 +++++++++++++++++-- 18 files changed, 260 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4697542..f2d2628 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Lao language Natural Language Processing (NLP) - Lao to Thai script - Thai to Lao script - Word dictionary -- Word Vector (**New**) +- Word Vector ## Install ``` diff --git a/laonlp/__init__.py b/laonlp/__init__.py index 8eca586..cfb154d 100644 --- a/laonlp/__init__.py +++ b/laonlp/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from laonlp.tokenize import * from laonlp.corpus import * from laonlp.transliterate import * diff --git a/laonlp/corpus/__init__.py b/laonlp/corpus/__init__.py index cb7dc35..4243707 100644 --- a/laonlp/corpus/__init__.py +++ b/laonlp/corpus/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" import laonlp import os diff --git a/laonlp/corpus/core.py b/laonlp/corpus/core.py index 68e0838..2f8198f 100644 --- a/laonlp/corpus/core.py +++ b/laonlp/corpus/core.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" import os from laonlp.corpus import laonlp_path diff --git a/laonlp/corpus/lao_words.py b/laonlp/corpus/lao_words.py index 3dd20dc..7771977 100644 --- a/laonlp/corpus/lao_words.py +++ b/laonlp/corpus/lao_words.py @@ -1,5 +1,19 @@ # -*- coding: utf-8 -*- -import os +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from typing import List from typing import FrozenSet from laonlp.corpus.core import get_path_corpus diff --git a/laonlp/corpus/mopt_dict.py b/laonlp/corpus/mopt_dict.py index 0e4c981..758cffd 100644 --- a/laonlp/corpus/mopt_dict.py +++ b/laonlp/corpus/mopt_dict.py @@ -1,9 +1,23 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" import csv -import os +from collections import defaultdict from laonlp.corpus import laonlp_path -from collections import defaultdict from laonlp.corpus.core import get_path_corpus corpus_path = get_path_corpus("lao-eng-dictionary.csv") list_data=[] diff --git a/laonlp/tag/__init__.py b/laonlp/tag/__init__.py index 48d52ad..9a8921d 100644 --- a/laonlp/tag/__init__.py +++ b/laonlp/tag/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from laonlp.tag.pos_tag import pos_tag __all__ = [ diff --git a/laonlp/tag/pos_tag.py b/laonlp/tag/pos_tag.py index a72d04d..9cec5f8 100644 --- a/laonlp/tag/pos_tag.py +++ b/laonlp/tag/pos_tag.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from typing import List, Tuple from laonlp.corpus import get_path_corpus from pythainlp.tag import PerceptronTagger diff --git a/laonlp/tokenize/__init__.py b/laonlp/tokenize/__init__.py index e66c547..d38cc7d 100644 --- a/laonlp/tokenize/__init__.py +++ b/laonlp/tokenize/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from typing import List from pythainlp.tokenize import Tokenizer from laonlp.corpus import lao_words diff --git a/laonlp/translate/__init__.py b/laonlp/translate/__init__.py index 74abc7d..15165dc 100644 --- a/laonlp/translate/__init__.py +++ b/laonlp/translate/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" __all__ = [ "word_dictionary", ] diff --git a/laonlp/translate/mopt_dict.py b/laonlp/translate/mopt_dict.py index 195cea6..693c59b 100644 --- a/laonlp/translate/mopt_dict.py +++ b/laonlp/translate/mopt_dict.py @@ -1,6 +1,19 @@ # -*- coding: utf-8 -*- -import csv -import os +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from typing import List from laonlp.corpus import mopt_dict diff --git a/laonlp/transliterate/__init__.py b/laonlp/transliterate/__init__.py index 6ae4438..d116dbc 100644 --- a/laonlp/transliterate/__init__.py +++ b/laonlp/transliterate/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" __all__ = [ "lao2thai_script", "thai2lao_script", diff --git a/laonlp/util/__init__.py b/laonlp/util/__init__.py index acc17a5..43905b9 100644 --- a/laonlp/util/__init__.py +++ b/laonlp/util/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" __all__ = [ "lao_digit_to_arabic_digit", "arabic_digit_to_lao_digit", diff --git a/laonlp/util/digitconv.py b/laonlp/util/digitconv.py index ae44aad..87d7c55 100644 --- a/laonlp/util/digitconv.py +++ b/laonlp/util/digitconv.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" NUMBERS = "໑໒໓໔໕໖໗໘໙໐" _arabic_numerals = "1234567890" _pronunciation = [ diff --git a/laonlp/util/lao.py b/laonlp/util/lao.py index a9e3d5e..651214a 100644 --- a/laonlp/util/lao.py +++ b/laonlp/util/lao.py @@ -1,3 +1,19 @@ +# -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" TONE_MARKS = "່້"+"໊໋" _tone_mark = str.maketrans({i:None for i in TONE_MARKS}) diff --git a/laonlp/word_vector/__init__.py b/laonlp/word_vector/__init__.py index 46b4908..e3205f5 100644 --- a/laonlp/word_vector/__init__.py +++ b/laonlp/word_vector/__init__.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from laonlp.word_vector.word2vec import Word2Vec __all__ = [ diff --git a/laonlp/word_vector/word2vec.py b/laonlp/word_vector/word2vec.py index 8a841ce..8cd73cd 100644 --- a/laonlp/word_vector/word2vec.py +++ b/laonlp/word_vector/word2vec.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from typing import List import gensim from huggingface_hub import hf_hub_download diff --git a/setup.py b/setup.py index 06abf88..033bed0 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +""" +Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from setuptools import find_packages, setup with open("README.md","r",encoding="utf-8-sig") as f: @@ -9,7 +24,7 @@ setup( name="LaoNLP", - version="0.7", + version="1.0", description="Lao Natural Language Processing library", long_description=readme, long_description_content_type="text/markdown", @@ -38,7 +53,7 @@ "Lao language", ], classifiers=[ - "Development Status :: 3 - Alpha", + "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License",