diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c78d43a..bd737cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,19 +19,19 @@ jobs: - name: Install dependencies run: | python -m pip install -U pip - pip install -r requirements.txt - - name: Lint with flake8 + python -m pip install -e ".[dev]" + - name: Lint with ruff run: | - flake8 + ruff check spacy_partial_tagger + ruff check tests - name: Lint with black run: | - black . --check - - name: Lint with isort - run: | - isort -c . + black . --check spacy_partial_tagger + black . --check tests - name: Lint with mypy run: | mypy spacy_partial_tagger + mypy tests - name: Run tests run: | pytest --cov=spacy_partial_tagger --cov-report=term-missing diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index db7b796..ebcc448 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -16,10 +16,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install poetry + python -m pip install hatch build - name: Build a binary wheel and a source tarball run: | - poetry build + hatch version "${GITHUB_REF_NAME}" + python -m build - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: diff --git a/.gitignore b/.gitignore index f859de0..f6bd162 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ .tool-versions poetry.lock +.pdm-python +pdm.toml +pdm.lock diff --git a/pyproject.toml b/pyproject.toml index 7903937..d1cc1c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,59 +1,60 @@ [build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "spacy-partial-tagger" -requires-python = ">=3.8" - -[tool.poetry] -name = "spacy-partial-tagger" -version = "0.15.2" description = "Sequence Tagger for Partially Annotated Dataset in spaCy" -authors = ["yasufumi "] -license = "MIT" -readme = "README.md" -repository = "https://github.com/tech-sketch/spacy-partial-tagger" +requires-python = ">=3.8,<4.0" +readme = {file = "README.md", content-type = "text/markdown"} +license = {file = "LICENSE"} +authors = [ + {name = "Yasufumi Taniguchi", email = "yasufumi.taniguchi@gmail.com"}, +] classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9" ] +dependencies = [ + "thinc<9.0.0,>=8.0.15", + "transformers[ja]<5.0.0,>=4.25.1", + "torch<3.0.0,>=2.0.1", + "spacy[transformers]<4.0.0,>=3.3.1", + "spacy-alignments<1.0.0,>=0.8.5", + "pytorch-partial-tagger<1.0.0,>=0.1.12", +] +dynamic = ["version"] -[tool.poetry.dependencies] -python = "^3.8" -thinc = "^8.0.15" -transformers = {extras = ["ja"], version = "^4.25.1"} -torch = "^2.0.1" -spacy = {extras = ["transformers"], version = "^3.3.1"} -spacy-alignments = "^0.8.5" -pytorch-partial-tagger = "^0.1.12" +[project.urls] +Repository = "https://github.com/doccano/spacy-partial-tagger" -[tool.poetry.group.dev.dependencies] -mypy = "^1.3.0" -black = "^22.3.0" -pytest = "^7.1.1" -isort = "^5.10.1" -flake8 = "^4.0.1" -pytest-cov = "^3.0.0" -ruff = "^0.0.270" +[project.optional-dependencies] +dev = [ + "mypy>=1.3.1", + "black>=23.3.0", + "pytest>=7.1.1", + "isort>=5.10.1", + "flake8>=4.0.1", + "pytest-cov>=3.0.0", + "ruff>=0.0.270", +] -[tool.poetry.plugins.spacy_factories] +[project.entry-points] +[project.entry-points.spacy_factories] partial_ner = "spacy_partial_tagger.pipeline:make_partial_ner" -[tool.poetry.plugins.spacy_architectures] +[project.entry-points.spacy_architectures] "spacy-partial-tagger.PartialTagger.v1" = "spacy_partial_tagger.tagger:build_partial_tagger_v1" +[tool.hatch.version] +path = "spacy_partial_tagger/__about__.py" + [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true show_error_codes = true -[tool.isort] -profile = "black" -include_trailing_comma = true -multi_line_output = 3 - [tool.black] exclude = ''' /( diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6e74ebf..0000000 --- a/requirements.txt +++ /dev/null @@ -1,74 +0,0 @@ -black==22.12.0 ; python_version >= "3.8" and python_version < "4.0" -blis==0.7.9 ; python_version >= "3.8" and python_version < "4.0" -catalogue==2.0.8 ; python_version >= "3.8" and python_version < "4.0" -certifi==2023.5.7 ; python_version >= "3.8" and python_version < "4.0" -charset-normalizer==3.2.0 ; python_version >= "3.8" and python_version < "4.0" -click==8.1.5 ; python_version >= "3.8" and python_version < "4.0" -colorama==0.4.6 ; python_version >= "3.8" and python_version < "4.0" and sys_platform == "win32" or python_version >= "3.8" and python_version < "4.0" and platform_system == "Windows" -confection==0.1.0 ; python_version >= "3.8" and python_version < "4.0" -coverage[toml]==7.2.7 ; python_version >= "3.8" and python_version < "4.0" -cymem==2.0.7 ; python_version >= "3.8" and python_version < "4.0" -exceptiongroup==1.1.2 ; python_version >= "3.8" and python_version < "3.11" -filelock==3.12.2 ; python_version >= "3.8" and python_version < "4.0" -flake8==4.0.1 ; python_version >= "3.8" and python_version < "4.0" -fsspec==2023.6.0 ; python_version >= "3.8" and python_version < "4.0" -fugashi==1.2.1 ; python_version >= "3.8" and python_version < "4.0" -huggingface-hub==0.16.4 ; python_version >= "3.8" and python_version < "4.0" -idna==3.4 ; python_version >= "3.8" and python_version < "4.0" -iniconfig==2.0.0 ; python_version >= "3.8" and python_version < "4.0" -ipadic==1.0.0 ; python_version >= "3.8" and python_version < "4.0" -isort==5.12.0 ; python_version >= "3.8" and python_version < "4.0" -jinja2==3.1.2 ; python_version >= "3.8" and python_version < "4.0" -langcodes==3.3.0 ; python_version >= "3.8" and python_version < "4.0" -markupsafe==2.1.3 ; python_version >= "3.8" and python_version < "4.0" -mccabe==0.6.1 ; python_version >= "3.8" and python_version < "4.0" -mpmath==1.3.0 ; python_version >= "3.8" and python_version < "4.0" -murmurhash==1.0.9 ; python_version >= "3.8" and python_version < "4.0" -mypy-extensions==1.0.0 ; python_version >= "3.8" and python_version < "4.0" -mypy==1.4.1 ; python_version >= "3.8" and python_version < "4.0" -networkx==3.1 ; python_version >= "3.8" and python_version < "4.0" -numpy==1.24.4 ; python_version >= "3.8" and python_version < "4.0" -packaging==23.1 ; python_version >= "3.8" and python_version < "4.0" -pathspec==0.11.1 ; python_version >= "3.8" and python_version < "4.0" -pathy==0.10.2 ; python_version >= "3.8" and python_version < "4.0" -plac==1.3.5 ; python_version >= "3.8" and python_version < "4.0" -platformdirs==3.9.1 ; python_version >= "3.8" and python_version < "4.0" -pluggy==1.2.0 ; python_version >= "3.8" and python_version < "4.0" -preshed==3.0.8 ; python_version >= "3.8" and python_version < "4.0" -pycodestyle==2.8.0 ; python_version >= "3.8" and python_version < "4.0" -pydantic==1.10.11 ; python_version >= "3.8" and python_version < "4.0" -pyflakes==2.4.0 ; python_version >= "3.8" and python_version < "4.0" -pytest-cov==3.0.0 ; python_version >= "3.8" and python_version < "4.0" -pytest==7.4.0 ; python_version >= "3.8" and python_version < "4.0" -pytorch-partial-tagger==0.1.12 ; python_version >= "3.8" and python_version < "4.0" -pyyaml==6.0 ; python_version >= "3.8" and python_version < "4.0" -regex==2023.6.3 ; python_version >= "3.8" and python_version < "4.0" -requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" -rhoknp==1.3.0 ; python_version >= "3.8" and python_version < "4.0" -ruff==0.0.270 ; python_version >= "3.8" and python_version < "4.0" -safetensors==0.3.1 ; python_version >= "3.8" and python_version < "4.0" -setuptools==68.0.0 ; python_version >= "3.8" and python_version < "4.0" -smart-open==6.3.0 ; python_version >= "3.8" and python_version < "4.0" -spacy-alignments==0.8.6 ; python_version >= "3.8" and python_version < "4.0" -spacy-legacy==3.0.12 ; python_version >= "3.8" and python_version < "4.0" -spacy-loggers==1.0.4 ; python_version >= "3.8" and python_version < "4.0" -spacy-transformers==1.2.5 ; python_version >= "3.8" and python_version < "4.0" -spacy==3.6.0 ; python_version >= "3.8" and python_version < "4.0" -spacy[transformers]==3.6.0 ; python_version >= "3.8" and python_version < "4.0" -srsly==2.4.6 ; python_version >= "3.8" and python_version < "4.0" -sudachidict-core==20230110 ; python_version >= "3.8" and python_version < "4.0" -sudachipy==0.6.7 ; python_version >= "3.8" and python_version < "4.0" -sympy==1.12 ; python_version >= "3.8" and python_version < "4.0" -thinc==8.1.10 ; python_version >= "3.8" and python_version < "4.0" -tokenizers==0.13.3 ; python_version >= "3.8" and python_version < "4.0" -tomli==2.0.1 ; python_version >= "3.8" and python_full_version < "3.11.0a7" -torch==2.0.1 ; python_version >= "3.8" and python_version < "4.0" -tqdm==4.65.0 ; python_version >= "3.8" and python_version < "4.0" -transformers==4.30.2 ; python_version >= "3.8" and python_version < "4.0" -transformers[ja]==4.30.2 ; python_version >= "3.8" and python_version < "4.0" -typer==0.9.0 ; python_version >= "3.8" and python_version < "4.0" -typing-extensions==4.7.1 ; python_version >= "3.8" and python_version < "4.0" -unidic-lite==1.0.8 ; python_version >= "3.8" and python_version < "4.0" -unidic==1.1.0 ; python_version >= "3.8" and python_version < "4.0" -urllib3==2.0.3 ; python_version >= "3.8" and python_version < "4.0" -wasabi==0.10.1 ; python_version >= "3.8" and python_version < "4.0" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0584b2c..0000000 --- a/setup.cfg +++ /dev/null @@ -1,5 +0,0 @@ -[flake8] -max-line-length = 88 -max-complexity = 18 -exclude = .git, .tox, .venv, .eggs, build, dist, docs -ignore = E203,W503,W504 diff --git a/spacy_partial_tagger/__about__.py b/spacy_partial_tagger/__about__.py new file mode 100644 index 0000000..c0d4999 --- /dev/null +++ b/spacy_partial_tagger/__about__.py @@ -0,0 +1 @@ +__version__ = "0.15.2" diff --git a/spacy_partial_tagger/pipeline.py b/spacy_partial_tagger/pipeline.py index 077967f..0184a49 100644 --- a/spacy_partial_tagger/pipeline.py +++ b/spacy_partial_tagger/pipeline.py @@ -51,7 +51,6 @@ def set_annotations( docs: List[Doc], tag_indices: Floats2d, ) -> None: - for doc, indices in zip(docs, tag_indices.tolist()): indices = [index for index in indices if index != self.padding_index] alignment = doc.user_data["alignment"] @@ -157,7 +156,6 @@ def add_label(self, label: str) -> int: def from_bytes( self, bytes_data: bytes, *, exclude: tuple = () ) -> "PartialEntityRecognizer": - self._validate_serialization_attrs() def load_model(b: bytes) -> None: diff --git a/spacy_partial_tagger/tagger.py b/spacy_partial_tagger/tagger.py index 51c01b2..fcf34cb 100644 --- a/spacy_partial_tagger/tagger.py +++ b/spacy_partial_tagger/tagger.py @@ -42,7 +42,6 @@ def forward( X: List[Doc], is_train: bool, ) -> Tuple[Tuple[Floats4d, Ints2d], Callable]: - tokenizer: BaseTokenizer = model.attrs["tokenizer"] text_batch = tokenizer(tuple(doc.text for doc in X)) diff --git a/spacy_partial_tagger/tokenizer.py b/spacy_partial_tagger/tokenizer.py index 7ec60fa..0414c4f 100644 --- a/spacy_partial_tagger/tokenizer.py +++ b/spacy_partial_tagger/tokenizer.py @@ -20,7 +20,6 @@ def __init__( tokenizer: _BertJapaneseTokenizer, tokenizer_args: Optional[dict] = None, ): - self.__tokenizer = tokenizer self.__tokenizer_args = tokenizer_args or { diff --git a/spacy_partial_tagger/util.py b/spacy_partial_tagger/util.py index e5d4feb..2629f8e 100644 --- a/spacy_partial_tagger/util.py +++ b/spacy_partial_tagger/util.py @@ -30,7 +30,7 @@ def get_alignments( tokens = tokenizer.word_tokenizer.tokenize( text, never_split=tokenizer.all_special_tokens ) - _, y2x = tokenizations.get_alignments(text, tokens) + _, y2x = tokenizations.get_alignments(list(text), tokens) token2char = {i: (x[0], x[-1] + 1) for i, x in enumerate(y2x)} pieces = [