Follow PEP 621 (#38)

* Follow PEP621 * Fix a bug in util.py * Remove unnecessary files * Fix CI * Apply black * Update pypi-publish.yml
doccano · Jul 16, 2023 · 22df87c · 22df87c
1 parent fbbdd9c
commit 22df87c
Show file tree

Hide file tree

Showing 11 changed files with 50 additions and 127 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -19,19 +19,19 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install -U pip
-        pip install -r requirements.txt
-    - name: Lint with flake8
+        python -m pip install -e ".[dev]"
+    - name: Lint with ruff
       run: |
-        flake8
+        ruff check spacy_partial_tagger
+        ruff check tests
     - name: Lint with black
       run: |
-        black . --check
-    - name: Lint with isort
-      run: |
-        isort -c .
+        black . --check spacy_partial_tagger
+        black . --check tests
     - name: Lint with mypy
       run: |
         mypy spacy_partial_tagger
+        mypy tests
     - name: Run tests
       run: |
         pytest --cov=spacy_partial_tagger --cov-report=term-missing
diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
@@ -16,10 +16,11 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install poetry
+        python -m pip install hatch build
     - name: Build a binary wheel and a source tarball
       run: |
-        poetry build
+        hatch version "${GITHUB_REF_NAME}"
+        python -m build
     - name: Publish a Python distribution to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
       with:

diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,6 @@ cython_debug/
 
 .tool-versions
 poetry.lock
+.pdm-python
+pdm.toml
+pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,59 +1,60 @@
 [build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
 [project]
 name = "spacy-partial-tagger"
-requires-python = ">=3.8"
-
-[tool.poetry]
-name = "spacy-partial-tagger"
-version = "0.15.2"
 description = "Sequence Tagger for Partially Annotated Dataset in spaCy"
-authors = ["yasufumi <[email protected]>"]
-license = "MIT"
-readme = "README.md"
-repository = "https://github.com/tech-sketch/spacy-partial-tagger"
+requires-python = ">=3.8,<4.0"
+readme = {file = "README.md", content-type = "text/markdown"}
+license = {file = "LICENSE"}
+authors = [
+    {name = "Yasufumi Taniguchi", email = "[email protected]"},
+]
 classifiers = [
     "Programming Language :: Python",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9"
 ]
+dependencies = [
+    "thinc<9.0.0,>=8.0.15",
+    "transformers[ja]<5.0.0,>=4.25.1",
+    "torch<3.0.0,>=2.0.1",
+    "spacy[transformers]<4.0.0,>=3.3.1",
+    "spacy-alignments<1.0.0,>=0.8.5",
+    "pytorch-partial-tagger<1.0.0,>=0.1.12",
+]
+dynamic = ["version"]
 
-[tool.poetry.dependencies]
-python = "^3.8"
-thinc = "^8.0.15"
-transformers = {extras = ["ja"], version = "^4.25.1"}
-torch = "^2.0.1"
-spacy = {extras = ["transformers"], version = "^3.3.1"}
-spacy-alignments = "^0.8.5"
-pytorch-partial-tagger = "^0.1.12"
+[project.urls]
+Repository = "https://github.com/doccano/spacy-partial-tagger"
 
-[tool.poetry.group.dev.dependencies]
-mypy = "^1.3.0"
-black = "^22.3.0"
-pytest = "^7.1.1"
-isort = "^5.10.1"
-flake8 = "^4.0.1"
-pytest-cov = "^3.0.0"
-ruff = "^0.0.270"
+[project.optional-dependencies]
+dev = [
+    "mypy>=1.3.1",
+    "black>=23.3.0",
+    "pytest>=7.1.1",
+    "isort>=5.10.1",
+    "flake8>=4.0.1",
+    "pytest-cov>=3.0.0",
+    "ruff>=0.0.270",
+]
 
-[tool.poetry.plugins.spacy_factories]
+[project.entry-points]
+[project.entry-points.spacy_factories]
 partial_ner = "spacy_partial_tagger.pipeline:make_partial_ner"
 
-[tool.poetry.plugins.spacy_architectures]
+[project.entry-points.spacy_architectures]
 "spacy-partial-tagger.PartialTagger.v1" = "spacy_partial_tagger.tagger:build_partial_tagger_v1"
 
+[tool.hatch.version]
+path = "spacy_partial_tagger/__about__.py"
+
 [tool.mypy]
 ignore_missing_imports = true
 disallow_untyped_defs = true
 show_error_codes = true
 
-[tool.isort]
-profile = "black"
-include_trailing_comma = true
-multi_line_output = 3
-
 [tool.black]
 exclude = '''
 /(

diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
diff --git a/spacy_partial_tagger/__about__.py b/spacy_partial_tagger/__about__.py
@@ -0,0 +1 @@
+__version__ = "0.15.2"
diff --git a/spacy_partial_tagger/pipeline.py b/spacy_partial_tagger/pipeline.py
@@ -51,7 +51,6 @@ def set_annotations(
         docs: List[Doc],
         tag_indices: Floats2d,
     ) -> None:
-
         for doc, indices in zip(docs, tag_indices.tolist()):
             indices = [index for index in indices if index != self.padding_index]
             alignment = doc.user_data["alignment"]
@@ -157,7 +156,6 @@ def add_label(self, label: str) -> int:
     def from_bytes(
         self, bytes_data: bytes, *, exclude: tuple = ()
     ) -> "PartialEntityRecognizer":
-
         self._validate_serialization_attrs()
 
         def load_model(b: bytes) -> None:

diff --git a/spacy_partial_tagger/tagger.py b/spacy_partial_tagger/tagger.py
@@ -42,7 +42,6 @@ def forward(
     X: List[Doc],
     is_train: bool,
 ) -> Tuple[Tuple[Floats4d, Ints2d], Callable]:
-
     tokenizer: BaseTokenizer = model.attrs["tokenizer"]
 
     text_batch = tokenizer(tuple(doc.text for doc in X))

diff --git a/spacy_partial_tagger/tokenizer.py b/spacy_partial_tagger/tokenizer.py
@@ -20,7 +20,6 @@ def __init__(
         tokenizer: _BertJapaneseTokenizer,
         tokenizer_args: Optional[dict] = None,
     ):
-
         self.__tokenizer = tokenizer
 
         self.__tokenizer_args = tokenizer_args or {

diff --git a/spacy_partial_tagger/util.py b/spacy_partial_tagger/util.py
@@ -30,7 +30,7 @@ def get_alignments(
     tokens = tokenizer.word_tokenizer.tokenize(
         text, never_split=tokenizer.all_special_tokens
     )
-    _, y2x = tokenizations.get_alignments(text, tokens)
+    _, y2x = tokenizations.get_alignments(list(text), tokens)
     token2char = {i: (x[0], x[-1] + 1) for i, x in enumerate(y2x)}
 
     pieces = [