diff --git a/README.md b/README.md index 04dca8b2..20d9cdc8 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ We're all hoping for the day of true digitization on which this repository will ## Installation Kohlrahbi is a Python based tool. -Therefor you have to make sure, that Python is running on your machine. +Therefore you have to make sure, that Python is running on your machine. We recommend to use virtual environments to keep your system clean. diff --git a/dev_requirements/requirements-dev.txt b/dev_requirements/requirements-dev.txt deleted file mode 100644 index ffc7d238..00000000 --- a/dev_requirements/requirements-dev.txt +++ /dev/null @@ -1,119 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --all-extras --output-file=dev_requirements/requirements-dev.txt pyproject.toml -# -annotated-types==0.7.0 - # via pydantic -astroid==3.2.4 - # via pylint -click==8.1.7 - # via kohlrahbi (pyproject.toml) -colorlog==6.8.2 - # via kohlrahbi (pyproject.toml) -coverage==7.6.1 - # via kohlrahbi (pyproject.toml) -dictdiffer==0.9.0 - # via kohlrahbi (pyproject.toml) -dill==0.3.8 - # via pylint -efoli==1.1.0 - # via kohlrahbi (pyproject.toml) -et-xmlfile==1.1.0 - # via openpyxl -freezegun==1.5.1 - # via kohlrahbi (pyproject.toml) -iniconfig==2.0.0 - # via pytest -isort==5.13.2 - # via pylint -lxml==5.3.0 - # via python-docx -mccabe==0.7.0 - # via pylint -more-itertools==10.4.0 - # via kohlrahbi (pyproject.toml) -mypy==1.11.2 - # via kohlrahbi (pyproject.toml) -mypy-extensions==1.0.0 - # via mypy -networkx==3.3 - # via networkx-stubs -networkx-stubs==0.0.1 - # via kohlrahbi (pyproject.toml) -numpy==2.1.0 - # via - # pandas - # pandas-stubs -openpyxl==3.1.5 - # via kohlrahbi (pyproject.toml) -packaging==24.1 - # via pytest -pandas==2.2.2 - # via kohlrahbi (pyproject.toml) -pandas-stubs==2.2.2.240807 - # via kohlrahbi (pyproject.toml) -platformdirs==4.2.2 - # via pylint -pluggy==1.5.0 - # via pytest -pydantic==2.8.2 - # via - # kohlrahbi (pyproject.toml) - # pylint-pydantic -pydantic-core==2.20.1 - # via pydantic -pylint==3.2.7 - # via - # kohlrahbi (pyproject.toml) - # pylint-plugin-utils - # pylint-pydantic -pylint-plugin-utils==0.8.2 - # via pylint-pydantic -pylint-pydantic==0.3.2 - # via kohlrahbi (pyproject.toml) -pytest==8.3.2 - # via - # kohlrahbi (pyproject.toml) - # pytest-datafiles - # syrupy -pytest-datafiles==3.0.0 - # via kohlrahbi (pyproject.toml) -python-dateutil==2.9.0.post0 - # via - # freezegun - # pandas -python-docx==1.1.2 - # via kohlrahbi (pyproject.toml) -pytz==2024.1 - # via - # efoli - # kohlrahbi (pyproject.toml) - # pandas -six==1.16.0 - # via python-dateutil -syrupy==4.7.1 - # via kohlrahbi (pyproject.toml) -tomlkit==0.13.2 - # via - # kohlrahbi (pyproject.toml) - # pylint -types-freezegun==1.1.10 - # via kohlrahbi (pyproject.toml) -types-pytz==2024.1.0.20240417 - # via pandas-stubs -types-requests==2.32.0.20240712 - # via kohlrahbi (pyproject.toml) -typing-extensions==4.12.2 - # via - # mypy - # pydantic - # pydantic-core - # python-docx -tzdata==2024.1 - # via pandas -urllib3==2.2.2 - # via types-requests -xlsxwriter==3.2.0 - # via kohlrahbi (pyproject.toml) diff --git a/dev_requirements/requirements-formatcheck.in b/dev_requirements/requirements-formatcheck.in deleted file mode 100644 index 98a546c7..00000000 --- a/dev_requirements/requirements-formatcheck.in +++ /dev/null @@ -1,3 +0,0 @@ -# specific requirements for the tox formatting environment -black -isort diff --git a/dev_requirements/requirements-formatcheck.txt b/dev_requirements/requirements-formatcheck.txt deleted file mode 100644 index db981ad2..00000000 --- a/dev_requirements/requirements-formatcheck.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --output-file=dev_requirements/requirements-formatcheck.txt dev_requirements/requirements-formatcheck.in -# -black==24.8.0 - # via -r dev_requirements/requirements-formatcheck.in -click==8.1.7 - # via black -isort==5.13.2 - # via -r dev_requirements/requirements-formatcheck.in -mypy-extensions==1.0.0 - # via black -packaging==24.1 - # via black -pathspec==0.12.1 - # via black -platformdirs==4.2.2 - # via black diff --git a/dev_requirements/requirements-lint.txt b/dev_requirements/requirements-lint.txt deleted file mode 100644 index 52cc7328..00000000 --- a/dev_requirements/requirements-lint.txt +++ /dev/null @@ -1,75 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --extra=lint --output-file=dev_requirements/requirements-lint.txt pyproject.toml -# -annotated-types==0.7.0 - # via pydantic -astroid==3.2.4 - # via pylint -click==8.1.7 - # via kohlrahbi (pyproject.toml) -colorlog==6.8.2 - # via kohlrahbi (pyproject.toml) -dill==0.3.8 - # via pylint -efoli==1.1.0 - # via kohlrahbi (pyproject.toml) -et-xmlfile==1.1.0 - # via openpyxl -isort==5.13.2 - # via pylint -lxml==5.3.0 - # via python-docx -mccabe==0.7.0 - # via pylint -more-itertools==10.4.0 - # via kohlrahbi (pyproject.toml) -numpy==2.1.0 - # via pandas -openpyxl==3.1.5 - # via kohlrahbi (pyproject.toml) -pandas==2.2.2 - # via kohlrahbi (pyproject.toml) -platformdirs==4.2.2 - # via pylint -pydantic==2.8.2 - # via - # kohlrahbi (pyproject.toml) - # pylint-pydantic -pydantic-core==2.20.1 - # via pydantic -pylint==3.2.7 - # via - # kohlrahbi (pyproject.toml) - # pylint-plugin-utils - # pylint-pydantic -pylint-plugin-utils==0.8.2 - # via pylint-pydantic -pylint-pydantic==0.3.2 - # via kohlrahbi (pyproject.toml) -python-dateutil==2.9.0.post0 - # via pandas -python-docx==1.1.2 - # via kohlrahbi (pyproject.toml) -pytz==2024.1 - # via - # efoli - # kohlrahbi (pyproject.toml) - # pandas -six==1.16.0 - # via python-dateutil -tomlkit==0.13.2 - # via - # kohlrahbi (pyproject.toml) - # pylint -typing-extensions==4.12.2 - # via - # pydantic - # pydantic-core - # python-docx -tzdata==2024.1 - # via pandas -xlsxwriter==3.2.0 - # via kohlrahbi (pyproject.toml) diff --git a/dev_requirements/requirements-test.txt b/dev_requirements/requirements-test.txt deleted file mode 100644 index 88045e17..00000000 --- a/dev_requirements/requirements-test.txt +++ /dev/null @@ -1,80 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --extra=test --output-file=dev_requirements/requirements-test.txt pyproject.toml -# -annotated-types==0.7.0 - # via pydantic -click==8.1.7 - # via kohlrahbi (pyproject.toml) -colorama==0.4.6 - # via - # click - # colorlog - # pytest -colorlog==6.8.2 - # via kohlrahbi (pyproject.toml) -coverage==7.6.1 - # via kohlrahbi (pyproject.toml) -dictdiffer==0.9.0 - # via kohlrahbi (pyproject.toml) -efoli==1.1.0 - # via kohlrahbi (pyproject.toml) -et-xmlfile==1.1.0 - # via openpyxl -freezegun==1.5.1 - # via kohlrahbi (pyproject.toml) -iniconfig==2.0.0 - # via pytest -lxml==5.3.0 - # via python-docx -more-itertools==10.5.0 - # via kohlrahbi (pyproject.toml) -numpy==2.1.1 - # via pandas -openpyxl==3.1.5 - # via kohlrahbi (pyproject.toml) -packaging==24.1 - # via pytest -pandas==2.2.2 - # via kohlrahbi (pyproject.toml) -pluggy==1.5.0 - # via pytest -pydantic==2.9.1 - # via kohlrahbi (pyproject.toml) -pydantic-core==2.23.3 - # via pydantic -pytest==8.3.2 - # via - # kohlrahbi (pyproject.toml) - # pytest-datafiles - # syrupy -pytest-datafiles==3.0.0 - # via kohlrahbi (pyproject.toml) -python-dateutil==2.9.0.post0 - # via - # freezegun - # pandas -python-docx==1.1.2 - # via kohlrahbi (pyproject.toml) -pytz==2024.2 - # via - # efoli - # kohlrahbi (pyproject.toml) - # pandas -six==1.16.0 - # via python-dateutil -syrupy==4.7.1 - # via kohlrahbi (pyproject.toml) -tomlkit==0.13.2 - # via kohlrahbi (pyproject.toml) -typing-extensions==4.12.2 - # via - # pydantic - # pydantic-core - # python-docx -tzdata==2024.1 - # via pandas -xlsxwriter==3.2.0 - # via kohlrahbi (pyproject.toml) diff --git a/dev_requirements/requirements-test_packaging.in b/dev_requirements/requirements-test_packaging.in deleted file mode 100644 index e47b6e99..00000000 --- a/dev_requirements/requirements-test_packaging.in +++ /dev/null @@ -1,2 +0,0 @@ -build -twine diff --git a/dev_requirements/requirements-test_packaging.txt b/dev_requirements/requirements-test_packaging.txt deleted file mode 100644 index db7afeed..00000000 --- a/dev_requirements/requirements-test_packaging.txt +++ /dev/null @@ -1,78 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --output-file=dev_requirements/requirements-test_packaging.txt dev_requirements/requirements-test_packaging.in -# -build==1.2.2 - # via -r dev_requirements/requirements-test_packaging.in -certifi==2024.8.30 - # via requests -cffi==1.17.1 - # via cryptography -charset-normalizer==3.3.2 - # via requests -cryptography==43.0.1 - # via secretstorage -docutils==0.21.2 - # via readme-renderer -idna==3.8 - # via requests -importlib-metadata==8.4.0 - # via twine -jaraco-classes==3.4.0 - # via keyring -jaraco-context==6.0.1 - # via keyring -jaraco-functools==4.0.2 - # via keyring -jeepney==0.8.0 - # via - # keyring - # secretstorage -keyring==25.3.0 - # via twine -markdown-it-py==3.0.0 - # via rich -mdurl==0.1.2 - # via markdown-it-py -more-itertools==10.4.0 - # via - # jaraco-classes - # jaraco-functools -nh3==0.2.18 - # via readme-renderer -packaging==24.1 - # via build -pkginfo==1.10.0 - # via twine -pycparser==2.22 - # via cffi -pygments==2.18.0 - # via - # readme-renderer - # rich -pyproject-hooks==1.1.0 - # via build -readme-renderer==44.0 - # via twine -requests==2.32.3 - # via - # requests-toolbelt - # twine -requests-toolbelt==1.0.0 - # via twine -rfc3986==2.0.0 - # via twine -rich==13.8.0 - # via twine -secretstorage==3.3.3 - # via keyring -twine==5.1.1 - # via -r dev_requirements/requirements-test_packaging.in -urllib3==2.2.2 - # via - # requests - # twine -zipp==3.20.1 - # via importlib-metadata diff --git a/dev_requirements/requirements-typecheck.txt b/dev_requirements/requirements-typecheck.txt deleted file mode 100644 index a74fb916..00000000 --- a/dev_requirements/requirements-typecheck.txt +++ /dev/null @@ -1,81 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --extra=typecheck --output-file=dev_requirements/requirements-typecheck.txt pyproject.toml -# -annotated-types==0.7.0 - # via pydantic -click==8.1.7 - # via kohlrahbi (pyproject.toml) -colorlog==6.8.2 - # via kohlrahbi (pyproject.toml) -efoli==1.1.0 - # via kohlrahbi (pyproject.toml) -et-xmlfile==1.1.0 - # via openpyxl -iniconfig==2.0.0 - # via pytest -lxml==5.3.0 - # via python-docx -more-itertools==10.4.0 - # via kohlrahbi (pyproject.toml) -mypy==1.11.2 - # via kohlrahbi (pyproject.toml) -mypy-extensions==1.0.0 - # via mypy -networkx==3.3 - # via networkx-stubs -networkx-stubs==0.0.1 - # via kohlrahbi (pyproject.toml) -numpy==2.1.0 - # via - # pandas - # pandas-stubs -openpyxl==3.1.5 - # via kohlrahbi (pyproject.toml) -packaging==24.1 - # via pytest -pandas==2.2.2 - # via kohlrahbi (pyproject.toml) -pandas-stubs==2.2.2.240807 - # via kohlrahbi (pyproject.toml) -pluggy==1.5.0 - # via pytest -pydantic==2.8.2 - # via kohlrahbi (pyproject.toml) -pydantic-core==2.20.1 - # via pydantic -pytest==8.3.2 - # via kohlrahbi (pyproject.toml) -python-dateutil==2.9.0.post0 - # via pandas -python-docx==1.1.2 - # via kohlrahbi (pyproject.toml) -pytz==2024.1 - # via - # efoli - # kohlrahbi (pyproject.toml) - # pandas -six==1.16.0 - # via python-dateutil -tomlkit==0.13.2 - # via kohlrahbi (pyproject.toml) -types-freezegun==1.1.10 - # via kohlrahbi (pyproject.toml) -types-pytz==2024.1.0.20240417 - # via pandas-stubs -types-requests==2.32.0.20240712 - # via kohlrahbi (pyproject.toml) -typing-extensions==4.12.2 - # via - # mypy - # pydantic - # pydantic-core - # python-docx -tzdata==2024.1 - # via pandas -urllib3==2.2.2 - # via types-requests -xlsxwriter==3.2.0 - # via kohlrahbi (pyproject.toml) diff --git a/domain-specific-terms.txt b/domain-specific-terms.txt new file mode 100644 index 00000000..455d0509 --- /dev/null +++ b/domain-specific-terms.txt @@ -0,0 +1,6 @@ +# contains 1 lower case word per line which are ignored in the spell_check +paket +ist +dokument +oder +CONTRL diff --git a/pyproject.toml b/pyproject.toml index 2a842f97..5a9f4e2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,22 +48,39 @@ kohlrahbi = "kohlrahbi:cli" [project.optional-dependencies] dev = ["kohlrahbi[test]", "kohlrahbi[lint]", "kohlrahbi[typecheck]"] -lint = ["pylint", "pylint-pydantic"] +lint = [ + "pylint==3.2.7", + "pylint-pydantic==0.3.2", +] test = [ - "coverage", - "dictdiffer", - "freezegun", - "pytest-datafiles", - "pytest", - "syrupy", + "coverage==7.6.1", + "dictdiffer==0.9.0", + "freezegun==1.5.1", + "pytest-datafiles==3.0.0", + "pytest==8.3.3", + "syrupy==4.7.1", ] typecheck = [ - "mypy", - "networkx-stubs", - "pandas-stubs", - "pytest", - "types-freezegun", - "types-requests", + "mypy==1.11.2", + "networkx-stubs==0.0.1", + "pandas-stubs==2.2.2.240909", + "pytest==8.3.3", + "types-freezegun==1.1.10", + "types-requests==2.32.0.20240914 ", +] +spell_check = [ + "codespell==2.3.0" +] +coverage = [ + "coverage==7.6.1" +] +formatting = [ + "black==24.8.0", + "isort==5.13.2" +] +test_packaging = [ + "build==1.2.2", + "twine==5.1.1" ] [project.urls] diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py index 4eea02f5..5cdb223a 100644 --- a/src/kohlrahbi/ahb/__init__.py +++ b/src/kohlrahbi/ahb/__init__.py @@ -64,7 +64,7 @@ def process_ahb_table( return pruefi_did_change_since_last_scraping: bool = True # we assume it yes, if we can't compare or unless we know better if AhbExportFileFormat.FLATAHB in file_type and json_file_path.exists(): - # the flat ahb ist the only file format from which we can READ to compare our current with previous results + # the flat ahb is the only file format from which we can READ to compare our current with previous results pruefi_did_change_since_last_scraping = not are_equal_except_for_guids(unfolded_ahb, json_file_path) logger.info("Pruefi '%s' did change since last scraping: %s", pruefi, pruefi_did_change_since_last_scraping) # ⚠ here we assume that the csv/json/xlsx files are in sync, if they exist. diff --git a/src/kohlrahbi/ahbtable/ahbtable.py b/src/kohlrahbi/ahbtable/ahbtable.py index 58b2ecec..6dc0467e 100644 --- a/src/kohlrahbi/ahbtable/ahbtable.py +++ b/src/kohlrahbi/ahbtable/ahbtable.py @@ -104,7 +104,7 @@ def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool: # type:igno def sanitize(self) -> None: """ - In some cases there is the content of one cell splitted in two. + In some cases there is the content of one cell splits in two. We need to merge the content into one cell and delete the deprecated cell afterwards. """ index_of_lines_to_drop: list[int] = [] diff --git a/src/kohlrahbi/docxfilefinder.py b/src/kohlrahbi/docxfilefinder.py index abc1bafb..ccfff73e 100644 --- a/src/kohlrahbi/docxfilefinder.py +++ b/src/kohlrahbi/docxfilefinder.py @@ -207,7 +207,7 @@ def get_docx_files_which_may_contain_searched_pruefi(self, searched_pruefi: str) def get_all_docx_files_which_contain_change_histories(self) -> list[Path]: """ - This function returns a list of docx fils which probably contain a change history. + This function returns a list of docx files which probably contain a change history. Only format documents like UTILMD, MSCONS etc. contain a change history. """ diff --git a/src/kohlrahbi/models/edifact_components.py b/src/kohlrahbi/models/edifact_components.py index 0e3b27bc..3a8d4379 100644 --- a/src/kohlrahbi/models/edifact_components.py +++ b/src/kohlrahbi/models/edifact_components.py @@ -1,7 +1,7 @@ # pylint:disable=too-few-public-methods """ EDIFACT components are data structures on different hierarchical levels inside an EDIFACT message. -Components contain not only EDIFACT composits but also segments and segment groups. +Components contain not only EDIFACT composites but also segments and segment groups. """ import re from abc import ABC @@ -243,7 +243,7 @@ class Segment(SegmentLevel): default=None, description=( "For the MIG matching it might be necessary to know the section" - "in which the data element occured in the AHB." + "in which the data element occurred in the AHB." "This might be necessary to e.g. distinguish gas and electricity fields which look the same otherwise." "See e.g. UTILMD 'Geplante Turnusablesung des MSB (Strom)' vs. 'Geplante Turnusablesung des NB (Gas)'" ), diff --git a/src/kohlrahbi/table_header.py b/src/kohlrahbi/table_header.py index 3caa5c6a..1ef0af27 100644 --- a/src/kohlrahbi/table_header.py +++ b/src/kohlrahbi/table_header.py @@ -45,7 +45,7 @@ def create_mapping_of_tabstop_positions( """ Create a mapping of the tabstop positions of the Prüfidentifikatoren columns. - For the current tabstop positions, the intial ones with the least difference are + For the current tabstop positions, the initial ones with the least difference are found to account for changes in tabstop positions between paragraphs. Returns: diff --git a/tox.ini b/tox.ini index 78638081..503c6fcc 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,7 @@ envlist = lint typecheck formatcheck + spell_check skip_missing_interpreters = True [testenv] @@ -13,7 +14,7 @@ commands = python -m pip install --upgrade pip # the test environment is called by the Github action that runs the unit tests setenv = PYTHONPATH = {toxinidir}/src deps = - -r dev_requirements/requirements-test.txt + .[test] commands = coverage run -m pytest --basetemp={envtmpdir} {posargs} coverage html --omit .tox/*,unittests/* @@ -22,7 +23,7 @@ commands = [testenv:update_snapshots] deps = -r requirements.txt - -r dev_requirements/requirements-test.txt + .[test] syrupy setenv = PYTHONPATH = {toxinidir}/src commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snapshot-update @@ -30,24 +31,35 @@ commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snaps [testenv:lint] # the lint environment is called by the Github Action that runs the linter deps = - -r dev_requirements/requirements-lint.txt + .[lint] setenv = PYTHONPATH = {toxinidir}/src # add your fixtures like e.g. pytest_datafiles here commands = pylint kohlrahbi --ignore=version.py +[testenv:spell_check] +# the spellcheck environment checks the code for typos +setenv = PYTHONPATH = {toxinidir}/src +deps = + -r requirements.txt + .[spell_check] +commands = + codespell --ignore-words=domain-specific-terms.txt src + codespell --ignore-words=domain-specific-terms.txt README.md + # add single files (ending with .py) or packages here + [testenv:typecheck] # the type_check environment checks the type hints using mypy setenv = PYTHONPATH = {toxinidir}/src deps = - -r dev_requirements/requirements-typecheck.txt + .[typecheck] commands = mypy --show-error-codes src/kohlrahbi --strict mypy --show-error-codes unittests [testenv:formatcheck] deps = - -r dev_requirements/requirements-formatcheck.txt + .[formatting] commands = black . --check isort . --check @@ -55,7 +67,7 @@ commands = [testenv:dev] # the dev environment contains everything you need to start developing on your local machine. deps = - -r dev_requirements/requirements-dev.txt + .[dev] pip-tools pre-commit commands = @@ -80,7 +92,7 @@ commands = [testenv:test_packaging] skip_install = true deps = - -r dev_requirements/requirements-test_packaging.txt + .[test_packaging] commands = python -m build twine check dist/*