diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bec521d..abf12d70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,11 @@ jobs: PIP_PACKAGES: ${{ matrix.pip-packages }} run: poetry run pip install $PIP_PACKAGES # Using pip shouldn't mess up poetry cache + - name: Run pre-commit hooks + run: | + pre-commit install + pre-commit run -a + - name: Run tests with pytest against PySpark ${{ matrix.pyspark-version }} run: make test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 94ee4ff5..8ceb2324 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,7 @@ repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - # Ruff version. rev: 'v0.5.2' hooks: - id: ruff - - repo: local - hooks: - - id: pytest - name: pytest-check - entry: poetry run pytest - language: system - pass_filenames: false - # Runs only on python files - types: [ python ] - always_run: true + args: [--exit-non-zero-on-fix] + - id: ruff-format diff --git a/Makefile b/Makefile index cac6556d..51ec8d0f 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,10 @@ install_test: ## Install the 'dev, test and extras' dependencies install_deps: ## Install all dependencies @poetry install --with=development,linting,testing,docs +.PHONY: install_ruff +install_ruff: ## Install ruff for use within IDE + @poetry run pip install ruff==0.5.2 + .PHONY: update_deps update_deps: ## Update dependencies @poetry update --with=development,linting,testing,docs @@ -18,13 +22,9 @@ update_deps: ## Update dependencies test: ## Run all tests @poetry run pytest tests -.PHONY: lint -lint: ## Lint the code - @poetry run ruff check --fix quinn - -.PHONY: format -format: ## Format the code - @poetry run ruff format quinn +.PHONY: check +check: ## Lint and format the code by running pre-commit hooks + @poetry run pre-commit run -a # Inspired by https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html .PHONY: help diff --git a/benchmarks/create_benchmark_df.py b/benchmarks/create_benchmark_df.py index 301b2b45..2d576473 100644 --- a/benchmarks/create_benchmark_df.py +++ b/benchmarks/create_benchmark_df.py @@ -26,11 +26,7 @@ def generate_df(spark: SparkSession, n: int) -> DataFrame: """Generate a dataframe with a monotonically increasing id column and a random count column.""" count_vals = [(random.randint(1, 10),) for _ in range(n)] # noqa: S311 - output: DataFrame = ( - spark.createDataFrame(count_vals, schema=["count"]) - .withColumn("mvv", F.monotonically_increasing_id()) - .select("mvv", "count") - ) + output: DataFrame = spark.createDataFrame(count_vals, schema=["count"]).withColumn("mvv", F.monotonically_increasing_id()).select("mvv", "count") return output diff --git a/benchmarks/visualize_benchmarks.py b/benchmarks/visualize_benchmarks.py index d73b5f7f..2458aafe 100644 --- a/benchmarks/visualize_benchmarks.py +++ b/benchmarks/visualize_benchmarks.py @@ -58,12 +58,7 @@ def parse_results(spark: SparkSession) -> tuple[pd.DataFrame, pd.DataFrame, str] ["xsmall", "small", "medium", "large"], ) - average_df = ( - result_df[["test_name", "dataset_size", "runtime"]] - .groupby(["test_name", "dataset_size"], observed=False) - .mean() - .reset_index() - ) + average_df = result_df[["test_name", "dataset_size", "runtime"]].groupby(["test_name", "dataset_size"], observed=False).mean().reset_index() benchmark_date = get_benchmark_date(benchmark_path="benchmarks/results/") return result_df, average_df, benchmark_date diff --git a/poetry.lock b/poetry.lock index 3833c1a4..df3d1465 100644 --- a/poetry.lock +++ b/poetry.lock @@ -299,6 +299,17 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -504,6 +515,17 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -546,6 +568,22 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] +[[package]] +name = "filelock" +version = "3.15.4" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"}, + {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] +typing = ["typing-extensions (>=4.8)"] + [[package]] name = "fqdn" version = "1.5.1" @@ -736,6 +774,20 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "identify" +version = "2.6.0" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"}, + {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.7" @@ -1733,6 +1785,17 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + [[package]] name = "notebook-shim" version = "0.2.4" @@ -1957,6 +2020,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "3.7.1" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"}, + {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "prometheus-client" version = "0.20.0" @@ -2715,33 +2796,6 @@ files = [ {file = "rpds_py-0.19.0.tar.gz", hash = "sha256:4fdc9afadbeb393b4bbbad75481e0ea78e4469f2e1d713a90811700830b553a9"}, ] -[[package]] -name = "ruff" -version = "0.5.2" -description = "An extremely fast Python linter and code formatter, written in Rust." -optional = false -python-versions = ">=3.7" -files = [ - {file = "ruff-0.5.2-py3-none-linux_armv6l.whl", hash = "sha256:7bab8345df60f9368d5f4594bfb8b71157496b44c30ff035d1d01972e764d3be"}, - {file = "ruff-0.5.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aa7acad382ada0189dbe76095cf0a36cd0036779607c397ffdea16517f535b1"}, - {file = "ruff-0.5.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:aec618d5a0cdba5592c60c2dee7d9c865180627f1a4a691257dea14ac1aa264d"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b62adc5ce81780ff04077e88bac0986363e4a3260ad3ef11ae9c14aa0e67ef"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc42ebf56ede83cb080a50eba35a06e636775649a1ffd03dc986533f878702a3"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c15c6e9f88c67ffa442681365d11df38afb11059fc44238e71a9d9f1fd51de70"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d3de9a5960f72c335ef00763d861fc5005ef0644cb260ba1b5a115a102157251"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe5a968ae933e8f7627a7b2fc8893336ac2be0eb0aace762d3421f6e8f7b7f83"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04f54a9018f75615ae52f36ea1c5515e356e5d5e214b22609ddb546baef7132"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ed02fb52e3741f0738db5f93e10ae0fb5c71eb33a4f2ba87c9a2fa97462a649"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3cf8fe659f6362530435d97d738eb413e9f090e7e993f88711b0377fbdc99f60"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:237a37e673e9f3cbfff0d2243e797c4862a44c93d2f52a52021c1a1b0899f846"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2a2949ce7c1cbd8317432ada80fe32156df825b2fd611688814c8557824ef060"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:481af57c8e99da92ad168924fd82220266043c8255942a1cb87958b108ac9335"}, - {file = "ruff-0.5.2-py3-none-win32.whl", hash = "sha256:f1aea290c56d913e363066d83d3fc26848814a1fed3d72144ff9c930e8c7c718"}, - {file = "ruff-0.5.2-py3-none-win_amd64.whl", hash = "sha256:8532660b72b5d94d2a0a7a27ae7b9b40053662d00357bb2a6864dd7e38819084"}, - {file = "ruff-0.5.2-py3-none-win_arm64.whl", hash = "sha256:73439805c5cb68f364d826a5c5c4b6c798ded6b7ebaa4011f01ce6c94e4d5583"}, - {file = "ruff-0.5.2.tar.gz", hash = "sha256:2c0df2d2de685433794a14d8d2e240df619b748fbe3367346baa519d8e6f1ca2"}, -] - [[package]] name = "semver" version = "3.0.2" @@ -2988,6 +3042,26 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "virtualenv" +version = "20.26.3" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, + {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [[package]] name = "watchdog" version = "4.0.1" @@ -3106,4 +3180,4 @@ connect = ["grpcio", "grpcio-status", "numpy", "pandas", "pyarrow"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "6f7bcd8a2108bfb4df7eef4f092f3889de53b921a8c42ebb0f22748c5ab6ac39" +content-hash = "3760d053f86c967ed007ea7ee0c6268c5ea467d81e012e5557b4fb371f57c739" diff --git a/pyproject.toml b/pyproject.toml index 9925fc41..1147f50a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,12 +47,10 @@ optional = true [tool.poetry.group.testing] optional = true -[tool.poetry.group.linting] -optional = true - [tool.poetry.group.development.dependencies] pyspark = ">3" semver = "^3" +pre-commit = "^3.7.1" [tool.poetry.group.testing.dependencies] pytest = "^7" @@ -61,9 +59,6 @@ pytest-describe = "^2" pyspark = ">3" semver = "^3" -[tool.poetry.group.linting.dependencies] -ruff = "0.5.2" - [tool.poetry.group.docs.dependencies] # All the dependencies related to mkdocs; # We are pinning only the main version of mkdocs. @@ -80,12 +75,15 @@ pymdown-extensions = "*" mkdocs-macros-plugin = "*" mkdocs-material-extensions = "*" markdown-exec = "*" + ########################################################################### # LINTING CONFIGURATION ########################################################################### + [tool.ruff] line-length = 150 +fix = true extend-exclude = ["tests", "docs"] [tool.ruff.lint] diff --git a/quinn/__init__.py b/quinn/__init__.py index 61f491eb..f45707d0 100644 --- a/quinn/__init__.py +++ b/quinn/__init__.py @@ -12,6 +12,7 @@ # limitations under the License. """quinn API.""" + from __future__ import annotations from quinn.append_if_schema_identical import append_if_schema_identical diff --git a/quinn/schema_helpers.py b/quinn/schema_helpers.py index cf725bee..57d26371 100644 --- a/quinn/schema_helpers.py +++ b/quinn/schema_helpers.py @@ -174,8 +174,4 @@ def complex_fields(schema: T.StructType) -> dict[str, object]: :return: A dictionary with complex field names as keys and their respective data types as values. :rtype: Dict[str, object] """ - return { - field.name: field.dataType - for field in schema.fields - if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType)) - } + return {field.name: field.dataType for field in schema.fields if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType))}