improve use of ruff

update makefile add make command for ruff
mrpowers-io · Jul 15, 2024 · 261f9f9 · 261f9f9
1 parent a5c80cf
commit 261f9f9
Show file tree

Hide file tree

Showing 9 changed files with 124 additions and 68 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -64,6 +64,11 @@ jobs:
           PIP_PACKAGES: ${{ matrix.pip-packages }}
         run: poetry run pip install $PIP_PACKAGES  # Using pip shouldn't mess up poetry cache
 
+      - name: Run pre-commit hooks
+        run: |
+          pre-commit install
+          pre-commit run -a
+
       - name: Run tests with pytest against PySpark ${{ matrix.pyspark-version }}
         run: make test
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,7 @@
 repos:
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    # Ruff version.
     rev: 'v0.5.2'
     hooks:
       - id: ruff
-  - repo: local
-    hooks:
-      - id: pytest
-        name: pytest-check
-        entry: poetry run pytest
-        language: system
-        pass_filenames: false
-        # Runs only on python files
-        types: [ python ]
-        always_run: true
+        args: [--exit-non-zero-on-fix]
+      - id: ruff-format
diff --git a/Makefile b/Makefile
@@ -10,6 +10,10 @@ install_test: ## Install the 'dev, test and extras' dependencies
 install_deps: ## Install all dependencies
 	@poetry install --with=development,linting,testing,docs
 
+.PHONY: install_ruff
+install_ruff: ## Install ruff for use within IDE
+	@poetry run pip install ruff==0.5.2
+
 .PHONY: update_deps
 update_deps: ## Update dependencies
 	@poetry update --with=development,linting,testing,docs
@@ -18,13 +22,9 @@ update_deps: ## Update dependencies
 test: ## Run all tests
 	@poetry run pytest tests
 
-.PHONY: lint 
-lint: ## Lint the code
-	@poetry run ruff check --fix quinn
-
-.PHONY: format
-format: ## Format the code
-	@poetry run ruff format quinn
+.PHONY: check
+check: ## Lint and format the code by running pre-commit hooks
+	@poetry run pre-commit run -a
 
 # Inspired by https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
 .PHONY: help

diff --git a/benchmarks/create_benchmark_df.py b/benchmarks/create_benchmark_df.py
@@ -26,11 +26,7 @@
 def generate_df(spark: SparkSession, n: int) -> DataFrame:
     """Generate a dataframe with a monotonically increasing id column and a random count column."""
     count_vals = [(random.randint(1, 10),) for _ in range(n)]  # noqa: S311
-    output: DataFrame = (
-        spark.createDataFrame(count_vals, schema=["count"])
-        .withColumn("mvv", F.monotonically_increasing_id())
-        .select("mvv", "count")
-    )
+    output: DataFrame = spark.createDataFrame(count_vals, schema=["count"]).withColumn("mvv", F.monotonically_increasing_id()).select("mvv", "count")
     return output
 
 

diff --git a/benchmarks/visualize_benchmarks.py b/benchmarks/visualize_benchmarks.py
@@ -58,12 +58,7 @@ def parse_results(spark: SparkSession) -> tuple[pd.DataFrame, pd.DataFrame, str]
         ["xsmall", "small", "medium", "large"],
     )
 
-    average_df = (
-        result_df[["test_name", "dataset_size", "runtime"]]
-        .groupby(["test_name", "dataset_size"], observed=False)
-        .mean()
-        .reset_index()
-    )
+    average_df = result_df[["test_name", "dataset_size", "runtime"]].groupby(["test_name", "dataset_size"], observed=False).mean().reset_index()
 
     benchmark_date = get_benchmark_date(benchmark_path="benchmarks/results/")
     return result_df, average_df, benchmark_date

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,12 +47,10 @@ optional = true
 [tool.poetry.group.testing]
 optional = true
 
-[tool.poetry.group.linting]
-optional = true
-
 [tool.poetry.group.development.dependencies]
 pyspark = ">3"
 semver = "^3"
+pre-commit = "^3.7.1"
 
 [tool.poetry.group.testing.dependencies]
 pytest = "^7"
@@ -61,9 +59,6 @@ pytest-describe = "^2"
 pyspark = ">3"
 semver = "^3"
 
-[tool.poetry.group.linting.dependencies]
-ruff = "0.5.2"
-
 [tool.poetry.group.docs.dependencies]
 # All the dependencies related to mkdocs;
 # We are pinning only the main version of mkdocs.
@@ -80,12 +75,15 @@ pymdown-extensions = "*"
 mkdocs-macros-plugin = "*"
 mkdocs-material-extensions = "*"
 markdown-exec = "*"
+
 ###########################################################################
 #                         LINTING CONFIGURATION 
 ###########################################################################
 
+
 [tool.ruff]
 line-length = 150
+fix = true
 extend-exclude = ["tests", "docs"]
 
 [tool.ruff.lint]

diff --git a/quinn/__init__.py b/quinn/__init__.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 
 """quinn API."""
+
 from __future__ import annotations
 
 from quinn.append_if_schema_identical import append_if_schema_identical

diff --git a/quinn/schema_helpers.py b/quinn/schema_helpers.py
@@ -174,8 +174,4 @@ def complex_fields(schema: T.StructType) -> dict[str, object]:
     :return: A dictionary with complex field names as keys and their respective data types as values.
     :rtype: Dict[str, object]
     """
-    return {
-        field.name: field.dataType
-        for field in schema.fields
-        if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType))
-    }
+    return {field.name: field.dataType for field in schema.fields if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType))}