Merge pull request #245 from fpgmaas/chore/UP007

Enable ruff `UP007`
mrpowers-io · Jul 15, 2024 · 1ed9375 · 1ed9375
2 parents 407f463 + 615db18
commit 1ed9375
Show file tree

Hide file tree

Showing 9 changed files with 30 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -476,41 +476,41 @@ from quinn.extensions import *
 
 ### Column Extensions
 
-**isFalsy()**
+**is_falsy()**
 
-Returns `True` if `has_stuff` is `None` or `False`.
+Returns a Column indicating whether all values in the Column are False or NULL: `True` if `has_stuff` is `None` or `False`.
 
 ```python
 source_df.withColumn("is_stuff_falsy", F.col("has_stuff").isFalsy())
 ```
 
-**isTruthy()**
+**is_truthy()**
 
-Returns `True` unless `has_stuff` is `None` or `False`.
+Calculates a boolean expression that is the opposite of is_falsy for the given Column: `True` unless `has_stuff` is `None` or `False`.
 
 ```python
 source_df.withColumn("is_stuff_truthy", F.col("has_stuff").isTruthy())
 ```
 
-**isNullOrBlank()**
+**is_null_or_blank()**
 
-Returns `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace).
+Returns a Boolean value which expresses whether a given column is NULL or contains only blank characters: `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace).
 
 ```python
 source_df.withColumn("is_blah_null_or_blank", F.col("blah").isNullOrBlank())
 ```
 
-**isNotIn()**
+**is_not_in()**
 
-Returns `True` if `fun_thing` is not included in the `bobs_hobbies` list.
+To see if a value is not in a list of values: `True` if `fun_thing` is not included in the `bobs_hobbies` list.
 
 ```python
 source_df.withColumn("is_not_bobs_hobby", F.col("fun_thing").isNotIn(bobs_hobbies))
 ```
 
-**nullBetween()**
+**null_between()**
 
-Returns `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`.
+To see if a value is between two values in a null friendly way: `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`.
 
 ```python
 source_df.withColumn("is_between", F.col("age").nullBetween(F.col("lower_age"), F.col("upper_age")))

diff --git a/benchmarks/create_benchmark_df.py b/benchmarks/create_benchmark_df.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 import random
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 from pyspark.sql import SparkSession
 from pyspark.sql import functions as F  # noqa: N812
@@ -38,7 +38,7 @@ def save_benchmark_df(
     spark: SparkSession,
     n: int,
     data_label: str,
-    repartition_n: Optional[int] = None,
+    repartition_n: int | None = None,
 ) -> None:
     """Save a benchmark dataframe to disk."""
     print(f"Generating benchmark df for n={n}")

diff --git a/pyproject.toml b/pyproject.toml
@@ -93,8 +93,6 @@ ignore = [
     "D205",    # It is broken
     "TCH003",  # I have no idea what is it about
     "PLC1901", # Strange thing
-    "UP007",   # Not supported in py3.6
-    "UP038",   # Not supported in all py versions
     "SIM108",  # Don't create long ternary operators
     "PTH123",  # Don't force use of Pathlib
     "PTH207",  # Don't force use of Pathlib
@@ -109,3 +107,6 @@ ignore = [
 "quinn/__init__.py" = ["F401", "F403"]
 "quinn/functions.py" = ["FBT003"]
 "quinn/keyword_finder.py" = ["A002"]
+
+[tool.ruff.isort]
+required-imports = ["from __future__ import annotations"]
diff --git a/quinn/__init__.py b/quinn/__init__.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 
 """quinn API."""
+from __future__ import annotations
 
 from quinn.append_if_schema_identical import append_if_schema_identical
 from quinn.dataframe_helpers import (

diff --git a/quinn/append_if_schema_identical.py b/quinn/append_if_schema_identical.py
@@ -10,8 +10,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
-from pyspark.sql import DataFrame
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame
 
 
 class SchemaMismatchError(ValueError):

diff --git a/quinn/functions.py b/quinn/functions.py
@@ -278,9 +278,9 @@ def is_falsy(col: Column) -> Column:
 
 
 def is_truthy(col: Column) -> Column:
-    """Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` col.
+    """Calculates a boolean expression that is the opposite of is_falsy for the given ``Column`` col.
 
-    :param Column col: The ``Column`` to calculate the opposite of isFalsy for.
+    :param Column col: The ``Column`` to calculate the opposite of is_falsy for.
     :returns: A ``Column`` with the results of the calculation.
     :rtype: Column
     """

diff --git a/quinn/math.py b/quinn/math.py
@@ -15,16 +15,14 @@
 
 from __future__ import annotations
 
-from typing import Optional, Union
-
 from pyspark.sql import Column
 from pyspark.sql import functions as F  # noqa: N812
 
 
 def rand_laplace(
-    mu: Union[float, Column],
-    beta: Union[float, Column],
-    seed: Optional[int] = None,
+    mu: float | Column,
+    beta: float | Column,
+    seed: int | None = None,
 ) -> Column:
     """Generate random numbers from Laplace(mu, beta).
 
@@ -47,7 +45,7 @@ def rand_laplace(
 def div_or_else(
     cola: Column,
     colb: Column,
-    default: Union[float, Column] = 0.0,
+    default: float | Column = 0.0,
 ) -> Column:
     """Return result of division of cola by colb or default if colb is zero.
 

diff --git a/quinn/schema_helpers.py b/quinn/schema_helpers.py
@@ -14,7 +14,6 @@
 from __future__ import annotations
 
 import json
-from typing import Optional
 
 from pyspark.sql import SparkSession
 from pyspark.sql import types as T  # noqa: N812
@@ -100,7 +99,7 @@ def schema_from_csv(spark: SparkSession, file_path: str) -> T.StructType:  # noq
     :rtype: pyspark.sql.types.StructType
     """
 
-    def _validate_json(metadata: Optional[str]) -> dict:
+    def _validate_json(metadata: str | None) -> dict:
         if metadata is None:
             return {}
 

diff --git a/quinn/split_columns.py b/quinn/split_columns.py
@@ -13,7 +13,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 from pyspark.sql.functions import length, split, trim, udf, when
 from pyspark.sql.types import IntegerType
@@ -28,7 +28,7 @@ def split_col(  # noqa: PLR0913
     delimiter: str,
     new_col_names: list[str],
     mode: str = "permissive",
-    default: Optional[str] = None,
+    default: str | None = None,
 ) -> DataFrame:
     """Splits the given column based on the delimiter and creates new columns with the split values.