Merge branch 'main' into fix-ruff-issues

Additions: - update pytest - update pytest-describe - resolve conflicts - update README (flask -> ruff) Changes to be committed: modified: .gitignore renamed: .pre-commit-config.yml -> .pre-commit-config.yaml modified: README.md modified: poetry.lock modified: pyproject.toml modified: quinn/extensions/column_ext.py modified: quinn/functions.py modified: tests/test_append_if_schema_identical.py modified: tests/test_functions.py
mrpowers-io · Aug 31, 2023 · 64c851a · 64c851a
2 parents c2ba6dc + 1c45400
commit 64c851a
Show file tree

Hide file tree

Showing 9 changed files with 284 additions and 180 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,6 +17,7 @@ __pycache__/
 # PyVenv
 .env
 .venv
+venv
 
 # Linters cache
 .mypy_cache

diff --git a/.pre-commit-config.yml → .pre-commit-config.yaml b/.pre-commit-config.yml → .pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: 'v0.0.265'
+    rev: 'v0.0.286'
     hooks:
       - id: ruff
   - repo: local

diff --git a/README.md b/README.md
@@ -1,7 +1,8 @@
 # Quinn
 
 ![![image](https://github.com/MrPowers/quinn/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/ci.yml/badge.svg)
-![![image](https://github.com/MrPowers/mack/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/flake8.yml/badge.svg)
+<!-- ![![image](https://github.com/MrPowers/mack/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/flake8.yml/badge.svg) -->
+[![Linter: Ruff](https://img.shields.io/badge/Linter-Ruff-brightgreen?style=flat-square)](https://github.com/MrPowers/quinn/actions/workflows/ruff.yml/badge.svg)
 ![PyPI - Downloads](https://img.shields.io/pypi/dm/quinn)
 [![PyPI version](https://badge.fury.io/py/quinn.svg)](https://badge.fury.io/py/quinn)
 
@@ -261,6 +262,24 @@ quinn.to_list_of_dictionaries(source_df)
 
 Converts an entire DataFrame into a list of dictionaries.
 
+**show_output_to_df()**
+
+```python
+quinn.show_output_to_df(output_str, spark)
+```
+
+Parses a spark DataFrame output string into a spark DataFrame. Useful for quickly pulling data from a log into a DataFrame. In this example, output_str is a string of the form:
+
+```
++----+---+-----------+------+
+|name|age|     stuff1|stuff2|
++----+---+-----------+------+
+|jose|  1|nice person|  yoyo|
+|  li|  2|nice person|  yoyo|
+| liz|  3|nice person|  yoyo|
++----+---+-----------+------+
+```
+
 ## Pyspark Core Class Extensions
 
 ```

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,9 +13,9 @@ python = ">=3.7,<4.0"
 
 [tool.poetry.dev-dependencies]
 pyspark = ">2.0.0"
-pytest = "3.2.2"
+pytest = "7.4"
 chispa = "0.9.2"
-pytest-describe = "^1.0.0"
+pytest-describe = "^2.1"
 semver = "3.0.0"
 
 [tool.poetry.group.mkdocs]
@@ -33,7 +33,7 @@ mkdocs = "^1.4.2"
 # Allow lines to be as long as 150 characters.
 line-length = 150
 ignore = ["D100"]
-required-version = "0.0.265"
+required-version = "0.0.286"
 
 [build-system]
 requires = ["poetry>=0.12"]

diff --git a/quinn/extensions/column_ext.py b/quinn/extensions/column_ext.py
@@ -36,7 +36,7 @@ def isFalse(self: Column) -> Column:
     :return: Column
     :rtype: Column
     """
-    return self == False # noqa
+    return self == lit(False)
 
 
 def isTrue(self: Column) -> Column:
@@ -51,7 +51,7 @@ def isTrue(self: Column) -> Column:
     :returns: Column object
     :rtype: Column
     """
-    return self == True # noqa
+    return self == lit(True)
 
 
 def isNullOrBlank(self: Column) -> Column:

diff --git a/quinn/functions.py b/quinn/functions.py
@@ -193,20 +193,20 @@ def _raise_if_invalid_day(day: str) -> None:
         raise ValueError(message)
 
 
-def approx_equal(col1: Column, col2: Column, threshhold: Number) -> Column:
+def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column:
     """Compares two ``Column`` objects by checking if the difference between them
-    is less than a specified ``threshhold``.
+    is less than a specified ``threshold``.
 
     :param col1: the first ``Column``
     :type col1: Column
     :param col2: the second ``Column``
     :type col2: Column
-    :param threshhold: value to compare with
-    :type threshhold: Number
+    :param threshold: value to compare with
+    :type threshold: Number
     :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 -
-    col2)`` is less than ``threshhold``
+    col2)`` is less than ``threshold``
     """
-    return F.abs(col1 - col2) < threshhold
+    return F.abs(col1 - col2) < threshold
 
 
 def array_choice(col: Column) -> Column:

diff --git a/tests/test_append_if_schema_identical.py b/tests/test_append_if_schema_identical.py
@@ -5,7 +5,7 @@
 
 @auto_inject_fixtures("spark")
 def test_append_if_schema_identical(spark):
-    source_data = [(1, "capetown", "Alice"), (2, "delhi", "Bob")]
+    source_data = [(1, "cape town", "Alice"), (2, "delhi", "Bob")]
     target_data = [(3, "Charlie", "New York"), (4, "Dave", "Los Angeles")]
 
     source_df = spark.createDataFrame(source_data, schema=StructType([

diff --git a/tests/test_functions.py b/tests/test_functions.py
@@ -307,12 +307,12 @@ def it_works_with_integer_values(spark):
         chispa.assert_column_equality(actual_df, "are_nums_approx_equal", "expected")
 
 
-# def test_array_choice(spark):
-#     df = spark.create_df(
-#         [(["a", "b", "c"],), (["a", "b", "c", "d"],), (["x"],), ([None],)],
-#         [("letters", ArrayType(StringType(), True), True)],
-#     )
-#     actual_df = df.withColumn("random_letter", quinn.array_choice(F.col("letters")))
+def test_array_choice(spark):
+    df = spark.create_df(
+        [(["a", "b", "c"],), (["a", "b", "c", "d"],), (["x"],), ([None],)],
+        [("letters", ArrayType(StringType(), True), True)],
+    )
+    actual_df = df.withColumn("random_letter", quinn.array_choice(F.col("letters")))
     # actual_df.show()
     # chispa.assert_column_equality(actual_df, "are_nums_approx_equal", "expected")