Skip to content

Commit

Permalink
Merge branch 'main' into fix-ruff-issues
Browse files Browse the repository at this point in the history
Additions:
- update pytest
- update pytest-describe
- resolve conflicts
- update README (flask -> ruff)

Changes to be committed:
modified:   .gitignore
renamed:    .pre-commit-config.yml -> .pre-commit-config.yaml
modified:   README.md
modified:   poetry.lock
modified:   pyproject.toml
modified:   quinn/extensions/column_ext.py
modified:   quinn/functions.py
modified:   tests/test_append_if_schema_identical.py
modified:   tests/test_functions.py
  • Loading branch information
SemyonSinchenko committed Aug 31, 2023
2 parents c2ba6dc + 1c45400 commit 64c851a
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 180 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ __pycache__/
# PyVenv
.env
.venv
venv

# Linters cache
.mypy_cache
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yml → .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.0.265'
rev: 'v0.0.286'
hooks:
- id: ruff
- repo: local
Expand Down
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Quinn

![![image](https://github.com/MrPowers/quinn/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/ci.yml/badge.svg)
![![image](https://github.com/MrPowers/mack/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/flake8.yml/badge.svg)
<!-- ![![image](https://github.com/MrPowers/mack/workflows/build/badge.svg)](https://github.com/MrPowers/quinn/actions/workflows/flake8.yml/badge.svg) -->
[![Linter: Ruff](https://img.shields.io/badge/Linter-Ruff-brightgreen?style=flat-square)](https://github.com/MrPowers/quinn/actions/workflows/ruff.yml/badge.svg)
![PyPI - Downloads](https://img.shields.io/pypi/dm/quinn)
[![PyPI version](https://badge.fury.io/py/quinn.svg)](https://badge.fury.io/py/quinn)

Expand Down Expand Up @@ -261,6 +262,24 @@ quinn.to_list_of_dictionaries(source_df)

Converts an entire DataFrame into a list of dictionaries.

**show_output_to_df()**

```python
quinn.show_output_to_df(output_str, spark)
```

Parses a spark DataFrame output string into a spark DataFrame. Useful for quickly pulling data from a log into a DataFrame. In this example, output_str is a string of the form:

```
+----+---+-----------+------+
|name|age| stuff1|stuff2|
+----+---+-----------+------+
|jose| 1|nice person| yoyo|
| li| 2|nice person| yoyo|
| liz| 3|nice person| yoyo|
+----+---+-----------+------+
```

## Pyspark Core Class Extensions

```
Expand Down
404 changes: 244 additions & 160 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ python = ">=3.7,<4.0"

[tool.poetry.dev-dependencies]
pyspark = ">2.0.0"
pytest = "3.2.2"
pytest = "7.4"
chispa = "0.9.2"
pytest-describe = "^1.0.0"
pytest-describe = "^2.1"
semver = "3.0.0"

[tool.poetry.group.mkdocs]
Expand All @@ -33,7 +33,7 @@ mkdocs = "^1.4.2"
# Allow lines to be as long as 150 characters.
line-length = 150
ignore = ["D100"]
required-version = "0.0.265"
required-version = "0.0.286"

[build-system]
requires = ["poetry>=0.12"]
Expand Down
4 changes: 2 additions & 2 deletions quinn/extensions/column_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def isFalse(self: Column) -> Column:
:return: Column
:rtype: Column
"""
return self == False # noqa
return self == lit(False)


def isTrue(self: Column) -> Column:
Expand All @@ -51,7 +51,7 @@ def isTrue(self: Column) -> Column:
:returns: Column object
:rtype: Column
"""
return self == True # noqa
return self == lit(True)


def isNullOrBlank(self: Column) -> Column:
Expand Down
12 changes: 6 additions & 6 deletions quinn/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,20 @@ def _raise_if_invalid_day(day: str) -> None:
raise ValueError(message)


def approx_equal(col1: Column, col2: Column, threshhold: Number) -> Column:
def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column:
"""Compares two ``Column`` objects by checking if the difference between them
is less than a specified ``threshhold``.
is less than a specified ``threshold``.
:param col1: the first ``Column``
:type col1: Column
:param col2: the second ``Column``
:type col2: Column
:param threshhold: value to compare with
:type threshhold: Number
:param threshold: value to compare with
:type threshold: Number
:return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 -
col2)`` is less than ``threshhold``
col2)`` is less than ``threshold``
"""
return F.abs(col1 - col2) < threshhold
return F.abs(col1 - col2) < threshold


def array_choice(col: Column) -> Column:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_append_if_schema_identical.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

@auto_inject_fixtures("spark")
def test_append_if_schema_identical(spark):
source_data = [(1, "capetown", "Alice"), (2, "delhi", "Bob")]
source_data = [(1, "cape town", "Alice"), (2, "delhi", "Bob")]
target_data = [(3, "Charlie", "New York"), (4, "Dave", "Los Angeles")]

source_df = spark.createDataFrame(source_data, schema=StructType([
Expand Down
12 changes: 6 additions & 6 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ def it_works_with_integer_values(spark):
chispa.assert_column_equality(actual_df, "are_nums_approx_equal", "expected")


# def test_array_choice(spark):
# df = spark.create_df(
# [(["a", "b", "c"],), (["a", "b", "c", "d"],), (["x"],), ([None],)],
# [("letters", ArrayType(StringType(), True), True)],
# )
# actual_df = df.withColumn("random_letter", quinn.array_choice(F.col("letters")))
def test_array_choice(spark):
df = spark.create_df(
[(["a", "b", "c"],), (["a", "b", "c", "d"],), (["x"],), ([None],)],
[("letters", ArrayType(StringType(), True), True)],
)
actual_df = df.withColumn("random_letter", quinn.array_choice(F.col("letters")))
# actual_df.show()
# chispa.assert_column_equality(actual_df, "are_nums_approx_equal", "expected")

Expand Down

0 comments on commit 64c851a

Please sign in to comment.