mrpowers-io · SemyonSinchenko · Jul 11, 2024 · Jul 7, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -22,9 +22,9 @@ To start contributing you should fork this repository and only after that clone
 
 ```shell
 # for user-login
-git remote --set-url origin https://github.com/your-github-name/quinn.git
+git remote set-url origin https://github.com/your-github-name/quinn.git
 # for private keys way
-git remote --set-url origin [email protected]:your-github-name/quinn.git
+git remote set-url origin [email protected]:your-github-name/quinn.git
 ```
 
 ### Install the project
@@ -74,20 +74,53 @@ This project follows the [PySpark style guide](https://github.com/MrPowers/spark
 """
 ```
 
-We are using `isort`, `black` and `ruff` as linters. You can find instructions on how to set up and use these tools here:
+We are using `isort` and `ruff` as linters. You can find instructions on how to set up and use these tools here:
 
 1. [isort](https://pycqa.github.io/isort/)
-2. [black](https://black.readthedocs.io/en/stable/)
-3. [ruff](https://github.com/charliermarsh/ruff)
+2. [ruff](https://github.com/charliermarsh/ruff)
+
+### Adding ruff to IDEs
+
+#### VSCode
+
+1. Install the `Ruff` extension by Astral Software from the VSCode marketplace (Extension ID: *charliermarsh.ruff*).
+2. Open the command palette (Ctrl+Shift+P) and select `Preferences: Open Settings (JSON)`.
+3. Add the following configuration to your settings.json file:
+
+```json
+{
+    "python.linting.ruffEnabled": true,
+    "python.linting.enabled": true,
+    "python.formatting.provider": "none",
+    "editor.formatOnSave": true
+}
+```
+The above settings will enable linting with Ruff, and format your code with Ruff on save.
+
+#### PyCharm
+
+To set up `Ruff` in PyCharm using `poetry`, follow these steps:
+
+1. **Find the path to your `poetry` executable:**
+   - Open a terminal.
+   - For macOS/Linux, use the command `which poetry`.
+   - For Windows, use the command `where poetry`.
+   - Note down the path returned by the command.
+
+2. **Open the `Preferences` window** (Cmd+, on macOS).
+3. **Navigate to `Tools` > `External Tools`.**
+4. **Click the `+` icon** to add a new external tool.
+5. **Fill in the following details:**
+   - **Name:** `Ruff`
+   - **Program:** Enter the path to your `poetry` executable that you noted earlier.
+   - **Arguments:** `run ruff check --fix $FilePathRelativeToProjectRoot$`
+   - **Working directory:** `$ProjectFileDir$`
+6. **Click `OK`** to save the configuration.
+7. **To run Ruff,** right-click on a file or directory in the project view, select `External Tools`, and then select `Ruff`.
 
 ### Pull Request
 
 When you're finished with the changes, create a pull request, also known as a PR.
 - Don't forget to link PR to the issue if you are solving one.
 - As you update your PR and apply changes, mark each conversation as resolved.
 - If you run into any merge issues, checkout this [git tutorial](https://github.com/skills/resolve-merge-conflicts) to help you resolve merge conflicts and other issues.
-
-## Maintainers and Reviewers
-
-1. [MrPowers](https://github.com/MrPowers)
-2. ...
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,6 +3,12 @@ name = "quinn"
 version = "0.10.3"
 description = "Pyspark helper methods to maximize developer efficiency"
 authors = ["MrPowers <[email protected]>"]
+
+# Maintainers of the project
+maintainers = [
+    "SemyonSinchenko <[email protected]>"
+]
+
 readme = "README.md"
 homepage = "https://github.com/MrPowers/quinn/"
 keywords = ['apachespark', 'spark', 'pyspark']

diff --git a/quinn/__init__.py b/quinn/__init__.py
@@ -17,7 +17,6 @@
 from quinn.dataframe_helpers import (
     column_to_list,
     create_df,
-    print_athena_create_table,
     show_output_to_df,
     to_list_of_dictionaries,
     two_columns_to_dictionary,

diff --git a/quinn/dataframe_helpers.py b/quinn/dataframe_helpers.py
@@ -18,7 +18,6 @@
 if TYPE_CHECKING:
     from pyspark.sql import DataFrame, SparkSession
 import sys
-import warnings
 from typing import Any
 
 from pyspark.sql.types import StructField, StructType
@@ -46,10 +45,10 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
         return [row[0] for row in df.select(col_name).collect()]
 
     pyarrow_enabled = (
-        spark_session.conf.get(
-            "spark.sql.execution.arrow.pyspark.enabled",
-        )
-        == "true"
+            spark_session.conf.get(
+                "spark.sql.execution.arrow.pyspark.enabled",
+            )
+            == "true"
     )
 
     pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"].__version__ >= "0.17.0"
@@ -64,9 +63,9 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
 
 
 def two_columns_to_dictionary(
-    df: DataFrame,
-    key_col_name: str,
-    value_col_name: str,
+        df: DataFrame,
+        key_col_name: str,
+        value_col_name: str,
 ) -> dict[str, Any]:
     """Collect two columns as dictionary when first column is key and second is value.
 
@@ -94,37 +93,6 @@ def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]:
     return list(map(lambda r: r.asDict(), df.collect()))  # noqa: C417
 
 
-def print_athena_create_table(
-    df: DataFrame,
-    athena_table_name: str,
-    s3location: str,
-) -> None:
-    """Generate the Athena create table statement for a given DataFrame.
-    :param df: The pyspark.sql.DataFrame to use
-    :param athena_table_name: The name of the athena table to generate
-    :param s3location: The S3 location of the parquet data
-    :return: None.
-    """
-    warnings.warn(
-        "Function print_athena_create_table is deprecated and will be removed in the version 1.0",
-        category=DeprecationWarning,
-        stacklevel=2,
-    )
-
-    fields = df.schema
-
-    print(f"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( ")
-
-    for field in fields.fieldNames()[:-1]:
-        print("\t", f"`{fields[field].name}` {fields[field].dataType.simpleString()}, ")
-    last = fields[fields.fieldNames()[-1]]
-    print("\t", f"`{last.name}` {last.dataType.simpleString()} ")
-
-    print(")")
-    print("STORED AS PARQUET")
-    print(f"LOCATION '{s3location}'\n")
-
-
 def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame:
     """Show output as spark DataFrame.
 

diff --git a/tests/test_dataframe_helpers.py b/tests/test_dataframe_helpers.py
@@ -1,7 +1,9 @@
+import chispa
+from pyspark.sql.types import IntegerType, StringType, StructField, StructType
+
 import quinn
+
 from .spark import spark
-import chispa
-from pyspark.sql.types import IntegerType, StringType, StructType, StructField
 
 
 def describe_column_to_list():
@@ -49,25 +51,11 @@ def it_converts_a_show_string_to_a_dataframe():
             ("liz", "3", "nice person", "yoyo"),
         ]
         expected_df = spark.createDataFrame(
-            expected_data, ["name", "age", "stuff1", "stuff2"]
+            expected_data, ["name", "age", "stuff1", "stuff2"],
         )
         chispa.assert_df_equality(expected_df, actual_df)
 
 
-def describe_print_athena_create_table():
-    def it_prints_a_create_table_string_for_athena(capsys):
-        source_df = spark.createDataFrame(
-            [("jets", "football", 45), ("nacional", "soccer", 10)],
-            ["team", "sport", "goals_for"],
-        )
-        quinn.print_athena_create_table(source_df, "athena_table", "s3://mock")
-        out, _ = capsys.readouterr()
-        assert (
-            out
-            == "CREATE EXTERNAL TABLE IF NOT EXISTS `athena_table` ( \n\t `team` string, \n\t `sport` string, \n\t `goals_for` bigint \n)\nSTORED AS PARQUET\nLOCATION 's3://mock'\n\n"  # noqa
-        )
-
-
 def test_create_df():
     rows_data = [("jose", 1), ("li", 2), ("luisa", 3)]
     col_specs = [("name", StringType()), ("age", IntegerType())]
@@ -76,7 +64,7 @@ def test_create_df():
         [
             StructField("name", StringType(), True),
             StructField("age", IntegerType(), True),
-        ]
+        ],
     )
     actual = quinn.create_df(spark, rows_data, col_specs)
     expected = spark.createDataFrame(rows_data, expected_schema)