Merge pull request #260 from paulooctavio/main

Remove import * from codebase and documentation
mrpowers-io · Oct 5, 2024 · 7c1332a · 7c1332a
2 parents 9156cee + 84fdb91
commit 7c1332a
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -471,7 +471,8 @@ IntegerType()
 ## Pyspark Core Class Extensions
 
 ```
-from quinn.extensions import *
+import pyspark.sql.functions as F
+import quinn
 ```
 
 ### Column Extensions
@@ -481,39 +482,39 @@ from quinn.extensions import *
 Returns a Column indicating whether all values in the Column are False or NULL: `True` if `has_stuff` is `None` or `False`.
 
 ```python
-source_df.withColumn("is_stuff_falsy", F.col("has_stuff").isFalsy())
+source_df.withColumn("is_stuff_falsy", quinn.is_falsy(F.col("has_stuff")))
 ```
 
 **is_truthy()**
 
 Calculates a boolean expression that is the opposite of is_falsy for the given Column: `True` unless `has_stuff` is `None` or `False`.
 
 ```python
-source_df.withColumn("is_stuff_truthy", F.col("has_stuff").isTruthy())
+source_df.withColumn("is_stuff_truthy", quinn.is_truthy(F.col("has_stuff")))
 ```
 
 **is_null_or_blank()**
 
 Returns a Boolean value which expresses whether a given column is NULL or contains only blank characters: `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace).
 
 ```python
-source_df.withColumn("is_blah_null_or_blank", F.col("blah").isNullOrBlank())
+source_df.withColumn("is_blah_null_or_blank", quinn.is_null_or_blank(F.col("blah")))
 ```
 
 **is_not_in()**
 
 To see if a value is not in a list of values: `True` if `fun_thing` is not included in the `bobs_hobbies` list.
 
 ```python
-source_df.withColumn("is_not_bobs_hobby", F.col("fun_thing").isNotIn(bobs_hobbies))
+source_df.withColumn("is_not_bobs_hobby", quinn.is_not_in(F.col("fun_thing")))
 ```
 
 **null_between()**
 
 To see if a value is between two values in a null friendly way: `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`.
 
 ```python
-source_df.withColumn("is_between", F.col("age").nullBetween(F.col("lower_age"), F.col("upper_age")))
+source_df.withColumn("is_between", quinn.null_between(F.col("age"), F.col("lower_age"), F.col("upper_age")))
 ```
 
 ## Contributing

diff --git a/docs/notebooks/schema_as_code.ipynb b/docs/notebooks/schema_as_code.ipynb
@@ -112,15 +112,17 @@
     }
    ],
    "source": [
-    "from pyspark.sql.types import *\n",
     "print(print_schema_as_code(schema))\n",
-    "eval(print_schema_as_code(schema))"
+    "\n",
+    "# Create a dictionary of PySpark SQL types to provide context to 'eval()' \n",
+    "spark_type_dict = {k: getattr(T, k) for k in dir(T) if isinstance(getattr(T, k), type)}\n",
+    "eval(print_schema_as_code(schema), {\"__builtins__\": None}, spark_type_dict)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e66219ad-cacc-4ed6-bbe6-f20d4d20afd4",
+   "id": "6fb30b81",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -142,7 +144,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/docs/usage.md b/docs/usage.md
@@ -399,47 +399,48 @@ IntegerType()
 ## Pyspark Core Class Extensions
 
 ```
-from quinn.extensions import *
+import pyspark.sql.functions as F
+import quinn
 ```
 
 ### Column Extensions
 
 **isFalsy()**
 
 ```python
-source_df.withColumn("is_stuff_falsy", F.col("has_stuff").isFalsy())
+source_df.withColumn("is_stuff_falsy", quinn.is_falsy(F.col("has_stuff")))
 ```
 
 Returns `True` if `has_stuff` is `None` or `False`.
 
 **isTruthy()**
 
 ```python
-source_df.withColumn("is_stuff_truthy", F.col("has_stuff").isTruthy())
+source_df.withColumn("is_stuff_truthy", quinn.is_truthy(F.col("has_stuff")))
 ```
 
 Returns `True` unless `has_stuff` is `None` or `False`.
 
 **isNullOrBlank()**
 
 ```python
-source_df.withColumn("is_blah_null_or_blank", F.col("blah").isNullOrBlank())
+source_df.withColumn("is_blah_null_or_blank", quinn.is_null_or_blank(F.col("blah")))
 ```
 
 Returns `True` if `blah` is `null` or blank (the empty string or a string that only contains whitespace).
 
 **isNotIn()**
 
 ```python
-source_df.withColumn("is_not_bobs_hobby", F.col("fun_thing").isNotIn(bobs_hobbies))
+source_df.withColumn("is_not_bobs_hobby", quinn.is_not_in(F.col("fun_thing")))
 ```
 
 Returns `True` if `fun_thing` is not included in the `bobs_hobbies` list.
 
 **nullBetween()**
 
 ```python
-source_df.withColumn("is_between", F.col("age").nullBetween(F.col("lower_age"), F.col("upper_age")))
+source_df.withColumn("is_between", quinn.null_between(F.col("age"), F.col("lower_age"), F.col("upper_age")))
 ```
 
 Returns `True` if `age` is between `lower_age` and `upper_age`. If `lower_age` is populated and `upper_age` is `null`, it will return `True` if `age` is greater than or equal to `lower_age`. If `lower_age` is `null` and `upper_age` is populate, it will return `True` if `age` is lower than or equal to `upper_age`.
diff --git a/quinn/extensions/__init__.py b/quinn/extensions/__init__.py
@@ -13,5 +13,5 @@
 
 """Extensions API."""
 
-from quinn.extensions.dataframe_ext import *
-from quinn.extensions.spark_session_ext import *
+from quinn.extensions.dataframe_ext import _ext_function
+from quinn.extensions.spark_session_ext import create_df