Skip to content

Commit

Permalink
[SPARK-47664][PYTHON][CONNECT][TESTS][FOLLOW-UP] Add more tests
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Add more tests

### Why are the changes needed?
for test coverage, to address apache#45788 (comment)

### Does this PR introduce _any_ user-facing change?
no, test only

### How was this patch tested?
new tests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes apache#45809 from zhengruifeng/col_name_val_test.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
  • Loading branch information
zhengruifeng committed Apr 2, 2024
1 parent 22771a6 commit a598f65
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion python/pyspark/sql/tests/connect/test_connect_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,11 @@ def test_parse_col_name(self):
self.assert_eq(parse_attr_name("`a"), None)
self.assert_eq(parse_attr_name("a`"), None)

self.assert_eq(parse_attr_name("`a`.b"), ["a", "b"])
self.assert_eq(parse_attr_name("`a`.`b`"), ["a", "b"])
self.assert_eq(parse_attr_name("`a```.b"), ["a`", "b"])
self.assert_eq(parse_attr_name("`a``.b"), None)

self.assert_eq(parse_attr_name("a.b.c"), ["a", "b", "c"])
self.assert_eq(parse_attr_name("`a`.`b`.`c`"), ["a", "b", "c"])
self.assert_eq(parse_attr_name("a.`b`.c"), ["a", "b", "c"])
Expand Down Expand Up @@ -1284,7 +1289,6 @@ def test_verify_col_name(self):
self.assertTrue(verify_col_name("m.`s`.id", cdf.schema))
self.assertTrue(verify_col_name("`m`.`s`.`id`", cdf.schema))
self.assertFalse(verify_col_name("m.`s.id`", cdf.schema))
self.assertFalse(verify_col_name("m.`s.id`", cdf.schema))

self.assertTrue(verify_col_name("a", cdf.schema))
self.assertTrue(verify_col_name("`a`", cdf.schema))
Expand All @@ -1294,6 +1298,47 @@ def test_verify_col_name(self):
self.assertTrue(verify_col_name("`a`.`v`", cdf.schema))
self.assertFalse(verify_col_name("`a`.`x`", cdf.schema))

cdf = (
self.connect.range(10)
.withColumn("v", CF.lit(123))
.withColumn("s.s", CF.struct("id", "v"))
.withColumn("m`", CF.struct("`s.s`", "v"))
)

# root
# |-- id: long (nullable = false)
# |-- v: string (nullable = false)
# |-- s.s: struct (nullable = false)
# | |-- id: long (nullable = false)
# | |-- v: string (nullable = false)
# |-- m`: struct (nullable = false)
# | |-- s.s: struct (nullable = false)
# | | |-- id: long (nullable = false)
# | | |-- v: string (nullable = false)
# | |-- v: string (nullable = false)

self.assertFalse(verify_col_name("s", cdf.schema))
self.assertFalse(verify_col_name("`s`", cdf.schema))
self.assertFalse(verify_col_name("s.s", cdf.schema))
self.assertFalse(verify_col_name("s.`s`", cdf.schema))
self.assertFalse(verify_col_name("`s`.s", cdf.schema))
self.assertTrue(verify_col_name("`s.s`", cdf.schema))

self.assertFalse(verify_col_name("m", cdf.schema))
self.assertFalse(verify_col_name("`m`", cdf.schema))
self.assertTrue(verify_col_name("`m```", cdf.schema))

self.assertFalse(verify_col_name("`m```.s", cdf.schema))
self.assertFalse(verify_col_name("`m```.`s`", cdf.schema))
self.assertFalse(verify_col_name("`m```.s.s", cdf.schema))
self.assertFalse(verify_col_name("`m```.s.`s`", cdf.schema))
self.assertTrue(verify_col_name("`m```.`s.s`", cdf.schema))

self.assertFalse(verify_col_name("`m```.s.s.v", cdf.schema))
self.assertFalse(verify_col_name("`m```.s.`s`.v", cdf.schema))
self.assertTrue(verify_col_name("`m```.`s.s`.v", cdf.schema))
self.assertTrue(verify_col_name("`m```.`s.s`.`v`", cdf.schema))


if __name__ == "__main__":
from pyspark.sql.tests.connect.test_connect_basic import * # noqa: F401
Expand Down

0 comments on commit a598f65

Please sign in to comment.