diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 541ec57d4939d..d8d8bf8552d4a 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -122,18 +122,19 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { ).executeUpdate() conn.prepareStatement("CREATE TABLE char_types (" + - "c0 char(4), c1 character(4), c2 character varying(4), c3 varchar(4), c4 bpchar(1))" + "c0 char(4), c1 character(4), c2 character varying(4), c3 varchar(4), c4 bpchar(1)," + + "c5 bpchar, c6 char)" ).executeUpdate() conn.prepareStatement("INSERT INTO char_types VALUES " + - "('abcd', 'efgh', 'ijkl', 'mnop', 'q')").executeUpdate() + "('abcd', 'efgh', 'ijkl', 'mnop', 'q', 'eason', 'c' )").executeUpdate() - // SPARK-42916: character/char/bpchar w/o length specifier defaults to int max value, this will - // cause OOM as it will be padded with ' ' to 2147483647. conn.prepareStatement("CREATE TABLE char_array_types (" + - "c0 char(4)[], c1 character(4)[], c2 character varying(4)[], c3 varchar(4)[], c4 bpchar(1)[])" + "c0 char(4)[], c1 character(4)[], c2 character varying(4)[], c3 varchar(4)[]," + + "c4 bpchar(1)[], c5 bpchar[])" ).executeUpdate() conn.prepareStatement("INSERT INTO char_array_types VALUES " + - """('{"a", "bcd"}', '{"ef", "gh"}', '{"i", "j", "kl"}', '{"mnop"}', '{"q", "r"}')""" + """('{"a", "bcd"}', '{"ef", "gh"}', '{"i", "j", "kl"}', '{"mnop"}', '{"q", "r"}', + | '{"Eason", "Ethan"}')""".stripMargin ).executeUpdate() conn.prepareStatement("CREATE TABLE money_types (" + @@ -184,7 +185,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows.length == 2) // Test the types, and values using the first row. val types = rows(0).toSeq.map(x => x.getClass) - assert(types.length == 42) + assert(types.length == 45) assert(classOf[String].isAssignableFrom(types(0))) assert(classOf[java.lang.Integer].isAssignableFrom(types(1))) assert(classOf[java.lang.Double].isAssignableFrom(types(2))) @@ -387,26 +388,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { test("character type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties) - val row = df.collect() - assert(row.length == 1) - assert(row(0).length === 5) - assert(row(0).getString(0) === "abcd") - assert(row(0).getString(1) === "efgh") - assert(row(0).getString(2) === "ijkl") - assert(row(0).getString(3) === "mnop") - assert(row(0).getString(4) === "q") + checkAnswer(df, Row("abcd", "efgh", "ijkl", "mnop", "q", "eason", "c")) } test("SPARK-32576: character array type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_array_types", new Properties) - val row = df.collect() - assert(row.length == 1) - assert(row(0).length === 5) - assert(row(0).getSeq[String](0) === Seq("a ", "bcd ")) - assert(row(0).getSeq[String](1) === Seq("ef ", "gh ")) - assert(row(0).getSeq[String](2) === Seq("i", "j", "kl")) - assert(row(0).getSeq[String](3) === Seq("mnop")) - assert(row(0).getSeq[String](4) === Seq("q", "r")) + checkAnswer(df, Row(Seq("a ", "bcd "), Seq("ef ", "gh "), Seq("i", "j", "kl"), + Seq("mnop"), Seq("q", "r"), Seq("Eason", "Ethan"))) } test("SPARK-34333: money type tests") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index 6436532d8692e..b00ae1751ac25 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -66,6 +66,9 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper { // timetz represents time with time zone, currently it maps to Types.TIME. // We need to change to Types.TIME_WITH_TIMEZONE if the upstream changes. Some(TimestampType) + case Types.CHAR if "bpchar".equalsIgnoreCase(typeName) && size == Int.MaxValue => + // bpchar with unspecified length same as text in postgres with blank-trimmed + Some(StringType) case Types.OTHER if "void".equalsIgnoreCase(typeName) => Some(NullType) case Types.OTHER => Some(StringType) case _ if "text".equalsIgnoreCase(typeName) => Some(StringType) // sqlType is Types.VARCHAR @@ -91,6 +94,7 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper { case "float4" => Some(FloatType) case "float8" => Some(DoubleType) case "varchar" => Some(VarcharType(precision)) + case "bpchar" if precision == Int.MaxValue => Some(StringType) case "char" | "bpchar" => Some(CharType(precision)) case "text" | "cidr" | "inet" | "json" | "jsonb" | "uuid" | "xml" | "tsvector" | "tsquery" | "macaddr" | "macaddr8" | "txid_snapshot" | "point" |