This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a34c8ceb19bd [SPARK-47462][SQL] Align mappings of other unsigned numeric types with TINYINT in MySQLDialect a34c8ceb19bd is described below commit a34c8ceb19bd1c1548a60bb144d1c587a2861cd8 Author: Kent Yao <y...@apache.org> AuthorDate: Wed Mar 20 09:31:26 2024 -0700 [SPARK-47462][SQL] Align mappings of other unsigned numeric types with TINYINT in MySQLDialect ### What changes were proposed in this pull request? Align mappings of other unsigned numeric types with TINYINT in MySQLDialect. TINYINT is mapping to ByteType and TINYINT UNSIGNED is mapping to ShortType. In this PR, we - map SMALLINT to ShortType, SMALLINT UNSIGNED to IntegerType. W/o this, both of them are mapping to IntegerType - map MEDIUMINT UNSIGNED to IntegerType, and MEDIUMINT is AS-IS. W/o this, MEDIUMINT UNSIGNED uses LongType Other unsigned/signed types remain unchanged and only improve the test coverage. ### Why are the changes needed? Consistency and efficiency while reading MySQL numeric values ### Does this PR introduce _any_ user-facing change? yes, the mappings described the 1st section. ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #45588 from yaooqinn/SPARK-47462. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../spark/sql/jdbc/MySQLIntegrationSuite.scala | 39 ++++++++++++++++++---- .../org/apache/spark/sql/jdbc/MySQLDialect.scala | 10 ++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 3d65b4f305b3..5b2214f2efd6 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -53,11 +53,19 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), " + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, " - + "dbl DOUBLE, tiny TINYINT, u_tiny TINYINT UNSIGNED)").executeUpdate() + + "dbl DOUBLE, tiny TINYINT)").executeUpdate() conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', " + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, " - + "42.75, 1.0000000000000002, -128, 255)").executeUpdate() + + "42.75, 1.0000000000000002, -128)").executeUpdate() + + conn.prepareStatement("CREATE TABLE unsigned_numbers (" + + "tiny TINYINT UNSIGNED, small SMALLINT UNSIGNED, med MEDIUMINT UNSIGNED," + + "nor INT UNSIGNED, big BIGINT UNSIGNED, deci DECIMAL(40,20) UNSIGNED," + + "dbl DOUBLE UNSIGNED)").executeUpdate() + + conn.prepareStatement("INSERT INTO unsigned_numbers VALUES (255, 65535, 16777215, 4294967295," + + "9223372036854775808, 123456789012345.123456789012345, 1.0000000000000002)").executeUpdate() conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, " + "yr YEAR)").executeUpdate() @@ -87,10 +95,10 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { val rows = df.collect() assert(rows.length == 1) val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types.length == 11) + assert(types.length == 10) assert(types(0).equals("class java.lang.Boolean")) assert(types(1).equals("class java.lang.Long")) - assert(types(2).equals("class java.lang.Integer")) + assert(types(2).equals("class java.lang.Short")) assert(types(3).equals("class java.lang.Integer")) assert(types(4).equals("class java.lang.Integer")) assert(types(5).equals("class java.lang.Long")) @@ -98,10 +106,9 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(7).equals("class java.lang.Double")) assert(types(8).equals("class java.lang.Double")) assert(types(9).equals("class java.lang.Byte")) - assert(types(10).equals("class java.lang.Short")) assert(rows(0).getBoolean(0) == false) assert(rows(0).getLong(1) == 0x225) - assert(rows(0).getInt(2) == 17) + assert(rows(0).getShort(2) == 17) assert(rows(0).getInt(3) == 77777) assert(rows(0).getInt(4) == 123456789) assert(rows(0).getLong(5) == 123456789012345L) @@ -110,7 +117,25 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getDouble(7) == 42.75) assert(rows(0).getDouble(8) == 1.0000000000000002) assert(rows(0).getByte(9) == 0x80.toByte) - assert(rows(0).getShort(10) == 0xff.toShort) + } + + test("SPARK-47462: Unsigned numeric types") { + val df = sqlContext.read.jdbc(jdbcUrl, "unsigned_numbers", new Properties) + val rows = df.head() + assert(rows.get(0).isInstanceOf[Short]) + assert(rows.get(1).isInstanceOf[Integer]) + assert(rows.get(2).isInstanceOf[Integer]) + assert(rows.get(3).isInstanceOf[Long]) + assert(rows.get(4).isInstanceOf[BigDecimal]) + assert(rows.get(5).isInstanceOf[BigDecimal]) + assert(rows.get(6).isInstanceOf[Double]) + assert(rows.getShort(0) === 255) + assert(rows.getInt(1) === 65535) + assert(rows.getInt(2) === 16777215) + assert(rows.getLong(3) === 4294967295L) + assert(rows.getAs[BigDecimal](4).equals(new BigDecimal("9223372036854775808"))) + assert(rows.getAs[BigDecimal](5).equals(new BigDecimal("123456789012345.12345678901234500000"))) + assert(rows.getDouble(6) === 1.0000000000000002) } test("Date types") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 4e5f092b193c..a245458a5cb4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -113,6 +113,16 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper { } else { Some(ShortType) } + case Types.SMALLINT => + if (md.build().getBoolean("isSigned")) { + Some(ShortType) + } else { + Some(IntegerType) + } + case Types.INTEGER if "MEDIUMINT UNSIGNED".equalsIgnoreCase(typeName) => + // Signed values in [-8388608, 8388607] and unsigned values in [0, 16777215], + // both of them fit IntegerType + Some(IntegerType) case Types.TIMESTAMP if "DATETIME".equalsIgnoreCase(typeName) => // scalastyle:off line.size.limit // In MYSQL, DATETIME is TIMESTAMP WITHOUT TIME ZONE --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org