Repository: spark Updated Branches: refs/heads/master e3967dc5c -> 25826c77d
[SPARK-21330][SQL] Bad partitioning does not allow to read a JDBC table with extreme values on the partition column ## What changes were proposed in this pull request? An overflow of the difference of bounds on the partitioning column leads to no data being read. This patch checks for this overflow. ## How was this patch tested? New unit test. Author: Andrew Ray <ray.and...@gmail.com> Closes #18800 from aray/SPARK-21330. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/25826c77 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/25826c77 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/25826c77 Branch: refs/heads/master Commit: 25826c77ddf0d5753d2501d0e764111da2caa8b6 Parents: e3967dc Author: Andrew Ray <ray.and...@gmail.com> Authored: Fri Aug 4 08:58:01 2017 +0100 Committer: Sean Owen <so...@cloudera.com> Committed: Fri Aug 4 08:58:01 2017 +0100 ---------------------------------------------------------------------- .../execution/datasources/jdbc/JDBCRelation.scala | 3 ++- .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/25826c77/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index 658d137..68ff53c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -64,7 +64,8 @@ private[sql] object JDBCRelation extends Logging { s"bound. Lower bound: $lowerBound; Upper bound: $upperBound") val numPartitions = - if ((upperBound - lowerBound) >= partitioning.numPartitions) { + if ((upperBound - lowerBound) >= partitioning.numPartitions || /* check for overflow */ + (upperBound - lowerBound) < 0) { partitioning.numPartitions } else { logWarning("The number of partitions is reduced because the specified number of " + http://git-wip-us.apache.org/repos/asf/spark/blob/25826c77/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index d1daf86..24f46a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -96,6 +96,15 @@ class JDBCSuite extends SparkFunSuite | partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3') """.stripMargin.replaceAll("\n", " ")) + sql( + s""" + |CREATE OR REPLACE TEMPORARY VIEW partsoverflow + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass', + | partitionColumn 'THEID', lowerBound '-9223372036854775808', + | upperBound '9223372036854775807', numPartitions '3') + """.stripMargin.replaceAll("\n", " ")) + conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, " + "d SMALLINT, e BIGINT)").executeUpdate() conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)" @@ -376,6 +385,12 @@ class JDBCSuite extends SparkFunSuite assert(ids(2) === 3) } + test("overflow of partition bound difference does not give negative stride") { + val df = sql("SELECT * FROM partsoverflow") + checkNumPartitions(df, expectedNumPartitions = 3) + assert(df.collect().length == 3) + } + test("Register JDBC query with renamed fields") { // Regression test for bug SPARK-7345 sql( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org