Repository: spark Updated Branches: refs/heads/master aa88b8dbb -> bc0848b4c
[SPARK-22469][SQL] Accuracy problem in comparison with string and numeric ## What changes were proposed in this pull request? This fixes a problem caused by #15880 `select '1.5' > 0.5; // Result is NULL in Spark but is true in Hive. ` When compare string and numeric, cast them as double like Hive. Author: liutang123 <liutang...@yeah.net> Closes #19692 from liutang123/SPARK-22469. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc0848b4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc0848b4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc0848b4 Branch: refs/heads/master Commit: bc0848b4c1ab84ccef047363a70fd11df240dbbf Parents: aa88b8d Author: liutang123 <liutang...@yeah.net> Authored: Wed Nov 15 09:02:54 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Wed Nov 15 09:02:54 2017 -0800 ---------------------------------------------------------------------- .../sql/catalyst/analysis/TypeCoercion.scala | 7 + .../catalyst/analysis/TypeCoercionSuite.scala | 3 + .../sql-tests/inputs/predicate-functions.sql | 5 + .../results/predicate-functions.sql.out | 140 ++++++++++++------- 4 files changed, 105 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 532d22d..074eda5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -137,6 +137,13 @@ object TypeCoercion { case (DateType, TimestampType) => Some(StringType) case (StringType, NullType) => Some(StringType) case (NullType, StringType) => Some(StringType) + + // There is no proper decimal type we can pick, + // using double type is the best we can do. + // See SPARK-22469 for details. + case (n: DecimalType, s: StringType) => Some(DoubleType) + case (s: StringType, n: DecimalType) => Some(DoubleType) + case (l: StringType, r: AtomicType) if r != StringType => Some(r) case (l: AtomicType, r: StringType) if (l != StringType) => Some(l) case (l, r) => None http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 793e04f..5dcd653 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1152,6 +1152,9 @@ class TypeCoercionSuite extends AnalysisTest { ruleTest(PromoteStrings, EqualTo(Literal(Array(1, 2)), Literal("123")), EqualTo(Literal(Array(1, 2)), Literal("123"))) + ruleTest(PromoteStrings, + GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))), + GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")), DoubleType))) } test("cast WindowFrame boundaries to the type they operate upon") { http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql index 3b3d4ad..e99d5ce 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql @@ -2,12 +2,14 @@ select 1 = 1; select 1 = '1'; select 1.0 = '1'; +select 1.5 = '1.51'; -- GreaterThan select 1 > '1'; select 2 > '1.0'; select 2 > '2.0'; select 2 > '2.2'; +select '1.5' > 0.5; select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52'); select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'; @@ -16,6 +18,7 @@ select 1 >= '1'; select 2 >= '1.0'; select 2 >= '2.0'; select 2.0 >= '2.2'; +select '1.5' >= 0.5; select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52'); select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'; @@ -24,6 +27,7 @@ select 1 < '1'; select 2 < '1.0'; select 2 < '2.0'; select 2.0 < '2.2'; +select 0.5 < '1.5'; select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52'); select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'; @@ -32,5 +36,6 @@ select 1 <= '1'; select 2 <= '1.0'; select 2 <= '2.0'; select 2.0 <= '2.2'; +select 0.5 <= '1.5'; select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52'); select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'; http://git-wip-us.apache.org/repos/asf/spark/blob/bc0848b4/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out index 8e7e04c..8cd0d51 100644 --- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 27 +-- Number of queries: 31 -- !query 0 @@ -21,12 +21,20 @@ true -- !query 2 select 1.0 = '1' -- !query 2 schema -struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> +struct<(CAST(1.0 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean> -- !query 2 output true -- !query 3 +select 1.5 = '1.51' +-- !query 3 schema +struct<(CAST(1.5 AS DOUBLE) = CAST(1.51 AS DOUBLE)):boolean> +-- !query 3 output +false + + +-- !query 3 select 1 > '1' -- !query 3 schema struct<(1 > CAST(1 AS INT)):boolean> @@ -59,160 +67,192 @@ false -- !query 7 -select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') +select '1.5' > 0.5 -- !query 7 schema -struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> +struct<(CAST(1.5 AS DOUBLE) > CAST(0.5 AS DOUBLE)):boolean> -- !query 7 output -false +true -- !query 8 -select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' +select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') -- !query 8 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> -- !query 8 output false -- !query 9 -select 1 >= '1' +select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' -- !query 9 schema -struct<(1 >= CAST(1 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> -- !query 9 output -true +false -- !query 10 -select 2 >= '1.0' +select 1 >= '1' -- !query 10 schema -struct<(2 >= CAST(1.0 AS INT)):boolean> +struct<(1 >= CAST(1 AS INT)):boolean> -- !query 10 output true -- !query 11 -select 2 >= '2.0' +select 2 >= '1.0' -- !query 11 schema -struct<(2 >= CAST(2.0 AS INT)):boolean> +struct<(2 >= CAST(1.0 AS INT)):boolean> -- !query 11 output true -- !query 12 -select 2.0 >= '2.2' +select 2 >= '2.0' -- !query 12 schema -struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 >= CAST(2.0 AS INT)):boolean> -- !query 12 output -false +true -- !query 13 -select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') +select 2.0 >= '2.2' -- !query 13 schema -struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> +struct<(CAST(2.0 AS DOUBLE) >= CAST(2.2 AS DOUBLE)):boolean> -- !query 13 output -true +false -- !query 14 -select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' +select '1.5' >= 0.5 -- !query 14 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> +struct<(CAST(1.5 AS DOUBLE) >= CAST(0.5 AS DOUBLE)):boolean> -- !query 14 output -false +true -- !query 15 -select 1 < '1' +select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') -- !query 15 schema -struct<(1 < CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> -- !query 15 output -false +true -- !query 16 -select 2 < '1.0' +select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' -- !query 16 schema -struct<(2 < CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> -- !query 16 output false -- !query 17 -select 2 < '2.0' +select 1 < '1' -- !query 17 schema -struct<(2 < CAST(2.0 AS INT)):boolean> +struct<(1 < CAST(1 AS INT)):boolean> -- !query 17 output false -- !query 18 -select 2.0 < '2.2' +select 2 < '1.0' -- !query 18 schema -struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 < CAST(1.0 AS INT)):boolean> -- !query 18 output -true +false -- !query 19 -select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') +select 2 < '2.0' -- !query 19 schema -struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> +struct<(2 < CAST(2.0 AS INT)):boolean> -- !query 19 output false -- !query 20 -select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' +select 2.0 < '2.2' -- !query 20 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> +struct<(CAST(2.0 AS DOUBLE) < CAST(2.2 AS DOUBLE)):boolean> -- !query 20 output true -- !query 21 -select 1 <= '1' +select 0.5 < '1.5' -- !query 21 schema -struct<(1 <= CAST(1 AS INT)):boolean> +struct<(CAST(0.5 AS DOUBLE) < CAST(1.5 AS DOUBLE)):boolean> -- !query 21 output true -- !query 22 -select 2 <= '1.0' +select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') -- !query 22 schema -struct<(2 <= CAST(1.0 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> -- !query 22 output false -- !query 23 -select 2 <= '2.0' +select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' -- !query 23 schema -struct<(2 <= CAST(2.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> -- !query 23 output true -- !query 24 -select 2.0 <= '2.2' +select 1 <= '1' -- !query 24 schema -struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(1 <= CAST(1 AS INT)):boolean> -- !query 24 output true -- !query 25 -select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') +select 2 <= '1.0' -- !query 25 schema -struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> +struct<(2 <= CAST(1.0 AS INT)):boolean> -- !query 25 output -true +false -- !query 26 -select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' +select 2 <= '2.0' -- !query 26 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> +struct<(2 <= CAST(2.0 AS INT)):boolean> -- !query 26 output true + + +-- !query 27 +select 2.0 <= '2.2' +-- !query 27 schema +struct<(CAST(2.0 AS DOUBLE) <= CAST(2.2 AS DOUBLE)):boolean> +-- !query 27 output +true + + +-- !query 28 +select 0.5 <= '1.5' +-- !query 28 schema +struct<(CAST(0.5 AS DOUBLE) <= CAST(1.5 AS DOUBLE)):boolean> +-- !query 28 output +true + + +-- !query 29 +select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') +-- !query 29 schema +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> +-- !query 29 output +true + + +-- !query 30 +select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' +-- !query 30 schema +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> +-- !query 30 output +true --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org