Repository: spark Updated Branches: refs/heads/master 8aae49afc -> 47d6e80a2
[SPARK-25457][SQL] IntegralDivide returns data type of the operands ## What changes were proposed in this pull request? The PR proposes to return the data type of the operands as a result for the `div` operator. Before the PR, `bigint` is always returned. It introduces also a `spark.sql.legacy.integralDivide.returnBigint` config in order to let the users restore the legacy behavior. ## How was this patch tested? added UTs Closes #22465 from mgaido91/SPARK-25457. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47d6e80a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47d6e80a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47d6e80a Branch: refs/heads/master Commit: 47d6e80a2e64823fabb596503fb6a6cc6f51f713 Parents: 8aae49a Author: Marco Gaido <marcogaid...@gmail.com> Authored: Thu Sep 20 10:23:37 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Sep 20 10:23:37 2018 +0800 ---------------------------------------------------------------------- .../sql/catalyst/expressions/arithmetic.scala | 17 +- .../org/apache/spark/sql/internal/SQLConf.scala | 9 + .../expressions/ArithmeticExpressionSuite.scala | 26 +- .../resources/sql-tests/inputs/operator-div.sql | 14 ++ .../resources/sql-tests/inputs/operators.sql | 6 +- .../sql-tests/results/operator-div.sql.out | 82 ++++++ .../sql-tests/results/operators.sql.out | 248 ++++++++----------- 7 files changed, 246 insertions(+), 156 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 1b1808f..f59b2a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -327,16 +328,24 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { case class IntegralDivide(left: Expression, right: Expression) extends DivModLike { override def inputType: AbstractDataType = IntegralType - override def dataType: DataType = LongType + override def dataType: DataType = if (SQLConf.get.integralDivideReturnLong) { + LongType + } else { + left.dataType + } override def symbol: String = "/" override def sqlOperator: String = "div" - private lazy val div: (Any, Any) => Long = left.dataType match { + private lazy val div: (Any, Any) => Any = left.dataType match { case i: IntegralType => val divide = i.integral.asInstanceOf[Integral[Any]].quot _ - val toLong = i.integral.asInstanceOf[Integral[Any]].toLong _ - (x, y) => toLong(divide(x, y)) + if (SQLConf.get.integralDivideReturnLong) { + val toLong = i.integral.asInstanceOf[Integral[Any]].toLong _ + (x, y) => toLong(divide(x, y)) + } else { + divide + } } override def evalOperation(left: Any, right: Any): Any = div(left, right) http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c3328a6..907221c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1561,6 +1561,13 @@ object SQLConf { "are performed before any UNION, EXCEPT and MINUS operations.") .booleanConf .createWithDefault(false) + + val LEGACY_INTEGRALDIVIDE_RETURN_LONG = buildConf("spark.sql.legacy.integralDivide.returnBigint") + .doc("If it is set to true, the div operator returns always a bigint. This behavior was " + + "inherited from Hive. Otherwise, the return type is the data type of the operands.") + .internal() + .booleanConf + .createWithDefault(false) } /** @@ -1973,6 +1980,8 @@ class SQLConf extends Serializable with Logging { def setOpsPrecedenceEnforced: Boolean = getConf(SQLConf.LEGACY_SETOPS_PRECEDENCE_ENABLED) + def integralDivideReturnLong: Boolean = getConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index c3c4d9e..1318ab1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -144,13 +145,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } test("/ (Divide) for integral type") { - checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0L) - checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0L) - checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0L) - checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0L) - checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0L) - checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0L) - checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) + withSQLConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG.key -> "false") { + checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0.toByte) + checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0.toShort) + checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0) + checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0.toLong) + checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0.toShort) + checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0) + checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) + } + withSQLConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG.key -> "true") { + checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0L) + checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0L) + checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0L) + checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0L) + checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0L) + checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0L) + checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) + } } test("% (Remainder)") { http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/core/src/test/resources/sql-tests/inputs/operator-div.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/operator-div.sql b/sql/core/src/test/resources/sql-tests/inputs/operator-div.sql new file mode 100644 index 0000000..6e1c1bd --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/operator-div.sql @@ -0,0 +1,14 @@ +set spark.sql.legacy.integralDivide.returnBigint=true; + +select 5 div 2; +select 5 div 0; +select 5 div null; +select null div 5; + +set spark.sql.legacy.integralDivide.returnBigint=false; + +select 5 div 2; +select 5 div 0; +select 5 div null; +select null div 5; + http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/core/src/test/resources/sql-tests/inputs/operators.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index 15d9819..37f9cd4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -16,15 +16,11 @@ select + + 100; select - - max(key) from testdata; select + - key from testdata where key = 33; --- div +-- division select 5 / 2; select 5 / 0; select 5 / null; select null / 5; -select 5 div 2; -select 5 div 0; -select 5 div null; -select null div 5; -- other arithmetics select 1 + 2; http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out b/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out new file mode 100644 index 0000000..088b4d1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out @@ -0,0 +1,82 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 10 + + +-- !query 0 +set spark.sql.legacy.integralDivide.returnBigint=true +-- !query 0 schema +struct<key:string,value:string> +-- !query 0 output +spark.sql.legacy.integralDivide.returnBigint true + + +-- !query 1 +select 5 div 2 +-- !query 1 schema +struct<(5 div 2):bigint> +-- !query 1 output +2 + + +-- !query 2 +select 5 div 0 +-- !query 2 schema +struct<(5 div 0):bigint> +-- !query 2 output +NULL + + +-- !query 3 +select 5 div null +-- !query 3 schema +struct<(5 div CAST(NULL AS INT)):bigint> +-- !query 3 output +NULL + + +-- !query 4 +select null div 5 +-- !query 4 schema +struct<(CAST(NULL AS INT) div 5):bigint> +-- !query 4 output +NULL + + +-- !query 5 +set spark.sql.legacy.integralDivide.returnBigint=false +-- !query 5 schema +struct<key:string,value:string> +-- !query 5 output +spark.sql.legacy.integralDivide.returnBigint false + + +-- !query 6 +select 5 div 2 +-- !query 6 schema +struct<(5 div 2):int> +-- !query 6 output +2 + + +-- !query 7 +select 5 div 0 +-- !query 7 schema +struct<(5 div 0):int> +-- !query 7 output +NULL + + +-- !query 8 +select 5 div null +-- !query 8 schema +struct<(5 div CAST(NULL AS INT)):int> +-- !query 8 output +NULL + + +-- !query 9 +select null div 5 +-- !query 9 schema +struct<(CAST(NULL AS INT) div 5):int> +-- !query 9 output +NULL http://git-wip-us.apache.org/repos/asf/spark/blob/47d6e80a/sql/core/src/test/resources/sql-tests/results/operators.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 2555734..fd1d0db 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 59 +-- Number of queries: 55 -- !query 0 @@ -155,332 +155,300 @@ NULL -- !query 19 -select 5 div 2 --- !query 19 schema -struct<(5 div 2):bigint> --- !query 19 output -2 - - --- !query 20 -select 5 div 0 --- !query 20 schema -struct<(5 div 0):bigint> --- !query 20 output -NULL - - --- !query 21 -select 5 div null --- !query 21 schema -struct<(5 div CAST(NULL AS INT)):bigint> --- !query 21 output -NULL - - --- !query 22 -select null div 5 --- !query 22 schema -struct<(CAST(NULL AS INT) div 5):bigint> --- !query 22 output -NULL - - --- !query 23 select 1 + 2 --- !query 23 schema +-- !query 19 schema struct<(1 + 2):int> --- !query 23 output +-- !query 19 output 3 --- !query 24 +-- !query 20 select 1 - 2 --- !query 24 schema +-- !query 20 schema struct<(1 - 2):int> --- !query 24 output +-- !query 20 output -1 --- !query 25 +-- !query 21 select 2 * 5 --- !query 25 schema +-- !query 21 schema struct<(2 * 5):int> --- !query 25 output +-- !query 21 output 10 --- !query 26 +-- !query 22 select 5 % 3 --- !query 26 schema +-- !query 22 schema struct<(5 % 3):int> --- !query 26 output +-- !query 22 output 2 --- !query 27 +-- !query 23 select pmod(-7, 3) --- !query 27 schema +-- !query 23 schema struct<pmod(-7, 3):int> --- !query 27 output +-- !query 23 output 2 --- !query 28 +-- !query 24 explain select 'a' || 1 + 2 --- !query 28 schema +-- !query 24 schema struct<plan:string> --- !query 28 output +-- !query 24 output == Physical Plan == *Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x] +- Scan OneRowRelation[] --- !query 29 +-- !query 25 explain select 1 - 2 || 'b' --- !query 29 schema +-- !query 25 schema struct<plan:string> --- !query 29 output +-- !query 25 output == Physical Plan == *Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x] +- Scan OneRowRelation[] --- !query 30 +-- !query 26 explain select 2 * 4 + 3 || 'b' --- !query 30 schema +-- !query 26 schema struct<plan:string> --- !query 30 output +-- !query 26 output == Physical Plan == *Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x] +- Scan OneRowRelation[] --- !query 31 +-- !query 27 explain select 3 + 1 || 'a' || 4 / 2 --- !query 31 schema +-- !query 27 schema struct<plan:string> --- !query 31 output +-- !query 27 output == Physical Plan == *Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x] +- Scan OneRowRelation[] --- !query 32 +-- !query 28 explain select 1 == 1 OR 'a' || 'b' == 'ab' --- !query 32 schema +-- !query 28 schema struct<plan:string> --- !query 32 output +-- !query 28 output == Physical Plan == *Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x] +- Scan OneRowRelation[] --- !query 33 +-- !query 29 explain select 'a' || 'c' == 'ac' AND 2 == 3 --- !query 33 schema +-- !query 29 schema struct<plan:string> --- !query 33 output +-- !query 29 output == Physical Plan == *Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x] +- Scan OneRowRelation[] --- !query 34 +-- !query 30 select cot(1) --- !query 34 schema +-- !query 30 schema struct<COT(CAST(1 AS DOUBLE)):double> --- !query 34 output +-- !query 30 output 0.6420926159343306 --- !query 35 +-- !query 31 select cot(null) --- !query 35 schema +-- !query 31 schema struct<COT(CAST(NULL AS DOUBLE)):double> --- !query 35 output +-- !query 31 output NULL --- !query 36 +-- !query 32 select cot(0) --- !query 36 schema +-- !query 32 schema struct<COT(CAST(0 AS DOUBLE)):double> --- !query 36 output +-- !query 32 output Infinity --- !query 37 +-- !query 33 select cot(-1) --- !query 37 schema +-- !query 33 schema struct<COT(CAST(-1 AS DOUBLE)):double> --- !query 37 output +-- !query 33 output -0.6420926159343306 --- !query 38 +-- !query 34 select ceiling(0) --- !query 38 schema +-- !query 34 schema struct<CEIL(CAST(0 AS DOUBLE)):bigint> --- !query 38 output +-- !query 34 output 0 --- !query 39 +-- !query 35 select ceiling(1) --- !query 39 schema +-- !query 35 schema struct<CEIL(CAST(1 AS DOUBLE)):bigint> --- !query 39 output +-- !query 35 output 1 --- !query 40 +-- !query 36 select ceil(1234567890123456) --- !query 40 schema +-- !query 36 schema struct<CEIL(1234567890123456):bigint> --- !query 40 output +-- !query 36 output 1234567890123456 --- !query 41 +-- !query 37 select ceiling(1234567890123456) --- !query 41 schema +-- !query 37 schema struct<CEIL(1234567890123456):bigint> --- !query 41 output +-- !query 37 output 1234567890123456 --- !query 42 +-- !query 38 select ceil(0.01) --- !query 42 schema +-- !query 38 schema struct<CEIL(0.01):decimal(1,0)> --- !query 42 output +-- !query 38 output 1 --- !query 43 +-- !query 39 select ceiling(-0.10) --- !query 43 schema +-- !query 39 schema struct<CEIL(-0.10):decimal(1,0)> --- !query 43 output +-- !query 39 output 0 --- !query 44 +-- !query 40 select floor(0) --- !query 44 schema +-- !query 40 schema struct<FLOOR(CAST(0 AS DOUBLE)):bigint> --- !query 44 output +-- !query 40 output 0 --- !query 45 +-- !query 41 select floor(1) --- !query 45 schema +-- !query 41 schema struct<FLOOR(CAST(1 AS DOUBLE)):bigint> --- !query 45 output +-- !query 41 output 1 --- !query 46 +-- !query 42 select floor(1234567890123456) --- !query 46 schema +-- !query 42 schema struct<FLOOR(1234567890123456):bigint> --- !query 46 output +-- !query 42 output 1234567890123456 --- !query 47 +-- !query 43 select floor(0.01) --- !query 47 schema +-- !query 43 schema struct<FLOOR(0.01):decimal(1,0)> --- !query 47 output +-- !query 43 output 0 --- !query 48 +-- !query 44 select floor(-0.10) --- !query 48 schema +-- !query 44 schema struct<FLOOR(-0.10):decimal(1,0)> --- !query 48 output +-- !query 44 output -1 --- !query 49 +-- !query 45 select 1 > 0.00001 --- !query 49 schema +-- !query 45 schema struct<(CAST(1 AS BIGINT) > 0):boolean> --- !query 49 output +-- !query 45 output true --- !query 50 +-- !query 46 select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null) --- !query 50 schema +-- !query 46 schema struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double> --- !query 50 output +-- !query 46 output 1 NULL 0 NULL NULL NULL --- !query 51 +-- !query 47 select BIT_LENGTH('abc') --- !query 51 schema +-- !query 47 schema struct<bit_length(abc):int> --- !query 51 output +-- !query 47 output 24 --- !query 52 +-- !query 48 select CHAR_LENGTH('abc') --- !query 52 schema +-- !query 48 schema struct<length(abc):int> --- !query 52 output +-- !query 48 output 3 --- !query 53 +-- !query 49 select CHARACTER_LENGTH('abc') --- !query 53 schema +-- !query 49 schema struct<length(abc):int> --- !query 53 output +-- !query 49 output 3 --- !query 54 +-- !query 50 select OCTET_LENGTH('abc') --- !query 54 schema +-- !query 50 schema struct<octet_length(abc):int> --- !query 54 output +-- !query 50 output 3 --- !query 55 +-- !query 51 select abs(-3.13), abs('-2.19') --- !query 55 schema +-- !query 51 schema struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double> --- !query 55 output +-- !query 51 output 3.13 2.19 --- !query 56 +-- !query 52 select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11) --- !query 56 schema +-- !query 52 schema struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 AS DOUBLE)):double,(- -1.11):decimal(3,2)> --- !query 56 output +-- !query 52 output -1.11 -1.11 1.11 1.11 --- !query 57 +-- !query 53 select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null) --- !query 57 schema +-- !query 53 schema struct<pmod(-7, 2):int,pmod(0, 2):int,pmod(7, 0):int,pmod(7, CAST(NULL AS INT)):int,pmod(CAST(NULL AS INT), 2):int,pmod(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)):double> --- !query 57 output +-- !query 53 output 1 0 NULL NULL NULL NULL --- !query 58 +-- !query 54 select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint)) --- !query 58 schema +-- !query 54 schema struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint> --- !query 58 output +-- !query 54 output NULL NULL --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org