This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cc927edf770 [SPARK-39248][SQL] Improve divide performance for decimal type cc927edf770 is described below commit cc927edf770daa237993e076dc29b4793f4e2a84 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Tue May 24 21:49:20 2022 +0800 [SPARK-39248][SQL] Improve divide performance for decimal type ### What changes were proposed in this pull request? Switch decimal type divide from ``` toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT) ``` to ``` toJavaBigDecimal.divide(that.toJavaBigDecimal, DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode) ``` The difference is that [`preferredScale != scale`](https://github.com/openjdk/jdk8u-dev/blob/jdk8u342-b01/jdk/src/share/classes/java/math/BigDecimal.java#L4288) is false if using the new API. This is the stack trace if using the old API: ``` java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203) java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163) java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235) java.math.BigInteger.divideAndRemainder(BigInteger.java:2223) java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404) java.math.BigDecimal.divideAndRound(BigDecimal.java:4294) java.math.BigDecimal.divide(BigDecimal.java:4660) java.math.BigDecimal.divide(BigDecimal.java:1753) ... ``` ### Why are the changes needed? Improve divide performance for decimal type. Benchmark code: ```scala import org.apache.spark.benchmark.Benchmark val valuesPerIteration = 2880404L val dir = "/tmp/spark/benchmark" spark.range(valuesPerIteration).selectExpr("CAST(id AS DECIMAL(9, 2)) AS d").write.mode("Overwrite").parquet(dir) val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, minNumIters = 5) benchmark.addCase("d * 2 > 0") { _ => spark.read.parquet(dir).where("d * 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.addCase("d / 2 > 0") { _ => spark.read.parquet(dir).where("d / 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.run() ``` Before this PR: ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7 Intel(R) Core(TM) i9-9980HK CPU 2.40GHz Benchmark decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ d * 2 > 0 480 585 141 6.0 166.7 1.0X d / 2 > 0 4689 4920 243 0.6 1627.9 0.1X ``` After this PR: ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7 Intel(R) Core(TM) i9-9980HK CPU 2.40GHz Benchmark decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ d * 2 > 0 529 580 35 5.4 183.6 1.0X d / 2 > 0 811 916 80 3.6 281.4 0.7X ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36628 from wangyum/SPARK-39248. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../scala/org/apache/spark/sql/types/Decimal.scala | 3 +- .../inputs/ansi/decimalArithmeticOperations.sql | 9 +++ .../inputs/decimalArithmeticOperations.sql | 9 +++ .../ansi/decimalArithmeticOperations.sql.out | 92 +++++++++++++++++++++- .../results/decimalArithmeticOperations.sql.out | 66 +++++++++++++++- 5 files changed, 175 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 7a43d01eb2f..43203e4f397 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -502,7 +502,8 @@ final class Decimal extends Ordered[Decimal] with Serializable { Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT)) def / (that: Decimal): Decimal = - if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT)) + if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal, + DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode)) def % (that: Decimal): Decimal = if (that.isZero) null diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql index d843847e6a1..c447511ba60 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql @@ -29,4 +29,13 @@ select 123456789123456789.1234567890 * 1.123456789123456789; select 123456789123456789.1234567890 * 1.123456789123456789; select 12345678912345.123456789123 / 0.000000012345678; +select 1.0123456789012345678901234567890123456e36BD / 0.1; +select 1.0123456789012345678901234567890123456e35BD / 1.0; +select 1.0123456789012345678901234567890123456e34BD / 1.0; +select 1.0123456789012345678901234567890123456e33BD / 1.0; +select 1.0123456789012345678901234567890123456e32BD / 1.0; +select 1.0123456789012345678901234567890123456e31BD / 1.0; +select 1.0123456789012345678901234567890123456e31BD / 0.1; +select 1.0123456789012345678901234567890123456e31BD / 10.0; + drop table decimals_test; diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql index a3bc282cd6a..70bb9123994 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql @@ -83,4 +83,13 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1 select 123456789123456789.1234567890 * 1.123456789123456789; select 12345678912345.123456789123 / 0.000000012345678; +select 1.0123456789012345678901234567890123456e36BD / 0.1; +select 1.0123456789012345678901234567890123456e35BD / 1.0; +select 1.0123456789012345678901234567890123456e34BD / 1.0; +select 1.0123456789012345678901234567890123456e33BD / 1.0; +select 1.0123456789012345678901234567890123456e32BD / 1.0; +select 1.0123456789012345678901234567890123456e31BD / 1.0; +select 1.0123456789012345678901234567890123456e31BD / 0.1; +select 1.0123456789012345678901234567890123456e31BD / 10.0; + drop table decimals_test; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 9268a5f92d1..219b1e621e1 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 24 -- !query @@ -112,7 +112,7 @@ select 1e35BD / 0.1 struct<> -- !query output org.apache.spark.SparkArithmeticException -[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 1000000000000000000000000000000000000, 37, 0) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 1000000000000000000000000000000000000.00000000000000000000000000000000000000, 75, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 1e35BD / 0.1 ^^^^^^^^^^^^ @@ -142,6 +142,94 @@ struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,9)> 1000000073899961059796.725866332 +-- !query +select 1.0123456789012345678901234567890123456e36BD / 0.1 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 10123456789012345678901234567890123456.00000000000000000000000000000000000000, 76, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e36BD / 0.1 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e35BD / 1.0 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 101234567890123456789012345678901234.56000000000000000000000000000000000000, 74, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e35BD / 1.0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e34BD / 1.0 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 10123456789012345678901234567890123.45600000000000000000000000000000000000, 73, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e34BD / 1.0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e33BD / 1.0 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 1012345678901234567890123456789012.34560000000000000000000000000000000000, 72, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e33BD / 1.0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e32BD / 1.0 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 101234567890123456789012345678901.23456000000000000000000000000000000000, 71, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e32BD / 1.0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 1.0 +-- !query schema +struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)> +-- !query output +10123456789012345678901234567890.123456 + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 0.1 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 101234567890123456789012345678901.23456000000000000000000000000000000000, 71, 38) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +== SQL(line 1, position 7) == +select 1.0123456789012345678901234567890123456e31BD / 0.1 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 10.0 +-- !query schema +struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,6)> +-- !query output +1012345678901234567890123456789.012346 + + -- !query drop table decimals_test -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out index 1d92dc35010..f58950e7071 100644 --- a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 40 +-- Number of queries: 48 -- !query @@ -327,6 +327,70 @@ struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,18)> NULL +-- !query +select 1.0123456789012345678901234567890123456e36BD / 0.1 +-- !query schema +struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)> +-- !query output +NULL + + +-- !query +select 1.0123456789012345678901234567890123456e35BD / 1.0 +-- !query schema +struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)> +-- !query output +NULL + + +-- !query +select 1.0123456789012345678901234567890123456e34BD / 1.0 +-- !query schema +struct<(10123456789012345678901234567890123.456 / 1.0):decimal(38,3)> +-- !query output +10123456789012345678901234567890123.456 + + +-- !query +select 1.0123456789012345678901234567890123456e33BD / 1.0 +-- !query schema +struct<(1012345678901234567890123456789012.3456 / 1.0):decimal(38,4)> +-- !query output +1012345678901234567890123456789012.3456 + + +-- !query +select 1.0123456789012345678901234567890123456e32BD / 1.0 +-- !query schema +struct<(101234567890123456789012345678901.23456 / 1.0):decimal(38,5)> +-- !query output +101234567890123456789012345678901.23456 + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 1.0 +-- !query schema +struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)> +-- !query output +10123456789012345678901234567890.123456 + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 0.1 +-- !query schema +struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)> +-- !query output +NULL + + +-- !query +select 1.0123456789012345678901234567890123456e31BD / 10.0 +-- !query schema +struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,7)> +-- !query output +1012345678901234567890123456789.0123456 + + -- !query drop table decimals_test -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org