This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cc927edf770 [SPARK-39248][SQL] Improve divide performance for decimal 
type
cc927edf770 is described below

commit cc927edf770daa237993e076dc29b4793f4e2a84
Author: Yuming Wang <yumw...@ebay.com>
AuthorDate: Tue May 24 21:49:20 2022 +0800

    [SPARK-39248][SQL] Improve divide performance for decimal type
    
    ### What changes were proposed in this pull request?
    
    Switch decimal type divide from
    ```
    toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT)
    ```
    to
    ```
    toJavaBigDecimal.divide(that.toJavaBigDecimal, DecimalType.MAX_SCALE, 
MATH_CONTEXT.getRoundingMode)
    ```
    
    The difference is that [`preferredScale != 
scale`](https://github.com/openjdk/jdk8u-dev/blob/jdk8u342-b01/jdk/src/share/classes/java/math/BigDecimal.java#L4288)
 is false if using the new API.
    
    This is the stack trace if using the old API:
    ```
    java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203)
    java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163)
    java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235)
    java.math.BigInteger.divideAndRemainder(BigInteger.java:2223)
    java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404)
    java.math.BigDecimal.divideAndRound(BigDecimal.java:4294)
    java.math.BigDecimal.divide(BigDecimal.java:4660)
    java.math.BigDecimal.divide(BigDecimal.java:1753)
    ...
    ```
    
    ### Why are the changes needed?
    
    Improve divide performance for decimal type.
    
    Benchmark code:
    ```scala
    import org.apache.spark.benchmark.Benchmark
    
    val valuesPerIteration = 2880404L
    val dir = "/tmp/spark/benchmark"
    spark.range(valuesPerIteration).selectExpr("CAST(id AS DECIMAL(9, 2)) AS 
d").write.mode("Overwrite").parquet(dir)
    
    val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, 
minNumIters = 5)
    benchmark.addCase("d * 2 > 0") { _ =>
      spark.read.parquet(dir).where("d * 2 > 
0").write.format("noop").mode("Overwrite").save()
    }
    
    benchmark.addCase("d / 2 > 0") { _ =>
      spark.read.parquet(dir).where("d / 2 > 
0").write.format("noop").mode("Overwrite").save()
    }
    benchmark.run()
    ```
    
    Before this PR:
    ```
    Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7
    Intel(R) Core(TM) i9-9980HK CPU  2.40GHz
    Benchmark decimal:                        Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    
------------------------------------------------------------------------------------------------------------------------
    d * 2 > 0                                           480            585      
   141          6.0         166.7       1.0X
    d / 2 > 0                                          4689           4920      
   243          0.6        1627.9       0.1X
    ```
    
    After this PR:
    ```
    Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7
    Intel(R) Core(TM) i9-9980HK CPU  2.40GHz
    Benchmark decimal:                        Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    
------------------------------------------------------------------------------------------------------------------------
    d * 2 > 0                                           529            580      
    35          5.4         183.6       1.0X
    d / 2 > 0                                           811            916      
    80          3.6         281.4       0.7X
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Unit test.
    
    Closes #36628 from wangyum/SPARK-39248.
    
    Authored-by: Yuming Wang <yumw...@ebay.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../scala/org/apache/spark/sql/types/Decimal.scala |  3 +-
 .../inputs/ansi/decimalArithmeticOperations.sql    |  9 +++
 .../inputs/decimalArithmeticOperations.sql         |  9 +++
 .../ansi/decimalArithmeticOperations.sql.out       | 92 +++++++++++++++++++++-
 .../results/decimalArithmeticOperations.sql.out    | 66 +++++++++++++++-
 5 files changed, 175 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 7a43d01eb2f..43203e4f397 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -502,7 +502,8 @@ final class Decimal extends Ordered[Decimal] with 
Serializable {
     Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT))
 
   def / (that: Decimal): Decimal =
-    if (that.isZero) null else 
Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT))
+    if (that.isZero) null else 
Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal,
+      DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode))
 
   def % (that: Decimal): Decimal =
     if (that.isZero) null
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
index d843847e6a1..c447511ba60 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
@@ -29,4 +29,13 @@ select 123456789123456789.1234567890 * 1.123456789123456789;
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
 drop table decimals_test;
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql 
b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index a3bc282cd6a..70bb9123994 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -83,4 +83,13 @@ select 12345678912345678912345678912.1234567 + 
9999999999999999999999999999999.1
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
 drop table decimals_test;
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 9268a5f92d1..219b1e621e1 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
+-- Number of queries: 24
 
 
 -- !query
@@ -112,7 +112,7 @@ select 1e35BD / 0.1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
1000000000000000000000000000000000000, 37, 0) cannot be represented as 
Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass 
this error.
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
1000000000000000000000000000000000000.00000000000000000000000000000000000000, 
75, 38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
 == SQL(line 1, position 7) ==
 select 1e35BD / 0.1
        ^^^^^^^^^^^^
@@ -142,6 +142,94 @@ struct<(12345678912345.123456789123 / 
1.2345678E-8):decimal(38,9)>
 1000000073899961059796.725866332
 
 
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
10123456789012345678901234567890123456.00000000000000000000000000000000000000, 
76, 38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
101234567890123456789012345678901234.56000000000000000000000000000000000000, 
74, 38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
10123456789012345678901234567890123.45600000000000000000000000000000000000, 73, 
38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
1012345678901234567890123456789012.34560000000000000000000000000000000000, 72, 
38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
101234567890123456789012345678901.23456000000000000000000000000000000000, 71, 
38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
101234567890123456789012345678901.23456000000000000000000000000000000000, 71, 
38) cannot be represented as Decimal(38, 6). If necessary set 
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,6)>
+-- !query output
+1012345678901234567890123456789.012346
+
+
 -- !query
 drop table decimals_test
 -- !query schema
diff --git 
a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 1d92dc35010..f58950e7071 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 40
+-- Number of queries: 48
 
 
 -- !query
@@ -327,6 +327,70 @@ struct<(12345678912345.123456789123 / 
1.2345678E-8):decimal(38,18)>
 NULL
 
 
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890123.456 / 1.0):decimal(38,3)>
+-- !query output
+10123456789012345678901234567890123.456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<(1012345678901234567890123456789012.3456 / 1.0):decimal(38,4)>
+-- !query output
+1012345678901234567890123456789012.3456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901.23456 / 1.0):decimal(38,5)>
+-- !query output
+101234567890123456789012345678901.23456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,7)>
+-- !query output
+1012345678901234567890123456789.0123456
+
+
 -- !query
 drop table decimals_test
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to