[ https://issues.apache.org/jira/browse/SPARK-39248?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yuming Wang updated SPARK-39248: -------------------------------- Description: How to reproduce this issue: {code:scala} import org.apache.spark.benchmark.Benchmark val valuesPerIteration = 2880404L val dir = "/tmp/spark/benchmark" spark.range(2880404L).selectExpr("cast(id as DECIMAL(9,2)) as d").write.mode("Overwrite").parquet(dir) val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, minNumIters = 5) benchmark.addCase("d * 2 > 0") { _ => spark.read.parquet(dir).where("d * 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.addCase("d / 2 > 0") { _ => spark.read.parquet(dir).where("d / 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.run() {code} {noformat} Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7 Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz Benchmark decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ d * 2 > 0 435 558 151 6.6 150.9 1.0X d / 2 > 0 5569 6208 734 0.5 1933.2 0.1X {noformat} Current stack trace: {noformat} java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203) java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163) java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235) java.math.BigInteger.divideAndRemainder(BigInteger.java:2223) java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404) java.math.BigDecimal.divideAndRound(BigDecimal.java:4294) java.math.BigDecimal.divide(BigDecimal.java:4660) java.math.BigDecimal.divide(BigDecimal.java:1753) org.apache.spark.sql.types.Decimal.$div(Decimal.scala:505) org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:435) org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$$Lambda$2997/2025304705.apply(Unknown Source) org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1538) org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:480) org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:381) org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec$$Lambda$2987/1586195133.apply(Unknown Source) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) org.apache.spark.scheduler.Task.run(Task.scala:139) org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) org.apache.spark.executor.Executor$TaskRunner$$Lambda$2921/365880128.apply(Unknown Source) org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) {noformat} was: How to reproduce this issue: {code:scala} import org.apache.spark.benchmark.Benchmark val valuesPerIteration = 2880404L val dir = "/tmp/spark/benchmark" spark.range(2880404L).selectExpr("cast(id as DECIMAL(9,2)) as d").write.mode("Overwrite").parquet(dir) val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, minNumIters = 5) benchmark.addCase("d * 2 > 0") { _ => spark.read.parquet(dir).where("d * 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.addCase("d / 2 > 0") { _ => spark.read.parquet(dir).where("d / 2 > 0").write.format("noop").mode("Overwrite").save() } benchmark.run() {code} {noformat} Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7 Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz Benchmark decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ d * 2 > 0 435 558 151 6.6 150.9 1.0X d / 2 > 0 5569 6208 734 0.5 1933.2 0.1X {noformat} > Decimal divide much slower than multiply > ---------------------------------------- > > Key: SPARK-39248 > URL: https://issues.apache.org/jira/browse/SPARK-39248 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 3.4.0 > Reporter: Yuming Wang > Priority: Major > > How to reproduce this issue: > {code:scala} > import org.apache.spark.benchmark.Benchmark > val valuesPerIteration = 2880404L > val dir = "/tmp/spark/benchmark" > spark.range(2880404L).selectExpr("cast(id as DECIMAL(9,2)) as > d").write.mode("Overwrite").parquet(dir) > val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, > minNumIters = 5) > benchmark.addCase("d * 2 > 0") { _ => > spark.read.parquet(dir).where("d * 2 > > 0").write.format("noop").mode("Overwrite").save() > } > benchmark.addCase("d / 2 > 0") { _ => > spark.read.parquet(dir).where("d / 2 > > 0").write.format("noop").mode("Overwrite").save() > } > benchmark.run() > {code} > {noformat} > Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7 > Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz > Benchmark decimal: Best Time(ms) Avg Time(ms) > Stdev(ms) Rate(M/s) Per Row(ns) Relative > ------------------------------------------------------------------------------------------------------------------------ > d * 2 > 0 435 558 > 151 6.6 150.9 1.0X > d / 2 > 0 5569 6208 > 734 0.5 1933.2 0.1X > {noformat} > Current stack trace: > {noformat} > java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203) > java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163) > java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235) > java.math.BigInteger.divideAndRemainder(BigInteger.java:2223) > java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404) > java.math.BigDecimal.divideAndRound(BigDecimal.java:4294) > java.math.BigDecimal.divide(BigDecimal.java:4660) > java.math.BigDecimal.divide(BigDecimal.java:1753) > org.apache.spark.sql.types.Decimal.$div(Decimal.scala:505) > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) > org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:435) > org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$$Lambda$2997/2025304705.apply(Unknown > Source) > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1538) > org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:480) > org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:381) > org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec$$Lambda$2987/1586195133.apply(Unknown > Source) > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) > org.apache.spark.scheduler.Task.run(Task.scala:139) > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) > org.apache.spark.executor.Executor$TaskRunner$$Lambda$2921/365880128.apply(Unknown > Source) > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > java.lang.Thread.run(Thread.java:748) > {noformat} -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org