Repository: spark Updated Branches: refs/heads/master 19f4ac6dc -> 747d2f538
[SPARK-13790] Speed up ColumnVector's getDecimal ## What changes were proposed in this pull request? We should reuse an object similar to the other non-primitive type getters. For a query that computes averages over decimal columns, this shows a 10% speedup on overall query times. ## How was this patch tested? Existing tests and this benchmark ``` TPCDS Snappy: Best/Avg Time(ms) Rate(M/s) Per Row(ns) -------------------------------------------------------------------------------- q27-agg (master) 10627 / 11057 10.8 92.3 q27-agg (this patch) 9722 / 9832 11.8 84.4 ``` Author: Nong Li <n...@databricks.com> Closes #11624 from nongli/spark-13790. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/747d2f53 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/747d2f53 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/747d2f53 Branch: refs/heads/master Commit: 747d2f5381b3ef0b2663ca7def42d3dc43ee13d7 Parents: 19f4ac6 Author: Nong Li <n...@databricks.com> Authored: Thu Mar 10 13:31:19 2016 -0800 Committer: Davies Liu <davies....@gmail.com> Committed: Thu Mar 10 13:31:19 2016 -0800 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/expressions/UnsafeRow.java | 2 +- .../main/scala/org/apache/spark/sql/types/Decimal.scala | 11 +++++++++++ .../spark/sql/execution/vectorized/ColumnVector.java | 4 ++-- 3 files changed, 14 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/747d2f53/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index a88bcbf..dd2f39e 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -400,7 +400,7 @@ public final class UnsafeRow extends MutableRow implements Externalizable, KryoS return null; } if (precision <= Decimal.MAX_LONG_DIGITS()) { - return Decimal.apply(getLong(ordinal), precision, scale); + return Decimal.createUnsafe(getLong(ordinal), precision, scale); } else { byte[] bytes = getBinary(ordinal); BigInteger bigInteger = new BigInteger(bytes); http://git-wip-us.apache.org/repos/asf/spark/blob/747d2f53/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 6a59e97..f0e535b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -376,6 +376,17 @@ object Decimal { def apply(value: String): Decimal = new Decimal().set(BigDecimal(value)) + /** + * Creates a decimal from unscaled, precision and scale without checking the bounds. + */ + def createUnsafe(unscaled: Long, precision: Int, scale: Int): Decimal = { + val dec = new Decimal() + dec.longVal = unscaled + dec._precision = precision + dec._scale = scale + dec + } + // Evidence parameters for Decimal considered either as Fractional or Integral. We provide two // parameters inheriting from a common trait since both traits define mkNumericOps. // See scala.math's Numeric.scala for examples for Scala's built-in types. http://git-wip-us.apache.org/repos/asf/spark/blob/747d2f53/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java ---------------------------------------------------------------------- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java index bb0247c..ffcc9c2 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java @@ -536,9 +536,9 @@ public abstract class ColumnVector { */ public final Decimal getDecimal(int rowId, int precision, int scale) { if (precision <= Decimal.MAX_INT_DIGITS()) { - return Decimal.apply(getInt(rowId), precision, scale); + return Decimal.createUnsafe(getInt(rowId), precision, scale); } else if (precision <= Decimal.MAX_LONG_DIGITS()) { - return Decimal.apply(getLong(rowId), precision, scale); + return Decimal.createUnsafe(getLong(rowId), precision, scale); } else { // TODO: best perf? byte[] bytes = getBinary(rowId); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org