This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ddc77fb [SPARK-37986][SQL] Support TimestampNTZ in radix sort ddc77fb is described below commit ddc77fb906cb3ce1567d277c2d0850104c89ac25 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Sun Jan 23 11:51:27 2022 +0800 [SPARK-37986][SQL] Support TimestampNTZ in radix sort ### What changes were proposed in this pull request? Make `TimestampNTZ` data type support radix sort in SQL ### Why are the changes needed? Better performance when sort by one TimestampNTZ column only ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test case in SortSuite Closes #35279 from gengliangwang/NTZRadixSort. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../org/apache/spark/sql/catalyst/expressions/SortOrder.scala | 8 +++++--- .../scala/org/apache/spark/sql/execution/SortPrefixUtils.scala | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 8e6f076..974d4b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -132,7 +132,8 @@ object SortOrder { case class SortPrefix(child: SortOrder) extends UnaryExpression { val nullValue = child.child.dataType match { - case BooleanType | DateType | TimestampType | _: IntegralType | _: AnsiIntervalType => + case BooleanType | DateType | TimestampType | TimestampNTZType | + _: IntegralType | _: AnsiIntervalType => if (nullAsSmallest) Long.MinValue else Long.MaxValue case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => if (nullAsSmallest) Long.MinValue else Long.MaxValue @@ -154,7 +155,8 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { private lazy val calcPrefix: Any => Long = child.child.dataType match { case BooleanType => (raw) => if (raw.asInstanceOf[Boolean]) 1 else 0 - case DateType | TimestampType | _: IntegralType | _: AnsiIntervalType => (raw) => + case DateType | TimestampType | TimestampNTZType | + _: IntegralType | _: AnsiIntervalType => (raw) => raw.asInstanceOf[java.lang.Number].longValue() case FloatType | DoubleType => (raw) => { val dVal = raw.asInstanceOf[java.lang.Number].doubleValue() @@ -198,7 +200,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { s"$input ? 1L : 0L" case _: IntegralType => s"(long) $input" - case DateType | TimestampType | _: AnsiIntervalType => + case DateType | TimestampType | TimestampNTZType | _: AnsiIntervalType => s"(long) $input" case FloatType | DoubleType => s"$DoublePrefixCmp.computePrefix((double)$input)" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index a1b093f..4b561b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -43,7 +43,7 @@ object SortPrefixUtils { case StringType => stringPrefixComparator(sortOrder) case BinaryType => binaryPrefixComparator(sortOrder) case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType | - _: AnsiIntervalType => + TimestampNTZType | _: AnsiIntervalType => longPrefixComparator(sortOrder) case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => longPrefixComparator(sortOrder) @@ -123,7 +123,7 @@ object SortPrefixUtils { def canSortFullyWithPrefix(sortOrder: SortOrder): Boolean = { sortOrder.dataType match { case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | - TimestampType | FloatType | DoubleType | _: AnsiIntervalType => + TimestampType | TimestampNTZType | FloatType | DoubleType | _: AnsiIntervalType => true case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS => true --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org