This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cc4463e [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile cc4463e is described below commit cc4463e818749faaf648ec71699d1e2fd3828c3f Author: gengjiaan <gengji...@360.cn> AuthorDate: Wed Jul 7 12:41:11 2021 +0300 [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile ### What changes were proposed in this pull request? The current `ApproximatePercentile` supports `TimestampType`, but not supports timestamp without time zone yet. This PR will add the function. ### Why are the changes needed? `ApproximatePercentile` need supports `TimestampNTZType`. ### Does this PR introduce _any_ user-facing change? 'Yes'. `ApproximatePercentile` accepts `TimestampNTZType`. ### How was this patch tested? New tests. Closes #33241 from beliefer/SPARK-36017. Authored-by: gengjiaan <gengji...@360.cn> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../expressions/aggregate/ApproximatePercentile.scala | 10 +++++----- .../apache/spark/sql/ApproximatePercentileQuerySuite.scala | 14 +++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala index 78e64bf..8cce79c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala @@ -92,9 +92,9 @@ case class ApproximatePercentile( private lazy val accuracy: Long = accuracyExpression.eval().asInstanceOf[Number].longValue override def inputTypes: Seq[AbstractDataType] = { - // Support NumericType, DateType and TimestampType since their internal types are all numeric, - // and can be easily cast to double for processing. - Seq(TypeCollection(NumericType, DateType, TimestampType), + // Support NumericType, DateType, TimestampType and TimestampNTZType since their internal types + // are all numeric, and can be easily cast to double for processing. + Seq(TypeCollection(NumericType, DateType, TimestampType, TimestampNTZType), TypeCollection(DoubleType, ArrayType(DoubleType, containsNull = false)), IntegralType) } @@ -139,7 +139,7 @@ case class ApproximatePercentile( // Convert the value to a double value val doubleValue = child.dataType match { case DateType => value.asInstanceOf[Int].toDouble - case TimestampType => value.asInstanceOf[Long].toDouble + case TimestampType | TimestampNTZType => value.asInstanceOf[Long].toDouble case n: NumericType => n.numeric.toDouble(value.asInstanceOf[n.InternalType]) case other: DataType => throw QueryExecutionErrors.dataTypeUnexpectedError(other) @@ -158,7 +158,7 @@ case class ApproximatePercentile( val doubleResult = buffer.getPercentiles(percentages) val result = child.dataType match { case DateType => doubleResult.map(_.toInt) - case TimestampType => doubleResult.map(_.toLong) + case TimestampType | TimestampNTZType => doubleResult.map(_.toLong) case ByteType => doubleResult.map(_.toByte) case ShortType => doubleResult.map(_.toShort) case IntegerType => doubleResult.map(_.toInt) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala index 4991e39..5ff15c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import java.sql.{Date, Timestamp} +import java.time.LocalDateTime import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY @@ -89,23 +90,26 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession test("percentile_approx, different column types") { withTempView(table) { val intSeq = 1 to 1000 - val data: Seq[(java.math.BigDecimal, Date, Timestamp)] = intSeq.map { i => - (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i), DateTimeUtils.toJavaTimestamp(i)) + val data: Seq[(java.math.BigDecimal, Date, Timestamp, LocalDateTime)] = intSeq.map { i => + (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i), + DateTimeUtils.toJavaTimestamp(i), DateTimeUtils.microsToLocalDateTime(i)) } - data.toDF("cdecimal", "cdate", "ctimestamp").createOrReplaceTempView(table) + data.toDF("cdecimal", "cdate", "ctimestamp", "ctimestampntz").createOrReplaceTempView(table) checkAnswer( spark.sql( s"""SELECT | percentile_approx(cdecimal, array(0.25, 0.5, 0.75D)), | percentile_approx(cdate, array(0.25, 0.5, 0.75D)), - | percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D)) + | percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D)), + | percentile_approx(ctimestampntz, array(0.25, 0.5, 0.75D)) |FROM $table """.stripMargin), Row( Seq("250.000000000000000000", "500.000000000000000000", "750.000000000000000000") .map(i => new java.math.BigDecimal(i)), Seq(250, 500, 750).map(DateTimeUtils.toJavaDate), - Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong))) + Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong)), + Seq(250, 500, 750).map(i => DateTimeUtils.microsToLocalDateTime(i.toLong))) ) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org