This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c3ac7824505 [SPARK-42777][SQL] Support converting TimestampNTZ catalog stats to plan stats c3ac7824505 is described below commit c3ac782450583e6073b88d940af60714eb4cdf44 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Mon Mar 13 21:00:02 2023 -0700 [SPARK-42777][SQL] Support converting TimestampNTZ catalog stats to plan stats ### What changes were proposed in this pull request? When `spark.sql.cbo.planStats.enabled` or `spark.sql.cbo.enabled` is enabled, the logical plan will fetch row counts and column statistics from catalog. This PR is to support converting TimestampNTZ catalog stats to plan stats. ### Why are the changes needed? Implement a missing piece of the TimestampNTZ type. ### Does this PR introduce _any_ user-facing change? No, TimestampNTZ is not released yet. ### How was this patch tested? New UT Closes #40404 from gengliangwang/fromExternalString. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../spark/sql/catalyst/catalog/interface.scala | 2 ++ .../spark/sql/StatisticsCollectionSuite.scala | 24 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 6f4c4f27efc..08dd2dfd5bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -681,6 +681,8 @@ object CatalogColumnStat extends Logging { case TimestampType if version == 1 => DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s)) case TimestampType => getTimestampFormatter(isParsing = true).parse(s) + case TimestampNTZType => + getTimestampFormatter(isParsing = true, forTimestampNTZ = true).parse(s) case ByteType => s.toByte case ShortType => s.toShort case IntegerType => s.toInt diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 2ab8bb25a8b..e6b74a328e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -571,6 +571,30 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } + test("SPARK-42777: describe column stats (min, max) for timestamp_ntz column") { + val table = "insert_desc_same_time_zone" + val tsCol = "timestamp_ntz_typed_col" + withTable(table) { + val minTimestamp = "make_timestamp_ntz(2022, 1, 1, 0, 0, 1.123456)" + val maxTimestamp = "make_timestamp_ntz(2022, 1, 3, 0, 0, 2.987654)" + sql(s"CREATE TABLE $table ($tsCol timestamp_ntz) USING parquet") + sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp") + sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS") + + checkDescTimestampColStats( + tableName = table, + timestampColumn = tsCol, + expectedMinTimestamp = "2022-01-01 00:00:01.123456", + expectedMaxTimestamp = "2022-01-03 00:00:02.987654") + + // Converting TimestampNTZ catalog stats to plan stats + val columnStat = getCatalogTable(table) + .stats.get.colStats(tsCol).toPlanStat(tsCol, TimestampNTZType) + assert(columnStat.min.contains(1640995201123456L)) + assert(columnStat.max.contains(1641168002987654L)) + } + } + private def getStatAttrNames(tableName: String): Set[String] = { val queryStats = spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats queryStats.map(_._1.name).toSet --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org