This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 7d09991610f9 [SPARK-46544][SQL] Support v2 DESCRIBE TABLE EXTENDED with table stats 7d09991610f9 is described below commit 7d09991610f966fa42707ba7c8bb2575ec20d233 Author: zouxxyy <zouxinyu....@alibaba-inc.com> AuthorDate: Tue Jan 2 14:11:11 2024 +0300 [SPARK-46544][SQL] Support v2 DESCRIBE TABLE EXTENDED with table stats ### What changes were proposed in this pull request? Support v2 DESCRIBE TABLE EXTENDED with table stats ### Why are the changes needed? Similar to #40058, make DS v1/v2 command parity, e.g. DESC EXTENDED table | col_name | data_type | comment | |-------------------|---------------------------|------------| | ... | ... | ... | | Statistics | 864 bytes, 2 rows | | | ... | ... | ... | ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add test `describe extended table with stats` ### Was this patch authored or co-authored using generative AI tooling? No Closes #44535 from Zouxxyy/dev/desc-table-stats. Lead-authored-by: zouxxyy <zouxinyu....@alibaba-inc.com> Co-authored-by: Zouxxyy <zoux...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../datasources/v2/DescribeTableExec.scala | 22 +++++++++++++++++++++- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 3 ++- .../execution/command/v2/DescribeTableSuite.scala | 19 ++++++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 3d79a7113e0d..a225dffb075b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -24,8 +24,10 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.IdentityTransform +import org.apache.spark.sql.connector.read.SupportsReportStatistics +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.ArrayImplicits._ case class DescribeTableExec( @@ -40,6 +42,7 @@ case class DescribeTableExec( if (isExtended) { addMetadataColumns(rows) addTableDetails(rows) + addTableStats(rows) } rows.toSeq } @@ -96,6 +99,23 @@ case class DescribeTableExec( case _ => } + private def addTableStats(rows: ArrayBuffer[InternalRow]): Unit = table match { + case read: SupportsRead => + read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match { + case s: SupportsReportStatistics => + val stats = s.estimateStatistics() + val statsComponents = Seq( + Option.when(stats.sizeInBytes().isPresent)(s"${stats.sizeInBytes().getAsLong} bytes"), + Option.when(stats.numRows().isPresent)(s"${stats.numRows().getAsLong} rows") + ).flatten + if (statsComponents.nonEmpty) { + rows += toCatalystRow("Statistics", statsComponents.mkString(", "), null) + } + case _ => + } + case _ => + } + private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = { if (table.partitioning.nonEmpty) { val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 589283a29b85..f92a9a827b1c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3342,7 +3342,8 @@ class DataSourceV2SQLSuiteV1Filter Row("# Column Default Values", "", ""), Row("# Metadata Columns", "", ""), Row("id", "bigint", "42"), - Row("id", "bigint", null) + Row("id", "bigint", null), + Row("Statistics", "0 bytes, 0 rows", null) )) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala index a21baebe24d8..cfd26c09bf3e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala @@ -90,7 +90,8 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase Row("Location", "file:/tmp/testcat/table_name", ""), Row("Provider", "_", ""), Row(TableCatalog.PROP_OWNER.capitalize, Utils.getCurrentUserName(), ""), - Row("Table Properties", "[bar=baz]", ""))) + Row("Table Properties", "[bar=baz]", ""), + Row("Statistics", "0 bytes, 0 rows", null))) } } @@ -196,4 +197,20 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase Row("comment", "column_comment"))) } } + + test("describe extended table with stats") { + withNamespaceAndTable("ns", "tbl") { tbl => + sql( + s""" + |CREATE TABLE $tbl + |(key INT, col STRING) + |$defaultUsing""".stripMargin) + + sql(s"INSERT INTO $tbl values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (null, 'ddd')") + val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl") + val stats = descriptionDf.filter("col_name == 'Statistics'").head() + .getAs[String]("data_type") + assert("""\d+\s+bytes,\s+4\s+rows""".r.matches(stats)) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org