This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c5f72b3e208 [SPARK-42294][SQL] Include column default values in DESCRIBE output for V2 tables c5f72b3e208 is described below commit c5f72b3e2086d66c72699778915d2ab6ee64a6eb Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Thu Feb 2 21:11:29 2023 -0800 [SPARK-42294][SQL] Include column default values in DESCRIBE output for V2 tables ### What changes were proposed in this pull request? Include column default values in DESCRIBE output for V2 tables. This was previously implemented for V1 tables, but missed V2 tables. ### Why are the changes needed? DESCRIBE commands make it easier to work with tables by inspecting their metadata. ### Does this PR introduce _any_ user-facing change? Yes, it adds more information to DESCRIBE commands for tables with default column values. ### How was this patch tested? This PR adds unit test coverage. Closes #39863 from dtenedor/descibe-defaults-v2. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../catalyst/util/ResolveDefaultColumnsUtil.scala | 17 ++++++++++++++ .../spark/sql/execution/command/tables.scala | 9 ++------ .../datasources/v2/DescribeTableExec.scala | 7 +++++- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 27 ++++++++++++++++++++++ 4 files changed, 52 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala index 9a0d3a435b4..667c0988d0c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.util +import scala.collection.mutable.ArrayBuffer + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis._ @@ -267,6 +269,21 @@ object ResolveDefaultColumns { } } + /** If any fields in a schema have default values, appends them to the result. */ + def getDescribeMetadata(schema: StructType): Seq[(String, String, String)] = { + val rows = new ArrayBuffer[(String, String, String)]() + if (schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) { + rows.append(("", "", "")) + rows.append(("# Column Default Values", "", "")) + schema.foreach { column => + column.getCurrentDefaultValue().map { value => + rows.append((column.name, column.dataType.simpleString, value)) + } + } + } + rows.toSeq + } + /** * This is an Analyzer for processing default column values using built-in functions only. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 30f77b11ec0..f6266bcb33f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -648,13 +648,8 @@ case class DescribeTableCommand( } // If any columns have default values, append them to the result. - if (metadata.schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) { - append(result, "", "", "") - append(result, "# Column Default Values", "", "") - metadata.schema.foreach { column => - column.getCurrentDefaultValue().map( - append(result, column.name, column.dataType.simpleString, _)) - } + ResolveDefaultColumns.getDescribeMetadata(metadata.schema).foreach { row => + append(result, row._1, row._2, row._3) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index acb861d7679..8b0098f14fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.util.quoteIfNeeded +import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.IdentityTransform @@ -68,6 +68,11 @@ case class DescribeTableExec( case (key, value) => key + "=" + value }.mkString("[", ",", "]") rows += toCatalystRow("Table Properties", properties, "") + + // If any columns have default values, append them to the result. + ResolveDefaultColumns.getDescribeMetadata(table.schema).foreach { row => + rows += toCatalystRow(row._1, row._2, row._3) + } } private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index a4b7f762dba..58ed4b2a55c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2890,6 +2890,33 @@ class DataSourceV2SQLSuiteV1Filter } } + test("DESCRIBE TABLE EXTENDED of a V2 table with a default column value") { + withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) { + withTable("t") { + spark.sql(s"CREATE TABLE t (id bigint default 42) USING $v2Source") + val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t") + assert(descriptionDf.schema.map { field => + (field.name, field.dataType) + } === Seq( + ("col_name", StringType), + ("data_type", StringType), + ("comment", StringType))) + QueryTest.checkAnswer( + descriptionDf.filter( + "!(col_name in ('Catalog', 'Created Time', 'Created By', 'Database', " + + "'index', 'Location', 'Name', 'Owner', 'Provider', 'Table', 'Table Properties', " + + "'Type', '_partition', ''))"), + Seq( + Row("# Detailed Table Information", "", ""), + Row("# Column Default Values", "", ""), + Row("# Metadata Columns", "", ""), + Row("id", "bigint", "42"), + Row("id", "bigint", null) + )) + } + } + } + private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { checkError( exception = intercept[AnalysisException] { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org