This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e9b12deff41 [SPARK-42123][SQL] Include column default values in DESCRIBE and SHOW CREATE TABLE output e9b12deff41 is described below commit e9b12deff41066dee28a7cdf971a3e7ef034d5f2 Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Wed Jan 25 14:35:29 2023 -0800 [SPARK-42123][SQL] Include column default values in DESCRIBE and SHOW CREATE TABLE output ### What changes were proposed in this pull request? Include column default values in DESCRIBE and SHOW CREATE TABLE output. ### Why are the changes needed? This helps users work with tables and check their properties before querying or modifying. ### Does this PR introduce _any_ user-facing change? Yes, it changes the DESCRIBE and SHOW CREATE TABLE command outputs when the table has one or more columns with default values. ### How was this patch tested? This PR includes new unit and file-based query test coverage. Closes #39726 from dtenedor/show-create-table-and-describe-defaults. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../org/apache/spark/sql/types/StructField.scala | 6 +- .../spark/sql/execution/command/tables.scala | 17 +- .../test/resources/sql-tests/inputs/describe.sql | 22 +++ .../sql-tests/inputs/show-create-table.sql | 8 + .../resources/sql-tests/results/describe.sql.out | 192 +++++++++++++++++++++ .../sql-tests/results/show-create-table.sql.out | 30 ++++ .../execution/command/v1/DescribeTableSuite.scala | 49 ++++++ .../command/v1/ShowCreateTableSuite.scala | 24 +++ 8 files changed, 346 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index aa038d05679..432e06a28a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -141,6 +141,10 @@ case class StructField( } } + private def getDDLDefault = getCurrentDefaultValue() + .map(" DEFAULT " + _) + .getOrElse("") + private def getDDLComment = getComment() .map(escapeSingleQuotedString) .map(" COMMENT '" + _ + "'") @@ -160,7 +164,7 @@ case class StructField( */ def toDDL: String = { val nullString = if (nullable) "" else " NOT NULL" - s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLComment" + s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLDefault$getDDLComment" } private[sql] def toAttribute: AttributeReference = diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index b47f0e376ac..30f77b11ec0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, ResolveDefaultColumns} +import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.datasources.DataSource @@ -287,7 +288,7 @@ case class AlterTableAddColumnsCommand( private def constantFoldCurrentDefaultsToExistDefaults( sparkSession: SparkSession, tableProvider: Option[String]): Seq[StructField] = { colsToAdd.map { col: StructField => - if (col.metadata.contains(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY)) { + if (col.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) { val foldedStructType = ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults( StructType(Array(col)), tableProvider, "ALTER TABLE ADD COLUMNS", true) foldedStructType.fields(0) @@ -645,6 +646,16 @@ case class DescribeTableCommand( } else if (isExtended) { describeFormattedTableInfo(metadata, result) } + + // If any columns have default values, append them to the result. + if (metadata.schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) { + append(result, "", "", "") + append(result, "# Column Default Values", "", "") + metadata.schema.foreach { column => + column.getCurrentDefaultValue().map( + append(result, column.name, column.dataType.simpleString, _)) + } + } } result.toSeq @@ -807,6 +818,10 @@ case class DescribeColumnCommand( hist <- c.histogram } yield histogramDescription(hist) buffer ++= histDesc.getOrElse(Seq(Row("histogram", "NULL"))) + val defaultKey = CURRENT_DEFAULT_COLUMN_METADATA_KEY + if (field.metadata.contains(defaultKey)) { + buffer += Row("default", field.metadata.getString(defaultKey)) + } } buffer.toSeq } diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index 493ea68baed..b37931456d0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -97,3 +97,25 @@ DROP VIEW temp_v; DROP VIEW temp_Data_Source_View; DROP VIEW v; + +-- Show column default values +CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment'; + +DESC d; + +DESC EXTENDED d; + +DESC TABLE EXTENDED d; + +DESC FORMATTED d; + +-- Show column default values with newlines in the string +CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment'; + +DESC e; + +DESC EXTENDED e; + +DESC TABLE EXTENDED e; + +DESC FORMATTED e; diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql index 98bc3c5cae0..5192d2dc6b5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql @@ -45,6 +45,14 @@ SHOW CREATE TABLE tbl; DROP TABLE tbl; +-- default column values +CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT 42) USING parquet +COMMENT 'This is a comment'; + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + -- comment CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet COMMENT 'This is a comment'; diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 6193e1222a6..6c55bf40c9e 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -672,3 +672,195 @@ DROP VIEW v struct<> -- !query output + + +-- !query +CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +DESC d +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Column Default Values +a string 'default-value' +b int 42 + + +-- !query +DESC EXTENDED d +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table d +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/d + +# Column Default Values +a string 'default-value' +b int 42 + + +-- !query +DESC TABLE EXTENDED d +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table d +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/d + +# Column Default Values +a string 'default-value' +b int 42 + + +-- !query +DESC FORMATTED d +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table d +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/d + +# Column Default Values +a string 'default-value' +b int 42 + + +-- !query +CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +DESC e +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Column Default Values +a string CONCAT('a\n b\n ', 'c\n d') +b int 42 + + +-- !query +DESC EXTENDED e +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table e +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/e + +# Column Default Values +a string CONCAT('a\n b\n ', 'c\n d') +b int 42 + + +-- !query +DESC TABLE EXTENDED e +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table e +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/e + +# Column Default Values +a string CONCAT('a\n b\n ', 'c\n d') +b int 42 + + +-- !query +DESC FORMATTED e +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +a string +b int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table e +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider parquet +Comment table_comment +Location [not included in comparison]/{warehouse_dir}/e + +# Column Default Values +a string CONCAT('a\n b\n ', 'c\n d') +b int 42 diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out index 08c13ccc7e0..0d73960a6b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out @@ -181,6 +181,36 @@ struct<> +-- !query +CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT 42) USING parquet +COMMENT 'This is a comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW CREATE TABLE tbl +-- !query schema +struct<createtab_stmt:string> +-- !query output +CREATE TABLE default.tbl ( + a INT DEFAULT 42, + b STRING DEFAULT 'abc, def', + c INT DEFAULT 42) +USING parquet +COMMENT 'This is a comment' + + +-- !query +DROP TABLE tbl +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet COMMENT 'This is a comment' diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala index 84da38f5097..02cf1958b94 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala @@ -182,6 +182,27 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase } } } + + test("describe a column with a default value") { + withTable("t") { + sql(s"create table t(a int default 42) $defaultUsing") + val descriptionDf = sql("describe table extended t a") + QueryTest.checkAnswer( + descriptionDf, + Seq( + Row("col_name", "a"), + Row("data_type", "int"), + Row("comment", "NULL"), + Row("default", "42"), + Row("min", "NULL"), + Row("max", "NULL"), + Row("num_nulls", "NULL"), + Row("distinct_count", "NULL"), + Row("max_col_len", "NULL"), + Row("avg_col_len", "NULL"), + Row("histogram", "NULL"))) + } + } } /** @@ -225,4 +246,32 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase { Row("Partition Provider", "Catalog", ""))) } } + + test("DESCRIBE TABLE EXTENDED of a table with a default column value") { + withTable("t") { + spark.sql(s"CREATE TABLE t (id bigint default 42) $defaultUsing") + val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t") + assert(descriptionDf.schema.map { field => + (field.name, field.dataType) + } === Seq( + ("col_name", StringType), + ("data_type", StringType), + ("comment", StringType))) + QueryTest.checkAnswer( + descriptionDf.filter( + "!(col_name in ('Created Time', 'Created By', 'Database', 'Location', " + + "'Provider', 'Type'))"), + Seq( + Row("id", "bigint", null), + Row("", "", ""), + Row("# Detailed Table Information", "", ""), + Row("Catalog", SESSION_CATALOG_NAME, ""), + Row("Table", "t", ""), + Row("Last Access", "UNKNOWN", ""), + Row("", "", ""), + Row("# Column Default Values", "", ""), + Row("id", "bigint", "42") + )) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala index ee8aa424d5c..62e8f53d765 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala @@ -161,6 +161,30 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead")) } } + + test("show create table with default column values") { + withNamespaceAndTable(ns, table) { t => + sql( + s""" + |CREATE TABLE $t ( + | a bigint NOT NULL, + | b bigint DEFAULT 42, + | c string DEFAULT 'abc, "def"' COMMENT 'comment' + |) + |USING parquet + |COMMENT 'This is a comment' + """.stripMargin) + val showDDL = getShowCreateDDL(t) + assert(showDDL === Array( + s"CREATE TABLE $fullName (", + "a BIGINT,", + "b BIGINT DEFAULT 42,", + "c STRING DEFAULT 'abc, \"def\"' COMMENT 'comment')", + "USING parquet", + "COMMENT 'This is a comment'" + )) + } + } } /** --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org