This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new 7b68757 [SPARK-33588][SQL][2.4] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED` 7b68757 is described below commit 7b6875797537ee18e8721a9e7efc70996a3635a9 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Mon Nov 30 08:39:31 2020 -0800 [SPARK-33588][SQL][2.4] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED` ### What changes were proposed in this pull request? Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity. ### Why are the changes needed? Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive: ```sql spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int) > USING parquet > partitioned by (year, month); spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1; spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`'; ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result: ```sql spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); default tbl1 false Partition Values: [year=2015, month=1] Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1] Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1} Created Time: Sat Nov 28 23:25:18 MSK 2020 Last Access: UNKNOWN Partition Statistics: 623 bytes ``` ### How was this patch tested? By running the modified test suite via: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DDLSuite" ``` Closes #30551 from MaxGekk/show-table-case-sensitive-spec-2.4. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../apache/spark/sql/execution/command/tables.scala | 17 +++++++++++------ .../resources/sql-tests/results/show-tables.sql.out | 2 +- .../spark/sql/execution/command/DDLSuite.scala | 21 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 1abbc72..4a75bcb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -822,12 +822,17 @@ case class ShowTablesCommand( // // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]] // should have been thrown by the sql parser. - val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db)) - val table = catalog.getTableMetadata(tableIdent).identifier - val partition = catalog.getPartition(tableIdent, partitionSpec.get) - val database = table.database.getOrElse("") - val tableName = table.table - val isTemp = catalog.isTemporaryTable(table) + val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db))) + val tableIdent = table.identifier + val normalizedSpec = PartitioningUtils.normalizePartitionSpec( + partitionSpec.get, + table.partitionColumnNames, + tableIdent.quotedString, + sparkSession.sessionState.conf.resolver) + val partition = catalog.getPartition(tableIdent, normalizedSpec) + val database = tableIdent.database.getOrElse("") + val tableName = tableIdent.table + val isTemp = catalog.isTemporaryTable(tableIdent) val information = partition.simpleString Seq(Row(database, tableName, isTemp, s"$information\n")) } diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index abeb7e1..62f5521 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -220,7 +220,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1) struct<> -- !query 18 output org.apache.spark.sql.AnalysisException -Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`'; +a is not a valid partition column in table `showdb`.`show_t1`.; -- !query 19 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 73565f2..9c55964 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -2846,6 +2846,27 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33588: case sensitivity of partition spec") { + val t = "part_table" + withTable(t) { + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |USING $dataSource + |PARTITIONED BY (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val df = sql(s"SHOW TABLE EXTENDED LIKE '$t' $partitionSpec") + val information = df.select("information").first().getString(0) + assert(information.contains("Partition Values: [year=2015, month=1]")) + } + } + } + } } object FakeLocalFsFileSystem { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org