This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b035bb177c08 [SPARK-46370][SQL] Fix bug when querying from table after changing column defaults b035bb177c08 is described below commit b035bb177c0875cfb7edb6d8672d4d2ac2813d1b Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Tue Dec 12 14:44:27 2023 -0800 [SPARK-46370][SQL] Fix bug when querying from table after changing column defaults ### What changes were proposed in this pull request? This PR fixes a bug when querying from table after changing defaults: ``` drop table if exists t; create table t(i int, s string default 'def') using parquet; insert into t select 1, default; alter table t alter column s drop default; insert into t select 2, default; select * from t; -- Removing this line changes the following results! alter table t alter column s set default 'mno'; insert into t select 3, default; select * from t; ``` The bug is related to the relation cache, and the fix involves adding a manual refresh to the cache to make sure we use the right table schema. ### Why are the changes needed? This PR fixes a correctness bug. ### Does this PR introduce _any_ user-facing change? Yes, see above. ### How was this patch tested? This PR adds test coverage. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44302 from dtenedor/fix-default-bug. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/execution/command/ddl.scala | 3 +++ .../org/apache/spark/sql/sources/InsertSuite.scala | 24 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 7e001803592f..dc1c5b3fd580 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -374,6 +374,9 @@ case class AlterTableChangeColumnCommand( // TODO: support change column name/dataType/metadata/position. override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog + // This command may change column default values, so we need to refresh the table relation cache + // here so that DML commands can resolve these default values correctly. + catalog.refreshTable(tableName) val table = catalog.getTableRawMetadata(tableName) val resolver = sparkSession.sessionState.conf.resolver DDLUtils.verifyAlterTableType(catalog, table, isView = false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 94535bc84a4c..76073a108a3c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -2608,6 +2608,30 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { } } + test("SPARK-46370: Querying a table should not invalidate the column defaults") { + withTable("t") { + // Create a table and insert some rows into it, changing the default value of a column + // throughout. + spark.sql("CREATE TABLE t(i INT, s STRING DEFAULT 'def') USING CSV") + spark.sql("INSERT INTO t SELECT 1, DEFAULT") + spark.sql("ALTER TABLE t ALTER COLUMN s DROP DEFAULT") + spark.sql("INSERT INTO t SELECT 2, DEFAULT") + // Run a query to trigger the table relation cache. + val results = spark.table("t").collect() + assert(results.length == 2) + // Change the column default value and insert another row. Then query the table's contents + // and the results should be correct. + spark.sql("ALTER TABLE t ALTER COLUMN s SET DEFAULT 'mno'") + spark.sql("INSERT INTO t SELECT 3, DEFAULT").collect() + checkAnswer( + spark.table("t"), + Seq( + Row(1, "def"), + Row(2, null), + Row(3, "mno"))) + } + } + test("UNSUPPORTED_OVERWRITE.TABLE: Can't overwrite a table that is also being read from") { val tableName = "t1" withTable(tableName) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org