[spark] branch branch-3.2 updated: [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 176b055 [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs 176b055 is described below commit 176b055c1245e3a12295038dccd02b1770fe4ad4 Author: RoryQi <1242949...@qq.com> AuthorDate: Tue Jul 6 14:56:12 2021 +0800 [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs ### What changes were proposed in this pull request? PR #15764 disabled creating permanent views based on temporary views or UDFs. But AlterViewCommand didn't block temporary objects. ### Why are the changes needed? More robust view canonicalization. ### Does this PR introduce _any_ user-facing change? Yes, now if you alter a permanent view based on temporary views or UDFs, the operation will fail. ### How was this patch tested? Add new unit tests. Closes #33204 from jerqi/alter_view. Authored-by: RoryQi <1242949...@qq.com> Signed-off-by: Wenchen Fan (cherry picked from commit e0c6b2e9655331e308a78d972db4a18d24b73e37) Signed-off-by: Wenchen Fan --- .../apache/spark/sql/execution/command/views.scala | 6 +++-- .../spark/sql/execution/SQLViewTestSuite.scala | 28 ++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index ebcd277..5e92ce2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -259,7 +259,10 @@ case class AlterViewAsCommand( def markAsAnalyzed(): LogicalPlan = copy(isAnalyzed = true) override def run(session: SparkSession): Seq[Row] = { -if (session.sessionState.catalog.isTempView(name)) { +val isTemporary = session.sessionState.catalog.isTempView(name) +verifyTemporaryObjectsNotExists(session.sessionState.catalog, isTemporary, name, query) +verifyAutoGeneratedAliasesNotExists(query, isTemporary, name) +if (isTemporary) { alterTemporaryView(session, query) } else { alterPermanentView(session, query) @@ -286,7 +289,6 @@ case class AlterViewAsCommand( } private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = { -verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary = false, name) val viewMeta = session.sessionState.catalog.getTableMetadata(name) // Detect cyclic view reference on ALTER VIEW. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index 80e9019..bc64f51 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -465,4 +465,32 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { } } } + + test("SPARK-36011: Disallow altering permanent views based on temporary views or UDFs") { +import testImplicits._ +withTable("t") { + (1 to 10).toDF("id").write.saveAsTable("t") + withView("v1") { +withTempView("v2") { + sql("CREATE VIEW v1 AS SELECT * FROM t") + sql("CREATE TEMPORARY VIEW v2 AS SELECT * FROM t") + var e = intercept[AnalysisException] { +sql("ALTER VIEW v1 AS SELECT * FROM v2") + }.getMessage + assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " + +"referencing a temporary view v2")) + val tempFunctionName = "temp_udf" + val functionClass = "test.org.apache.spark.sql.MyDoubleAvg" + withUserDefinedFunction(tempFunctionName -> true) { +sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS '$functionClass'") +e = intercept[AnalysisException] { + sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t") +}.getMessage +assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " + + s"referencing a temporary function `$tempFunctionName`")) + } +} + } +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e0c6b2e [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs e0c6b2e is described below commit e0c6b2e9655331e308a78d972db4a18d24b73e37 Author: RoryQi <1242949...@qq.com> AuthorDate: Tue Jul 6 14:56:12 2021 +0800 [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs ### What changes were proposed in this pull request? PR #15764 disabled creating permanent views based on temporary views or UDFs. But AlterViewCommand didn't block temporary objects. ### Why are the changes needed? More robust view canonicalization. ### Does this PR introduce _any_ user-facing change? Yes, now if you alter a permanent view based on temporary views or UDFs, the operation will fail. ### How was this patch tested? Add new unit tests. Closes #33204 from jerqi/alter_view. Authored-by: RoryQi <1242949...@qq.com> Signed-off-by: Wenchen Fan --- .../apache/spark/sql/execution/command/views.scala | 6 +++-- .../spark/sql/execution/SQLViewTestSuite.scala | 28 ++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index ebcd277..5e92ce2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -259,7 +259,10 @@ case class AlterViewAsCommand( def markAsAnalyzed(): LogicalPlan = copy(isAnalyzed = true) override def run(session: SparkSession): Seq[Row] = { -if (session.sessionState.catalog.isTempView(name)) { +val isTemporary = session.sessionState.catalog.isTempView(name) +verifyTemporaryObjectsNotExists(session.sessionState.catalog, isTemporary, name, query) +verifyAutoGeneratedAliasesNotExists(query, isTemporary, name) +if (isTemporary) { alterTemporaryView(session, query) } else { alterPermanentView(session, query) @@ -286,7 +289,6 @@ case class AlterViewAsCommand( } private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = { -verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary = false, name) val viewMeta = session.sessionState.catalog.getTableMetadata(name) // Detect cyclic view reference on ALTER VIEW. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index 80e9019..bc64f51 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -465,4 +465,32 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { } } } + + test("SPARK-36011: Disallow altering permanent views based on temporary views or UDFs") { +import testImplicits._ +withTable("t") { + (1 to 10).toDF("id").write.saveAsTable("t") + withView("v1") { +withTempView("v2") { + sql("CREATE VIEW v1 AS SELECT * FROM t") + sql("CREATE TEMPORARY VIEW v2 AS SELECT * FROM t") + var e = intercept[AnalysisException] { +sql("ALTER VIEW v1 AS SELECT * FROM v2") + }.getMessage + assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " + +"referencing a temporary view v2")) + val tempFunctionName = "temp_udf" + val functionClass = "test.org.apache.spark.sql.MyDoubleAvg" + withUserDefinedFunction(tempFunctionName -> true) { +sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS '$functionClass'") +e = intercept[AnalysisException] { + sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t") +}.getMessage +assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " + + s"referencing a temporary function `$tempFunctionName`")) + } +} + } +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new e09feda [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ e09feda is described below commit e09feda1d23a89a6f15a900f8001405f47b7e058 Author: Gengliang Wang AuthorDate: Tue Jul 6 14:33:22 2021 +0800 [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ ### What changes were proposed in this pull request? Support new keyword `TIMESTAMP_LTZ`, which can be used for: - timestamp with local time zone data type in DDL - timestamp with local time zone data type in Cast clause. - timestamp with local time zone data type literal ### Why are the changes needed? Users can use `TIMESTAMP_LTZ` in DDL/Cast/Literals for the timestamp with local time zone type directly. The new keyword is independent of the SQL configuration `spark.sql.timestampType`. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #33224 from gengliangwang/TIMESTAMP_LTZ. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit b0b9643cd76da48ed90e958e40717a664bc7494b) Signed-off-by: Gengliang Wang --- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 16 ++-- .../sql/catalyst/parser/DataTypeParserSuite.scala | 1 + .../sql/catalyst/parser/ExpressionParserSuite.scala | 19 +++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 680d781..d6363b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2119,6 +2119,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg throw QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx) } } + +def constructTimestampLTZLiteral(value: String): Literal = { + val zoneId = getZoneId(conf.sessionLocalTimeZone) + val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, TimestampType)) + specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) +} + try { valueType match { case "DATE" => @@ -2128,13 +2135,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case "TIMESTAMP_NTZ" => val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) +case "TIMESTAMP_LTZ" => + constructTimestampLTZLiteral(value) case "TIMESTAMP" => - def constructTimestampLTZLiteral(value: String): Literal = { -val zoneId = getZoneId(conf.sessionLocalTimeZone) -val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, TimestampType)) -specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) - } - SQLConf.get.timestampType match { case TimestampNTZType => val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) @@ -2529,6 +2532,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case ("date", Nil) => DateType case ("timestamp", Nil) => SQLConf.get.timestampType case ("timestamp_ntz", Nil) => TimestampNTZType + case ("timestamp_ltz", Nil) => TimestampType case ("string", Nil) => StringType case ("character" | "char", length :: Nil) => CharType(length.getText.toInt) case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index d34..97dd0db 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -59,6 +59,7 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { checkDataType("DATE", DateType) checkDataType("timestamp", TimestampType) checkDataType("timestamp_ntz", TimestampNTZType) + checkDataType("timestamp_ltz", TimestampType) checkDataType("string", StringType) checkDataType("ChaR(5)", CharType(5)) chec
[spark] branch master updated (9544277 -> b0b9643)
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 9544277 [SPARK-35788][SS] Metrics support for RocksDB instance add b0b9643 [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ No new revisions were added by this update. Summary of changes: .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 16 ++-- .../sql/catalyst/parser/DataTypeParserSuite.scala | 1 + .../sql/catalyst/parser/ExpressionParserSuite.scala | 19 +++ 3 files changed, 26 insertions(+), 10 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35788][SS] Metrics support for RocksDB instance
This is an automated email from the ASF dual-hosted git repository. kabhwan pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 22b303a [SPARK-35788][SS] Metrics support for RocksDB instance 22b303a is described below commit 22b303a64814d982e21b042cd12dad9b6499b5c6 Author: Yuanjian Li AuthorDate: Tue Jul 6 11:12:21 2021 +0900 [SPARK-35788][SS] Metrics support for RocksDB instance ### What changes were proposed in this pull request? Add more metrics for the RocksDB instance. We transform the native states from RocksDB. ### Why are the changes needed? Improve the usability with more metrics for RocksDB instance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #32934 from xuanyuanking/SPARK-35788. Authored-by: Yuanjian Li Signed-off-by: Jungtaek Lim (cherry picked from commit 9544277b0a3ab4ed401092fcae751e20cfc62316) Signed-off-by: Jungtaek Lim --- .../sql/execution/streaming/state/RocksDB.scala| 72 +- .../streaming/state/RocksDBFileManager.scala | 47 ++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala index f640018..9952d5d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala @@ -26,6 +26,8 @@ import scala.ref.WeakReference import scala.util.Try import org.apache.hadoop.conf.Configuration +import org.json4s.NoTypeHints +import org.json4s.jackson.Serialization import org.rocksdb.{RocksDB => NativeRocksDB, _} import org.apache.spark.TaskContext @@ -72,6 +74,7 @@ class RocksDB( dbOptions.setTableFormatConfig(tableFormatConfig) private val dbLogger = createLogger() // for forwarding RocksDB native logs to log4j dbOptions.setStatistics(new Statistics()) + private val nativeStats = dbOptions.statistics() private val workingDir = createTempDir("workingDir") private val fileManager = new RocksDBFileManager( @@ -84,6 +87,7 @@ class RocksDB( @volatile private var loadedVersion = -1L // -1 = nothing valid is loaded @volatile private var numKeysOnLoadedVersion = 0L @volatile private var numKeysOnWritingVersion = 0L + @volatile private var fileManagerMetrics = RocksDBFileManagerMetrics.EMPTY_METRICS @GuardedBy("acquireLock") @volatile private var acquiredThreadInfo: AcquiredThreadInfo = _ @@ -105,6 +109,7 @@ class RocksDB( numKeysOnWritingVersion = metadata.numKeys numKeysOnLoadedVersion = metadata.numKeys loadedVersion = version +fileManagerMetrics = fileManager.latestLoadCheckpointMetrics } writeBatch.clear() logInfo(s"Loaded $version") @@ -223,6 +228,7 @@ class RocksDB( } numKeysOnLoadedVersion = numKeysOnWritingVersion loadedVersion = newVersion + fileManagerMetrics = fileManager.latestSaveCheckpointMetrics commitLatencyMs ++= Map( "writeBatch" -> writeTimeMs, "flush" -> flushTimeMs, @@ -231,6 +237,7 @@ class RocksDB( "checkpoint" -> checkpointTimeMs, "fileSync" -> fileSyncTimeMs ) + logInfo(s"Committed $newVersion, stats = ${metrics.json}") loadedVersion } catch { case t: Throwable => @@ -283,6 +290,30 @@ class RocksDB( /** Get the latest version available in the DFS */ def getLatestVersion(): Long = fileManager.getLatestVersion() + /** Get current instantaneous statistics */ + def metrics: RocksDBMetrics = { +import HistogramType._ +val totalSSTFilesBytes = getDBProperty("rocksdb.total-sst-files-size") +val readerMemUsage = getDBProperty("rocksdb.estimate-table-readers-mem") +val memTableMemUsage = getDBProperty("rocksdb.size-all-mem-tables") +val nativeOps = Seq("get" -> DB_GET, "put" -> DB_WRITE).toMap +val nativeOpsLatencyMicros = nativeOps.mapValues { typ => + RocksDBNativeHistogram(nativeStats.getHistogramData(typ)) +} + +RocksDBMetrics( + numKeysOnLoadedVersion, + numKeysOnWritingVersion, + readerMemUsage + memTableMemUsage, + totalSSTFilesBytes, + nativeOpsLatencyMicros.toMap, + commitLatencyMs, + bytesCopied = fileManagerMetrics.bytesCopied, + filesCopied = fileManagerMetrics.filesCopied, + filesReused = fileManagerMetrics.filesReused, + zipFileBytesUncompressed = fileManagerMetrics.zipFileBytesUncompressed) + } + private def acquire(): Unit = acquireLock.synchronized { val newAcquiredThreadInfo = AcquiredThreadInfo() val waitStartTime = System.curre
[spark] branch master updated (8b46e26 -> 9544277)
This is an automated email from the ASF dual-hosted git repository. kabhwan pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 8b46e26 [SPARK-34302][SQL][FOLLOWUP] More code cleanup add 9544277 [SPARK-35788][SS] Metrics support for RocksDB instance No new revisions were added by this update. Summary of changes: .../sql/execution/streaming/state/RocksDB.scala| 72 +- .../streaming/state/RocksDBFileManager.scala | 47 ++ 2 files changed, 118 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-34302][SQL][FOLLOWUP] More code cleanup
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 0df89d8 [SPARK-34302][SQL][FOLLOWUP] More code cleanup 0df89d8 is described below commit 0df89d8999516c692dc08a84b5787e66d1d6f3a6 Author: Wenchen Fan AuthorDate: Tue Jul 6 03:43:42 2021 +0800 [SPARK-34302][SQL][FOLLOWUP] More code cleanup ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/33113, to do some code cleanup: 1. `UnresolvedFieldPosition` doesn't need to include the field name. We can get it through "context" (`AlterTableAlterColumn.column.name`). 2. Run `ResolveAlterTableCommands` in the main resolution batch, so that the column/field resolution is also unified between v1 and v2 commands (same error message). 3. Fail immediately in `ResolveAlterTableCommands` if we can't resolve the field, instead of waiting until `CheckAnalysis`. We don't expect other rules to resolve fields in ALTER TABLE commands, so failing immediately is simpler and we can remove duplicated code in `CheckAnalysis`. ### Why are the changes needed? code simplification. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #33213 from cloud-fan/follow. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 8b46e26fc6770bd39d7fee070f88072b05d50df7) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 7 ++- .../spark/sql/catalyst/analysis/Analyzer.scala | 61 -- .../sql/catalyst/analysis/CheckAnalysis.scala | 37 - .../sql/catalyst/analysis/v2ResolutionPlans.scala | 4 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 9 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 14 + .../org/apache/spark/sql/types/StructType.scala| 2 +- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 4 +- .../sql-tests/results/change-column.sql.out| 7 ++- .../spark/sql/connector/AlterTableTests.scala | 28 -- .../connector/V2CommandsCaseSensitivitySuite.scala | 10 ++-- .../execution/command/PlanResolutionSuite.scala| 17 +++--- .../v2/jdbc/JDBCTableCatalogSuite.scala| 16 +++--- 13 files changed, 95 insertions(+), 121 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index e13ce81..1afe26a 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -50,7 +50,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL") }.getMessage -assert(msg.contains("Cannot update missing field bad_column")) +assert(msg.contains("Missing field bad_column")) } def testRenameColumn(tbl: String): Unit = { @@ -103,8 +103,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column") }.getMessage - assert( -msg.contains(s"Cannot delete missing field bad_column in $catalogName.alt_table schema")) + assert(msg.contains(s"Missing field bad_column in table $catalogName.alt_table")) } // Drop a column from a not existing table val msg = intercept[AnalysisException] { @@ -120,7 +119,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg2 = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE") }.getMessage - assert(msg2.contains("Cannot update missing field bad_column")) + assert(msg2.contains("Missing field bad_column")) } // Update column type in not existing table val msg = intercept[AnalysisException] { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a381658..2d747f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -269,6 +269,7 @@ class Analyzer(override val catalogManager: CatalogManager) ResolveRela
[spark] branch master updated: [SPARK-34302][SQL][FOLLOWUP] More code cleanup
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8b46e26 [SPARK-34302][SQL][FOLLOWUP] More code cleanup 8b46e26 is described below commit 8b46e26fc6770bd39d7fee070f88072b05d50df7 Author: Wenchen Fan AuthorDate: Tue Jul 6 03:43:42 2021 +0800 [SPARK-34302][SQL][FOLLOWUP] More code cleanup ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/33113, to do some code cleanup: 1. `UnresolvedFieldPosition` doesn't need to include the field name. We can get it through "context" (`AlterTableAlterColumn.column.name`). 2. Run `ResolveAlterTableCommands` in the main resolution batch, so that the column/field resolution is also unified between v1 and v2 commands (same error message). 3. Fail immediately in `ResolveAlterTableCommands` if we can't resolve the field, instead of waiting until `CheckAnalysis`. We don't expect other rules to resolve fields in ALTER TABLE commands, so failing immediately is simpler and we can remove duplicated code in `CheckAnalysis`. ### Why are the changes needed? code simplification. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #33213 from cloud-fan/follow. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 7 ++- .../spark/sql/catalyst/analysis/Analyzer.scala | 61 -- .../sql/catalyst/analysis/CheckAnalysis.scala | 37 - .../sql/catalyst/analysis/v2ResolutionPlans.scala | 4 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 9 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 14 + .../org/apache/spark/sql/types/StructType.scala| 2 +- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 4 +- .../sql-tests/results/change-column.sql.out| 7 ++- .../spark/sql/connector/AlterTableTests.scala | 28 -- .../connector/V2CommandsCaseSensitivitySuite.scala | 10 ++-- .../execution/command/PlanResolutionSuite.scala| 17 +++--- .../v2/jdbc/JDBCTableCatalogSuite.scala| 16 +++--- 13 files changed, 95 insertions(+), 121 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index e13ce81..1afe26a 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -50,7 +50,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL") }.getMessage -assert(msg.contains("Cannot update missing field bad_column")) +assert(msg.contains("Missing field bad_column")) } def testRenameColumn(tbl: String): Unit = { @@ -103,8 +103,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column") }.getMessage - assert( -msg.contains(s"Cannot delete missing field bad_column in $catalogName.alt_table schema")) + assert(msg.contains(s"Missing field bad_column in table $catalogName.alt_table")) } // Drop a column from a not existing table val msg = intercept[AnalysisException] { @@ -120,7 +119,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val msg2 = intercept[AnalysisException] { sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE") }.getMessage - assert(msg2.contains("Cannot update missing field bad_column")) + assert(msg2.contains("Missing field bad_column")) } // Update column type in not existing table val msg = intercept[AnalysisException] { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a381658..2d747f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -269,6 +269,7 @@ class Analyzer(override val catalogManager: CatalogManager) ResolveRelations :: ResolveTables :: ResolvePartitionSpec :: + ResolveAlterTableCommands :: AddMet
[spark] branch branch-3.2 updated: [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 1ec37dd [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ 1ec37dd is described below commit 1ec37dd164ae64c78bdccd8c0604b4013a692015 Author: Gengliang Wang AuthorDate: Mon Jul 5 22:30:44 2021 +0300 [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ ### What changes were proposed in this pull request? Support new keyword TIMESTAMP_NTZ, which can be used for: - timestamp without time zone data type in DDL - timestamp without time zone data type in Cast clause. - timestamp without time zone data type literal ### Why are the changes needed? Users can use `TIMESTAMP_NTZ` in DDL/Cast/Literals for the timestamp without time zone type directly. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #33221 from gengliangwang/timstamp_ntz. Authored-by: Gengliang Wang Signed-off-by: Max Gekk (cherry picked from commit 5f44acff3df51721fe891ea50c0a5bcf3a37a719) Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 .../org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala| 1 + .../org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala | 3 +++ 3 files changed, 8 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5b9107f..c650cf0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2125,6 +2125,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val zoneId = getZoneId(conf.sessionLocalTimeZone) val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) specialDate.getOrElse(toLiteral(stringToDate, DateType)) +case "TIMESTAMP_NTZ" => + val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) + specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) case "TIMESTAMP" => def constructTimestampLTZLiteral(value: String): Literal = { val zoneId = getZoneId(conf.sessionLocalTimeZone) @@ -2525,6 +2528,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case ("double", Nil) => DoubleType case ("date", Nil) => DateType case ("timestamp", Nil) => SQLConf.get.timestampType + case ("timestamp_ntz", Nil) => TimestampNTZType case ("string", Nil) => StringType case ("character" | "char", length :: Nil) => CharType(length.getText.toInt) case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index a6b78e0..d34 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -58,6 +58,7 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { checkDataType("deC", DecimalType.USER_DEFAULT) checkDataType("DATE", DateType) checkDataType("timestamp", TimestampType) + checkDataType("timestamp_ntz", TimestampNTZType) checkDataType("string", StringType) checkDataType("ChaR(5)", CharType(5)) checkDataType("ChaRacter(5)", CharType(5)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 37e2d9b..7b13fa9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -465,6 +465,9 @@ class ExpressionParserSuite extends AnalysisTest { intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value") // Timestamp without time zone +assertEqual("tImEstAmp_Ntz '2016-03-11 20:54:00.000'", + Literal(LocalDateTime.parse("2016-03-11T20:54:00.000"))) +intercept("tImEstAmp_Ntz '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP_NTZ value") withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> Timest
[spark] branch master updated: [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 5f44acf [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ 5f44acf is described below commit 5f44acff3df51721fe891ea50c0a5bcf3a37a719 Author: Gengliang Wang AuthorDate: Mon Jul 5 22:30:44 2021 +0300 [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ ### What changes were proposed in this pull request? Support new keyword TIMESTAMP_NTZ, which can be used for: - timestamp without time zone data type in DDL - timestamp without time zone data type in Cast clause. - timestamp without time zone data type literal ### Why are the changes needed? Users can use `TIMESTAMP_NTZ` in DDL/Cast/Literals for the timestamp without time zone type directly. ### Does this PR introduce _any_ user-facing change? No, the new timestamp type is not released yet. ### How was this patch tested? Unit test Closes #33221 from gengliangwang/timstamp_ntz. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 .../org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala| 1 + .../org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala | 3 +++ 3 files changed, 8 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5b9107f..c650cf0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2125,6 +2125,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val zoneId = getZoneId(conf.sessionLocalTimeZone) val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) specialDate.getOrElse(toLiteral(stringToDate, DateType)) +case "TIMESTAMP_NTZ" => + val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, TimestampNTZType)) + specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) case "TIMESTAMP" => def constructTimestampLTZLiteral(value: String): Literal = { val zoneId = getZoneId(conf.sessionLocalTimeZone) @@ -2525,6 +2528,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case ("double", Nil) => DoubleType case ("date", Nil) => DateType case ("timestamp", Nil) => SQLConf.get.timestampType + case ("timestamp_ntz", Nil) => TimestampNTZType case ("string", Nil) => StringType case ("character" | "char", length :: Nil) => CharType(length.getText.toInt) case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index a6b78e0..d34 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -58,6 +58,7 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper { checkDataType("deC", DecimalType.USER_DEFAULT) checkDataType("DATE", DateType) checkDataType("timestamp", TimestampType) + checkDataType("timestamp_ntz", TimestampNTZType) checkDataType("string", StringType) checkDataType("ChaR(5)", CharType(5)) checkDataType("ChaRacter(5)", CharType(5)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 37e2d9b..7b13fa9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -465,6 +465,9 @@ class ExpressionParserSuite extends AnalysisTest { intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value") // Timestamp without time zone +assertEqual("tImEstAmp_Ntz '2016-03-11 20:54:00.000'", + Literal(LocalDateTime.parse("2016-03-11T20:54:00.000"))) +intercept("tImEstAmp_Ntz '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP_NTZ value") withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) { assertEqual("tImEstAmp '2016-03-11 20:54:00.000'", Literal(
[spark] branch branch-3.2 updated: [SPARK-35979][SQL] Return different timestamp literals based on the default timestamp type
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new a9947cb [SPARK-35979][SQL] Return different timestamp literals based on the default timestamp type a9947cb is described below commit a9947cbd716b83e2f65dfec035c7abf29ea40922 Author: Gengliang Wang AuthorDate: Tue Jul 6 00:54:58 2021 +0800 [SPARK-35979][SQL] Return different timestamp literals based on the default timestamp type ### What changes were proposed in this pull request? For the timestamp literal, it should have the following behavior. 1. When `spark.sql.timestampType` is TIMESTAMP_NTZ: if there is no time zone part, return timestamp without time zone literal; otherwise, return timestamp with local time zone literal 2. When `spark.sql.timestampType` is TIMESTAMP_LTZ: return timestamp with local time zone literal ### Why are the changes needed? When the default timestamp type is TIMESTAMP_NTZ, the result of type literal should return TIMESTAMP_NTZ when there is no time zone part in the string. From setion 5.3 "literal" of ANSI SQL standard 2011: ``` 27) The declared type of a that does not specify is TIMESTAMP(P) WITHOUT TIME ZONE, where P is the number of digits in , if specified, and 0 (zero) otherwise. The declared type of a that specifies is TIMESTAMP(P) WITH TIME ZONE, where P is the number of digits in , if specified, and 0 (zero) otherwise. ``` Since we don't have "timestamp with time zone", we use timestamp with local time zone instead. ### Does this PR introduce _any_ user-facing change? No, the new timestmap type and the default timestamp configuration is not released yet. ### How was this patch tested? Unit test Closes #33215 from gengliangwang/tsLiteral. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 2fffec7de8d31bd01c8acd8bca72acacaf189c97) Signed-off-by: Gengliang Wang --- .../spark/sql/catalyst/parser/AstBuilder.scala | 32 ++ .../spark/sql/catalyst/util/DateTimeUtils.scala| 31 + .../org/apache/spark/sql/internal/SQLConf.scala| 6 ++-- .../catalyst/parser/ExpressionParserSuite.scala| 12 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 15 ++ 5 files changed, 83 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 361ecc1..5b9107f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -38,8 +38,8 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.CurrentOrigin -import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, IntervalUtils} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, getZoneId, stringToDate, stringToTimestamp} +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone} import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition @@ -2126,9 +2126,31 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) specialDate.getOrElse(toLiteral(stringToDate, DateType)) case "TIMESTAMP" => - val zoneId = getZoneId(conf.sessionLocalTimeZone) - val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, TimestampType)) - specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) + def constructTimestampLTZLiteral(value: String): Literal = { +val zoneId = getZoneId(conf.sessionLocalTimeZone) +val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, TimestampType)) +specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) + } + + SQLConf.get.timestampType match { +case TimestampNTZType => + val specialTs = convertSpecialTimestampNTZ(valu
[spark] branch master updated (c605ba2 -> 2fffec7)
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from c605ba2 [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType add 2fffec7 [SPARK-35979][SQL] Return different timestamp literals based on the default timestamp type No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/parser/AstBuilder.scala | 32 ++ .../spark/sql/catalyst/util/DateTimeUtils.scala| 31 + .../org/apache/spark/sql/internal/SQLConf.scala| 6 ++-- .../catalyst/parser/ExpressionParserSuite.scala| 12 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 15 ++ 5 files changed, 83 insertions(+), 13 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new d3e8c9c [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType d3e8c9c is described below commit d3e8c9c78b364580523e3f915ee51369ca7df0bf Author: gengjiaan AuthorDate: Mon Jul 5 18:48:00 2021 +0800 [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType ### What changes were proposed in this pull request? This PR fix the incorrect comment for `TimestampNTZType`. ### Why are the changes needed? Fix the incorrect comment ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? No need. Closes #33218 from beliefer/SPARK-35664-followup. Authored-by: gengjiaan Signed-off-by: Gengliang Wang (cherry picked from commit c605ba2d46742ca13db794ca1be136a4b10b652e) Signed-off-by: Gengliang Wang --- sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala index 15a93a7..f23f3c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala @@ -116,7 +116,7 @@ object Encoders { /** * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class - * to the internal representation of nullable Catalyst's DateType. + * to the internal representation of nullable Catalyst's TimestampNTZType. * * @since 3.2.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c605ba2 [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType c605ba2 is described below commit c605ba2d46742ca13db794ca1be136a4b10b652e Author: gengjiaan AuthorDate: Mon Jul 5 18:48:00 2021 +0800 [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType ### What changes were proposed in this pull request? This PR fix the incorrect comment for `TimestampNTZType`. ### Why are the changes needed? Fix the incorrect comment ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? No need. Closes #33218 from beliefer/SPARK-35664-followup. Authored-by: gengjiaan Signed-off-by: Gengliang Wang --- sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala index 15a93a7..f23f3c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala @@ -116,7 +116,7 @@ object Encoders { /** * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class - * to the internal representation of nullable Catalyst's DateType. + * to the internal representation of nullable Catalyst's TimestampNTZType. * * @since 3.2.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals properly
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 544b7e1 [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals properly 544b7e1 is described below commit 544b7e16acf51b7c2a9555fb4ebe7b19a00e Author: Kousuke Saruta AuthorDate: Mon Jul 5 13:10:50 2021 +0300 [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals properly ### What changes were proposed in this pull request? This PR fixes an issue that `from_csv/to_csv` doesn't handle year-month intervals properly. `from_csv` throws exception if year-month interval types are given. ``` spark-sql> select from_csv("interval '1-2' year to month", "a interval year to month"); 21/07/03 04:32:24 ERROR SparkSQLDriver: Failed in [select from_csv("interval '1-2' year to month", "a interval year to month")] java.lang.Exception: Unsupported type: interval year to month at org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedTypeError(QueryExecutionErrors.scala:775) at org.apache.spark.sql.catalyst.csv.UnivocityParser.makeConverter(UnivocityParser.scala:224) at org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$valueConverters$1(UnivocityParser.scala:134) ``` Also, `to_csv` doesn't handle year-month interval types properly though any exception is thrown. The result of `to_csv` for year-month interval types is not ANSI interval compliant form. ``` spark-sql> select to_csv(named_struct("a", interval '1-2' year to month)); 14 ``` The result above should be `INTERVAL '1-2' YEAR TO MONTH`. ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New tests. Closes #33210 from sarutak/csv-yminterval. Authored-by: Kousuke Saruta Signed-off-by: Max Gekk (cherry picked from commit f4237aff7ebece0b8d61e680ecbe759850f25af8) Signed-off-by: Max Gekk --- .../sql/catalyst/csv/UnivocityGenerator.scala | 7 +- .../spark/sql/catalyst/csv/UnivocityParser.scala | 7 +- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 29 ++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala index 11b31ce..5d70ccb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala @@ -22,7 +22,7 @@ import java.io.Writer import com.univocity.parsers.csv.CsvWriter import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateFormatter, IntervalStringStyles, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT import org.apache.spark.sql.types._ @@ -61,6 +61,11 @@ class UnivocityGenerator( case TimestampType => (row: InternalRow, ordinal: Int) => timestampFormatter.format(row.getLong(ordinal)) +case YearMonthIntervalType(start, end) => + (row: InternalRow, ordinal: Int) => +IntervalUtils.toYearMonthIntervalString( + row.getInt(ordinal), IntervalStringStyles.ANSI_STYLE, start, end) + case udt: UserDefinedType[_] => makeConverter(udt.sqlType) case dt: DataType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index 672d133..3ec1ea0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -26,7 +26,7 @@ import com.univocity.parsers.csv.CsvParser import org.apache.spark.SparkUpgradeException import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, OrderedFilters} -import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow} +import org.apache.spark.sql.catalyst.expressions.{Cast, EmptyRow, ExprUtils, GenericInternalRow, Literal} import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT import org.apache.spark.sql.errors.QueryExecutionErrors @@ -217,6 +217,11 @@ class UnivocityParser( IntervalUtils.safeStringToInterval(UTF8String.fromString(datum)) } +case ym: YearMonthInter
[spark] branch master updated (2fe6c94 -> f4237af)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 2fe6c94 [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's add f4237af [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals properly No new revisions were added by this update. Summary of changes: .../sql/catalyst/csv/UnivocityGenerator.scala | 7 +- .../spark/sql/catalyst/csv/UnivocityParser.scala | 7 +- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 29 ++ 3 files changed, 41 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 5261600 [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's 5261600 is described below commit 52616009da9f37be6f2fd2285eb602b04df96dee Author: Hyukjin Kwon AuthorDate: Mon Jul 5 18:55:45 2021 +0900 [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/31019 that forgot to update SBT's to match. ### Why are the changes needed? To use the same version in both Maven and SBT. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI should test them. Closes #33207 from HyukjinKwon/SPARK-33996. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 2fe6c9454465fbac58ecb90bde6af42a489d8bbf) Signed-off-by: Hyukjin Kwon --- project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index e733f86..9e86e32 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -18,7 +18,7 @@ addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") // sbt-checkstyle-plugin uses an old version of checkstyle. Match it to Maven's. -libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.25" +libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.39" // checkstyle uses guava 23.0. libraryDependencies += "com.google.guava" % "guava" % "23.0" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (7fe4c4a -> 2fe6c94)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 7fe4c4a [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE add 2fe6c94 [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's No new revisions were added by this update. Summary of changes: project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new ed7c81d [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE ed7c81d is described below commit ed7c81dfaa9b286ad4ad67cc7ad28ba814093405 Author: ulysses-you AuthorDate: Mon Jul 5 17:10:42 2021 +0800 [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE ### What changes were proposed in this pull request? Skip remove shuffle if it's shuffle origin is not `REPARTITION_BY_COL` in AQE. ### Why are the changes needed? `REPARTITION_BY_COL` doesn't guarantee the output partitioning number so we can remove it safely in AQE. For `REPARTITION_BY_NUM`, we should retain the shuffle which partition number is specified by user. For `REBALANCE_PARTITIONS_BY_COL`, it is a special shuffle used to rebalance partitions so we should not remove it. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? add test Closes #33188 from ulysses-you/SPARK-35989. Lead-authored-by: ulysses-you Co-authored-by: ulysses Signed-off-by: Wenchen Fan (cherry picked from commit 7fe4c4a9ad92cedd398a13a1781649dff57ca4d9) Signed-off-by: Wenchen Fan --- .../execution/exchange/EnsureRequirements.scala| 7 +- .../apache/spark/sql/execution/PlannerSuite.scala | 5 +++-- .../adaptive/AdaptiveQueryExecSuite.scala | 19 .../exchange/EnsureRequirementsSuite.scala | 25 +- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index a990700..d71933a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -250,7 +250,12 @@ object EnsureRequirements extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = plan.transformUp { // TODO: remove this after we create a physical operator for `RepartitionByExpression`. -case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, _) => +// SPARK-35989: AQE will change the partition number so we should retain the REPARTITION_BY_NUM +// shuffle which is specified by user. And also we can not remove REBALANCE_PARTITIONS_BY_COL, +// it is a special shuffle used to rebalance partitions. +// So, here we only remove REPARTITION_BY_COL in AQE. +case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin) +if shuffleOrigin == REPARTITION_BY_COL || !conf.adaptiveExecutionEnabled => def hasSemanticEqualPartitioning(partitioning: Partitioning): Boolean = { partitioning match { case lower: HashPartitioning if upper.semanticEquals(lower) => true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 0ea7599..fad6ed1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution} import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} -import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReusedExchangeExec, ShuffleExchangeExec} +import org.apache.spark.sql.execution.exchange.{EnsureRequirements, REPARTITION_BY_COL, ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery import org.apache.spark.sql.functions._ @@ -420,7 +420,8 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val inputPlan = ShuffleExchangeExec( partitioning, - DummySparkPlan(outputPartitioning = partitioning)) + DummySparkPlan(outputPartitioning = partitioning), + REPARTITION_BY_COL) val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 1) { diff --git a/sql/core/src/te
[spark] branch master updated (044dddf -> 7fe4c4a)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 044dddf [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator add 7fe4c4a [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE No new revisions were added by this update. Summary of changes: .../execution/exchange/EnsureRequirements.scala| 7 +- .../apache/spark/sql/execution/PlannerSuite.scala | 5 +++-- .../adaptive/AdaptiveQueryExecSuite.scala | 19 .../exchange/EnsureRequirementsSuite.scala | 25 +- 4 files changed, 42 insertions(+), 14 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 39b3a04 [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator 39b3a04 is described below commit 39b3a04bfec3385395df4418a3794dd276ba3271 Author: Cheng Su AuthorDate: Mon Jul 5 09:06:38 2021 + [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator ### What changes were proposed in this pull request? Current AQE has cost evaluator to decide whether to use new plan after replanning. The current used evaluator is `SimpleCostEvaluator` to make decision based on number of shuffle in the query plan. This is not perfect cost evaluator, and different production environments might want to use different custom evaluators. E.g., sometimes we might want to still do skew join even though it might introduce extra shuffle (trade off resource for better latency), sometimes we might want to take [...] The approach is to introduce a new config to allow define sub-class name of `CostEvaluator` - `spark.sql.adaptive.customCostEvaluatorClass`. And add `CostEvaluator.instantiate` to instantiate the cost evaluator class in `AdaptiveSparkPlanExec.costEvaluator`. ### Why are the changes needed? Make AQE cost evaluation more flexible. ### Does this PR introduce _any_ user-facing change? No but an internal config is introduced - `spark.sql.adaptive.customCostEvaluatorClass` to allow custom implementation of `CostEvaluator`. ### How was this patch tested? Added unit test in `AdaptiveQueryExecSuite.scala`. Closes #32944 from c21/aqe-cost. Authored-by: Cheng Su Signed-off-by: Wenchen Fan (cherry picked from commit 044dddf28860cabb813d6ccf9489ea6ef21dd11e) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/internal/SQLConf.scala| 8 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 6 ++- .../spark/sql/execution/adaptive/costing.scala | 30 - .../adaptive/AdaptiveQueryExecSuite.scala | 50 ++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 59a310d..0f4d1ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -678,6 +678,14 @@ object SQLConf { .booleanConf .createWithDefault(true) + val ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS = +buildConf("spark.sql.adaptive.customCostEvaluatorClass") + .doc("The custom cost evaluator class to be used for adaptive execution. If not being set," + +" Spark will use its own SimpleCostEvaluator by default.") + .version("3.2.0") + .stringConf + .createOptional + val SUBEXPRESSION_ELIMINATION_ENABLED = buildConf("spark.sql.subexpressionElimination.enabled") .internal() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index cbf70e3..18aaf5b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -130,7 +130,11 @@ case class AdaptiveSparkPlanExec( } } - @transient private val costEvaluator = SimpleCostEvaluator + @transient private val costEvaluator = +conf.getConf(SQLConf.ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS) match { + case Some(className) => CostEvaluator.instantiate(className, session.sparkContext.getConf) + case _ => SimpleCostEvaluator +} @transient val initialPlan = context.session.withActive { applyPhysicalRules( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala index 293e619..56f29b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala @@ -17,16 +17,42 @@ package org.apache.spark.sql.execution.adaptive +import org.apache.spark.SparkConf +import org.apache.spark.annotation.Unstable +import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.Utils /** - * Represents the cost of a plan. + * An interface to represent the cost of a plan. + * + * @note This class is subject to be changed and/or moved in the nea
[spark] branch master updated (7f70350 -> 044dddf)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 7f70350 [SPARK-36013][BUILD] Upgrade Dropwizard Metrics to 4.2.2 add 044dddf [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/internal/SQLConf.scala| 8 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 6 ++- .../spark/sql/execution/adaptive/costing.scala | 30 - .../adaptive/AdaptiveQueryExecSuite.scala | 50 ++ 4 files changed, 91 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (554d5fe -> 7f70350)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 554d5fe [SPARK-36010][BUILD] Upgrade sbt-antlr4 from 0.8.2 to 0.8.3 add 7f70350 [SPARK-36013][BUILD] Upgrade Dropwizard Metrics to 4.2.2 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 10 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 10 +- pom.xml | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (6474226 -> 554d5fe)
This is an automated email from the ASF dual-hosted git repository. sarutak pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 6474226 [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals add 554d5fe [SPARK-36010][BUILD] Upgrade sbt-antlr4 from 0.8.2 to 0.8.3 No new revisions were added by this update. Summary of changes: project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 26bcf02 [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals 26bcf02 is described below commit 26bcf028339c02ca75af31ab8105f7dbe58c49a9 Author: Kousuke Saruta AuthorDate: Mon Jul 5 10:35:50 2021 +0300 [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals ### What changes were proposed in this pull request? This PR fixes two issues. One is that `to_json` doesn't support `map` types where value types are `year-month` interval types like: ``` spark-sql> select to_json(map('a', interval '1-2' year to month)); 21/07/02 11:38:15 ERROR SparkSQLDriver: Failed in [select to_json(map('a', interval '1-2' year to month))] java.lang.RuntimeException: Failed to convert value 14 (class of class java.lang.Integer) with the type of YearMonthIntervalType(0,1) to JSON. ``` The other issue is that even if the issue of `to_json` is resolved, `from_json` doesn't support to convert `year-month` interval string to JSON. So the result of following query will be `null`. ``` spark-sql> select from_json(to_json(map('a', interval '1-2' year to month)), 'a interval year to month'); {"a":null} ``` ### Why are the changes needed? There should be no reason why year-month intervals cannot used as map value types. `CalendarIntervalTypes` can do it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New tests. Closes #33181 from sarutak/map-json-yminterval. Authored-by: Kousuke Saruta Signed-off-by: Max Gekk (cherry picked from commit 647422685292cd1a46766afa9b07b6fcfc181bbd) Signed-off-by: Max Gekk --- .../spark/sql/catalyst/json/JacksonGenerator.scala | 9 .../spark/sql/catalyst/json/JacksonParser.scala| 7 ++ .../org/apache/spark/sql/JsonFunctionsSuite.scala | 26 ++ 3 files changed, 42 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 2567438..9777d56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -147,6 +147,15 @@ private[sql] class JacksonGenerator( (row: SpecializedGetters, ordinal: Int) => gen.writeString(row.getInterval(ordinal).toString) +case YearMonthIntervalType(start, end) => + (row: SpecializedGetters, ordinal: Int) => +val ymString = IntervalUtils.toYearMonthIntervalString( + row.getInt(ordinal), + IntervalStringStyles.ANSI_STYLE, + start, + end) +gen.writeString(ymString) + case BinaryType => (row: SpecializedGetters, ordinal: Int) => gen.writeBinary(row.getBinary(ordinal)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 27e1411..2aa735d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -295,6 +295,13 @@ class JacksonParser( IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText)) } +case ym: YearMonthIntervalType => (parser: JsonParser) => + parseJsonToken[Integer](parser, dataType) { +case VALUE_STRING => + val expr = Cast(Literal(parser.getText), ym) + Integer.valueOf(expr.eval(EmptyRow).asInstanceOf[Int]) + } + case st: StructType => val fieldConverters = st.map(_.dataType).map(makeConverter).toArray (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 5485cc1..c2bea8c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import java.text.SimpleDateFormat +import java.time.Period import java.util.Locale import collection.JavaConverters._ @@ -27,6 +28,7 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types
[spark] branch master updated: [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6474226 [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals 6474226 is described below commit 647422685292cd1a46766afa9b07b6fcfc181bbd Author: Kousuke Saruta AuthorDate: Mon Jul 5 10:35:50 2021 +0300 [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals ### What changes were proposed in this pull request? This PR fixes two issues. One is that `to_json` doesn't support `map` types where value types are `year-month` interval types like: ``` spark-sql> select to_json(map('a', interval '1-2' year to month)); 21/07/02 11:38:15 ERROR SparkSQLDriver: Failed in [select to_json(map('a', interval '1-2' year to month))] java.lang.RuntimeException: Failed to convert value 14 (class of class java.lang.Integer) with the type of YearMonthIntervalType(0,1) to JSON. ``` The other issue is that even if the issue of `to_json` is resolved, `from_json` doesn't support to convert `year-month` interval string to JSON. So the result of following query will be `null`. ``` spark-sql> select from_json(to_json(map('a', interval '1-2' year to month)), 'a interval year to month'); {"a":null} ``` ### Why are the changes needed? There should be no reason why year-month intervals cannot used as map value types. `CalendarIntervalTypes` can do it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New tests. Closes #33181 from sarutak/map-json-yminterval. Authored-by: Kousuke Saruta Signed-off-by: Max Gekk --- .../spark/sql/catalyst/json/JacksonGenerator.scala | 9 .../spark/sql/catalyst/json/JacksonParser.scala| 7 ++ .../org/apache/spark/sql/JsonFunctionsSuite.scala | 26 ++ 3 files changed, 42 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 2567438..9777d56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -147,6 +147,15 @@ private[sql] class JacksonGenerator( (row: SpecializedGetters, ordinal: Int) => gen.writeString(row.getInterval(ordinal).toString) +case YearMonthIntervalType(start, end) => + (row: SpecializedGetters, ordinal: Int) => +val ymString = IntervalUtils.toYearMonthIntervalString( + row.getInt(ordinal), + IntervalStringStyles.ANSI_STYLE, + start, + end) +gen.writeString(ymString) + case BinaryType => (row: SpecializedGetters, ordinal: Int) => gen.writeBinary(row.getBinary(ordinal)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 27e1411..2aa735d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -295,6 +295,13 @@ class JacksonParser( IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText)) } +case ym: YearMonthIntervalType => (parser: JsonParser) => + parseJsonToken[Integer](parser, dataType) { +case VALUE_STRING => + val expr = Cast(Literal(parser.getText), ym) + Integer.valueOf(expr.eval(EmptyRow).asInstanceOf[Int]) + } + case st: StructType => val fieldConverters = st.map(_.dataType).map(makeConverter).toArray (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 5485cc1..c2bea8c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import java.text.SimpleDateFormat +import java.time.Period import java.util.Locale import collection.JavaConverters._ @@ -27,6 +28,7 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR} class JsonFunctionsSuite extends