[spark] branch branch-3.2 updated: [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 176b055  [SPARK-36011][SQL] Disallow altering permanent views based on 
temporary views or UDFs
176b055 is described below

commit 176b055c1245e3a12295038dccd02b1770fe4ad4
Author: RoryQi <1242949...@qq.com>
AuthorDate: Tue Jul 6 14:56:12 2021 +0800

[SPARK-36011][SQL] Disallow altering permanent views based on temporary 
views or UDFs

### What changes were proposed in this pull request?
PR #15764 disabled creating permanent views based on temporary views or 
UDFs.  But AlterViewCommand didn't block temporary objects.

### Why are the changes needed?
More robust view canonicalization.

### Does this PR introduce _any_ user-facing change?
Yes, now if you alter a permanent view based on temporary views or UDFs, 
the operation will fail.

### How was this patch tested?
Add new unit tests.

Closes #33204 from jerqi/alter_view.

Authored-by: RoryQi <1242949...@qq.com>
Signed-off-by: Wenchen Fan 
(cherry picked from commit e0c6b2e9655331e308a78d972db4a18d24b73e37)
Signed-off-by: Wenchen Fan 
---
 .../apache/spark/sql/execution/command/views.scala |  6 +++--
 .../spark/sql/execution/SQLViewTestSuite.scala | 28 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index ebcd277..5e92ce2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -259,7 +259,10 @@ case class AlterViewAsCommand(
   def markAsAnalyzed(): LogicalPlan = copy(isAnalyzed = true)
 
   override def run(session: SparkSession): Seq[Row] = {
-if (session.sessionState.catalog.isTempView(name)) {
+val isTemporary = session.sessionState.catalog.isTempView(name)
+verifyTemporaryObjectsNotExists(session.sessionState.catalog, isTemporary, 
name, query)
+verifyAutoGeneratedAliasesNotExists(query, isTemporary, name)
+if (isTemporary) {
   alterTemporaryView(session, query)
 } else {
   alterPermanentView(session, query)
@@ -286,7 +289,6 @@ case class AlterViewAsCommand(
   }
 
   private def alterPermanentView(session: SparkSession, analyzedPlan: 
LogicalPlan): Unit = {
-verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary = false, 
name)
 val viewMeta = session.sessionState.catalog.getTableMetadata(name)
 
 // Detect cyclic view reference on ALTER VIEW.
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 80e9019..bc64f51 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -465,4 +465,32 @@ class PersistedViewTestSuite extends SQLViewTestSuite with 
SharedSparkSession {
   }
 }
   }
+
+  test("SPARK-36011: Disallow altering permanent views based on temporary 
views or UDFs") {
+import testImplicits._
+withTable("t") {
+  (1 to 10).toDF("id").write.saveAsTable("t")
+  withView("v1") {
+withTempView("v2") {
+  sql("CREATE VIEW v1 AS SELECT * FROM t")
+  sql("CREATE TEMPORARY VIEW v2 AS  SELECT * FROM t")
+  var e = intercept[AnalysisException] {
+sql("ALTER VIEW v1 AS SELECT * FROM v2")
+  }.getMessage
+  assert(e.contains("Not allowed to create a permanent view 
`default`.`v1` by " +
+"referencing a temporary view v2"))
+  val tempFunctionName = "temp_udf"
+  val functionClass = "test.org.apache.spark.sql.MyDoubleAvg"
+  withUserDefinedFunction(tempFunctionName -> true) {
+sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS 
'$functionClass'")
+e = intercept[AnalysisException] {
+  sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t")
+}.getMessage
+assert(e.contains("Not allowed to create a permanent view 
`default`.`v1` by " +
+  s"referencing a temporary function `$tempFunctionName`"))
+  }
+}
+  }
+}
+  }
 }

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-36011][SQL] Disallow altering permanent views based on temporary views or UDFs

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new e0c6b2e  [SPARK-36011][SQL] Disallow altering permanent views based on 
temporary views or UDFs
e0c6b2e is described below

commit e0c6b2e9655331e308a78d972db4a18d24b73e37
Author: RoryQi <1242949...@qq.com>
AuthorDate: Tue Jul 6 14:56:12 2021 +0800

[SPARK-36011][SQL] Disallow altering permanent views based on temporary 
views or UDFs

### What changes were proposed in this pull request?
PR #15764 disabled creating permanent views based on temporary views or 
UDFs.  But AlterViewCommand didn't block temporary objects.

### Why are the changes needed?
More robust view canonicalization.

### Does this PR introduce _any_ user-facing change?
Yes, now if you alter a permanent view based on temporary views or UDFs, 
the operation will fail.

### How was this patch tested?
Add new unit tests.

Closes #33204 from jerqi/alter_view.

Authored-by: RoryQi <1242949...@qq.com>
Signed-off-by: Wenchen Fan 
---
 .../apache/spark/sql/execution/command/views.scala |  6 +++--
 .../spark/sql/execution/SQLViewTestSuite.scala | 28 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index ebcd277..5e92ce2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -259,7 +259,10 @@ case class AlterViewAsCommand(
   def markAsAnalyzed(): LogicalPlan = copy(isAnalyzed = true)
 
   override def run(session: SparkSession): Seq[Row] = {
-if (session.sessionState.catalog.isTempView(name)) {
+val isTemporary = session.sessionState.catalog.isTempView(name)
+verifyTemporaryObjectsNotExists(session.sessionState.catalog, isTemporary, 
name, query)
+verifyAutoGeneratedAliasesNotExists(query, isTemporary, name)
+if (isTemporary) {
   alterTemporaryView(session, query)
 } else {
   alterPermanentView(session, query)
@@ -286,7 +289,6 @@ case class AlterViewAsCommand(
   }
 
   private def alterPermanentView(session: SparkSession, analyzedPlan: 
LogicalPlan): Unit = {
-verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary = false, 
name)
 val viewMeta = session.sessionState.catalog.getTableMetadata(name)
 
 // Detect cyclic view reference on ALTER VIEW.
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 80e9019..bc64f51 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -465,4 +465,32 @@ class PersistedViewTestSuite extends SQLViewTestSuite with 
SharedSparkSession {
   }
 }
   }
+
+  test("SPARK-36011: Disallow altering permanent views based on temporary 
views or UDFs") {
+import testImplicits._
+withTable("t") {
+  (1 to 10).toDF("id").write.saveAsTable("t")
+  withView("v1") {
+withTempView("v2") {
+  sql("CREATE VIEW v1 AS SELECT * FROM t")
+  sql("CREATE TEMPORARY VIEW v2 AS  SELECT * FROM t")
+  var e = intercept[AnalysisException] {
+sql("ALTER VIEW v1 AS SELECT * FROM v2")
+  }.getMessage
+  assert(e.contains("Not allowed to create a permanent view 
`default`.`v1` by " +
+"referencing a temporary view v2"))
+  val tempFunctionName = "temp_udf"
+  val functionClass = "test.org.apache.spark.sql.MyDoubleAvg"
+  withUserDefinedFunction(tempFunctionName -> true) {
+sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS 
'$functionClass'")
+e = intercept[AnalysisException] {
+  sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t")
+}.getMessage
+assert(e.contains("Not allowed to create a permanent view 
`default`.`v1` by " +
+  s"referencing a temporary function `$tempFunctionName`"))
+  }
+}
+  }
+}
+  }
 }

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new e09feda  [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ
e09feda is described below

commit e09feda1d23a89a6f15a900f8001405f47b7e058
Author: Gengliang Wang 
AuthorDate: Tue Jul 6 14:33:22 2021 +0800

[SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ

### What changes were proposed in this pull request?

Support new keyword `TIMESTAMP_LTZ`, which can be used for:

- timestamp with local time zone data type in DDL
- timestamp with local time zone data type in Cast clause.
- timestamp with local time zone data type literal

### Why are the changes needed?

Users can use `TIMESTAMP_LTZ` in DDL/Cast/Literals for the timestamp with 
local time zone type directly. The new keyword is independent of the SQL 
configuration `spark.sql.timestampType`.

### Does this PR introduce _any_ user-facing change?

No, the new timestamp type is not released yet.

### How was this patch tested?

Unit test

Closes #33224 from gengliangwang/TIMESTAMP_LTZ.

Authored-by: Gengliang Wang 
Signed-off-by: Gengliang Wang 
(cherry picked from commit b0b9643cd76da48ed90e958e40717a664bc7494b)
Signed-off-by: Gengliang Wang 
---
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 16 ++--
 .../sql/catalyst/parser/DataTypeParserSuite.scala |  1 +
 .../sql/catalyst/parser/ExpressionParserSuite.scala   | 19 +++
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 680d781..d6363b5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2119,6 +2119,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
 throw QueryParsingErrors.cannotParseValueTypeError(valueType, value, 
ctx)
   }
 }
+
+def constructTimestampLTZLiteral(value: String): Literal = {
+  val zoneId = getZoneId(conf.sessionLocalTimeZone)
+  val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, 
TimestampType))
+  specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), 
TimestampType))
+}
+
 try {
   valueType match {
 case "DATE" =>
@@ -2128,13 +2135,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
 case "TIMESTAMP_NTZ" =>
   val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, 
TimestampNTZType))
   specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, 
TimestampNTZType))
+case "TIMESTAMP_LTZ" =>
+  constructTimestampLTZLiteral(value)
 case "TIMESTAMP" =>
-  def constructTimestampLTZLiteral(value: String): Literal = {
-val zoneId = getZoneId(conf.sessionLocalTimeZone)
-val specialTs = convertSpecialTimestamp(value, 
zoneId).map(Literal(_, TimestampType))
-specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), 
TimestampType))
-  }
-
   SQLConf.get.timestampType match {
 case TimestampNTZType =>
   val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, 
TimestampNTZType))
@@ -2529,6 +2532,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   case ("date", Nil) => DateType
   case ("timestamp", Nil) => SQLConf.get.timestampType
   case ("timestamp_ntz", Nil) => TimestampNTZType
+  case ("timestamp_ltz", Nil) => TimestampType
   case ("string", Nil) => StringType
   case ("character" | "char", length :: Nil) => 
CharType(length.getText.toInt)
   case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index d34..97dd0db 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -59,6 +59,7 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
   checkDataType("DATE", DateType)
   checkDataType("timestamp", TimestampType)
   checkDataType("timestamp_ntz", TimestampNTZType)
+  checkDataType("timestamp_ltz", TimestampType)
   checkDataType("string", StringType)
   checkDataType("ChaR(5)", CharType(5))
   chec

[spark] branch master updated (9544277 -> b0b9643)

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 9544277  [SPARK-35788][SS] Metrics support for RocksDB instance
 add b0b9643  [SPARK-35978][SQL] Support non-reserved keyword TIMESTAMP_LTZ

No new revisions were added by this update.

Summary of changes:
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 16 ++--
 .../sql/catalyst/parser/DataTypeParserSuite.scala |  1 +
 .../sql/catalyst/parser/ExpressionParserSuite.scala   | 19 +++
 3 files changed, 26 insertions(+), 10 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35788][SS] Metrics support for RocksDB instance

2021-07-05 Thread kabhwan
This is an automated email from the ASF dual-hosted git repository.

kabhwan pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 22b303a  [SPARK-35788][SS] Metrics support for RocksDB instance
22b303a is described below

commit 22b303a64814d982e21b042cd12dad9b6499b5c6
Author: Yuanjian Li 
AuthorDate: Tue Jul 6 11:12:21 2021 +0900

[SPARK-35788][SS] Metrics support for RocksDB instance

### What changes were proposed in this pull request?
Add more metrics for the RocksDB instance. We transform the native states 
from RocksDB.

### Why are the changes needed?
Improve the usability with more metrics for RocksDB instance.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing tests.

Closes #32934 from xuanyuanking/SPARK-35788.

Authored-by: Yuanjian Li 
Signed-off-by: Jungtaek Lim 
(cherry picked from commit 9544277b0a3ab4ed401092fcae751e20cfc62316)
Signed-off-by: Jungtaek Lim 
---
 .../sql/execution/streaming/state/RocksDB.scala| 72 +-
 .../streaming/state/RocksDBFileManager.scala   | 47 ++
 2 files changed, 118 insertions(+), 1 deletion(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index f640018..9952d5d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -26,6 +26,8 @@ import scala.ref.WeakReference
 import scala.util.Try
 
 import org.apache.hadoop.conf.Configuration
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
 import org.rocksdb.{RocksDB => NativeRocksDB, _}
 
 import org.apache.spark.TaskContext
@@ -72,6 +74,7 @@ class RocksDB(
   dbOptions.setTableFormatConfig(tableFormatConfig)
   private val dbLogger = createLogger() // for forwarding RocksDB native logs 
to log4j
   dbOptions.setStatistics(new Statistics())
+  private val nativeStats = dbOptions.statistics()
 
   private val workingDir = createTempDir("workingDir")
   private val fileManager = new RocksDBFileManager(
@@ -84,6 +87,7 @@ class RocksDB(
   @volatile private var loadedVersion = -1L   // -1 = nothing valid is loaded
   @volatile private var numKeysOnLoadedVersion = 0L
   @volatile private var numKeysOnWritingVersion = 0L
+  @volatile private var fileManagerMetrics = 
RocksDBFileManagerMetrics.EMPTY_METRICS
 
   @GuardedBy("acquireLock")
   @volatile private var acquiredThreadInfo: AcquiredThreadInfo = _
@@ -105,6 +109,7 @@ class RocksDB(
 numKeysOnWritingVersion = metadata.numKeys
 numKeysOnLoadedVersion = metadata.numKeys
 loadedVersion = version
+fileManagerMetrics = fileManager.latestLoadCheckpointMetrics
   }
   writeBatch.clear()
   logInfo(s"Loaded $version")
@@ -223,6 +228,7 @@ class RocksDB(
   }
   numKeysOnLoadedVersion = numKeysOnWritingVersion
   loadedVersion = newVersion
+  fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
   commitLatencyMs ++= Map(
 "writeBatch" -> writeTimeMs,
 "flush" -> flushTimeMs,
@@ -231,6 +237,7 @@ class RocksDB(
 "checkpoint" -> checkpointTimeMs,
 "fileSync" -> fileSyncTimeMs
   )
+  logInfo(s"Committed $newVersion, stats = ${metrics.json}")
   loadedVersion
 } catch {
   case t: Throwable =>
@@ -283,6 +290,30 @@ class RocksDB(
   /** Get the latest version available in the DFS */
   def getLatestVersion(): Long = fileManager.getLatestVersion()
 
+  /** Get current instantaneous statistics */
+  def metrics: RocksDBMetrics = {
+import HistogramType._
+val totalSSTFilesBytes = getDBProperty("rocksdb.total-sst-files-size")
+val readerMemUsage = getDBProperty("rocksdb.estimate-table-readers-mem")
+val memTableMemUsage = getDBProperty("rocksdb.size-all-mem-tables")
+val nativeOps = Seq("get" -> DB_GET, "put" -> DB_WRITE).toMap
+val nativeOpsLatencyMicros = nativeOps.mapValues { typ =>
+  RocksDBNativeHistogram(nativeStats.getHistogramData(typ))
+}
+
+RocksDBMetrics(
+  numKeysOnLoadedVersion,
+  numKeysOnWritingVersion,
+  readerMemUsage + memTableMemUsage,
+  totalSSTFilesBytes,
+  nativeOpsLatencyMicros.toMap,
+  commitLatencyMs,
+  bytesCopied = fileManagerMetrics.bytesCopied,
+  filesCopied = fileManagerMetrics.filesCopied,
+  filesReused = fileManagerMetrics.filesReused,
+  zipFileBytesUncompressed = fileManagerMetrics.zipFileBytesUncompressed)
+  }
+
   private def acquire(): Unit = acquireLock.synchronized {
 val newAcquiredThreadInfo = AcquiredThreadInfo()
 val waitStartTime = System.curre

[spark] branch master updated (8b46e26 -> 9544277)

2021-07-05 Thread kabhwan
This is an automated email from the ASF dual-hosted git repository.

kabhwan pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 8b46e26  [SPARK-34302][SQL][FOLLOWUP] More code cleanup
 add 9544277  [SPARK-35788][SS] Metrics support for RocksDB instance

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/streaming/state/RocksDB.scala| 72 +-
 .../streaming/state/RocksDBFileManager.scala   | 47 ++
 2 files changed, 118 insertions(+), 1 deletion(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-34302][SQL][FOLLOWUP] More code cleanup

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 0df89d8  [SPARK-34302][SQL][FOLLOWUP] More code cleanup
0df89d8 is described below

commit 0df89d8999516c692dc08a84b5787e66d1d6f3a6
Author: Wenchen Fan 
AuthorDate: Tue Jul 6 03:43:42 2021 +0800

[SPARK-34302][SQL][FOLLOWUP] More code cleanup

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/33113, to do 
some code cleanup:
1. `UnresolvedFieldPosition` doesn't need to include the field name. We can 
get it through "context" (`AlterTableAlterColumn.column.name`).
2. Run `ResolveAlterTableCommands` in the main resolution batch, so that 
the column/field resolution is also unified between v1 and v2 commands (same 
error message).
3. Fail immediately in `ResolveAlterTableCommands` if we can't resolve the 
field, instead of waiting until `CheckAnalysis`. We don't expect other rules to 
resolve fields in ALTER  TABLE commands, so failing immediately is simpler and 
we can remove duplicated code in `CheckAnalysis`.

### Why are the changes needed?

code simplification.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

Closes #33213 from cloud-fan/follow.

Authored-by: Wenchen Fan 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 8b46e26fc6770bd39d7fee070f88072b05d50df7)
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala  |  7 ++-
 .../spark/sql/catalyst/analysis/Analyzer.scala | 61 --
 .../sql/catalyst/analysis/CheckAnalysis.scala  | 37 -
 .../sql/catalyst/analysis/v2ResolutionPlans.scala  |  4 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala |  9 ++--
 .../spark/sql/errors/QueryCompilationErrors.scala  | 14 +
 .../org/apache/spark/sql/types/StructType.scala|  2 +-
 .../spark/sql/catalyst/parser/DDLParserSuite.scala |  4 +-
 .../sql-tests/results/change-column.sql.out|  7 ++-
 .../spark/sql/connector/AlterTableTests.scala  | 28 --
 .../connector/V2CommandsCaseSensitivitySuite.scala | 10 ++--
 .../execution/command/PlanResolutionSuite.scala| 17 +++---
 .../v2/jdbc/JDBCTableCatalogSuite.scala| 16 +++---
 13 files changed, 95 insertions(+), 121 deletions(-)

diff --git 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index e13ce81..1afe26a 100644
--- 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -50,7 +50,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with 
DockerIntegrationFu
 val msg = intercept[AnalysisException] {
   sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP 
NOT NULL")
 }.getMessage
-assert(msg.contains("Cannot update missing field bad_column"))
+assert(msg.contains("Missing field bad_column"))
   }
 
   def testRenameColumn(tbl: String): Unit = {
@@ -103,8 +103,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession 
with DockerIntegrationFu
   val msg = intercept[AnalysisException] {
 sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
   }.getMessage
-  assert(
-msg.contains(s"Cannot delete missing field bad_column in 
$catalogName.alt_table schema"))
+  assert(msg.contains(s"Missing field bad_column in table 
$catalogName.alt_table"))
 }
 // Drop a column from a not existing table
 val msg = intercept[AnalysisException] {
@@ -120,7 +119,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession 
with DockerIntegrationFu
   val msg2 = intercept[AnalysisException] {
 sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE 
DOUBLE")
   }.getMessage
-  assert(msg2.contains("Cannot update missing field bad_column"))
+  assert(msg2.contains("Missing field bad_column"))
 }
 // Update column type in not existing table
 val msg = intercept[AnalysisException] {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a381658..2d747f7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -269,6 +269,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   ResolveRela

[spark] branch master updated: [SPARK-34302][SQL][FOLLOWUP] More code cleanup

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 8b46e26  [SPARK-34302][SQL][FOLLOWUP] More code cleanup
8b46e26 is described below

commit 8b46e26fc6770bd39d7fee070f88072b05d50df7
Author: Wenchen Fan 
AuthorDate: Tue Jul 6 03:43:42 2021 +0800

[SPARK-34302][SQL][FOLLOWUP] More code cleanup

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/33113, to do 
some code cleanup:
1. `UnresolvedFieldPosition` doesn't need to include the field name. We can 
get it through "context" (`AlterTableAlterColumn.column.name`).
2. Run `ResolveAlterTableCommands` in the main resolution batch, so that 
the column/field resolution is also unified between v1 and v2 commands (same 
error message).
3. Fail immediately in `ResolveAlterTableCommands` if we can't resolve the 
field, instead of waiting until `CheckAnalysis`. We don't expect other rules to 
resolve fields in ALTER  TABLE commands, so failing immediately is simpler and 
we can remove duplicated code in `CheckAnalysis`.

### Why are the changes needed?

code simplification.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

Closes #33213 from cloud-fan/follow.

Authored-by: Wenchen Fan 
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala  |  7 ++-
 .../spark/sql/catalyst/analysis/Analyzer.scala | 61 --
 .../sql/catalyst/analysis/CheckAnalysis.scala  | 37 -
 .../sql/catalyst/analysis/v2ResolutionPlans.scala  |  4 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala |  9 ++--
 .../spark/sql/errors/QueryCompilationErrors.scala  | 14 +
 .../org/apache/spark/sql/types/StructType.scala|  2 +-
 .../spark/sql/catalyst/parser/DDLParserSuite.scala |  4 +-
 .../sql-tests/results/change-column.sql.out|  7 ++-
 .../spark/sql/connector/AlterTableTests.scala  | 28 --
 .../connector/V2CommandsCaseSensitivitySuite.scala | 10 ++--
 .../execution/command/PlanResolutionSuite.scala| 17 +++---
 .../v2/jdbc/JDBCTableCatalogSuite.scala| 16 +++---
 13 files changed, 95 insertions(+), 121 deletions(-)

diff --git 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index e13ce81..1afe26a 100644
--- 
a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ 
b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -50,7 +50,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with 
DockerIntegrationFu
 val msg = intercept[AnalysisException] {
   sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP 
NOT NULL")
 }.getMessage
-assert(msg.contains("Cannot update missing field bad_column"))
+assert(msg.contains("Missing field bad_column"))
   }
 
   def testRenameColumn(tbl: String): Unit = {
@@ -103,8 +103,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession 
with DockerIntegrationFu
   val msg = intercept[AnalysisException] {
 sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
   }.getMessage
-  assert(
-msg.contains(s"Cannot delete missing field bad_column in 
$catalogName.alt_table schema"))
+  assert(msg.contains(s"Missing field bad_column in table 
$catalogName.alt_table"))
 }
 // Drop a column from a not existing table
 val msg = intercept[AnalysisException] {
@@ -120,7 +119,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession 
with DockerIntegrationFu
   val msg2 = intercept[AnalysisException] {
 sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE 
DOUBLE")
   }.getMessage
-  assert(msg2.contains("Cannot update missing field bad_column"))
+  assert(msg2.contains("Missing field bad_column"))
 }
 // Update column type in not existing table
 val msg = intercept[AnalysisException] {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a381658..2d747f7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -269,6 +269,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   ResolveRelations ::
   ResolveTables ::
   ResolvePartitionSpec ::
+  ResolveAlterTableCommands ::
   AddMet

[spark] branch branch-3.2 updated: [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 1ec37dd  [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ
1ec37dd is described below

commit 1ec37dd164ae64c78bdccd8c0604b4013a692015
Author: Gengliang Wang 
AuthorDate: Mon Jul 5 22:30:44 2021 +0300

[SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ

### What changes were proposed in this pull request?

Support new keyword TIMESTAMP_NTZ, which can be used for:

- timestamp without time zone data type in DDL
- timestamp without time zone data type in Cast clause.
- timestamp without time zone data type literal

### Why are the changes needed?

Users can use `TIMESTAMP_NTZ` in DDL/Cast/Literals for the timestamp 
without time zone type directly.

### Does this PR introduce _any_ user-facing change?

No, the new timestamp type is not released yet.

### How was this patch tested?

Unit test

Closes #33221 from gengliangwang/timstamp_ntz.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
(cherry picked from commit 5f44acff3df51721fe891ea50c0a5bcf3a37a719)
Signed-off-by: Max Gekk 
---
 .../main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala  | 4 
 .../org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala| 1 +
 .../org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala  | 3 +++
 3 files changed, 8 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5b9107f..c650cf0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2125,6 +2125,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   val zoneId = getZoneId(conf.sessionLocalTimeZone)
   val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, 
DateType))
   specialDate.getOrElse(toLiteral(stringToDate, DateType))
+case "TIMESTAMP_NTZ" =>
+  val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, 
TimestampNTZType))
+  specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, 
TimestampNTZType))
 case "TIMESTAMP" =>
   def constructTimestampLTZLiteral(value: String): Literal = {
 val zoneId = getZoneId(conf.sessionLocalTimeZone)
@@ -2525,6 +2528,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   case ("double", Nil) => DoubleType
   case ("date", Nil) => DateType
   case ("timestamp", Nil) => SQLConf.get.timestampType
+  case ("timestamp_ntz", Nil) => TimestampNTZType
   case ("string", Nil) => StringType
   case ("character" | "char", length :: Nil) => 
CharType(length.getText.toInt)
   case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index a6b78e0..d34 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -58,6 +58,7 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
   checkDataType("deC", DecimalType.USER_DEFAULT)
   checkDataType("DATE", DateType)
   checkDataType("timestamp", TimestampType)
+  checkDataType("timestamp_ntz", TimestampNTZType)
   checkDataType("string", StringType)
   checkDataType("ChaR(5)", CharType(5))
   checkDataType("ChaRacter(5)", CharType(5))
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 37e2d9b..7b13fa9 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -465,6 +465,9 @@ class ExpressionParserSuite extends AnalysisTest {
 intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the 
TIMESTAMP value")
 
 // Timestamp without time zone
+assertEqual("tImEstAmp_Ntz '2016-03-11 20:54:00.000'",
+  Literal(LocalDateTime.parse("2016-03-11T20:54:00.000")))
+intercept("tImEstAmp_Ntz '2016-33-11 20:54:00.000'", "Cannot parse the 
TIMESTAMP_NTZ value")
 withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> 
Timest

[spark] branch master updated: [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 5f44acf  [SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ
5f44acf is described below

commit 5f44acff3df51721fe891ea50c0a5bcf3a37a719
Author: Gengliang Wang 
AuthorDate: Mon Jul 5 22:30:44 2021 +0300

[SPARK-35977][SQL] Support non-reserved keyword TIMESTAMP_NTZ

### What changes were proposed in this pull request?

Support new keyword TIMESTAMP_NTZ, which can be used for:

- timestamp without time zone data type in DDL
- timestamp without time zone data type in Cast clause.
- timestamp without time zone data type literal

### Why are the changes needed?

Users can use `TIMESTAMP_NTZ` in DDL/Cast/Literals for the timestamp 
without time zone type directly.

### Does this PR introduce _any_ user-facing change?

No, the new timestamp type is not released yet.

### How was this patch tested?

Unit test

Closes #33221 from gengliangwang/timstamp_ntz.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
---
 .../main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala  | 4 
 .../org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala| 1 +
 .../org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala  | 3 +++
 3 files changed, 8 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5b9107f..c650cf0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2125,6 +2125,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   val zoneId = getZoneId(conf.sessionLocalTimeZone)
   val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, 
DateType))
   specialDate.getOrElse(toLiteral(stringToDate, DateType))
+case "TIMESTAMP_NTZ" =>
+  val specialTs = convertSpecialTimestampNTZ(value).map(Literal(_, 
TimestampNTZType))
+  specialTs.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, 
TimestampNTZType))
 case "TIMESTAMP" =>
   def constructTimestampLTZLiteral(value: String): Literal = {
 val zoneId = getZoneId(conf.sessionLocalTimeZone)
@@ -2525,6 +2528,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   case ("double", Nil) => DoubleType
   case ("date", Nil) => DateType
   case ("timestamp", Nil) => SQLConf.get.timestampType
+  case ("timestamp_ntz", Nil) => TimestampNTZType
   case ("string", Nil) => StringType
   case ("character" | "char", length :: Nil) => 
CharType(length.getText.toInt)
   case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index a6b78e0..d34 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -58,6 +58,7 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
   checkDataType("deC", DecimalType.USER_DEFAULT)
   checkDataType("DATE", DateType)
   checkDataType("timestamp", TimestampType)
+  checkDataType("timestamp_ntz", TimestampNTZType)
   checkDataType("string", StringType)
   checkDataType("ChaR(5)", CharType(5))
   checkDataType("ChaRacter(5)", CharType(5))
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 37e2d9b..7b13fa9 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -465,6 +465,9 @@ class ExpressionParserSuite extends AnalysisTest {
 intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the 
TIMESTAMP value")
 
 // Timestamp without time zone
+assertEqual("tImEstAmp_Ntz '2016-03-11 20:54:00.000'",
+  Literal(LocalDateTime.parse("2016-03-11T20:54:00.000")))
+intercept("tImEstAmp_Ntz '2016-33-11 20:54:00.000'", "Cannot parse the 
TIMESTAMP_NTZ value")
 withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> 
TimestampTypes.TIMESTAMP_NTZ.toString) {
   assertEqual("tImEstAmp '2016-03-11 20:54:00.000'",
 Literal(

[spark] branch branch-3.2 updated: [SPARK-35979][SQL] Return different timestamp literals based on the default timestamp type

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new a9947cb  [SPARK-35979][SQL] Return different timestamp literals based 
on the default timestamp type
a9947cb is described below

commit a9947cbd716b83e2f65dfec035c7abf29ea40922
Author: Gengliang Wang 
AuthorDate: Tue Jul 6 00:54:58 2021 +0800

[SPARK-35979][SQL] Return different timestamp literals based on the default 
timestamp type

### What changes were proposed in this pull request?

For the timestamp literal, it should have the following behavior.
1. When `spark.sql.timestampType` is TIMESTAMP_NTZ: if there is no time 
zone part, return timestamp without time zone literal; otherwise, return 
timestamp with local time zone literal

2. When `spark.sql.timestampType` is TIMESTAMP_LTZ: return timestamp with 
local time zone literal

### Why are the changes needed?

When the default timestamp type is TIMESTAMP_NTZ, the result of type 
literal should return TIMESTAMP_NTZ when there is no time zone part in the 
string.

From setion 5.3 "literal" of ANSI SQL standard 2011:
```
27) The declared type of a  that does not specify  is TIMESTAMP(P) WITHOUT TIME ZONE, where P is the number of 
digits in , if specified, and 0 (zero) otherwise. The 
declared type of a  that specifies  is 
TIMESTAMP(P) WITH TIME ZONE, where P is the number of digits in , if specified, and 0 (zero) otherwise.
```
Since we don't have "timestamp with time zone", we use timestamp with local 
time zone instead.
### Does this PR introduce _any_ user-facing change?

No, the new timestmap type and the default timestamp configuration is not 
released yet.

### How was this patch tested?

Unit test

Closes #33215 from gengliangwang/tsLiteral.

Authored-by: Gengliang Wang 
Signed-off-by: Gengliang Wang 
(cherry picked from commit 2fffec7de8d31bd01c8acd8bca72acacaf189c97)
Signed-off-by: Gengliang Wang 
---
 .../spark/sql/catalyst/parser/AstBuilder.scala | 32 ++
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 31 +
 .../org/apache/spark/sql/internal/SQLConf.scala|  6 ++--
 .../catalyst/parser/ExpressionParserSuite.scala| 12 
 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 15 ++
 5 files changed, 83 insertions(+), 13 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 361ecc1..5b9107f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, IntervalUtils}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, getZoneId, stringToDate, stringToTimestamp}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, 
IntervalUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, 
stringToTimestamp, stringToTimestampWithoutTimeZone}
 import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit
 import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, 
TableCatalog}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
@@ -2126,9 +2126,31 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, 
DateType))
   specialDate.getOrElse(toLiteral(stringToDate, DateType))
 case "TIMESTAMP" =>
-  val zoneId = getZoneId(conf.sessionLocalTimeZone)
-  val specialTs = convertSpecialTimestamp(value, 
zoneId).map(Literal(_, TimestampType))
-  specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), 
TimestampType))
+  def constructTimestampLTZLiteral(value: String): Literal = {
+val zoneId = getZoneId(conf.sessionLocalTimeZone)
+val specialTs = convertSpecialTimestamp(value, 
zoneId).map(Literal(_, TimestampType))
+specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), 
TimestampType))
+  }
+
+  SQLConf.get.timestampType match {
+case TimestampNTZType =>
+  val specialTs = convertSpecialTimestampNTZ(valu

[spark] branch master updated (c605ba2 -> 2fffec7)

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from c605ba2  [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for 
TimestampNTZType
 add 2fffec7  [SPARK-35979][SQL] Return different timestamp literals based 
on the default timestamp type

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/parser/AstBuilder.scala | 32 ++
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 31 +
 .../org/apache/spark/sql/internal/SQLConf.scala|  6 ++--
 .../catalyst/parser/ExpressionParserSuite.scala| 12 
 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 15 ++
 5 files changed, 83 insertions(+), 13 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new d3e8c9c  [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for 
TimestampNTZType
d3e8c9c is described below

commit d3e8c9c78b364580523e3f915ee51369ca7df0bf
Author: gengjiaan 
AuthorDate: Mon Jul 5 18:48:00 2021 +0800

[SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType

### What changes were proposed in this pull request?
This PR fix the incorrect comment for `TimestampNTZType`.

### Why are the changes needed?
Fix the incorrect comment

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
No need.

Closes #33218 from beliefer/SPARK-35664-followup.

Authored-by: gengjiaan 
Signed-off-by: Gengliang Wang 
(cherry picked from commit c605ba2d46742ca13db794ca1be136a4b10b652e)
Signed-off-by: Gengliang Wang 
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index 15a93a7..f23f3c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -116,7 +116,7 @@ object Encoders {
 
   /**
* Creates an encoder that serializes instances of the 
`java.time.LocalDateTime` class
-   * to the internal representation of nullable Catalyst's DateType.
+   * to the internal representation of nullable Catalyst's TimestampNTZType.
*
* @since 3.2.0
*/

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType

2021-07-05 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new c605ba2  [SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for 
TimestampNTZType
c605ba2 is described below

commit c605ba2d46742ca13db794ca1be136a4b10b652e
Author: gengjiaan 
AuthorDate: Mon Jul 5 18:48:00 2021 +0800

[SPARK-35664][SQL][FOLLOWUP] Fix incorrect comment for TimestampNTZType

### What changes were proposed in this pull request?
This PR fix the incorrect comment for `TimestampNTZType`.

### Why are the changes needed?
Fix the incorrect comment

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
No need.

Closes #33218 from beliefer/SPARK-35664-followup.

Authored-by: gengjiaan 
Signed-off-by: Gengliang Wang 
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index 15a93a7..f23f3c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -116,7 +116,7 @@ object Encoders {
 
   /**
* Creates an encoder that serializes instances of the 
`java.time.LocalDateTime` class
-   * to the internal representation of nullable Catalyst's DateType.
+   * to the internal representation of nullable Catalyst's TimestampNTZType.
*
* @since 3.2.0
*/

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals properly

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 544b7e1  [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month 
intervals properly
544b7e1 is described below

commit 544b7e16acf51b7c2a9555fb4ebe7b19a00e
Author: Kousuke Saruta 
AuthorDate: Mon Jul 5 13:10:50 2021 +0300

[SPARK-35998][SQL] Make from_csv/to_csv to handle year-month intervals 
properly

### What changes were proposed in this pull request?

This PR fixes an issue that `from_csv/to_csv` doesn't handle year-month 
intervals properly.
`from_csv` throws exception if year-month interval types are given.
```
spark-sql> select from_csv("interval '1-2' year to month", "a interval year 
to month");
21/07/03 04:32:24 ERROR SparkSQLDriver: Failed in [select 
from_csv("interval '1-2' year to month", "a interval year to month")]
java.lang.Exception: Unsupported type: interval year to month
at 
org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedTypeError(QueryExecutionErrors.scala:775)
at 
org.apache.spark.sql.catalyst.csv.UnivocityParser.makeConverter(UnivocityParser.scala:224)
at 
org.apache.spark.sql.catalyst.csv.UnivocityParser.$anonfun$valueConverters$1(UnivocityParser.scala:134)
```

Also, `to_csv` doesn't handle year-month interval types properly though any 
exception is thrown.
The result of `to_csv` for year-month interval types is not ANSI interval 
compliant form.

```
spark-sql> select to_csv(named_struct("a", interval '1-2' year to month));
14
```
The result above should be `INTERVAL '1-2' YEAR TO MONTH`.

### Why are the changes needed?

Bug fix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New tests.

Closes #33210 from sarutak/csv-yminterval.

Authored-by: Kousuke Saruta 
Signed-off-by: Max Gekk 
(cherry picked from commit f4237aff7ebece0b8d61e680ecbe759850f25af8)
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/csv/UnivocityGenerator.scala  |  7 +-
 .../spark/sql/catalyst/csv/UnivocityParser.scala   |  7 +-
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   | 29 ++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 11b31ce..5d70ccb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -22,7 +22,7 @@ import java.io.Writer
 import com.univocity.parsers.csv.CsvWriter
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, 
IntervalStringStyles, IntervalUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.types._
 
@@ -61,6 +61,11 @@ class UnivocityGenerator(
 case TimestampType =>
   (row: InternalRow, ordinal: Int) => 
timestampFormatter.format(row.getLong(ordinal))
 
+case YearMonthIntervalType(start, end) =>
+  (row: InternalRow, ordinal: Int) =>
+IntervalUtils.toYearMonthIntervalString(
+  row.getInt(ordinal), IntervalStringStyles.ANSI_STYLE, start, end)
+
 case udt: UserDefinedType[_] => makeConverter(udt.sqlType)
 
 case dt: DataType =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 672d133..3ec1ea0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -26,7 +26,7 @@ import com.univocity.parsers.csv.CsvParser
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, OrderedFilters}
-import org.apache.spark.sql.catalyst.expressions.{ExprUtils, 
GenericInternalRow}
+import org.apache.spark.sql.catalyst.expressions.{Cast, EmptyRow, ExprUtils, 
GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -217,6 +217,11 @@ class UnivocityParser(
 IntervalUtils.safeStringToInterval(UTF8String.fromString(datum))
   }
 
+case ym: YearMonthInter

[spark] branch master updated (2fe6c94 -> f4237af)

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 2fe6c94  [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle 
version to Maven's
 add f4237af  [SPARK-35998][SQL] Make from_csv/to_csv to handle year-month 
intervals properly

No new revisions were added by this update.

Summary of changes:
 .../sql/catalyst/csv/UnivocityGenerator.scala  |  7 +-
 .../spark/sql/catalyst/csv/UnivocityParser.scala   |  7 +-
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   | 29 ++
 3 files changed, 41 insertions(+), 2 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to Maven's

2021-07-05 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 5261600  [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle 
version to Maven's
5261600 is described below

commit 52616009da9f37be6f2fd2285eb602b04df96dee
Author: Hyukjin Kwon 
AuthorDate: Mon Jul 5 18:55:45 2021 +0900

[SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle version to 
Maven's

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/31019 that 
forgot to update SBT's to match.

### Why are the changes needed?

To use the same version in both Maven and SBT.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

CI should test them.

Closes #33207 from HyukjinKwon/SPARK-33996.

Authored-by: Hyukjin Kwon 
Signed-off-by: Hyukjin Kwon 
(cherry picked from commit 2fe6c9454465fbac58ecb90bde6af42a489d8bbf)
Signed-off-by: Hyukjin Kwon 
---
 project/plugins.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/plugins.sbt b/project/plugins.sbt
index e733f86..9e86e32 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -18,7 +18,7 @@
 addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1")
 
 // sbt-checkstyle-plugin uses an old version of checkstyle. Match it to 
Maven's.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.25"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.39"
 
 // checkstyle uses guava 23.0.
 libraryDependencies += "com.google.guava" % "guava" % "23.0"

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (7fe4c4a -> 2fe6c94)

2021-07-05 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 7fe4c4a  [SPARK-35989][SQL] Only remove redundant shuffle if shuffle 
origin is REPARTITION_BY_COL in AQE
 add 2fe6c94  [SPARK-33996][BUILD][FOLLOW-UP] Match SBT's plugin checkstyle 
version to Maven's

No new revisions were added by this update.

Summary of changes:
 project/plugins.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is REPARTITION_BY_COL in AQE

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new ed7c81d  [SPARK-35989][SQL] Only remove redundant shuffle if shuffle 
origin is REPARTITION_BY_COL in AQE
ed7c81d is described below

commit ed7c81dfaa9b286ad4ad67cc7ad28ba814093405
Author: ulysses-you 
AuthorDate: Mon Jul 5 17:10:42 2021 +0800

[SPARK-35989][SQL] Only remove redundant shuffle if shuffle origin is 
REPARTITION_BY_COL in AQE

### What changes were proposed in this pull request?

Skip remove shuffle if it's shuffle origin is not `REPARTITION_BY_COL` in 
AQE.

### Why are the changes needed?

`REPARTITION_BY_COL` doesn't guarantee the output partitioning number so we 
can remove it safely in AQE.

For `REPARTITION_BY_NUM`, we should retain the shuffle which partition 
number is specified by user.
For `REBALANCE_PARTITIONS_BY_COL`, it is a special shuffle used to 
rebalance partitions so we should not remove it.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

add test

Closes #33188 from ulysses-you/SPARK-35989.

Lead-authored-by: ulysses-you 
Co-authored-by: ulysses 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 7fe4c4a9ad92cedd398a13a1781649dff57ca4d9)
Signed-off-by: Wenchen Fan 
---
 .../execution/exchange/EnsureRequirements.scala|  7 +-
 .../apache/spark/sql/execution/PlannerSuite.scala  |  5 +++--
 .../adaptive/AdaptiveQueryExecSuite.scala  | 19 
 .../exchange/EnsureRequirementsSuite.scala | 25 +-
 4 files changed, 42 insertions(+), 14 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index a990700..d71933a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -250,7 +250,12 @@ object EnsureRequirements extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
 // TODO: remove this after we create a physical operator for 
`RepartitionByExpression`.
-case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, _) =>
+// SPARK-35989: AQE will change the partition number so we should retain 
the REPARTITION_BY_NUM
+// shuffle which is specified by user. And also we can not remove 
REBALANCE_PARTITIONS_BY_COL,
+// it is a special shuffle used to rebalance partitions.
+// So, here we only remove REPARTITION_BY_COL in AQE.
+case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, 
shuffleOrigin)
+if shuffleOrigin == REPARTITION_BY_COL || 
!conf.adaptiveExecutionEnabled =>
   def hasSemanticEqualPartitioning(partitioning: Partitioning): Boolean = {
 partitioning match {
   case lower: HashPartitioning if upper.semanticEquals(lower) => true
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 0ea7599..fad6ed1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, 
DisableAdaptiveExecution}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, 
ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, 
InMemoryTableScanExec}
-import org.apache.spark.sql.execution.exchange.{EnsureRequirements, 
ReusedExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.exchange.{EnsureRequirements, 
REPARTITION_BY_COL, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, 
SortMergeJoinExec}
 import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery
 import org.apache.spark.sql.functions._
@@ -420,7 +420,8 @@ class PlannerSuite extends SharedSparkSession with 
AdaptiveSparkPlanHelper {
 
 val inputPlan = ShuffleExchangeExec(
   partitioning,
-  DummySparkPlan(outputPartitioning = partitioning))
+  DummySparkPlan(outputPartitioning = partitioning),
+  REPARTITION_BY_COL)
 val outputPlan = EnsureRequirements.apply(inputPlan)
 assertDistributionRequirementsAreSatisfied(outputPlan)
 if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 1) {
diff --git 
a/sql/core/src/te

[spark] branch master updated (044dddf -> 7fe4c4a)

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 044dddf  [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator
 add 7fe4c4a  [SPARK-35989][SQL] Only remove redundant shuffle if shuffle 
origin is REPARTITION_BY_COL in AQE

No new revisions were added by this update.

Summary of changes:
 .../execution/exchange/EnsureRequirements.scala|  7 +-
 .../apache/spark/sql/execution/PlannerSuite.scala  |  5 +++--
 .../adaptive/AdaptiveQueryExecSuite.scala  | 19 
 .../exchange/EnsureRequirementsSuite.scala | 25 +-
 4 files changed, 42 insertions(+), 14 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 39b3a04  [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator
39b3a04 is described below

commit 39b3a04bfec3385395df4418a3794dd276ba3271
Author: Cheng Su 
AuthorDate: Mon Jul 5 09:06:38 2021 +

[SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator

### What changes were proposed in this pull request?

Current AQE has cost evaluator to decide whether to use new plan after 
replanning. The current used evaluator is `SimpleCostEvaluator` to make 
decision based on number of shuffle in the query plan. This is not perfect cost 
evaluator, and different production environments might want to use different 
custom evaluators. E.g., sometimes we might want to still do skew join even 
though it might introduce extra shuffle (trade off resource for better 
latency), sometimes we might want to take  [...]

The approach is to introduce a new config to allow define sub-class name of 
`CostEvaluator` - `spark.sql.adaptive.customCostEvaluatorClass`. And add 
`CostEvaluator.instantiate` to instantiate the cost evaluator class in 
`AdaptiveSparkPlanExec.costEvaluator`.

### Why are the changes needed?

Make AQE cost evaluation more flexible.

### Does this PR introduce _any_ user-facing change?

No but an internal config is introduced - 
`spark.sql.adaptive.customCostEvaluatorClass` to allow custom implementation of 
`CostEvaluator`.

### How was this patch tested?

Added unit test in `AdaptiveQueryExecSuite.scala`.

Closes #32944 from c21/aqe-cost.

Authored-by: Cheng Su 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 044dddf28860cabb813d6ccf9489ea6ef21dd11e)
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/internal/SQLConf.scala|  8 
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |  6 ++-
 .../spark/sql/execution/adaptive/costing.scala | 30 -
 .../adaptive/AdaptiveQueryExecSuite.scala  | 50 ++
 4 files changed, 91 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 59a310d..0f4d1ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -678,6 +678,14 @@ object SQLConf {
   .booleanConf
   .createWithDefault(true)
 
+  val ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS =
+buildConf("spark.sql.adaptive.customCostEvaluatorClass")
+  .doc("The custom cost evaluator class to be used for adaptive execution. 
If not being set," +
+" Spark will use its own SimpleCostEvaluator by default.")
+  .version("3.2.0")
+  .stringConf
+  .createOptional
+
   val SUBEXPRESSION_ELIMINATION_ENABLED =
 buildConf("spark.sql.subexpressionElimination.enabled")
   .internal()
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index cbf70e3..18aaf5b 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -130,7 +130,11 @@ case class AdaptiveSparkPlanExec(
 }
   }
 
-  @transient private val costEvaluator = SimpleCostEvaluator
+  @transient private val costEvaluator =
+conf.getConf(SQLConf.ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS) match {
+  case Some(className) => CostEvaluator.instantiate(className, 
session.sparkContext.getConf)
+  case _ => SimpleCostEvaluator
+}
 
   @transient val initialPlan = context.session.withActive {
 applyPhysicalRules(
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala
index 293e619..56f29b9 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/costing.scala
@@ -17,16 +17,42 @@
 
 package org.apache.spark.sql.execution.adaptive
 
+import org.apache.spark.SparkConf
+import org.apache.spark.annotation.Unstable
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
 
 /**
- * Represents the cost of a plan.
+ * An interface to represent the cost of a plan.
+ *
+ * @note This class is subject to be changed and/or moved in the nea

[spark] branch master updated (7f70350 -> 044dddf)

2021-07-05 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 7f70350  [SPARK-36013][BUILD] Upgrade Dropwizard Metrics to 4.2.2
 add 044dddf  [SPARK-35794][SQL] Allow custom plugin for AQE cost evaluator

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/internal/SQLConf.scala|  8 
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |  6 ++-
 .../spark/sql/execution/adaptive/costing.scala | 30 -
 .../adaptive/AdaptiveQueryExecSuite.scala  | 50 ++
 4 files changed, 91 insertions(+), 3 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (554d5fe -> 7f70350)

2021-07-05 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 554d5fe  [SPARK-36010][BUILD] Upgrade sbt-antlr4 from 0.8.2 to 0.8.3
 add 7f70350  [SPARK-36013][BUILD] Upgrade Dropwizard Metrics to 4.2.2

No new revisions were added by this update.

Summary of changes:
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 10 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 10 +-
 pom.xml |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (6474226 -> 554d5fe)

2021-07-05 Thread sarutak
This is an automated email from the ASF dual-hosted git repository.

sarutak pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 6474226  [SPARK-35982][SQL] Allow from_json/to_json for map types 
where value types are year-month intervals
 add 554d5fe  [SPARK-36010][BUILD] Upgrade sbt-antlr4 from 0.8.2 to 0.8.3

No new revisions were added by this update.

Summary of changes:
 project/plugins.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-3.2 updated: [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 26bcf02  [SPARK-35982][SQL] Allow from_json/to_json for map types 
where value types are year-month intervals
26bcf02 is described below

commit 26bcf028339c02ca75af31ab8105f7dbe58c49a9
Author: Kousuke Saruta 
AuthorDate: Mon Jul 5 10:35:50 2021 +0300

[SPARK-35982][SQL] Allow from_json/to_json for map types where value types 
are year-month intervals

### What changes were proposed in this pull request?

This PR fixes two issues. One is that `to_json` doesn't support `map` types 
where value types are `year-month` interval types like:
```
spark-sql> select to_json(map('a', interval '1-2' year to  month));
21/07/02 11:38:15 ERROR SparkSQLDriver: Failed in [select to_json(map('a', 
interval '1-2' year to  month))]
java.lang.RuntimeException: Failed to convert value 14 (class of class 
java.lang.Integer) with the type of YearMonthIntervalType(0,1) to JSON.
```
The other issue is that even if the issue of `to_json` is resolved, 
`from_json` doesn't support to convert `year-month` interval string to JSON. So 
the result of following query will be `null`.
```
spark-sql> select from_json(to_json(map('a', interval '1-2' year to 
month)), 'a interval year to month');
{"a":null}
```

### Why are the changes needed?

There should be no reason why year-month intervals cannot used as map value 
types.
`CalendarIntervalTypes` can do it.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New tests.

Closes #33181 from sarutak/map-json-yminterval.

Authored-by: Kousuke Saruta 
Signed-off-by: Max Gekk 
(cherry picked from commit 647422685292cd1a46766afa9b07b6fcfc181bbd)
Signed-off-by: Max Gekk 
---
 .../spark/sql/catalyst/json/JacksonGenerator.scala |  9 
 .../spark/sql/catalyst/json/JacksonParser.scala|  7 ++
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 26 ++
 3 files changed, 42 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 2567438..9777d56 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -147,6 +147,15 @@ private[sql] class JacksonGenerator(
   (row: SpecializedGetters, ordinal: Int) =>
 gen.writeString(row.getInterval(ordinal).toString)
 
+case YearMonthIntervalType(start, end) =>
+  (row: SpecializedGetters, ordinal: Int) =>
+val ymString = IntervalUtils.toYearMonthIntervalString(
+  row.getInt(ordinal),
+  IntervalStringStyles.ANSI_STYLE,
+  start,
+  end)
+gen.writeString(ymString)
+
 case BinaryType =>
   (row: SpecializedGetters, ordinal: Int) =>
 gen.writeBinary(row.getBinary(ordinal))
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 27e1411..2aa735d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -295,6 +295,13 @@ class JacksonParser(
   
IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText))
   }
 
+case ym: YearMonthIntervalType => (parser: JsonParser) =>
+  parseJsonToken[Integer](parser, dataType) {
+case VALUE_STRING =>
+  val expr = Cast(Literal(parser.getText), ym)
+  Integer.valueOf(expr.eval(EmptyRow).asInstanceOf[Int])
+  }
+
 case st: StructType =>
   val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
   (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 5485cc1..c2bea8c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.text.SimpleDateFormat
+import java.time.Period
 import java.util.Locale
 
 import collection.JavaConverters._
@@ -27,6 +28,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types

[spark] branch master updated: [SPARK-35982][SQL] Allow from_json/to_json for map types where value types are year-month intervals

2021-07-05 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 6474226  [SPARK-35982][SQL] Allow from_json/to_json for map types 
where value types are year-month intervals
6474226 is described below

commit 647422685292cd1a46766afa9b07b6fcfc181bbd
Author: Kousuke Saruta 
AuthorDate: Mon Jul 5 10:35:50 2021 +0300

[SPARK-35982][SQL] Allow from_json/to_json for map types where value types 
are year-month intervals

### What changes were proposed in this pull request?

This PR fixes two issues. One is that `to_json` doesn't support `map` types 
where value types are `year-month` interval types like:
```
spark-sql> select to_json(map('a', interval '1-2' year to  month));
21/07/02 11:38:15 ERROR SparkSQLDriver: Failed in [select to_json(map('a', 
interval '1-2' year to  month))]
java.lang.RuntimeException: Failed to convert value 14 (class of class 
java.lang.Integer) with the type of YearMonthIntervalType(0,1) to JSON.
```
The other issue is that even if the issue of `to_json` is resolved, 
`from_json` doesn't support to convert `year-month` interval string to JSON. So 
the result of following query will be `null`.
```
spark-sql> select from_json(to_json(map('a', interval '1-2' year to 
month)), 'a interval year to month');
{"a":null}
```

### Why are the changes needed?

There should be no reason why year-month intervals cannot used as map value 
types.
`CalendarIntervalTypes` can do it.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New tests.

Closes #33181 from sarutak/map-json-yminterval.

Authored-by: Kousuke Saruta 
Signed-off-by: Max Gekk 
---
 .../spark/sql/catalyst/json/JacksonGenerator.scala |  9 
 .../spark/sql/catalyst/json/JacksonParser.scala|  7 ++
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 26 ++
 3 files changed, 42 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 2567438..9777d56 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -147,6 +147,15 @@ private[sql] class JacksonGenerator(
   (row: SpecializedGetters, ordinal: Int) =>
 gen.writeString(row.getInterval(ordinal).toString)
 
+case YearMonthIntervalType(start, end) =>
+  (row: SpecializedGetters, ordinal: Int) =>
+val ymString = IntervalUtils.toYearMonthIntervalString(
+  row.getInt(ordinal),
+  IntervalStringStyles.ANSI_STYLE,
+  start,
+  end)
+gen.writeString(ymString)
+
 case BinaryType =>
   (row: SpecializedGetters, ordinal: Int) =>
 gen.writeBinary(row.getBinary(ordinal))
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 27e1411..2aa735d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -295,6 +295,13 @@ class JacksonParser(
   
IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText))
   }
 
+case ym: YearMonthIntervalType => (parser: JsonParser) =>
+  parseJsonToken[Integer](parser, dataType) {
+case VALUE_STRING =>
+  val expr = Cast(Literal(parser.getText), ym)
+  Integer.valueOf(expr.eval(EmptyRow).asInstanceOf[Int])
+  }
+
 case st: StructType =>
   val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
   (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 5485cc1..c2bea8c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.text.SimpleDateFormat
+import java.time.Period
 import java.util.Locale
 
 import collection.JavaConverters._
@@ -27,6 +28,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
 
 class JsonFunctionsSuite extends