date:20200305

[spark] branch master updated (2e3adad -> 71c73d5)

2020-03-05 Thread yamamuro

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 2e3adad  [SPARK-31061][SQL] Provide ability to alter the provider of a 
table
 add 71c73d5  [SPARK-30279][SQL] Support 32 or more grouping attributes for 
GROUPING_ID

No new revisions were added by this update.

Summary of changes:
 docs/sql-migration-guide.md|  3 ++
 .../spark/sql/catalyst/analysis/Analyzer.scala | 10 ++--
 .../spark/sql/catalyst/expressions/grouping.scala  | 28 +++
 .../plans/logical/basicLogicalOperators.scala  | 13 --
 .../org/apache/spark/sql/internal/SQLConf.scala|  9 
 .../analysis/ResolveGroupingAnalyticsSuite.scala   | 54 --
 .../sql-tests/results/group-analytics.sql.out  |  8 ++--
 .../sql-tests/results/grouping_set.sql.out |  4 +-
 .../results/postgreSQL/groupingsets.sql.out|  2 +-
 .../results/udf/udf-group-analytics.sql.out|  8 ++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 31 +
 11 files changed, 117 insertions(+), 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31061][SQL] Provide ability to alter the provider of a table

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 35a8e1e  [SPARK-31061][SQL] Provide ability to alter the provider of a 
table
35a8e1e is described below

commit 35a8e1ed7bb2f9493742d7401e32eab4621fd2d0
Author: Burak Yavuz 
AuthorDate: Fri Mar 6 15:40:44 2020 +0800

[SPARK-31061][SQL] Provide ability to alter the provider of a table

This PR adds functionality to HiveExternalCatalog to be able to change the 
provider of a table.

This is useful for catalogs in Spark 3.0 to be able to use alterTable to 
change the provider of a table as part of an atomic REPLACE TABLE function.

No

Unit tests

Closes #27822 from brkyvz/externalCat.

Authored-by: Burak Yavuz 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 2e3adadc6a53fe044b286a6a59529a94e7eeda6c)
Signed-off-by: Wenchen Fan 
---
 .../spark/sql/hive/HiveExternalCatalog.scala   | 10 +-
 .../spark/sql/hive/HiveExternalCatalogSuite.scala  | 40 ++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ca292f6..be6d824 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -634,7 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
 k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
   k.startsWith(CREATED_SPARK_VERSION)
   }
-  val newTableProps = propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp
+  val newFormatIfExists = tableDefinition.provider.flatMap { p =>
+if (DDLUtils.isDatasourceTable(tableDefinition)) {
+  Some(DATASOURCE_PROVIDER -> p)
+} else {
+  None
+}
+  }
+  val newTableProps =
+propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp ++ newFormatIfExists
 
   // // Add old table's owner if we need to restore
   val owner = 
Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 0a88898..473a93b 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import java.net.URI
+
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
@@ -178,4 +180,42 @@ class HiveExternalCatalogSuite extends 
ExternalCatalogSuite {
 assertThrows[QueryExecutionException](client.runSqlHive(
   "INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a"))
   }
+
+  test("SPARK-31061: alterTable should be able to change table provider") {
+val catalog = newBasicCatalog()
+val parquetTable = CatalogTable(
+  identifier = TableIdentifier("parq_tbl", Some("db1")),
+  tableType = CatalogTableType.MANAGED,
+  storage = storageFormat.copy(locationUri = Some(new 
URI("file:/some/path"))),
+  schema = new StructType().add("col1", "int").add("col2", "string"),
+  provider = Some("parquet"))
+catalog.createTable(parquetTable, ignoreIfExists = false)
+
+val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+assert(rawTable.provider === Some("parquet"))
+
+val fooTable = parquetTable.copy(provider = Some("foo"))
+catalog.alterTable(fooTable)
+val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+assert(alteredTable.provider === Some("foo"))
+  }
+
+  test("SPARK-31061: alterTable should be able to change table provider from 
hive") {
+val catalog = newBasicCatalog()
+val hiveTable = CatalogTable(
+  identifier = TableIdentifier("parq_tbl", Some("db1")),
+  tableType = CatalogTableType.MANAGED,
+  storage = storageFormat,
+  schema = new StructType().add("col1", "int").add("col2", "string"),
+  provider = Some("hive"))
+catalog.createTable(hiveTable, ignoreIfExists = false)
+
+val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+assert(rawTable.provider === Some("hive"))
+
+val fooTable = rawTable.copy(provider = Some("foo"))
+catalog.alterTable(fooTable)
+val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+assert(alteredTable.provider === Some("foo"))
+  }
 }


-
To unsubs

[spark] branch master updated (6468d6f -> 2e3adad)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 6468d6f  [SPARK-30776][ML] Support FValueSelector for continuous 
features and continuous labels
 add 2e3adad  [SPARK-31061][SQL] Provide ability to alter the provider of a 
table

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/hive/HiveExternalCatalog.scala   | 10 +-
 .../spark/sql/hive/HiveExternalCatalogSuite.scala  | 40 ++
 2 files changed, 49 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31045][SQL][FOLLOWUP][3.0] Fix build due to divergence between master and 3.0

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 9b48f33  [SPARK-31045][SQL][FOLLOWUP][3.0] Fix build due to divergence 
between master and 3.0
9b48f33 is described below

commit 9b48f3358d3efb523715a5f258e5ed83e28692f6
Author: Jungtaek Lim (HeartSaVioR) 
AuthorDate: Thu Mar 5 21:31:08 2020 -0800

[SPARK-31045][SQL][FOLLOWUP][3.0] Fix build due to divergence between 
master and 3.0

### What changes were proposed in this pull request?

This patch fixes the build failure in `branch-3.0` due to cherry-picking 
SPARK-31045 to branch-3.0, as `.version()` is not available in `branch-3.0` yet.

### Why are the changes needed?

The build is failing in `branch-3.0`.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Jenkins build will verify.

Closes #27826 from HeartSaVioR/SPARK-31045-branch-3.0-FOLLOWUP.

Authored-by: Jungtaek Lim (HeartSaVioR) 
Signed-off-by: Dongjoon Hyun 
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index cd465bc..fdaf0ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -382,7 +382,6 @@ object SQLConf {
 .internal()
 .doc("Configures the log level for adaptive execution logging of plan 
changes. The value " +
   "can be 'trace', 'debug', 'info', 'warn', or 'error'. The default log 
level is 'debug'.")
-.version("3.0.0")
 .stringConf
 .transform(_.toUpperCase(Locale.ROOT))
 .checkValues(Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR"))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (1426ad8 -> 6468d6f)

2020-03-05 Thread ruifengz

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 1426ad8  [SPARK-23817][FOLLOWUP][TEST] Add OrcV2QuerySuite
 add 6468d6f  [SPARK-30776][ML] Support FValueSelector for continuous 
features and continuous labels

No new revisions were added by this update.

Summary of changes:
 .../apache/spark/ml/feature/ChiSqSelector.scala|   2 +-
 .../{ChiSqSelector.scala => FValueSelector.scala}  | 290 -
 .../org/apache/spark/ml/stat/FValueTest.scala  |  36 ++-
 .../apache/spark/ml/stat/SelectionTestResult.scala | 101 +++
 .../spark/ml/feature/FValueSelectorSuite.scala | 212 +++
 5 files changed, 512 insertions(+), 129 deletions(-)
 copy mllib/src/main/scala/org/apache/spark/ml/feature/{ChiSqSelector.scala => 
FValueSelector.scala} (56%)
 create mode 100644 
mllib/src/main/scala/org/apache/spark/ml/stat/SelectionTestResult.scala
 create mode 100644 
mllib/src/test/scala/org/apache/spark/ml/feature/FValueSelectorSuite.scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated (5375b40 -> 7c09c9f)

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 5375b40  [SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF
 add 7c09c9f  [SPARK-23817][FOLLOWUP][TEST] Add OrcV2QuerySuite

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/execution/datasources/orc/OrcQuerySuite.scala  | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (587266f -> 1426ad8)

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 587266f  [SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF
 add 1426ad8  [SPARK-23817][FOLLOWUP][TEST] Add OrcV2QuerySuite

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/execution/datasources/orc/OrcQuerySuite.scala  | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5375b40  [SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF
5375b40 is described below

commit 5375b40f37cefe6f18f9f943412b60c65fe2f844
Author: yi.wu 
AuthorDate: Fri Mar 6 13:00:04 2020 +0800

[SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF

### What changes were proposed in this pull request?

Use scala annotation deprecate to deprecate untyped scala UDF.

### Why are the changes needed?

After #27488, it's weird to see the untyped scala UDF will fail by default 
without deprecation.

### Does this PR introduce any user-facing change?

Yes, user will see the warning:
```
:26: warning: method udf in object functions is deprecated (since 
3.0.0): Untyped Scala UDF API is deprecated, please use typed Scala UDF API 
such as 'def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction' instead.
   val myudf = udf(() => Math.random(), DoubleType)
   ^
```

### How was this patch tested?

Tested manually.

Closes #27794 from Ngone51/deprecate_untyped_scala_udf.

Authored-by: yi.wu 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 587266f887f85bfa2a73a77485ab4db522c6aca1)
Signed-off-by: Wenchen Fan 
---
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c6e8cf7..7a58957 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4732,6 +4732,8 @@ object functions {
* @group udf_funcs
* @since 2.0.0
*/
+  @deprecated("Untyped Scala UDF API is deprecated, please use typed Scala UDF 
API such as " +
+"'def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction' instead.", 
"3.0.0")
   def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
 if (!SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_UNTYPED_SCALA_UDF)) {
   val errorMsg = "You're using untyped Scala UDF, which does not have the 
input type " +


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (59f1e76 -> 587266f)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 59f1e76  [SPARK-31020][SPARK-31023][SPARK-31025][SPARK-31044][SQL] 
Support foldable args by `from_csv/json` and `schema_of_csv/json`
 add 587266f  [SPARK-31010][SQL][FOLLOW-UP] Deprecate untyped scala UDF

No new revisions were added by this update.

Summary of changes:
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 ++
 1 file changed, 2 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (afb84e9 -> 59f1e76)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from afb84e9  [SPARK-30886][SQL] Deprecate two-parameter TRIM/LTRIM/RTRIM 
functions
 add 59f1e76  [SPARK-31020][SPARK-31023][SPARK-31025][SPARK-31044][SQL] 
Support foldable args by `from_csv/json` and `schema_of_csv/json`

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/expressions/ExprUtils.scala | 33 +-
 .../sql/catalyst/expressions/csvExpressions.scala  | 12 +---
 .../sql/catalyst/expressions/jsonExpressions.scala | 12 +---
 .../sql-tests/results/csv-functions.sql.out|  6 ++--
 .../sql-tests/results/json-functions.sql.out   |  6 ++--
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   | 26 +
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 23 ++-
 7 files changed, 83 insertions(+), 35 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30886][SQL] Deprecate two-parameter TRIM/LTRIM/RTRIM functions

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 1535b2b  [SPARK-30886][SQL] Deprecate two-parameter TRIM/LTRIM/RTRIM 
functions
1535b2b is described below

commit 1535b2bb51782a89b271b1ebe53273ab610b390b
Author: Dongjoon Hyun 
AuthorDate: Thu Mar 5 20:09:39 2020 -0800

[SPARK-30886][SQL] Deprecate two-parameter TRIM/LTRIM/RTRIM functions

### What changes were proposed in this pull request?

This PR aims to show a deprecation warning on two-parameter 
TRIM/LTRIM/RTRIM function usages based on the community decision.
- 
https://lists.apache.org/thread.html/r48b6c2596ab06206b7b7fd4bbafd4099dccd4e2cf9801aaa9034c418%40%3Cdev.spark.apache.org%3E

### Why are the changes needed?

For backward compatibility, SPARK-28093 is reverted. However, from Apache 
Spark 3.0.0, we should give a safe guideline to use SQL syntax instead of the 
esoteric function signatures.

### Does this PR introduce any user-facing change?

Yes. This shows a directional warning.

### How was this patch tested?

Pass the Jenkins with a newly added test case.

Closes #27643 from dongjoon-hyun/SPARK-30886.

Authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit afb84e9d378003c57cd01d21cdb1a977ba25454b)
Signed-off-by: Dongjoon Hyun 
---
 .../spark/sql/catalyst/analysis/Analyzer.scala | 20 ++---
 .../sql/catalyst/analysis/AnalysisSuite.scala  | 52 ++
 2 files changed, 66 insertions(+), 6 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3cb754d..eadcd0f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util
 import java.util.Locale
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -1795,6 +1796,7 @@ class Analyzer(
* Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
*/
   object ResolveFunctions extends Rule[LogicalPlan] {
+val trimWarningEnabled = new AtomicBoolean(true)
 def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
   case q: LogicalPlan =>
 q transformExpressions {
@@ -1839,13 +1841,19 @@ class Analyzer(
   }
   AggregateExpression(agg, Complete, isDistinct, filter)
 // This function is not an aggregate function, just return the 
resolved one.
-case other =>
-  if (isDistinct || filter.isDefined) {
-failAnalysis("DISTINCT or FILTER specified, " +
-  s"but ${other.prettyName} is not an aggregate function")
-  } else {
-other
+case other if (isDistinct || filter.isDefined) =>
+  failAnalysis("DISTINCT or FILTER specified, " +
+s"but ${other.prettyName} is not an aggregate function")
+case e: String2TrimExpression if arguments.size == 2 =>
+  if (trimWarningEnabled.get) {
+log.warn("Two-parameter TRIM/LTRIM/RTRIM function 
signatures are deprecated." +
+  " Use SQL syntax `TRIM((BOTH | LEADING | TRAILING)? 
trimStr FROM str)`" +
+  " instead.")
+trimWarningEnabled.set(false)
   }
+  e
+case other =>
+  other
   }
 }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index d385133..8451b9b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -21,6 +21,7 @@ import java.util.{Locale, TimeZone}
 
 import scala.reflect.ClassTag
 
+import org.apache.log4j.Level
 import org.scalatest.Matchers
 
 import org.apache.spark.api.python.PythonEvalType
@@ -768,4 +769,55 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 assert(message.startsWith(s"Max iterations ($maxIterations) reached for 
batch Resolution, " +
   s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger 
value."))
   }
+
+  test("SPARK-30886 Deprecate two-parameter TRIM/LTRIM/RTRIM") {
+Seq("trim

[spark] branch master updated (d705d36 -> afb84e9)

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from d705d36  [SPARK-31045][SQL] Add config for AQE logging level
 add afb84e9  [SPARK-30886][SQL] Deprecate two-parameter TRIM/LTRIM/RTRIM 
functions

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/analysis/Analyzer.scala | 20 ++---
 .../sql/catalyst/analysis/AnalysisSuite.scala  | 52 ++
 2 files changed, 66 insertions(+), 6 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31045][SQL] Add config for AQE logging level

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new db1c3fe  [SPARK-31045][SQL] Add config for AQE logging level
db1c3fe is described below

commit db1c3feacdca04c1b72191269f4f3910ad05bcb4
Author: maryannxue 
AuthorDate: Fri Mar 6 11:41:45 2020 +0800

[SPARK-31045][SQL] Add config for AQE logging level

### What changes were proposed in this pull request?
This PR adds an internal config for changing the logging level of adaptive 
execution query plan evolvement.

### Why are the changes needed?
To make AQE debugging easier.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Added UT.

Closes #27798 from maryannxue/aqe-log-level.

Authored-by: maryannxue 
Signed-off-by: Wenchen Fan 
(cherry picked from commit d705d36c0c94d3a4684de6ca0f444557c3cec25e)
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/internal/SQLConf.scala| 12 ++
 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  | 47 ++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b2b3d12..cd465bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -378,6 +378,16 @@ object SQLConf {
 .booleanConf
 .createWithDefault(false)
 
+  val ADAPTIVE_EXECUTION_LOG_LEVEL = buildConf("spark.sql.adaptive.logLevel")
+.internal()
+.doc("Configures the log level for adaptive execution logging of plan 
changes. The value " +
+  "can be 'trace', 'debug', 'info', 'warn', or 'error'. The default log 
level is 'debug'.")
+.version("3.0.0")
+.stringConf
+.transform(_.toUpperCase(Locale.ROOT))
+.checkValues(Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR"))
+.createWithDefault("debug")
+
   val ADVISORY_PARTITION_SIZE_IN_BYTES =
 buildConf("spark.sql.adaptive.advisoryPartitionSizeInBytes")
   .doc("The advisory size in bytes of the shuffle partition during 
adaptive optimization " +
@@ -2428,6 +2438,8 @@ class SQLConf extends Serializable with Logging {
 
   def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
 
+  def adaptiveExecutionLogLevel: String = getConf(ADAPTIVE_EXECUTION_LOG_LEVEL)
+
   def fetchShuffleBlocksInBatch: Boolean = 
getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH)
 
   def nonEmptyPartitionRatioForBroadcastJoin: Double =
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index b74401e..fc88a7f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -66,6 +66,15 @@ case class AdaptiveSparkPlanExec(
 
   @transient private val lock = new Object()
 
+  @transient private val logOnLevel: ( => String) => Unit = 
conf.adaptiveExecutionLogLevel match {
+case "TRACE" => logTrace(_)
+case "DEBUG" => logDebug(_)
+case "INFO" => logInfo(_)
+case "WARN" => logWarning(_)
+case "ERROR" => logError(_)
+case _ => logDebug(_)
+  }
+
   // The logical plan optimizer for re-optimizing the current logical plan.
   @transient private val optimizer = new RuleExecutor[LogicalPlan] {
 // TODO add more optimization rules
@@ -204,6 +213,7 @@ case class AdaptiveSparkPlanExec(
 val newCost = costEvaluator.evaluateCost(newPhysicalPlan)
 if (newCost < origCost ||
 (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) {
+  logOnLevel(s"Plan changed from $currentPhysicalPlan to 
$newPhysicalPlan")
   cleanUpTempTags(newPhysicalPlan)
   currentPhysicalPlan = newPhysicalPlan
   currentLogicalPlan = newLogicalPlan
@@ -217,7 +227,7 @@ case class AdaptiveSparkPlanExec(
   currentPhysicalPlan = applyPhysicalRules(result.newPlan, 
queryStageOptimizerRules)
   isFinalPlan = true
   executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
-  logDebug(s"Final plan: $currentPhysicalPlan")
+  logOnLevel(s"Final plan: $currentPhysicalPlan")
 }
 currentPhysicalPlan
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index a7fa63d..25b1f89 100644
--- 
a/sql/core/src/t

[spark] branch master updated (d9254b2 -> d705d36)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from d9254b2  [SPARK-30841][SQL][DOC][FOLLOW-UP] Add version information to 
the configuration of SQL
 add d705d36  [SPARK-31045][SQL] Add config for AQE logging level

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/internal/SQLConf.scala| 12 ++
 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  | 47 ++
 3 files changed, 70 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (e36227e -> d9254b2)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from e36227e  [SPARK-30914][CORE][DOC] Add version information to the 
configuration of UI
 add d9254b2  [SPARK-30841][SQL][DOC][FOLLOW-UP] Add version information to 
the configuration of SQL

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/internal/SQLConf.scala| 112 ++---
 1 file changed, 56 insertions(+), 56 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (cf7c397 -> e36227e)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite
 add e36227e  [SPARK-30914][CORE][DOC] Add version information to the 
configuration of UI

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/internal/config/UI.scala  | 26 ++
 docs/configuration.md  | 10 +
 2 files changed, 36 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 7c237cc  [MINOR][SQL] Remove an ignored test from JsonSuite
7c237cc is described below

commit 7c237cc8f7d83d96aed282d799e8a42acdc9d623
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 5ca430a..9fcd67a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -468,59 +468,6 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6d8ee15  [MINOR][SQL] Remove an ignored test from JsonSuite
6d8ee15 is described below

commit 6d8ee156adcac8fd2a442ab70da750f950a83661
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index df0bca7..fb3328c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -481,59 +481,6 @@ abstract class JsonSuite extends QueryTest with 
SharedSparkSession with TestJson
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6d8ee15  [MINOR][SQL] Remove an ignored test from JsonSuite
6d8ee15 is described below

commit 6d8ee156adcac8fd2a442ab70da750f950a83661
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index df0bca7..fb3328c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -481,59 +481,6 @@ abstract class JsonSuite extends QueryTest with 
SharedSparkSession with TestJson
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 7c237cc  [MINOR][SQL] Remove an ignored test from JsonSuite
7c237cc is described below

commit 7c237cc8f7d83d96aed282d799e8a42acdc9d623
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 5ca430a..9fcd67a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -468,59 +468,6 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (fc12165 -> cf7c397)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
 add cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 7c237cc  [MINOR][SQL] Remove an ignored test from JsonSuite
7c237cc is described below

commit 7c237cc8f7d83d96aed282d799e8a42acdc9d623
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 5ca430a..9fcd67a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -468,59 +468,6 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6d8ee15  [MINOR][SQL] Remove an ignored test from JsonSuite
6d8ee15 is described below

commit 6d8ee156adcac8fd2a442ab70da750f950a83661
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index df0bca7..fb3328c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -481,59 +481,6 @@ abstract class JsonSuite extends QueryTest with 
SharedSparkSession with TestJson
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (fc12165 -> cf7c397)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
 add cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden parameters

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5220a1c  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
5220a1c is described below

commit 5220a1c8756a8ddcf015001c7ab5d8fa02ab8692
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 10:33:20 2020 +0900

[SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden 
parameters

### What changes were proposed in this pull request?

This PR proposes to respect hidden parameters by using `stringArgs`  in 
`Expression.toString `. By this, we can show the strings properly in some cases 
such as `NonSQLExpression`.

### Why are the changes needed?

To respect "hidden" arguments in the string representation.

### Does this PR introduce any user-facing change?

Yes, for example, on the top of https://github.com/apache/spark/pull/27657,

```scala
val identify = udf((input: Seq[Int]) => input)
spark.range(10).select(identify(array("id"))).show()
```

shows hidden parameter `useStringTypeWhenEmpty`.

```
+-+
|UDF(array(id, false))|
+-+
|  [0]|
|  [1]|
...
```

whereas:

```scala
spark.range(10).select(array("id")).show()
```

```
+-+
|array(id)|
+-+
|  [0]|
|  [1]|
...
```

### How was this patch tested?

Manually tested as below:

```scala
val identify = udf((input: Boolean) => input)
spark.range(10).select(identify(exists(array(col("id")), _ % 2 === 
0))).show()
```

Before:

```

+-+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false), true))|

+-+
|   
  true|
|   
 false|
|   
  true|
...
```

After:

```

+---+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false)))|

+---+
|   
true|
|  
false|
|   
true|
...
```

Closes #27788 from HyukjinKwon/arguments-str-repr.

Authored-by: HyukjinKwon 
Signed-off-by: HyukjinKwon 
(cherry picked from commit fc12165f48b2e1dfe04116ddaa6ff6e8650a18fb)
Signed-off-by: HyukjinKwon 
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4632957..1599321 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -260,7 +260,7 @@ abstract class Expression extends TreeNode[Expression] {
*/
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
-  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = stringArgs.flatMap {
 case t: Iterable[_] => t
 case single => single :: Nil
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9dd4263..e91bd0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -533,6 +533,8 @@ case class ArrayExists(
   
SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC))
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs.take(2)
+
   override def nullable: Boolean =
 if (followThreeValuedL

[spark] branch master updated (72b52a3 -> fc12165)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
 add fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 7c237cc  [MINOR][SQL] Remove an ignored test from JsonSuite
7c237cc is described below

commit 7c237cc8f7d83d96aed282d799e8a42acdc9d623
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 5ca430a..9fcd67a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -468,59 +468,6 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6d8ee15  [MINOR][SQL] Remove an ignored test from JsonSuite
6d8ee15 is described below

commit 6d8ee156adcac8fd2a442ab70da750f950a83661
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index df0bca7..fb3328c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -481,59 +481,6 @@ abstract class JsonSuite extends QueryTest with 
SharedSparkSession with TestJson
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (fc12165 -> cf7c397)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
 add cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden parameters

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5220a1c  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
5220a1c is described below

commit 5220a1c8756a8ddcf015001c7ab5d8fa02ab8692
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 10:33:20 2020 +0900

[SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden 
parameters

### What changes were proposed in this pull request?

This PR proposes to respect hidden parameters by using `stringArgs`  in 
`Expression.toString `. By this, we can show the strings properly in some cases 
such as `NonSQLExpression`.

### Why are the changes needed?

To respect "hidden" arguments in the string representation.

### Does this PR introduce any user-facing change?

Yes, for example, on the top of https://github.com/apache/spark/pull/27657,

```scala
val identify = udf((input: Seq[Int]) => input)
spark.range(10).select(identify(array("id"))).show()
```

shows hidden parameter `useStringTypeWhenEmpty`.

```
+-+
|UDF(array(id, false))|
+-+
|  [0]|
|  [1]|
...
```

whereas:

```scala
spark.range(10).select(array("id")).show()
```

```
+-+
|array(id)|
+-+
|  [0]|
|  [1]|
...
```

### How was this patch tested?

Manually tested as below:

```scala
val identify = udf((input: Boolean) => input)
spark.range(10).select(identify(exists(array(col("id")), _ % 2 === 
0))).show()
```

Before:

```

+-+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false), true))|

+-+
|   
  true|
|   
 false|
|   
  true|
...
```

After:

```

+---+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false)))|

+---+
|   
true|
|  
false|
|   
true|
...
```

Closes #27788 from HyukjinKwon/arguments-str-repr.

Authored-by: HyukjinKwon 
Signed-off-by: HyukjinKwon 
(cherry picked from commit fc12165f48b2e1dfe04116ddaa6ff6e8650a18fb)
Signed-off-by: HyukjinKwon 
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4632957..1599321 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -260,7 +260,7 @@ abstract class Expression extends TreeNode[Expression] {
*/
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
-  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = stringArgs.flatMap {
 case t: Iterable[_] => t
 case single => single :: Nil
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9dd4263..e91bd0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -533,6 +533,8 @@ case class ArrayExists(
   
SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC))
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs.take(2)
+
   override def nullable: Boolean =
 if (followThreeValuedL

[spark] branch branch-3.0 updated: [SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6a7aa0d  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
6a7aa0d is described below

commit 6a7aa0d1666aa57213ef16cc858dbf105f0810f4
Author: Peter Toth 
AuthorDate: Fri Mar 6 10:30:59 2020 +0900

[SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

### What changes were proposed in this pull request?
This PR disables using commit coordinator with `NoopDataSource`.

### Why are the changes needed?
No need for a coordinator in benchmarks.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UTs.

Closes #27791 from peter-toth/SPARK-30563-disalbe-commit-coordinator.

Authored-by: Peter Toth 
Signed-off-by: HyukjinKwon 
(cherry picked from commit 72b52a3cdf2b8661c0f82db645e805ff6b085dfc)
Signed-off-by: HyukjinKwon 
---
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 4fad0a2..851cc51 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -61,6 +61,7 @@ private[noop] object NoopWriteBuilder extends WriteBuilder 
with SupportsTruncate
 private[noop] object NoopBatchWrite extends BatchWrite {
   override def createBatchWriterFactory(info: PhysicalWriteInfo): 
DataWriterFactory =
 NoopWriterFactory
+  override def useCommitCoordinator(): Boolean = false
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (72b52a3 -> fc12165)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
 add fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6d8ee15  [MINOR][SQL] Remove an ignored test from JsonSuite
6d8ee15 is described below

commit 6d8ee156adcac8fd2a442ab70da750f950a83661
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index df0bca7..fb3328c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -481,59 +481,6 @@ abstract class JsonSuite extends QueryTest with 
SharedSparkSession with TestJson
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [MINOR][SQL] Remove an ignored test from JsonSuite

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 7c237cc  [MINOR][SQL] Remove an ignored test from JsonSuite
7c237cc is described below

commit 7c237cc8f7d83d96aed282d799e8a42acdc9d623
Author: Maxim Gekk 
AuthorDate: Fri Mar 6 10:35:44 2020 +0900

[MINOR][SQL] Remove an ignored test from JsonSuite

### What changes were proposed in this pull request?
Remove ignored and outdated test `Type conflict in primitive field values 
(Ignored)` from JsonSuite.

### Why are the changes needed?
The test is not maintained for long time. It can be removed to reduce size 
of JsonSuite, and improve maintainability.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the command `./build/sbt "test:testOnly *JsonV2Suite"`

Closes #27795 from MaxGekk/remove-ignored-test-in-JsonSuite.

Authored-by: Maxim Gekk 
Signed-off-by: HyukjinKwon 
(cherry picked from commit cf7c397ede05fd106697bd5cc8062f394623bf22)
Signed-off-by: HyukjinKwon 
---
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 5ca430a..9fcd67a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -468,59 +468,6 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
-  ignore("Type conflict in primitive field values (Ignored)") {
-val jsonDF = spark.read.json(primitiveFieldValueTypeConflict)
-jsonDF.createOrReplaceTempView("jsonTable")
-
-// Right now, the analyzer does not promote strings in a boolean 
expression.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where NOT num_bool"),
-  Row(false)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where NOT str_bool"),
-  Row(false)
-)
-
-// Right now, the analyzer does not know that num_bool should be treated 
as a boolean.
-// Number and Boolean conflict: resolve the type as boolean in this query.
-checkAnswer(
-  sql("select num_bool from jsonTable where num_bool"),
-  Row(true)
-)
-
-checkAnswer(
-  sql("select str_bool from jsonTable where str_bool"),
-  Row(false)
-)
-
-// The plan of the following DSL is
-// Project [(CAST(num_str#65:4, DoubleType) + 1.2) AS num#78]
-//  Filter (CAST(CAST(num_str#65:4, DoubleType), DecimalType) > 
92233720368547758060)
-//ExistingRdd 
[num_bool#61,num_num_1#62L,num_num_2#63,num_num_3#64,num_str#65,str_bool#66]
-// We should directly cast num_str to DecimalType and also need to do the 
right type promotion
-// in the Project.
-checkAnswer(
-  jsonDF.
-where('num_str >= BigDecimal("92233720368547758060")).
-select(('num_str + 1.2).as("num")),
-  Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue())
-)
-
-// The following test will fail. The type of num_str is StringType.
-// So, to evaluate num_str + 1.2, we first need to use Cast to convert the 
type.
-// In our test data, one value of num_str is 13.1.
-// The result of (CAST(num_str#65:4, DoubleType) + 1.2) for this value is 
14.299,
-// which is not 14.3.
-// Number and String conflict: resolve the type as number in this query.
-checkAnswer(
-  sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-  Row(BigDecimal("14.3")) :: Row(BigDecimal("92233720368547758071.2")) :: 
Nil
-)
-  }
-
   test("Type conflict in complex field values") {
 val jsonDF = spark.read.json(complexFieldValueTypeConflict)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (fc12165 -> cf7c397)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
 add cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden parameters

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5220a1c  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
5220a1c is described below

commit 5220a1c8756a8ddcf015001c7ab5d8fa02ab8692
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 10:33:20 2020 +0900

[SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden 
parameters

### What changes were proposed in this pull request?

This PR proposes to respect hidden parameters by using `stringArgs`  in 
`Expression.toString `. By this, we can show the strings properly in some cases 
such as `NonSQLExpression`.

### Why are the changes needed?

To respect "hidden" arguments in the string representation.

### Does this PR introduce any user-facing change?

Yes, for example, on the top of https://github.com/apache/spark/pull/27657,

```scala
val identify = udf((input: Seq[Int]) => input)
spark.range(10).select(identify(array("id"))).show()
```

shows hidden parameter `useStringTypeWhenEmpty`.

```
+-+
|UDF(array(id, false))|
+-+
|  [0]|
|  [1]|
...
```

whereas:

```scala
spark.range(10).select(array("id")).show()
```

```
+-+
|array(id)|
+-+
|  [0]|
|  [1]|
...
```

### How was this patch tested?

Manually tested as below:

```scala
val identify = udf((input: Boolean) => input)
spark.range(10).select(identify(exists(array(col("id")), _ % 2 === 
0))).show()
```

Before:

```

+-+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false), true))|

+-+
|   
  true|
|   
 false|
|   
  true|
...
```

After:

```

+---+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false)))|

+---+
|   
true|
|  
false|
|   
true|
...
```

Closes #27788 from HyukjinKwon/arguments-str-repr.

Authored-by: HyukjinKwon 
Signed-off-by: HyukjinKwon 
(cherry picked from commit fc12165f48b2e1dfe04116ddaa6ff6e8650a18fb)
Signed-off-by: HyukjinKwon 
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4632957..1599321 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -260,7 +260,7 @@ abstract class Expression extends TreeNode[Expression] {
*/
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
-  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = stringArgs.flatMap {
 case t: Iterable[_] => t
 case single => single :: Nil
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9dd4263..e91bd0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -533,6 +533,8 @@ case class ArrayExists(
   
SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC))
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs.take(2)
+
   override def nullable: Boolean =
 if (followThreeValuedL

[spark] branch master updated (72b52a3 -> fc12165)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
 add fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6a7aa0d  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
6a7aa0d is described below

commit 6a7aa0d1666aa57213ef16cc858dbf105f0810f4
Author: Peter Toth 
AuthorDate: Fri Mar 6 10:30:59 2020 +0900

[SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

### What changes were proposed in this pull request?
This PR disables using commit coordinator with `NoopDataSource`.

### Why are the changes needed?
No need for a coordinator in benchmarks.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UTs.

Closes #27791 from peter-toth/SPARK-30563-disalbe-commit-coordinator.

Authored-by: Peter Toth 
Signed-off-by: HyukjinKwon 
(cherry picked from commit 72b52a3cdf2b8661c0f82db645e805ff6b085dfc)
Signed-off-by: HyukjinKwon 
---
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 4fad0a2..851cc51 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -61,6 +61,7 @@ private[noop] object NoopWriteBuilder extends WriteBuilder 
with SupportsTruncate
 private[noop] object NoopBatchWrite extends BatchWrite {
   override def createBatchWriterFactory(info: PhysicalWriteInfo): 
DataWriterFactory =
 NoopWriterFactory
+  override def useCommitCoordinator(): Boolean = false
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (fc12165 -> cf7c397)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
 add cf7c397  [MINOR][SQL] Remove an ignored test from JsonSuite

No new revisions were added by this update.

Summary of changes:
 .../sql/execution/datasources/json/JsonSuite.scala | 53 --
 1 file changed, 53 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden parameters

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5220a1c  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
5220a1c is described below

commit 5220a1c8756a8ddcf015001c7ab5d8fa02ab8692
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 10:33:20 2020 +0900

[SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden 
parameters

### What changes were proposed in this pull request?

This PR proposes to respect hidden parameters by using `stringArgs`  in 
`Expression.toString `. By this, we can show the strings properly in some cases 
such as `NonSQLExpression`.

### Why are the changes needed?

To respect "hidden" arguments in the string representation.

### Does this PR introduce any user-facing change?

Yes, for example, on the top of https://github.com/apache/spark/pull/27657,

```scala
val identify = udf((input: Seq[Int]) => input)
spark.range(10).select(identify(array("id"))).show()
```

shows hidden parameter `useStringTypeWhenEmpty`.

```
+-+
|UDF(array(id, false))|
+-+
|  [0]|
|  [1]|
...
```

whereas:

```scala
spark.range(10).select(array("id")).show()
```

```
+-+
|array(id)|
+-+
|  [0]|
|  [1]|
...
```

### How was this patch tested?

Manually tested as below:

```scala
val identify = udf((input: Boolean) => input)
spark.range(10).select(identify(exists(array(col("id")), _ % 2 === 
0))).show()
```

Before:

```

+-+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false), true))|

+-+
|   
  true|
|   
 false|
|   
  true|
...
```

After:

```

+---+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false)))|

+---+
|   
true|
|  
false|
|   
true|
...
```

Closes #27788 from HyukjinKwon/arguments-str-repr.

Authored-by: HyukjinKwon 
Signed-off-by: HyukjinKwon 
(cherry picked from commit fc12165f48b2e1dfe04116ddaa6ff6e8650a18fb)
Signed-off-by: HyukjinKwon 
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4632957..1599321 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -260,7 +260,7 @@ abstract class Expression extends TreeNode[Expression] {
*/
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
-  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = stringArgs.flatMap {
 case t: Iterable[_] => t
 case single => single :: Nil
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9dd4263..e91bd0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -533,6 +533,8 @@ case class ArrayExists(
   
SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC))
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs.take(2)
+
   override def nullable: Boolean =
 if (followThreeValuedL

[spark] branch master updated (72b52a3 -> fc12165)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
 add fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6a7aa0d  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
6a7aa0d is described below

commit 6a7aa0d1666aa57213ef16cc858dbf105f0810f4
Author: Peter Toth 
AuthorDate: Fri Mar 6 10:30:59 2020 +0900

[SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

### What changes were proposed in this pull request?
This PR disables using commit coordinator with `NoopDataSource`.

### Why are the changes needed?
No need for a coordinator in benchmarks.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UTs.

Closes #27791 from peter-toth/SPARK-30563-disalbe-commit-coordinator.

Authored-by: Peter Toth 
Signed-off-by: HyukjinKwon 
(cherry picked from commit 72b52a3cdf2b8661c0f82db645e805ff6b085dfc)
Signed-off-by: HyukjinKwon 
---
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 4fad0a2..851cc51 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -61,6 +61,7 @@ private[noop] object NoopWriteBuilder extends WriteBuilder 
with SupportsTruncate
 private[noop] object NoopBatchWrite extends BatchWrite {
   override def createBatchWriterFactory(info: PhysicalWriteInfo): 
DataWriterFactory =
 NoopWriterFactory
+  override def useCommitCoordinator(): Boolean = false
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden parameters

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 5220a1c  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters
5220a1c is described below

commit 5220a1c8756a8ddcf015001c7ab5d8fa02ab8692
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 10:33:20 2020 +0900

[SPARK-31036][SQL] Use stringArgs in Expression.toString to respect hidden 
parameters

### What changes were proposed in this pull request?

This PR proposes to respect hidden parameters by using `stringArgs`  in 
`Expression.toString `. By this, we can show the strings properly in some cases 
such as `NonSQLExpression`.

### Why are the changes needed?

To respect "hidden" arguments in the string representation.

### Does this PR introduce any user-facing change?

Yes, for example, on the top of https://github.com/apache/spark/pull/27657,

```scala
val identify = udf((input: Seq[Int]) => input)
spark.range(10).select(identify(array("id"))).show()
```

shows hidden parameter `useStringTypeWhenEmpty`.

```
+-+
|UDF(array(id, false))|
+-+
|  [0]|
|  [1]|
...
```

whereas:

```scala
spark.range(10).select(array("id")).show()
```

```
+-+
|array(id)|
+-+
|  [0]|
|  [1]|
...
```

### How was this patch tested?

Manually tested as below:

```scala
val identify = udf((input: Boolean) => input)
spark.range(10).select(identify(exists(array(col("id")), _ % 2 === 
0))).show()
```

Before:

```

+-+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false), true))|

+-+
|   
  true|
|   
 false|
|   
  true|
...
```

After:

```

+---+
|UDF(exists(array(id), lambdafunction(((lambda 'x % 2) = 0), lambda 'x, 
false)))|

+---+
|   
true|
|  
false|
|   
true|
...
```

Closes #27788 from HyukjinKwon/arguments-str-repr.

Authored-by: HyukjinKwon 
Signed-off-by: HyukjinKwon 
(cherry picked from commit fc12165f48b2e1dfe04116ddaa6ff6e8650a18fb)
Signed-off-by: HyukjinKwon 
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4632957..1599321 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -260,7 +260,7 @@ abstract class Expression extends TreeNode[Expression] {
*/
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
-  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = stringArgs.flatMap {
 case t: Iterable[_] => t
 case single => single :: Nil
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9dd4263..e91bd0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -533,6 +533,8 @@ case class ArrayExists(
   
SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC))
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs.take(2)
+
   override def nullable: Boolean =
 if (followThreeValuedL

[spark] branch master updated (72b52a3 -> fc12165)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
 add fc12165  [SPARK-31036][SQL] Use stringArgs in Expression.toString to 
respect hidden parameters

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/catalyst/expressions/Expression.scala| 2 +-
 .../apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala| 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6a7aa0d  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
6a7aa0d is described below

commit 6a7aa0d1666aa57213ef16cc858dbf105f0810f4
Author: Peter Toth 
AuthorDate: Fri Mar 6 10:30:59 2020 +0900

[SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

### What changes were proposed in this pull request?
This PR disables using commit coordinator with `NoopDataSource`.

### Why are the changes needed?
No need for a coordinator in benchmarks.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UTs.

Closes #27791 from peter-toth/SPARK-30563-disalbe-commit-coordinator.

Authored-by: Peter Toth 
Signed-off-by: HyukjinKwon 
(cherry picked from commit 72b52a3cdf2b8661c0f82db645e805ff6b085dfc)
Signed-off-by: HyukjinKwon 
---
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 4fad0a2..851cc51 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -61,6 +61,7 @@ private[noop] object NoopWriteBuilder extends WriteBuilder 
with SupportsTruncate
 private[noop] object NoopBatchWrite extends BatchWrite {
   override def createBatchWriterFactory(info: PhysicalWriteInfo): 
DataWriterFactory =
 NoopWriterFactory
+  override def useCommitCoordinator(): Boolean = false
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6a7aa0d  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource
6a7aa0d is described below

commit 6a7aa0d1666aa57213ef16cc858dbf105f0810f4
Author: Peter Toth 
AuthorDate: Fri Mar 6 10:30:59 2020 +0900

[SPARK-30563][SQL] Disable using commit coordinator with NoopDataSource

### What changes were proposed in this pull request?
This PR disables using commit coordinator with `NoopDataSource`.

### Why are the changes needed?
No need for a coordinator in benchmarks.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing UTs.

Closes #27791 from peter-toth/SPARK-30563-disalbe-commit-coordinator.

Authored-by: Peter Toth 
Signed-off-by: HyukjinKwon 
(cherry picked from commit 72b52a3cdf2b8661c0f82db645e805ff6b085dfc)
Signed-off-by: HyukjinKwon 
---
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 4fad0a2..851cc51 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -61,6 +61,7 @@ private[noop] object NoopWriteBuilder extends WriteBuilder 
with SupportsTruncate
 private[noop] object NoopBatchWrite extends BatchWrite {
   override def createBatchWriterFactory(info: PhysicalWriteInfo): 
DataWriterFactory =
 NoopWriterFactory
+  override def useCommitCoordinator(): Boolean = false
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ffec7a1 -> 72b52a3)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from ffec7a1  [SQL][DOCS][MINOR] Fix typos and wrong phrases in docs
 add 72b52a3  [SPARK-30563][SQL] Disable using commit coordinator with 
NoopDataSource

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala | 1 +
 1 file changed, 1 insertion(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SQL][DOCS][MINOR] Fix typos and wrong phrases in docs

2020-03-05 Thread gengliang

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new ed7924a  [SQL][DOCS][MINOR] Fix typos and wrong phrases in docs
ed7924a is described below

commit ed7924a86e2c4dd6b38075de68038e1997b2b30e
Author: Takeshi Yamamuro 
AuthorDate: Thu Mar 5 16:54:59 2020 -0800

[SQL][DOCS][MINOR] Fix typos and wrong phrases in docs

### What changes were proposed in this pull request?

This PR intends to fix typos and phrases in the `/docs` directory. To find 
them, I run the Intellij typo checker.

### Why are the changes needed?

For better documents.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

N/A

Closes #27819 from maropu/TypoFix-20200306.

Authored-by: Takeshi Yamamuro 
Signed-off-by: Gengliang Wang 
(cherry picked from commit ffec7a1964984a7f911ac0da125134c098f273ba)
Signed-off-by: Gengliang Wang 
---
 docs/sql-pyspark-pandas-with-arrow.md| 2 +-
 docs/sql-ref-ansi-compliance.md  | 2 +-
 docs/sql-ref-null-semantics.md   | 2 +-
 docs/sql-ref-syntax-aux-show-functions.md| 2 +-
 docs/sql-ref-syntax-ddl-alter-table.md   | 2 +-
 docs/sql-ref-syntax-ddl-create-table-like.md | 2 +-
 docs/sql-ref-syntax-qry-select-limit.md  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/sql-pyspark-pandas-with-arrow.md 
b/docs/sql-pyspark-pandas-with-arrow.md
index 63ba0ba..e8abb9f 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -91,7 +91,7 @@ specify the type hints of `pandas.Series` and 
`pandas.DataFrame` as below:
 
 
 
-In the following sections, it describes the cominations of the supported type 
hints. For simplicity,
+In the following sections, it describes the combinations of the supported type 
hints. For simplicity,
 `pandas.DataFrame` variant is omitted.
 
 ### Series to Series
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 267184a..27e60b4 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -60,7 +60,7 @@ The following subsections present behaviour changes in 
arithmetic operations, ty
 ### Arithmetic Operations
 
 In Spark SQL, arithmetic operations performed on numeric types (with the 
exception of decimal) are not checked for overflows by default.
-This means that in case an operation causes overflows, the result is the same 
that the same operation returns in a Java/Scala program (e.g., if the sum of 2 
integers is higher than the maximum value representable, the result is a 
negative number).
+This means that in case an operation causes overflows, the result is the same 
with the corresponding operation in a Java/Scala program (e.g., if the sum of 2 
integers is higher than the maximum value representable, the result is a 
negative number).
 On the other hand, Spark SQL returns null for decimal overflows.
 When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in 
numeric and interval arithmetic operations, it throws an arithmetic exception 
at runtime.
 
diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md
index 3cbc15c..37b4081 100644
--- a/docs/sql-ref-null-semantics.md
+++ b/docs/sql-ref-null-semantics.md
@@ -605,7 +605,7 @@ SELECT name, age FROM unknown_age;
 In Spark, EXISTS and NOT EXISTS expressions are allowed inside a WHERE clause. 
 These are boolean expressions which return either `TRUE` or
 `FALSE`. In other words, EXISTS is a membership condition and returns `TRUE`
-when the subquery it refers to returns one or more rows. Similary, NOT EXISTS
+when the subquery it refers to returns one or more rows. Similarly, NOT EXISTS
 is a non-membership condition and returns TRUE when no rows or zero rows are
 returned from the subquery.
 
diff --git a/docs/sql-ref-syntax-aux-show-functions.md 
b/docs/sql-ref-syntax-aux-show-functions.md
index 701d427..d6f9df9 100644
--- a/docs/sql-ref-syntax-aux-show-functions.md
+++ b/docs/sql-ref-syntax-aux-show-functions.md
@@ -22,7 +22,7 @@ license: |
 ### Description
 Returns the list of functions after applying an optional regex pattern.
 Given number of functions supported by Spark is quite large, this statement
-in conjuction with [describe 
function](sql-ref-syntax-aux-describe-function.html)
+in conjunction with [describe 
function](sql-ref-syntax-aux-describe-function.html)
 may be used to quickly find the function and understand its usage. The `LIKE` 
 clause is optional and supported only for compatibility with other systems.
 
diff --git a/docs/sql-ref-syntax-ddl-alter-table.md 
b/docs/sql-ref-syntax-ddl-alter-table.md
index a921478..373fa8d 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/do

[spark] branch master updated: [SQL][DOCS][MINOR] Fix typos and wrong phrases in docs

2020-03-05 Thread gengliang

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new ffec7a1  [SQL][DOCS][MINOR] Fix typos and wrong phrases in docs
ffec7a1 is described below

commit ffec7a1964984a7f911ac0da125134c098f273ba
Author: Takeshi Yamamuro 
AuthorDate: Thu Mar 5 16:54:59 2020 -0800

[SQL][DOCS][MINOR] Fix typos and wrong phrases in docs

### What changes were proposed in this pull request?

This PR intends to fix typos and phrases in the `/docs` directory. To find 
them, I run the Intellij typo checker.

### Why are the changes needed?

For better documents.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

N/A

Closes #27819 from maropu/TypoFix-20200306.

Authored-by: Takeshi Yamamuro 
Signed-off-by: Gengliang Wang 
---
 docs/sql-pyspark-pandas-with-arrow.md| 2 +-
 docs/sql-ref-ansi-compliance.md  | 2 +-
 docs/sql-ref-null-semantics.md   | 2 +-
 docs/sql-ref-syntax-aux-show-functions.md| 2 +-
 docs/sql-ref-syntax-ddl-alter-table.md   | 2 +-
 docs/sql-ref-syntax-ddl-create-table-like.md | 2 +-
 docs/sql-ref-syntax-qry-select-limit.md  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/sql-pyspark-pandas-with-arrow.md 
b/docs/sql-pyspark-pandas-with-arrow.md
index 63ba0ba..e8abb9f 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -91,7 +91,7 @@ specify the type hints of `pandas.Series` and 
`pandas.DataFrame` as below:
 
 
 
-In the following sections, it describes the cominations of the supported type 
hints. For simplicity,
+In the following sections, it describes the combinations of the supported type 
hints. For simplicity,
 `pandas.DataFrame` variant is omitted.
 
 ### Series to Series
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 267184a..27e60b4 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -60,7 +60,7 @@ The following subsections present behaviour changes in 
arithmetic operations, ty
 ### Arithmetic Operations
 
 In Spark SQL, arithmetic operations performed on numeric types (with the 
exception of decimal) are not checked for overflows by default.
-This means that in case an operation causes overflows, the result is the same 
that the same operation returns in a Java/Scala program (e.g., if the sum of 2 
integers is higher than the maximum value representable, the result is a 
negative number).
+This means that in case an operation causes overflows, the result is the same 
with the corresponding operation in a Java/Scala program (e.g., if the sum of 2 
integers is higher than the maximum value representable, the result is a 
negative number).
 On the other hand, Spark SQL returns null for decimal overflows.
 When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in 
numeric and interval arithmetic operations, it throws an arithmetic exception 
at runtime.
 
diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md
index 3cbc15c..37b4081 100644
--- a/docs/sql-ref-null-semantics.md
+++ b/docs/sql-ref-null-semantics.md
@@ -605,7 +605,7 @@ SELECT name, age FROM unknown_age;
 In Spark, EXISTS and NOT EXISTS expressions are allowed inside a WHERE clause. 
 These are boolean expressions which return either `TRUE` or
 `FALSE`. In other words, EXISTS is a membership condition and returns `TRUE`
-when the subquery it refers to returns one or more rows. Similary, NOT EXISTS
+when the subquery it refers to returns one or more rows. Similarly, NOT EXISTS
 is a non-membership condition and returns TRUE when no rows or zero rows are
 returned from the subquery.
 
diff --git a/docs/sql-ref-syntax-aux-show-functions.md 
b/docs/sql-ref-syntax-aux-show-functions.md
index 701d427..d6f9df9 100644
--- a/docs/sql-ref-syntax-aux-show-functions.md
+++ b/docs/sql-ref-syntax-aux-show-functions.md
@@ -22,7 +22,7 @@ license: |
 ### Description
 Returns the list of functions after applying an optional regex pattern.
 Given number of functions supported by Spark is quite large, this statement
-in conjuction with [describe 
function](sql-ref-syntax-aux-describe-function.html)
+in conjunction with [describe 
function](sql-ref-syntax-aux-describe-function.html)
 may be used to quickly find the function and understand its usage. The `LIKE` 
 clause is optional and supported only for compatibility with other systems.
 
diff --git a/docs/sql-ref-syntax-ddl-alter-table.md 
b/docs/sql-ref-syntax-ddl-alter-table.md
index a921478..373fa8d 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -260,7 +260,7 @@ ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION

[spark] branch branch-3.0 updated: [SPARK-30994][BUILD][FOLLOW-UP] Change scope of xml-apis to include it and add xerces in SBT as dependency override

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 32dc6ac  [SPARK-30994][BUILD][FOLLOW-UP] Change scope of xml-apis to 
include it and add xerces in SBT as dependency override
32dc6ac is described below

commit 32dc6acdf5615cdd668fa65249d6d170692acb19
Author: HyukjinKwon 
AuthorDate: Fri Mar 6 09:39:02 2020 +0900

[SPARK-30994][BUILD][FOLLOW-UP] Change scope of xml-apis to include it and 
add xerces in SBT as dependency override

### What changes were proposed in this pull request?

This PR propose

1. Explicitly include xml-apis. xml-apis is already the part of xerces 
2.12.0 
(https://repo1.maven.org/maven2/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0.pom).
 However, we're excluding it by setting `scope` to `test`. This seems causing 
`spark-shell`, built from Maven, to fail.

Seems like previously xml-apis wasn't reached for some reasons but 
after we upgrade, it seems requiring. Therefore, this PR proposes to include it.

2. Pins `xerces` version in SBT as well. Seems this dependency is resolved 
differently from Maven.

Note that Hadoop 3 does not looks requiring this as they replaced xerces as 
of [HDFS-12221](https://issues.apache.org/jira/browse/HDFS-12221).

### Why are the changes needed?

To make `spark-shell` working from Maven build, and uses the same xerces 
version.

### Does this PR introduce any user-facing change?

No, it's master only.

### How was this patch tested?

**1.**

```bash
./build/mvn -DskipTests -Psparkr -Phive clean package
./bin/spark-shell
```

Before:

```
Exception in thread "main" java.lang.NoClassDefFoundError: 
org/w3c/dom/ElementTraversal
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at 
java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
at java.net.URLClassLoader.access$100(URLClassLoader.java:74)
at java.net.URLClassLoader$1.run(URLClassLoader.java:369)
at java.net.URLClassLoader$1.run(URLClassLoader.java:363)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:362)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.xerces.parsers.AbstractDOMParser.startDocument(Unknown 
Source)
at org.apache.xerces.xinclude.XIncludeHandler.startDocument(Unknown 
Source)
at org.apache.xerces.impl.dtd.XMLDTDValidator.startDocument(Unknown 
Source)
at org.apache.xerces.impl.XMLDocumentScannerImpl.startEntity(Unknown 
Source)
at 
org.apache.xerces.impl.XMLVersionDetector.startDocumentParsing(Unknown Source)
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
at org.apache.xerces.parsers.DOMParser.parse(Unknown Source)
at org.apache.xerces.jaxp.DocumentBuilderImpl.parse(Unknown Source)
at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:150)
at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2482)
at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2470)
at 
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2541)
at 
org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2494)
at 
org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2407)
at org.apache.hadoop.conf.Configuration.set(Configuration.java:1143)
at org.apache.hadoop.conf.Configuration.set(Configuration.java:1115)
at 
org.apache.spark.deploy.SparkHadoopUtil$.org$apache$spark$deploy$SparkHadoopUtil$$appendS3AndSparkHadoopHiveConfigurations(SparkHadoopUtil.scala:456)
at 
org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:427)
at 
org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$2(SparkSubmit.scala:342)
at scala.Option.getOrElse(Option.scala:189)
at 
org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:342)
at 
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:871)
at 
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.su

[spark] branch master updated (fe126a6 -> 5b3277f)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from fe126a6  [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the 
implementation of `quoteIfNeeded`
 add 5b3277f  [SPARK-30994][BUILD][FOLLOW-UP] Change scope of xml-apis to 
include it and add xerces in SBT as dependency override

No new revisions were added by this update.

Summary of changes:
 dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 1 +
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 +
 pom.xml | 1 -
 project/SparkBuild.scala| 1 +
 4 files changed, 3 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`

2020-03-05 Thread dbtsai

This is an automated email from the ASF dual-hosted git repository.

dbtsai pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 853f69a  [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the 
implementation of `quoteIfNeeded`
853f69a is described below

commit 853f69a4cf4bf138afb9075325ad175bc5f2d334
Author: DB Tsai 
AuthorDate: Fri Mar 6 00:13:57 2020 +

[SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of 
`quoteIfNeeded`

### What changes were proposed in this pull request?
There are two implementation of quoteIfNeeded.  One is in 
`org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other 
is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate 
them into one.

### Why are the changes needed?
Simplify the codebase.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Existing UTs.

Closes #27814 from dbtsai/SPARK-31058.

Authored-by: DB Tsai 
Signed-off-by: DB Tsai 
(cherry picked from commit fe126a6a05b0de8db68c8f890e876ce96bf194ca)
Signed-off-by: DB Tsai 
---
 .../spark/sql/connector/catalog/IdentifierImpl.java |  2 +-
 .../sql/connector/catalog/CatalogV2Implicits.scala  | 10 +-
 .../spark/sql/connector/catalog/V1Table.scala   | 13 +++--
 .../catalyst/analysis/ResolveSessionCatalog.scala   |  2 +-
 .../execution/datasources/orc/OrcFiltersBase.scala  | 10 --
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 -
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 -
 7 files changed, 34 insertions(+), 45 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index a56007b..30596d9 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -55,7 +55,7 @@ class IdentifierImpl implements Identifier {
   @Override
   public String toString() {
 return Stream.concat(Stream.of(namespace), Stream.of(name))
-  .map(CatalogV2Implicits::quote)
+  .map(CatalogV2Implicits::quoteIfNeeded)
   .collect(Collectors.joining("."));
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 3478af8..71bab62 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits {
   }
 
   implicit class NamespaceHelper(namespace: Array[String]) {
-def quoted: String = namespace.map(quote).mkString(".")
+def quoted: String = namespace.map(quoteIfNeeded).mkString(".")
   }
 
   implicit class IdentifierHelper(ident: Identifier) {
 def quoted: String = {
   if (ident.namespace.nonEmpty) {
-ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
+ident.namespace.map(quoteIfNeeded).mkString(".") + "." + 
quoteIfNeeded(ident.name)
   } else {
-quote(ident.name)
+quoteIfNeeded(ident.name)
   }
 }
 
@@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits {
   s"$quoted is not a valid TableIdentifier as it has more than 2 name 
parts.")
 }
 
-def quoted: String = parts.map(quote).mkString(".")
+def quoted: String = parts.map(quoteIfNeeded).mkString(".")
   }
 
-  def quote(part: String): String = {
+  def quoteIfNeeded(part: String): String = {
 if (part.contains(".") || part.contains("`")) {
   s"`${part.replace("`", "``")}`"
 } else {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 91e0c58..70fc968 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, 
Transform}
 import org.apache.spark.sql.types.StructType
 
@@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) 
extends Table {
 def quoted: String = {
   identif

[spark] branch master updated (8d5ef2f -> fe126a6)

2020-03-05 Thread dbtsai

This is an automated email from the ASF dual-hosted git repository.

dbtsai pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 8d5ef2f  [SPARK-31052][TEST][CORE] Fix flaky test 
"DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task 
succeed"
 add fe126a6  [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the 
implementation of `quoteIfNeeded`

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/connector/catalog/IdentifierImpl.java |  2 +-
 .../sql/connector/catalog/CatalogV2Implicits.scala  | 10 +-
 .../spark/sql/connector/catalog/V1Table.scala   | 13 +++--
 .../catalyst/analysis/ResolveSessionCatalog.scala   |  2 +-
 .../execution/datasources/orc/OrcFiltersBase.scala  | 10 --
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 -
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 -
 7 files changed, 34 insertions(+), 45 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated (80a8947 -> 515eb9d)

2020-03-05 Thread gengliang

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 80a8947  [SPARK-31052][TEST][CORE] Fix flaky test 
"DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task 
succeed"
 add 515eb9d  [SPARK-31013][CORE][WEBUI] InMemoryStore: improve 
removeAllByIndexValues over natural key index

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/util/kvstore/InMemoryStore.java   | 16 +++-
 .../apache/spark/util/kvstore/InMemoryStoreSuite.java  | 18 --
 2 files changed, 23 insertions(+), 11 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31052][TEST][CORE] Fix flaky test "DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task succeed"

2020-03-05 Thread jiangxb1987

This is an automated email from the ASF dual-hosted git repository.

jiangxb1987 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 80a8947  [SPARK-31052][TEST][CORE] Fix flaky test 
"DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task 
succeed"
80a8947 is described below

commit 80a894785121777aedfe340d0acacbbd17630cb3
Author: yi.wu 
AuthorDate: Thu Mar 5 10:56:49 2020 -0800

[SPARK-31052][TEST][CORE] Fix flaky test "DAGSchedulerSuite.shuffle fetch 
failed on speculative task, but original task succeed"

### What changes were proposed in this pull request?

This PR fix the flaky test in #27050.

### Why are the changes needed?

`SparkListenerStageCompleted` is posted by `listenerBus` asynchronously. 
So, we should make sure listener has consumed the event before asserting 
completed stages.

See [error 
message](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/119308/testReport/org.apache.spark.scheduler/DAGSchedulerSuite/shuffle_fetch_failed_on_speculative_task__but_original_task_succeed__SPARK_30388_/):

```
sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: 
List(0, 1, 1) did not equal List(0, 1, 1, 0)
at 
org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
at 
org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
at 
org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
at 
org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
at 
org.apache.spark.scheduler.DAGSchedulerSuite.$anonfun$new$88(DAGSchedulerSuite.scala:1976)
```

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Update test and test locally by no failure after running hundreds of times. 
Note, the failure is easy to reproduce when loop running the test for hundreds 
of times(e.g 200)

Closes #27809 from Ngone51/fix_flaky_spark_30388.

Authored-by: yi.wu 
Signed-off-by: Xingbo Jiang 
(cherry picked from commit 8d5ef2f766166cce3cc7a15a98ec016050ede4d8)
Signed-off-by: Xingbo Jiang 
---
 .../test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala| 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 72a2e4c..02bc216 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -1931,7 +1931,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 assertDataStructuresEmpty()
   }
 
-  test("shuffle fetch failed on speculative task, but original task succeed 
(SPARK-30388)") {
+  test("SPARK-30388: shuffle fetch failed on speculative task, but original 
task succeed") {
 var completedStage: List[Int] = Nil
 val listener = new SparkListener() {
   override def onStageCompleted(event: SparkListenerStageCompleted): Unit 
= {
@@ -1945,6 +1945,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
 submit(reduceRdd, Array(0, 1))
 completeShuffleMapStageSuccessfully(0, 0, 2)
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0))
 
 // result task 0.0 succeed
@@ -1960,6 +1961,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 info
   )
 )
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0, 1))
 
 Thread.sleep(DAGScheduler.RESUBMIT_TIMEOUT * 2)
@@ -1971,6 +1973,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 
 // original result task 1.0 succeed
 runEvent(makeCompletionEvent(taskSets(1).tasks(1), Success, 42))
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0, 1, 1, 0))
 assert(scheduler.activeJobs.isEmpty)
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-31052][TEST][CORE] Fix flaky test "DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task succeed"

2020-03-05 Thread jiangxb1987

This is an automated email from the ASF dual-hosted git repository.

jiangxb1987 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 8d5ef2f  [SPARK-31052][TEST][CORE] Fix flaky test 
"DAGSchedulerSuite.shuffle fetch failed on speculative task, but original task 
succeed"
8d5ef2f is described below

commit 8d5ef2f766166cce3cc7a15a98ec016050ede4d8
Author: yi.wu 
AuthorDate: Thu Mar 5 10:56:49 2020 -0800

[SPARK-31052][TEST][CORE] Fix flaky test "DAGSchedulerSuite.shuffle fetch 
failed on speculative task, but original task succeed"

### What changes were proposed in this pull request?

This PR fix the flaky test in #27050.

### Why are the changes needed?

`SparkListenerStageCompleted` is posted by `listenerBus` asynchronously. 
So, we should make sure listener has consumed the event before asserting 
completed stages.

See [error 
message](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/119308/testReport/org.apache.spark.scheduler/DAGSchedulerSuite/shuffle_fetch_failed_on_speculative_task__but_original_task_succeed__SPARK_30388_/):

```
sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: 
List(0, 1, 1) did not equal List(0, 1, 1, 0)
at 
org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
at 
org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
at 
org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
at 
org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
at 
org.apache.spark.scheduler.DAGSchedulerSuite.$anonfun$new$88(DAGSchedulerSuite.scala:1976)
```

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Update test and test locally by no failure after running hundreds of times. 
Note, the failure is easy to reproduce when loop running the test for hundreds 
of times(e.g 200)

Closes #27809 from Ngone51/fix_flaky_spark_30388.

Authored-by: yi.wu 
Signed-off-by: Xingbo Jiang 
---
 .../test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala| 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 2b2fd32..4486389 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -1933,7 +1933,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 assertDataStructuresEmpty()
   }
 
-  test("shuffle fetch failed on speculative task, but original task succeed 
(SPARK-30388)") {
+  test("SPARK-30388: shuffle fetch failed on speculative task, but original 
task succeed") {
 var completedStage: List[Int] = Nil
 val listener = new SparkListener() {
   override def onStageCompleted(event: SparkListenerStageCompleted): Unit 
= {
@@ -1947,6 +1947,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
 submit(reduceRdd, Array(0, 1))
 completeShuffleMapStageSuccessfully(0, 0, 2)
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0))
 
 // result task 0.0 succeed
@@ -1962,6 +1963,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 info
   )
 )
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0, 1))
 
 Thread.sleep(DAGScheduler.RESUBMIT_TIMEOUT * 2)
@@ -1973,6 +1975,7 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
 
 // original result task 1.0 succeed
 runEvent(makeCompletionEvent(taskSets(1).tasks(1), Success, 42))
+sc.listenerBus.waitUntilEmpty()
 assert(completedStage === List(0, 1, 1, 0))
 assert(scheduler.activeJobs.isEmpty)
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31037][SQL] refine AQE config names

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 46b7f17  [SPARK-31037][SQL] refine AQE config names
46b7f17 is described below

commit 46b7f1796bd0b96977ce9b473601033f397a3b18
Author: Wenchen Fan 
AuthorDate: Fri Mar 6 00:46:34 2020 +0800

[SPARK-31037][SQL] refine AQE config names

When introducing AQE to others, I feel the config names are a bit 
incoherent and hard to use.
This PR refines the config names:
1. remove the "shuffle" prefix. AQE is all about shuffle and we don't need 
to add the "shuffle" prefix everywhere.
2. `targetPostShuffleInputSize` is obscure, rename to 
`advisoryShufflePartitionSizeInBytes`.
3. `reducePostShufflePartitions` doesn't match the actual optimization, 
rename to `coalesceShufflePartitions`
4. `minNumPostShufflePartitions` is obscure, rename it `minPartitionNum` 
under the `coalesceShufflePartitions` namespace
5. `maxNumPostShufflePartitions` is confusing with the word "max", rename 
it `initialPartitionNum`
6. `skewedJoinOptimization` is too verbose. skew join is a well-known 
terminology in database area, we can just say `skewJoin`

Make the config names easy to understand.

deprecate the config `spark.sql.adaptive.shuffle.targetPostShuffleInputSize`

N/A

Closes #27793 from cloud-fan/aqe.

Authored-by: Wenchen Fan 
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/internal/SQLConf.scala| 142 +++--
 .../spark/sql/execution/ShuffledRowRDD.scala   |   2 +-
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |   2 +-
 ...tions.scala => CoalesceShufflePartitions.scala} |  14 +-
 .../execution/adaptive/OptimizeSkewedJoin.scala|  14 +-
 .../execution/exchange/EnsureRequirements.scala|   4 +-
 scala => CoalesceShufflePartitionsSuite.scala} |  12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  |   8 +-
 .../apache/spark/sql/internal/SQLConfSuite.scala   |  26 ++--
 .../spark/sql/sources/BucketedReadSuite.scala  |   2 +-
 10 files changed, 117 insertions(+), 109 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3dbfc65..b2b3d12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -356,8 +356,16 @@ object SQLConf {
 .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be 
positive")
 .createWithDefault(200)
 
+  val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
+buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
+  .internal()
+  .doc("(Deprecated since Spark 3.0)")
+  .bytesConf(ByteUnit.BYTE)
+  .createWithDefaultString("64MB")
+
   val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
-.doc("When true, enable adaptive query execution.")
+.doc("When true, enable adaptive query execution, which re-optimizes the 
query plan in the " +
+  "middle of query execution, based on accurate runtime statistics.")
 .booleanConf
 .createWithDefault(false)
 
@@ -365,90 +373,90 @@ object SQLConf {
 .internal()
 .doc("Adaptive query execution is skipped when the query does not have 
exchanges or " +
   "sub-queries. By setting this config to true (together with " +
-  s"'${ADAPTIVE_EXECUTION_ENABLED.key}' enabled), Spark will force apply 
adaptive query " +
+  s"'${ADAPTIVE_EXECUTION_ENABLED.key}' set to true), Spark will force 
apply adaptive query " +
   "execution for all supported queries.")
 .booleanConf
 .createWithDefault(false)
 
-  val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
-buildConf("spark.sql.adaptive.shuffle.reducePostShufflePartitions")
-  .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is enabled, 
this enables reducing " +
-"the number of post-shuffle partitions based on map output 
statistics.")
-  .booleanConf
-  .createWithDefault(true)
+  val ADVISORY_PARTITION_SIZE_IN_BYTES =
+buildConf("spark.sql.adaptive.advisoryPartitionSizeInBytes")
+  .doc("The advisory size in bytes of the shuffle partition during 
adaptive optimization " +
+s"(when ${ADAPTIVE_EXECUTION_ENABLED.key} is true). It takes effect 
when Spark " +
+"coalesces small shuffle partitions or splits skewed shuffle 
partition.")
+  .fallbackConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
 
-  val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED =
-buildConf("spark.sql.adaptive.shuffle.fetchShuffleBlocksInBatch")
-  .doc("Whether to fetch the continuous shuffle blocks in batch. Instead 
of fetching blocks " +
-"one by one, fetching continuous shuffle

[spark] branch branch-3.0 updated: [SPARK-31037][SQL] refine AQE config names

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 46b7f17  [SPARK-31037][SQL] refine AQE config names
46b7f17 is described below

commit 46b7f1796bd0b96977ce9b473601033f397a3b18
Author: Wenchen Fan 
AuthorDate: Fri Mar 6 00:46:34 2020 +0800

[SPARK-31037][SQL] refine AQE config names

When introducing AQE to others, I feel the config names are a bit 
incoherent and hard to use.
This PR refines the config names:
1. remove the "shuffle" prefix. AQE is all about shuffle and we don't need 
to add the "shuffle" prefix everywhere.
2. `targetPostShuffleInputSize` is obscure, rename to 
`advisoryShufflePartitionSizeInBytes`.
3. `reducePostShufflePartitions` doesn't match the actual optimization, 
rename to `coalesceShufflePartitions`
4. `minNumPostShufflePartitions` is obscure, rename it `minPartitionNum` 
under the `coalesceShufflePartitions` namespace
5. `maxNumPostShufflePartitions` is confusing with the word "max", rename 
it `initialPartitionNum`
6. `skewedJoinOptimization` is too verbose. skew join is a well-known 
terminology in database area, we can just say `skewJoin`

Make the config names easy to understand.

deprecate the config `spark.sql.adaptive.shuffle.targetPostShuffleInputSize`

N/A

Closes #27793 from cloud-fan/aqe.

Authored-by: Wenchen Fan 
Signed-off-by: Wenchen Fan 
---
 .../org/apache/spark/sql/internal/SQLConf.scala| 142 +++--
 .../spark/sql/execution/ShuffledRowRDD.scala   |   2 +-
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |   2 +-
 ...tions.scala => CoalesceShufflePartitions.scala} |  14 +-
 .../execution/adaptive/OptimizeSkewedJoin.scala|  14 +-
 .../execution/exchange/EnsureRequirements.scala|   4 +-
 scala => CoalesceShufflePartitionsSuite.scala} |  12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  |   8 +-
 .../apache/spark/sql/internal/SQLConfSuite.scala   |  26 ++--
 .../spark/sql/sources/BucketedReadSuite.scala  |   2 +-
 10 files changed, 117 insertions(+), 109 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3dbfc65..b2b3d12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -356,8 +356,16 @@ object SQLConf {
 .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be 
positive")
 .createWithDefault(200)
 
+  val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
+buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
+  .internal()
+  .doc("(Deprecated since Spark 3.0)")
+  .bytesConf(ByteUnit.BYTE)
+  .createWithDefaultString("64MB")
+
   val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
-.doc("When true, enable adaptive query execution.")
+.doc("When true, enable adaptive query execution, which re-optimizes the 
query plan in the " +
+  "middle of query execution, based on accurate runtime statistics.")
 .booleanConf
 .createWithDefault(false)
 
@@ -365,90 +373,90 @@ object SQLConf {
 .internal()
 .doc("Adaptive query execution is skipped when the query does not have 
exchanges or " +
   "sub-queries. By setting this config to true (together with " +
-  s"'${ADAPTIVE_EXECUTION_ENABLED.key}' enabled), Spark will force apply 
adaptive query " +
+  s"'${ADAPTIVE_EXECUTION_ENABLED.key}' set to true), Spark will force 
apply adaptive query " +
   "execution for all supported queries.")
 .booleanConf
 .createWithDefault(false)
 
-  val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
-buildConf("spark.sql.adaptive.shuffle.reducePostShufflePartitions")
-  .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is enabled, 
this enables reducing " +
-"the number of post-shuffle partitions based on map output 
statistics.")
-  .booleanConf
-  .createWithDefault(true)
+  val ADVISORY_PARTITION_SIZE_IN_BYTES =
+buildConf("spark.sql.adaptive.advisoryPartitionSizeInBytes")
+  .doc("The advisory size in bytes of the shuffle partition during 
adaptive optimization " +
+s"(when ${ADAPTIVE_EXECUTION_ENABLED.key} is true). It takes effect 
when Spark " +
+"coalesces small shuffle partitions or splits skewed shuffle 
partition.")
+  .fallbackConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
 
-  val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED =
-buildConf("spark.sql.adaptive.shuffle.fetchShuffleBlocksInBatch")
-  .doc("Whether to fetch the continuous shuffle blocks in batch. Instead 
of fetching blocks " +
-"one by one, fetching continuous shuffle

[spark] branch master updated (2257ce2 -> ba86524)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 2257ce2  [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always 
create request for last block group
 add ba86524  [SPARK-31037][SQL] refine AQE config names

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/internal/SQLConf.scala| 149 +++--
 .../spark/sql/execution/ShuffledRowRDD.scala   |   2 +-
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |   2 +-
 ...tions.scala => CoalesceShufflePartitions.scala} |  14 +-
 .../execution/adaptive/OptimizeSkewedJoin.scala|  14 +-
 .../execution/exchange/EnsureRequirements.scala|   4 +-
 scala => CoalesceShufflePartitionsSuite.scala} |  12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  |   8 +-
 .../apache/spark/sql/internal/SQLConfSuite.scala   |  26 ++--
 .../spark/sql/sources/BucketedReadSuite.scala  |   2 +-
 10 files changed, 121 insertions(+), 112 deletions(-)
 rename 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/{ReduceNumShufflePartitions.scala
 => CoalesceShufflePartitions.scala} (90%)
 rename 
sql/core/src/test/scala/org/apache/spark/sql/execution/{ReduceNumShufflePartitionsSuite.scala
 => CoalesceShufflePartitionsSuite.scala} (96%)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (2257ce2 -> ba86524)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 2257ce2  [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always 
create request for last block group
 add ba86524  [SPARK-31037][SQL] refine AQE config names

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/internal/SQLConf.scala| 149 +++--
 .../spark/sql/execution/ShuffledRowRDD.scala   |   2 +-
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |   2 +-
 ...tions.scala => CoalesceShufflePartitions.scala} |  14 +-
 .../execution/adaptive/OptimizeSkewedJoin.scala|  14 +-
 .../execution/exchange/EnsureRequirements.scala|   4 +-
 scala => CoalesceShufflePartitionsSuite.scala} |  12 +-
 .../adaptive/AdaptiveQueryExecSuite.scala  |   8 +-
 .../apache/spark/sql/internal/SQLConfSuite.scala   |  26 ++--
 .../spark/sql/sources/BucketedReadSuite.scala  |   2 +-
 10 files changed, 121 insertions(+), 112 deletions(-)
 rename 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/{ReduceNumShufflePartitions.scala
 => CoalesceShufflePartitions.scala} (90%)
 rename 
sql/core/src/test/scala/org/apache/spark/sql/execution/{ReduceNumShufflePartitionsSuite.scala
 => CoalesceShufflePartitionsSuite.scala} (96%)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always create request for last block group

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new f34898c  [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always 
create request for last block group
f34898c is described below

commit f34898c5e19c9a35c091eded9652cd5e3d661d19
Author: yi.wu 
AuthorDate: Thu Mar 5 21:31:26 2020 +0800

[SPARK-31034][CORE] ShuffleBlockFetcherIterator should always create 
request for last block group

### What changes were proposed in this pull request?

This is a bug fix of #27280. This PR fix the bug where 
`ShuffleBlockFetcherIterator` may forget to create request for the last block 
group.

### Why are the changes needed?

When (all blocks).sum < `targetRemoteRequestSize` and (all blocks).length > 
`maxBlocksInFlightPerAddress` and (last block group).size < 
`maxBlocksInFlightPerAddress`,
`ShuffleBlockFetcherIterator` will not create a request for the last group. 
Thus, it will lost data for the reduce task.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Updated test.

Closes #27786 from Ngone51/fix_no_request_bug.

Authored-by: yi.wu 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 2257ce24437f05c417821c02e3e44c77c93f7211)
Signed-off-by: Wenchen Fan 
---
 .../storage/ShuffleBlockFetcherIterator.scala  |  8 +-
 .../storage/ShuffleBlockFetcherIteratorSuite.scala | 91 ++
 2 files changed, 78 insertions(+), 21 deletions(-)

diff --git 
a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
 
b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index cd4c860..2a0447d 100644
--- 
a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ 
b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -339,14 +339,14 @@ final class ShuffleBlockFetcherIterator(
 + s"with ${blocks.size} blocks")
 }
 
-def createFetchRequests(): Unit = {
+def createFetchRequests(isLast: Boolean): Unit = {
   val mergedBlocks = mergeContinuousShuffleBlockIdsIfNeeded(curBlocks)
   curBlocks = new ArrayBuffer[FetchBlockInfo]
   if (mergedBlocks.length <= maxBlocksInFlightPerAddress) {
 createFetchRequest(mergedBlocks)
   } else {
 mergedBlocks.grouped(maxBlocksInFlightPerAddress).foreach { blocks =>
-  if (blocks.length == maxBlocksInFlightPerAddress) {
+  if (blocks.length == maxBlocksInFlightPerAddress || isLast) {
 createFetchRequest(blocks)
   } else {
 // The last group does not exceed `maxBlocksInFlightPerAddress`. 
Put it back
@@ -367,12 +367,12 @@ final class ShuffleBlockFetcherIterator(
   // For batch fetch, the actual block in flight should count for merged 
block.
   val mayExceedsMaxBlocks = !doBatchFetch && curBlocks.size >= 
maxBlocksInFlightPerAddress
   if (curRequestSize >= targetRemoteRequestSize || mayExceedsMaxBlocks) {
-createFetchRequests()
+createFetchRequests(isLast = false)
   }
 }
 // Add in the final request
 if (curBlocks.nonEmpty) {
-  createFetchRequests()
+  createFetchRequests(isLast = true)
 }
   }
 
diff --git 
a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
 
b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 2090a51..773629c 100644
--- 
a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -433,32 +433,86 @@ class ShuffleBlockFetcherIteratorSuite extends 
SparkFunSuite with PrivateMethodT
 assert(blockManager.hostLocalDirManager.get.getCachedHostLocalDirs().size 
=== 1)
   }
 
-  test("fetch continuous blocks in batch respects maxSize and maxBlocks") {
+  test("fetch continuous blocks in batch should respect maxBytesInFlight") {
 val blockManager = mock(classOf[BlockManager])
 val localBmId = BlockManagerId("test-client", "test-local-host", 1)
 doReturn(localBmId).when(blockManager).blockManagerId
 
 // Make sure remote blocks would return the merged block
-val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
-val remoteBlocks = Seq[BlockId](
+val remoteBmId1 = BlockManagerId("test-client-1", "test-client-1", 1)
+val remoteBmId2 = BlockManagerId("test-client-2", "test-client-2", 2)
+val remoteBlocks1 = (0 until 15).map(ShuffleBlockId(0, 3, _))
+val remoteBlocks2 = Seq[BlockId](ShuffleBlockId(0, 4, 0), 
ShuffleBlockId(0, 4, 1))
+val mergedRemoteBlocks = Map[BlockId, ManagedBuffer](
+  ShuffleBlockBatchId(0, 3, 0,

[spark] branch master updated (1fd9a91 -> 2257ce2)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 1fd9a91  [SPARK-31005][SQL] Support time zone ids in casting strings 
to timestamps
 add 2257ce2  [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always 
create request for last block group

No new revisions were added by this update.

Summary of changes:
 .../storage/ShuffleBlockFetcherIterator.scala  |  8 +-
 .../storage/ShuffleBlockFetcherIteratorSuite.scala | 91 ++
 2 files changed, 78 insertions(+), 21 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31005][SQL] Support time zone ids in casting strings to timestamps

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 2ff711c  [SPARK-31005][SQL] Support time zone ids in casting strings 
to timestamps
2ff711c is described below

commit 2ff711ca4f17004abf44186d24fbb0f35fa6336d
Author: Maxim Gekk 
AuthorDate: Thu Mar 5 20:49:43 2020 +0800

[SPARK-31005][SQL] Support time zone ids in casting strings to timestamps

### What changes were proposed in this pull request?
In the PR, I propose to change `DateTimeUtils.stringToTimestamp` to support 
any valid time zone id at the end of input string. After the changes, the 
function accepts zone ids in the formats:
- no zone id. In that case, the function uses the local session time zone 
from the SQL config `spark.sql.session.timeZone`
- -[h]h:[m]m
- +[h]h:[m]m
- Z
- Short zone id, see 
https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
- Zone ID starts with 'UTC+', 'UTC-', 'GMT+', 'GMT-', 'UT+' or 'UT-'. The 
ID is split in two, with a two or three letter prefix and a suffix starting 
with the sign. The suffix must be in the formats:
  - +|-h[h]
  - +|-hh[:]mm
  - +|-hh:mm:ss
  - +|-hhmmss
- Region-based zone IDs in the form `{area}/{city}`, such as `Europe/Paris` 
or `America/New_York`. The default set of region ids is supplied by the IANA 
Time Zone Database (TZDB).

### Why are the changes needed?
- To use `stringToTimestamp` as a substitution of removed `stringToTime`, 
see https://github.com/apache/spark/pull/27710#discussion_r385020173
- Improve UX of Spark SQL by allowing flexible formats of zone ids. 
Currently, Spark accepts only `Z` and zone offsets that can be inconvenient 
when a time zone offset is shifted due to daylight saving rules. For instance:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as 
timestamp);
NULL
```

### Does this PR introduce any user-facing change?
Yes. After the changes, casting strings to timestamps allows time zone id 
at the end of the strings:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as 
timestamp);
2015-03-18 12:03:17.123456
```

### How was this patch tested?
- Added new test cases to the `string to timestamp` test in 
`DateTimeUtilsSuite`.
- Run `CastSuite` and `AnsiCastSuite`.

Closes #27753 from MaxGekk/stringToTimestamp-uni-zoneId.

Authored-by: Maxim Gekk 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 1fd9a91c662a368e348ef96604a79929f814041c)
Signed-off-by: Wenchen Fan 
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 57 +++---
 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 23 +++--
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 448e4b3..6811bfc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -187,28 +187,28 @@ object DateTimeUtils {
* `-[m]m`
* `-[m]m-[d]d`
* `-[m]m-[d]d `
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
+   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   *
+   * where `zone_id` should have one of the forms:
+   *   - Z - Zulu time zone UTC+0
+   *   - +|-[h]h:[m]m
+   *   - A short id, see 
https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
+   *   - An id with one of the prefixes UT

[spark] branch master updated: [SPARK-31005][SQL] Support time zone ids in casting strings to timestamps

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 1fd9a91  [SPARK-31005][SQL] Support time zone ids in casting strings 
to timestamps
1fd9a91 is described below

commit 1fd9a91c662a368e348ef96604a79929f814041c
Author: Maxim Gekk 
AuthorDate: Thu Mar 5 20:49:43 2020 +0800

[SPARK-31005][SQL] Support time zone ids in casting strings to timestamps

### What changes were proposed in this pull request?
In the PR, I propose to change `DateTimeUtils.stringToTimestamp` to support 
any valid time zone id at the end of input string. After the changes, the 
function accepts zone ids in the formats:
- no zone id. In that case, the function uses the local session time zone 
from the SQL config `spark.sql.session.timeZone`
- -[h]h:[m]m
- +[h]h:[m]m
- Z
- Short zone id, see 
https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
- Zone ID starts with 'UTC+', 'UTC-', 'GMT+', 'GMT-', 'UT+' or 'UT-'. The 
ID is split in two, with a two or three letter prefix and a suffix starting 
with the sign. The suffix must be in the formats:
  - +|-h[h]
  - +|-hh[:]mm
  - +|-hh:mm:ss
  - +|-hhmmss
- Region-based zone IDs in the form `{area}/{city}`, such as `Europe/Paris` 
or `America/New_York`. The default set of region ids is supplied by the IANA 
Time Zone Database (TZDB).

### Why are the changes needed?
- To use `stringToTimestamp` as a substitution of removed `stringToTime`, 
see https://github.com/apache/spark/pull/27710#discussion_r385020173
- Improve UX of Spark SQL by allowing flexible formats of zone ids. 
Currently, Spark accepts only `Z` and zone offsets that can be inconvenient 
when a time zone offset is shifted due to daylight saving rules. For instance:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as 
timestamp);
NULL
```

### Does this PR introduce any user-facing change?
Yes. After the changes, casting strings to timestamps allows time zone id 
at the end of the strings:
```sql
spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as 
timestamp);
2015-03-18 12:03:17.123456
```

### How was this patch tested?
- Added new test cases to the `string to timestamp` test in 
`DateTimeUtilsSuite`.
- Run `CastSuite` and `AnsiCastSuite`.

Closes #27753 from MaxGekk/stringToTimestamp-uni-zoneId.

Authored-by: Maxim Gekk 
Signed-off-by: Wenchen Fan 
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 57 +++---
 .../sql/catalyst/util/DateTimeUtilsSuite.scala | 23 +++--
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 6b252ec..3a038a4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -185,28 +185,28 @@ object DateTimeUtils {
* `-[m]m`
* `-[m]m-[d]d`
* `-[m]m-[d]d `
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
+   * `-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   *
+   * where `zone_id` should have one of the forms:
+   *   - Z - Zulu time zone UTC+0
+   *   - +|-[h]h:[m]m
+   *   - A short id, see 
https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
+   *   - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-,
+   * and a suffix in the formats:
+   * - +|-h[h]
+   * - +|-hh[:]

[spark] branch branch-3.0 updated: fix merge mistakes

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 49c07b7  fix merge mistakes
49c07b7 is described below

commit 49c07b7335a89533bc25d6ef45f7877b43b6a98d
Author: Wenchen Fan 
AuthorDate: Thu Mar 5 20:26:14 2020 +0800

fix merge mistakes
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 1 -
 .../apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala| 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index cc9c2ae..3dbfc65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2112,7 +2112,6 @@ object SQLConf {
   "MapFromEntries, StringToMap, MapConcat and TransformKeys. When 
EXCEPTION, the query " +
   "fails if duplicated map keys are detected. When LAST_WIN, the map key 
that is inserted " +
   "at last takes precedence.")
-.version("3.0.0")
 .stringConf
 .transform(_.toUpperCase(Locale.ROOT))
 .checkValues(MapKeyDedupPolicy.values.map(_.toString))
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index e5f1fa6..500b6cc 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -609,7 +609,7 @@ class AdaptiveQueryExecSuite
 withSQLConf(
   SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
   SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-  SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZEDateFormatter.key -> 
"2000") {
+  SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key -> "2000") {
   withTempView("skewData1", "skewData2") {
 spark
   .range(0, 1000, 1, 10)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31038][SQL] Add checkValue for spark.sql.session.timeZone

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 1c165ee  [SPARK-31038][SQL] Add checkValue for 
spark.sql.session.timeZone
1c165ee is described below

commit 1c165eecd3948dc12db19827dfd326d32eb4e475
Author: Kent Yao 
AuthorDate: Thu Mar 5 19:38:20 2020 +0800

[SPARK-31038][SQL] Add checkValue for spark.sql.session.timeZone

### What changes were proposed in this pull request?

The `spark.sql.session.timeZone` config can accept any string value 
including invalid time zone ids, then it will fail other queries that rely on 
the time zone. We should do the value checking in the set phase and fail fast 
if the zone value is invalid.

### Why are the changes needed?

improve configuration
### Does this PR introduce any user-facing change?

yes, will fail fast if the value is a wrong timezone id
### How was this patch tested?

add ut

Closes #27792 from yaooqinn/SPARK-31038.

Authored-by: Kent Yao 
Signed-off-by: Wenchen Fan 
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala |  8 
 .../org/apache/spark/sql/internal/SQLConfSuite.scala  | 19 +++
 2 files changed, 27 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 68a89b2..cc9c2ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -24,6 +24,7 @@ import java.util.zip.Deflater
 
 import scala.collection.JavaConverters._
 import scala.collection.immutable
+import scala.util.Try
 import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
@@ -38,6 +39,7 @@ import 
org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import 
org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
@@ -1483,10 +1485,16 @@ object SQLConf {
 .doubleConf
 .createWithDefault(0.9)
 
+  private def isValidTimezone(zone: String): Boolean = {
+Try { DateTimeUtils.getZoneId(zone) }.isSuccess
+  }
+
   val SESSION_LOCAL_TIMEZONE =
 buildConf("spark.sql.session.timeZone")
   .doc("""The ID of session local timezone, e.g. "GMT", 
"America/Los_Angeles", etc.""")
   .stringConf
+  .checkValue(isValidTimezone, s"Cannot resolve the given timezone with" +
+" ZoneId.of(_, ZoneId.SHORT_IDS)")
   .createWithDefaultFunction(() => TimeZone.getDefault.getID)
 
   val WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 61be367..48be211 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -348,4 +348,23 @@ class SQLConfSuite extends QueryTest with 
SharedSparkSession {
 }
 check(config2)
   }
+
+  test("spark.sql.session.timeZone should only accept valid zone id") {
+spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "MIT")
+assert(sql(s"set 
${SQLConf.SESSION_LOCAL_TIMEZONE.key}").head().getString(1) === "MIT")
+spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "America/Chicago")
+assert(sql(s"set 
${SQLConf.SESSION_LOCAL_TIMEZONE.key}").head().getString(1) ===
+  "America/Chicago")
+
+intercept[IllegalArgumentException] {
+  spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "pst")
+}
+intercept[IllegalArgumentException] {
+  spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "GMT+8:00")
+}
+val e = intercept[IllegalArgumentException] {
+  spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "Asia/shanghai")
+}
+assert(e.getMessage === "Cannot resolve the given timezone with 
ZoneId.of(_, ZoneId.SHORT_IDS)")
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31019][SQL] make it clear that people can deduplicate map keys

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new daa140d  [SPARK-31019][SQL] make it clear that people can deduplicate 
map keys
daa140d is described below

commit daa140df726f8521ae7bbaf4586277ae4f4aea7c
Author: Wenchen Fan 
AuthorDate: Thu Mar 5 20:43:52 2020 +0900

[SPARK-31019][SQL] make it clear that people can deduplicate map keys

rename the config and make it non-internal.

Now we fail the query if duplicated map keys are detected, and provide a 
legacy config to deduplicate it. However, we must provide a way to get users 
out of this situation, instead of just rejecting to run the query. This exit 
strategy should always be there, while legacy config indicates that it may be 
removed someday.

no, just rename a config which was added in 3.0

add more tests for the fail behavior.

Closes #27772 from cloud-fan/map.

Authored-by: Wenchen Fan 
Signed-off-by: HyukjinKwon 
---
 docs/sql-migration-guide.md|  2 +-
 .../sql/catalyst/util/ArrayBasedMapBuilder.scala   | 20 +++---
 .../org/apache/spark/sql/internal/SQLConf.scala| 25 +---
 .../expressions/CollectionExpressionsSuite.scala   | 14 -
 .../catalyst/expressions/ComplexTypeSuite.scala| 15 -
 .../expressions/HigherOrderFunctionsSuite.scala|  5 +-
 .../catalyst/util/ArrayBasedMapBuilderSuite.scala  | 71 ++
 .../apache/spark/sql/DataFrameFunctionsSuite.scala | 13 +++-
 8 files changed, 112 insertions(+), 53 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 0050061..6c73038 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -49,7 +49,7 @@ license: |
 
   - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal 
to 0.0, but -0.0 and 0.0 are considered as different values when used in 
aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, 
this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` 
returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and 
earlier.
 
-  - In Spark version 2.4 and earlier, users can create a map with duplicated 
keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior 
of map with duplicated keys is undefined, e.g. map look up respects the 
duplicated key appears first, `Dataset.collect` only keeps the duplicated key 
appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, new 
config `spark.sql.legacy.allowDuplicatedMapKeys` was added, with the default 
value `false`, Spark will throw Ru [...]
+  - In Spark version 2.4 and earlier, users can create a map with duplicated 
keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior 
of map with duplicated keys is undefined, e.g. map look up respects the 
duplicated key appears first, `Dataset.collect` only keeps the duplicated key 
appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, Spark 
will throw RuntimeException while duplicated keys are found. Users can set 
`spark.sql.mapKeyDedupPolicy` to L [...]
 
   - In Spark version 2.4 and earlier, partition column value is converted as 
null if it can't be casted to corresponding user provided schema. Since 3.0, 
partition column value is validated with user provided schema. An exception is 
thrown if the validation fails. You can disable such validation by setting 
`spark.sql.sources.validatePartitionColumns` to `false`.
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
index 40e75b5..0185b57 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.LEGACY_ALLOW_DUPLICATED_MAP_KEY
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.array.ByteArrayMethods
 
@@ -49,8 +48,7 @@ class ArrayBasedMapBuilder(keyType: DataType, valueType: 
DataType) extends Seria
   private lazy val keyGetter = InternalRow.getAccessor(keyType)
   private lazy val valueGetter = InternalRow.getAccessor(valueType)
 
-  private val allowDuplicatedMapKey =
-SQLConf.get.getConf(LEGACY_ALLOW_DUPLICATED_MAP_KEY)
+  private val mapKeyDedupPolicy = 
SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)
 
   def put(key: Any, value: Any): Unit = {
 if (key == null) {
@@ -67,13 +65,17 @@ class ArrayBasedMapBuil

[spark] branch master updated (f45ae7f -> 807ea41)

2020-03-05 Thread gurwls223

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from f45ae7f  [SPARK-31038][SQL] Add checkValue for 
spark.sql.session.timeZone
 add 807ea41  [SPARK-31019][SQL] make it clear that people can deduplicate 
map keys

No new revisions were added by this update.

Summary of changes:
 docs/sql-migration-guide.md|  2 +-
 .../sql/catalyst/util/ArrayBasedMapBuilder.scala   | 20 +++---
 .../org/apache/spark/sql/internal/SQLConf.scala| 26 
 .../expressions/CollectionExpressionsSuite.scala   | 14 -
 .../catalyst/expressions/ComplexTypeSuite.scala| 15 -
 .../expressions/HigherOrderFunctionsSuite.scala|  5 +-
 .../catalyst/util/ArrayBasedMapBuilderSuite.scala  | 71 ++
 .../apache/spark/sql/DataFrameFunctionsSuite.scala | 13 +++-
 8 files changed, 112 insertions(+), 54 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (9b602e2 -> f45ae7f)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 9b602e2  [SPARK-31046][SQL] Make more efficient and clean up AQE 
update UI code
 add f45ae7f  [SPARK-31038][SQL] Add checkValue for 
spark.sql.session.timeZone

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/internal/SQLConf.scala |  8 
 .../org/apache/spark/sql/internal/SQLConfSuite.scala  | 19 +++
 2 files changed, 27 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31046][SQL] Make more efficient and clean up AQE update UI code

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 2247239  [SPARK-31046][SQL] Make more efficient and clean up AQE 
update UI code
2247239 is described below

commit 2247239dd5fef9506f025c993c43fb8365e57ffd
Author: maryannxue 
AuthorDate: Thu Mar 5 18:53:01 2020 +0800

[SPARK-31046][SQL] Make more efficient and clean up AQE update UI code

### What changes were proposed in this pull request?
This PR avoids sending redundant metrics (those that have been included in 
previous update) as well as useless metrics (those in future stages) to Spark 
UI in AQE UI metrics update.

### Why are the changes needed?
This change will make UI metrics update more efficient.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Manual test in Spark UI.

Closes #27799 from maryannxue/aqe-ui-cleanup.

Authored-by: maryannxue 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 9b602e26d2a5252623a2ed66a1a2b382665fdab4)
Signed-off-by: Wenchen Fan 
---
 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 36 +-
 1 file changed, 8 insertions(+), 28 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index c018ca4..f2ebe1a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -37,7 +37,6 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
 import org.apache.spark.sql.execution.exchange._
-import org.apache.spark.sql.execution.metric.SQLMetric
 import 
org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, 
SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ThreadUtils
@@ -133,21 +132,6 @@ case class AdaptiveSparkPlanExec(
 executedPlan.resetMetrics()
   }
 
-  private def collectSQLMetrics(plan: SparkPlan): Seq[SQLMetric] = {
-val metrics = new mutable.ArrayBuffer[SQLMetric]()
-plan.foreach {
-  case p: ShuffleQueryStageExec if (p.resultOption.isEmpty) =>
-collectSQLMetrics(p.plan).foreach(metrics += _)
-  case p: BroadcastQueryStageExec if (p.resultOption.isEmpty) =>
-collectSQLMetrics(p.plan).foreach(metrics += _)
-  case p: SparkPlan =>
-p.metrics.foreach { case metric =>
-  metrics += metric._2
-}
-}
-metrics
-  }
-
   private def getFinalPhysicalPlan(): SparkPlan = lock.synchronized {
 if (!isFinalPlan) {
   // Subqueries do not have their own execution IDs and therefore rely on 
the main query to
@@ -163,7 +147,7 @@ case class AdaptiveSparkPlanExec(
 currentPhysicalPlan = result.newPlan
 if (result.newStages.nonEmpty) {
   stagesToReplace = result.newStages ++ stagesToReplace
-  executionId.foreach(onUpdatePlan)
+  executionId.foreach(onUpdatePlan(_, result.newStages.map(_.plan)))
 
   // Start materialization of all new stages and fail fast if any 
stages failed eagerly
   result.newStages.foreach { stage =>
@@ -232,7 +216,7 @@ case class AdaptiveSparkPlanExec(
   // Run the final plan when there's no more unfinished stages.
   currentPhysicalPlan = applyPhysicalRules(result.newPlan, 
queryStageOptimizerRules)
   isFinalPlan = true
-  executionId.foreach(onUpdatePlan)
+  executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
   logDebug(s"Final plan: $currentPhysicalPlan")
 }
 currentPhysicalPlan
@@ -496,14 +480,18 @@ case class AdaptiveSparkPlanExec(
   /**
* Notify the listeners of the physical plan change.
*/
-  private def onUpdatePlan(executionId: Long): Unit = {
+  private def onUpdatePlan(executionId: Long, newSubPlans: Seq[SparkPlan]): 
Unit = {
 if (isSubquery) {
   // When executing subqueries, we can't update the query plan in the UI 
as the
   // UI doesn't support partial update yet. However, the subquery may have 
been
   // optimized into a different plan and we must let the UI know the SQL 
metrics
   // of the new plan nodes, so that it can track the valid accumulator 
updates later
   // and display SQL metrics correctly.
-  onUpdateSQLMetrics(collectSQLMetrics(currentPhysicalPlan), executionId)
+  val newMetrics = newSubPlans.flatMap { p =>
+p.flatMap(_.metrics.values.map(m => SQLPlanMetric(m.name.get, m.id, 
m.met

[spark] branch master updated (66b4fd0 -> 9b602e2)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 66b4fd0  [SPARK-31024][SQL] Allow specifying session catalog name 
`spark_catalog` in qualified column names for v1 tables
 add 9b602e2  [SPARK-31046][SQL] Make more efficient and clean up AQE 
update UI code

No new revisions were added by this update.

Summary of changes:
 .../execution/adaptive/AdaptiveSparkPlanExec.scala | 36 +-
 1 file changed, 8 insertions(+), 28 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31024][SQL] Allow specifying session catalog name `spark_catalog` in qualified column names for v1 tables

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 4fcb5ae  [SPARK-31024][SQL] Allow specifying session catalog name 
`spark_catalog` in qualified column names for v1 tables
4fcb5ae is described below

commit 4fcb5ae22623de96247fcfe7341be0af7ed2471f
Author: Terry Kim 
AuthorDate: Thu Mar 5 18:33:59 2020 +0800

[SPARK-31024][SQL] Allow specifying session catalog name `spark_catalog` in 
qualified column names for v1 tables

### What changes were proposed in this pull request?

Currently, the user cannot specify the session catalog name 
(`spark_catalog`) in qualified column names for v1 tables:
```
SELECT spark_catalog.default.t.i FROM spark_catalog.default.t
```
fails with `cannot resolve 'spark_catalog.default.t.i`.

This is inconsistent with v2 table behavior where catalog name can be used:
```
SELECT testcat.ns1.tbl.id FROM testcat.ns1.tbl.id
```

This PR proposes to fix the inconsistency and allow the user to specify 
session catalog name in column names for v1 tables.

### Why are the changes needed?

Fixing an inconsistent behavior.

### Does this PR introduce any user-facing change?

Yes, now the following query works:
```
SELECT spark_catalog.default.t.i FROM spark_catalog.default.t
```

### How was this patch tested?

Added new tests.

Closes #27776 from imback82/spark_catalog_col_name_resolution.

Authored-by: Terry Kim 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 66b4fd040e97cb6de6536a5545017278879c98fb)
Signed-off-by: Wenchen Fan 
---
 .../spark/sql/catalyst/analysis/Analyzer.scala |   2 +-
 .../sql/catalyst/catalog/SessionCatalog.scala  |   6 +-
 .../spark/sql/catalyst/expressions/package.scala   | 102 -
 .../sql/catalyst/catalog/SessionCatalogSuite.scala |   6 +-
 .../results/columnresolution-negative.sql.out  |  26 +++---
 .../results/postgreSQL/create_view.sql.out |   2 +-
 .../sql-tests/results/postgreSQL/join.sql.out  |   2 +-
 .../results/postgreSQL/select_having.sql.out   |   2 +-
 .../results/postgreSQL/window_part3.sql.out|   4 +-
 .../results/udf/postgreSQL/udf-join.sql.out|   2 +-
 .../udf/postgreSQL/udf-select_having.sql.out   |   2 +-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala |   4 +-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala |   8 +-
 .../spark/sql/hive/HiveMetastoreCatalogSuite.scala |   2 +-
 14 files changed, 111 insertions(+), 59 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 254dd44..3cb754d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -935,7 +935,7 @@ class Analyzer(
   v1SessionCatalog.getRelation(v1Table.v1Table)
 case table =>
   SubqueryAlias(
-ident.asMultipartIdentifier,
+catalog.name +: ident.asMultipartIdentifier,
 DataSourceV2Relation.create(table, Some(catalog), Some(ident)))
   }
   val key = catalog.name +: ident.namespace :+ ident.name
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c80d9d2..3a63aff 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, 
ExpressionInfo, Im
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, 
ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, 
SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.types.StructType
@@ -758,6 +759,7 @@ class SessionCatalog(
 val name = metadata.identifier
 val db = formatDatabaseName(name.database.getOrElse(currentDb))
 val table = formatTableName(name.table)
+val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table)
 
 if (metadata.tableType == CatalogTableType.VIEW) {
   val viewText = metadata.viewText.getOrElse(sys.error("Invalid view 
without text."))
@@ -769,9 +771,9 @@ class Session

[spark] branch master updated (0a22f19 -> 66b4fd0)

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 0a22f19  [SPARK-31050][TEST] Disable flaky `Roundtrip` test in 
KafkaDelegationTokenSuite
 add 66b4fd0  [SPARK-31024][SQL] Allow specifying session catalog name 
`spark_catalog` in qualified column names for v1 tables

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/analysis/Analyzer.scala |   2 +-
 .../sql/catalyst/catalog/SessionCatalog.scala  |   6 +-
 .../spark/sql/catalyst/expressions/package.scala   | 102 -
 .../sql/catalyst/catalog/SessionCatalogSuite.scala |   6 +-
 .../results/columnresolution-negative.sql.out  |  26 +++---
 .../results/postgreSQL/create_view.sql.out |   2 +-
 .../sql-tests/results/postgreSQL/join.sql.out  |   2 +-
 .../results/postgreSQL/select_having.sql.out   |   2 +-
 .../results/postgreSQL/window_part3.sql.out|   4 +-
 .../results/udf/postgreSQL/udf-join.sql.out|   2 +-
 .../udf/postgreSQL/udf-select_having.sql.out   |   2 +-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala |   4 +-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala |   8 +-
 .../spark/sql/hive/HiveMetastoreCatalogSuite.scala |   2 +-
 14 files changed, 111 insertions(+), 59 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31050][TEST] Disable flaky `Roundtrip` test in KafkaDelegationTokenSuite

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 9cea92b  [SPARK-31050][TEST] Disable flaky `Roundtrip` test in 
KafkaDelegationTokenSuite
9cea92b is described below

commit 9cea92b6f2c2fc8e0effcec710e6ff6e8a7c965f
Author: yi.wu 
AuthorDate: Thu Mar 5 00:21:32 2020 -0800

[SPARK-31050][TEST] Disable flaky `Roundtrip` test in 
KafkaDelegationTokenSuite

### What changes were proposed in this pull request?

Disable test `KafkaDelegationTokenSuite`.

### Why are the changes needed?

`KafkaDelegationTokenSuite` is too flaky.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Pass Jenkins.

Closes #27789 from Ngone51/retry_kafka.

Authored-by: yi.wu 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 0a22f1966466629cb745d000a0608d521fece093)
Signed-off-by: Dongjoon Hyun 
---
 .../scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
index 3064838..79239e5 100644
--- 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
+++ 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
@@ -62,7 +62,7 @@ class KafkaDelegationTokenSuite extends StreamTest with 
SharedSparkSession with
 }
   }
 
-  test("Roundtrip") {
+  ignore("Roundtrip") {
 val hadoopConf = new Configuration()
 val manager = new HadoopDelegationTokenManager(spark.sparkContext.conf, 
hadoopConf, null)
 val credentials = new Credentials()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-31050][TEST] Disable flaky `Roundtrip` test in KafkaDelegationTokenSuite

2020-03-05 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 0a22f19  [SPARK-31050][TEST] Disable flaky `Roundtrip` test in 
KafkaDelegationTokenSuite
0a22f19 is described below

commit 0a22f1966466629cb745d000a0608d521fece093
Author: yi.wu 
AuthorDate: Thu Mar 5 00:21:32 2020 -0800

[SPARK-31050][TEST] Disable flaky `Roundtrip` test in 
KafkaDelegationTokenSuite

### What changes were proposed in this pull request?

Disable test `KafkaDelegationTokenSuite`.

### Why are the changes needed?

`KafkaDelegationTokenSuite` is too flaky.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Pass Jenkins.

Closes #27789 from Ngone51/retry_kafka.

Authored-by: yi.wu 
Signed-off-by: Dongjoon Hyun 
---
 .../scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
index 3064838..79239e5 100644
--- 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
+++ 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
@@ -62,7 +62,7 @@ class KafkaDelegationTokenSuite extends StreamTest with 
SharedSparkSession with
 }
   }
 
-  test("Roundtrip") {
+  ignore("Roundtrip") {
 val hadoopConf = new Configuration()
 val manager = new HadoopDelegationTokenManager(spark.sparkContext.conf, 
hadoopConf, null)
 val credentials = new Credentials()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-30668][SQL][FOLLOWUP] Raise exception instead of silent change for new DateFormatter

2020-03-05 Thread wenchen

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 253fbd5  [SPARK-30668][SQL][FOLLOWUP] Raise exception instead of 
silent change for new DateFormatter
253fbd5 is described below

commit 253fbd54e7fe3dda0672ebc7143b943af8387a2c
Author: Yuanjian Li 
AuthorDate: Thu Mar 5 15:29:39 2020 +0800

[SPARK-30668][SQL][FOLLOWUP] Raise exception instead of silent change for 
new DateFormatter

This is a follow-up work for #27441. For the cases of new 
TimestampFormatter return null while legacy formatter can return a value, we 
need to throw an exception instead of silent change. The legacy config will be 
referenced in the error message.

Avoid silent result change for new behavior in 3.0.

Yes, an exception is thrown when we detect legacy formatter can parse the 
string and the new formatter return null.

Extend existing UT.

Closes #27537 from xuanyuanking/SPARK-30668-follow.

Authored-by: Yuanjian Li 
Signed-off-by: Wenchen Fan 
(cherry picked from commit 7db0af578585ecaeee9fd23f8189292289b52a97)
Signed-off-by: Wenchen Fan 
---
 .../scala/org/apache/spark/SparkException.scala|  7 +++
 .../spark/sql/catalyst/csv/UnivocityParser.scala   |  2 +
 .../catalyst/expressions/datetimeExpressions.scala |  3 +
 .../spark/sql/catalyst/json/JacksonParser.scala|  2 +
 .../spark/sql/catalyst/util/DateFormatter.scala| 60 +++---
 .../catalyst/util/DateTimeFormatterHelper.scala| 26 +++-
 .../sql/catalyst/util/TimestampFormatter.scala | 73 +-
 .../org/apache/spark/sql/internal/SQLConf.scala| 16 -
 .../expressions/DateExpressionsSuite.scala | 39 +---
 .../sql/catalyst/json/JsonInferSchemaSuite.scala   | 16 ++---
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   | 20 --
 .../org/apache/spark/sql/DateFunctionsSuite.scala  | 58 ++---
 .../adaptive/AdaptiveQueryExecSuite.scala  |  2 +-
 .../sql/execution/datasources/csv/CSVSuite.scala   | 22 ++-
 .../sql/execution/datasources/json/JsonSuite.scala | 22 ++-
 15 files changed, 271 insertions(+), 97 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala 
b/core/src/main/scala/org/apache/spark/SparkException.scala
index 4ad9a0c..81c087e 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -43,3 +43,10 @@ private[spark] case class SparkUserAppException(exitCode: 
Int)
  */
 private[spark] case class ExecutorDeadException(message: String)
   extends SparkException(message)
+
+/**
+ * Exception thrown when Spark returns different result after upgrading to a 
new version.
+ */
+private[spark] class SparkUpgradeException(version: String, message: String, 
cause: Throwable)
+  extends SparkException("You may get a different result due to the upgrading 
of Spark" +
+s" $version: $message", cause)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index f829e6b..dd8537b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -23,6 +23,7 @@ import scala.util.control.NonFatal
 
 import com.univocity.parsers.csv.CsvParser
 
+import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, 
GenericInternalRow}
@@ -285,6 +286,7 @@ class UnivocityParser(
   }
 }
   } catch {
+case e: SparkUpgradeException => throw e
 case NonFatal(e) =>
   badRecordException = badRecordException.orElse(Some(e))
   row.setNullAt(i)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 05074d9..d92d700 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -26,6 +26,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.text.StringEscapeUtils
 
+import org.apache.spark.SparkUpgradeException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -787,6 +788,7 @@ abstract class ToTimestamp
   formatter.parse(
 t.asInsta

76 matches

Mail list logo