svn commit: r26580 - in /dev/spark/2.4.0-SNAPSHOT-2018_04_27_20_01-ad94e85-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Apr 28 03:15:20 2018 New Revision: 26580 Log: Apache Spark 2.4.0-SNAPSHOT-2018_04_27_20_01-ad94e85 docs [This commit notification would consist of 1460 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23736][SQL][FOLLOWUP] Error message should contains SQL types
Repository: spark Updated Branches: refs/heads/master 1fb46f30f -> ad94e8592 [SPARK-23736][SQL][FOLLOWUP] Error message should contains SQL types ## What changes were proposed in this pull request? In the error messages we should return the SQL types (like `string` rather than the internal types like `StringType`). ## How was this patch tested? added UT Author: Marco GaidoCloses #21181 from mgaido91/SPARK-23736_followup. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad94e859 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad94e859 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad94e859 Branch: refs/heads/master Commit: ad94e8592b2e8f4c1bdbd958e110797c6658af84 Parents: 1fb46f3 Author: Marco Gaido Authored: Sat Apr 28 10:47:43 2018 +0800 Committer: hyukjinkwon Committed: Sat Apr 28 10:47:43 2018 +0800 -- .../spark/sql/catalyst/expressions/collectionOperations.scala | 5 +++-- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala| 5 + 2 files changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad94e859/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 90223b9..6d63a53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -863,8 +863,9 @@ case class Concat(children: Seq[Expression]) extends Expression { val childTypes = children.map(_.dataType) if (childTypes.exists(tpe => !allowedTypes.exists(_.acceptsType(tpe { return TypeCheckResult.TypeCheckFailure( - s"input to function $prettyName should have been StringType, BinaryType or ArrayType," + -s" but it's " + childTypes.map(_.simpleString).mkString("[", ", ", "]")) + s"input to function $prettyName should have been ${StringType.simpleString}," + +s" ${BinaryType.simpleString} or ${ArrayType.simpleString}, but it's " + +childTypes.map(_.simpleString).mkString("[", ", ", "]")) } TypeUtils.checkForSameTypeInputExpr(childTypes, s"function $prettyName") } http://git-wip-us.apache.org/repos/asf/spark/blob/ad94e859/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index c216d13..470a1c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -712,6 +712,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { intercept[AnalysisException] { df.selectExpr("concat(i1, array(i1, i2))") } + +val e = intercept[AnalysisException] { + df.selectExpr("concat(map(1, 2), map(3, 4))") +} +assert(e.getMessage.contains("string, binary or array")) } test("flatten function") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23688][SS] Refactor tests away from rate source
Repository: spark Updated Branches: refs/heads/master 8614edd44 -> 1fb46f30f [SPARK-23688][SS] Refactor tests away from rate source ## What changes were proposed in this pull request? Replace rate source with memory source in continuous mode test suite. Keep using "rate" source if the tests intend to put data periodically in background, or need to put short source name to load, since "memory" doesn't have provider for source. ## How was this patch tested? Ran relevant test suite from IDE. Author: Jungtaek LimCloses #21152 from HeartSaVioR/SPARK-23688. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fb46f30 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fb46f30 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fb46f30 Branch: refs/heads/master Commit: 1fb46f30f83e4751169ff288ad406f26b7c11f7e Parents: 8614edd Author: Jungtaek Lim Authored: Sat Apr 28 09:55:56 2018 +0800 Committer: jerryshao Committed: Sat Apr 28 09:55:56 2018 +0800 -- .../streaming/continuous/ContinuousSuite.scala | 163 +++ 1 file changed, 61 insertions(+), 102 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1fb46f30/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index c318b95..5f222e7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -75,73 +75,50 @@ class ContinuousSuite extends ContinuousSuiteBase { } test("map") { -val df = spark.readStream - .format("rate") - .option("numPartitions", "5") - .option("rowsPerSecond", "5") - .load() - .select('value) - .map(r => r.getLong(0) * 2) +val input = ContinuousMemoryStream[Int] +val df = input.toDF().map(_.getInt(0) * 2) -testStream(df, useV2Sink = true)( - StartStream(longContinuousTrigger), - AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 2)), - IncrementEpoch(), - Execute(waitForRateSourceTriggers(_, 4)), - IncrementEpoch(), - CheckAnswerRowsContains(scala.Range(0, 40, 2).map(Row(_ +testStream(df)( + AddData(input, 0, 1), + CheckAnswer(0, 2), + StopStream, + AddData(input, 2, 3, 4), + StartStream(), + CheckAnswer(0, 2, 4, 6, 8)) } test("flatMap") { -val df = spark.readStream - .format("rate") - .option("numPartitions", "5") - .option("rowsPerSecond", "5") - .load() - .select('value) - .flatMap(r => Seq(0, r.getLong(0), r.getLong(0) * 2)) +val input = ContinuousMemoryStream[Int] +val df = input.toDF().flatMap(r => Seq(0, r.getInt(0), r.getInt(0) * 2)) -testStream(df, useV2Sink = true)( - StartStream(longContinuousTrigger), - AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 2)), - IncrementEpoch(), - Execute(waitForRateSourceTriggers(_, 4)), - IncrementEpoch(), - CheckAnswerRowsContains(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)).map(Row(_ +testStream(df)( + AddData(input, 0, 1), + CheckAnswer((0 to 1).flatMap(n => Seq(0, n, n * 2)): _*), + StopStream, + AddData(input, 2, 3, 4), + StartStream(), + CheckAnswer((0 to 4).flatMap(n => Seq(0, n, n * 2)): _*)) } test("filter") { -val df = spark.readStream - .format("rate") - .option("numPartitions", "5") - .option("rowsPerSecond", "5") - .load() - .select('value) - .where('value > 5) +val input = ContinuousMemoryStream[Int] +val df = input.toDF().where('value > 2) -testStream(df, useV2Sink = true)( - StartStream(longContinuousTrigger), - AwaitEpoch(0), - Execute(waitForRateSourceTriggers(_, 2)), - IncrementEpoch(), - Execute(waitForRateSourceTriggers(_, 4)), - IncrementEpoch(), - CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_ +testStream(df)( + AddData(input, 0, 1), + CheckAnswer(), + StopStream, + AddData(input, 2, 3, 4), + StartStream(), + CheckAnswer(3, 4)) } test("deduplicate") { -val df = spark.readStream - .format("rate") - .option("numPartitions", "5") - .option("rowsPerSecond", "5") - .load() - .select('value) - .dropDuplicates() +val input = ContinuousMemoryStream[Int] +val df =
svn commit: r26579 - in /dev/spark/2.3.1-SNAPSHOT-2018_04_27_18_01-df45ddb-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sat Apr 28 01:15:06 2018 New Revision: 26579 Log: Apache Spark 2.3.1-SNAPSHOT-2018_04_27_18_01-df45ddb docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26578 - in /dev/spark/2.4.0-SNAPSHOT-2018_04_27_16_01-8614edd-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Apr 27 23:15:14 2018 New Revision: 26578 Log: Apache Spark 2.4.0-SNAPSHOT-2018_04_27_16_01-8614edd docs [This commit notification would consist of 1460 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26576 - in /dev/spark/2.3.1-SNAPSHOT-2018_04_27_14_01-4a10df0-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Apr 27 21:15:20 2018 New Revision: 26576 Log: Apache Spark 2.3.1-SNAPSHOT-2018_04_27_14_01-4a10df0 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24104] SQLAppStatusListener overwrites metrics onDriverAccumUpdates instead of updating them
Repository: spark Updated Branches: refs/heads/master 3fd297af6 -> 8614edd44 [SPARK-24104] SQLAppStatusListener overwrites metrics onDriverAccumUpdates instead of updating them ## What changes were proposed in this pull request? Event `SparkListenerDriverAccumUpdates` may happen multiple times in a query - e.g. every `FileSourceScanExec` and `BroadcastExchangeExec` call `postDriverMetricUpdates`. In Spark 2.2 `SQLListener` updated the map with new values. `SQLAppStatusListener` overwrites it. Unless `update` preserved it in the KV store (dependant on `exec.lastWriteTime`), only the metrics from the last operator that does `postDriverMetricUpdates` are preserved. ## How was this patch tested? Unit test added. Author: Juliusz SompolskiCloses #21171 from juliuszsompolski/SPARK-24104. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8614edd4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8614edd4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8614edd4 Branch: refs/heads/master Commit: 8614edd445264007144caa6743a8c2ca2b5082e0 Parents: 3fd297a Author: Juliusz Sompolski Authored: Fri Apr 27 14:14:28 2018 -0700 Committer: Marcelo Vanzin Committed: Fri Apr 27 14:14:28 2018 -0700 -- .../sql/execution/ui/SQLAppStatusListener.scala | 2 +- .../ui/SQLAppStatusListenerSuite.scala | 24 2 files changed, 21 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8614edd4/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala index 2b6bb48..d254af4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala @@ -289,7 +289,7 @@ class SQLAppStatusListener( private def onDriverAccumUpdates(event: SparkListenerDriverAccumUpdates): Unit = { val SparkListenerDriverAccumUpdates(executionId, accumUpdates) = event Option(liveExecutions.get(executionId)).foreach { exec => - exec.driverAccumUpdates = accumUpdates.toMap + exec.driverAccumUpdates = exec.driverAccumUpdates ++ accumUpdates update(exec) } } http://git-wip-us.apache.org/repos/asf/spark/blob/8614edd4/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index f3f0883..02df45d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -443,7 +443,8 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with val oldCount = statusStore.executionsList().size val expectedAccumValue = 12345 -val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue) +val expectedAccumValue2 = 54321 +val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue, expectedAccumValue2) val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) { override lazy val sparkPlan = physicalPlan override lazy val executedPlan = physicalPlan @@ -466,10 +467,14 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with val execId = statusStore.executionsList().last.executionId val metrics = statusStore.executionMetrics(execId) val driverMetric = physicalPlan.metrics("dummy") +val driverMetric2 = physicalPlan.metrics("dummy2") val expectedValue = SQLMetrics.stringValue(driverMetric.metricType, Seq(expectedAccumValue)) +val expectedValue2 = SQLMetrics.stringValue(driverMetric2.metricType, Seq(expectedAccumValue2)) assert(metrics.contains(driverMetric.id)) assert(metrics(driverMetric.id) === expectedValue) +assert(metrics.contains(driverMetric2.id)) +assert(metrics(driverMetric2.id) === expectedValue2) } test("roundtripping SparkListenerDriverAccumUpdates through JsonProtocol (SPARK-18462)") { @@ -562,20 +567,31 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with * A dummy [[org.apache.spark.sql.execution.SparkPlan]] that updates a
spark git commit: [SPARK-24104] SQLAppStatusListener overwrites metrics onDriverAccumUpdates instead of updating them
Repository: spark Updated Branches: refs/heads/branch-2.3 4a10df0f6 -> df45ddb9d [SPARK-24104] SQLAppStatusListener overwrites metrics onDriverAccumUpdates instead of updating them ## What changes were proposed in this pull request? Event `SparkListenerDriverAccumUpdates` may happen multiple times in a query - e.g. every `FileSourceScanExec` and `BroadcastExchangeExec` call `postDriverMetricUpdates`. In Spark 2.2 `SQLListener` updated the map with new values. `SQLAppStatusListener` overwrites it. Unless `update` preserved it in the KV store (dependant on `exec.lastWriteTime`), only the metrics from the last operator that does `postDriverMetricUpdates` are preserved. ## How was this patch tested? Unit test added. Author: Juliusz SompolskiCloses #21171 from juliuszsompolski/SPARK-24104. (cherry picked from commit 8614edd445264007144caa6743a8c2ca2b5082e0) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/df45ddb9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/df45ddb9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/df45ddb9 Branch: refs/heads/branch-2.3 Commit: df45ddb9dea9bf42d18c1164cf35067c7cac5d6f Parents: 4a10df0 Author: Juliusz Sompolski Authored: Fri Apr 27 14:14:28 2018 -0700 Committer: Marcelo Vanzin Committed: Fri Apr 27 14:14:38 2018 -0700 -- .../sql/execution/ui/SQLAppStatusListener.scala | 2 +- .../ui/SQLAppStatusListenerSuite.scala | 24 2 files changed, 21 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/df45ddb9/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala index 2b6bb48..d254af4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala @@ -289,7 +289,7 @@ class SQLAppStatusListener( private def onDriverAccumUpdates(event: SparkListenerDriverAccumUpdates): Unit = { val SparkListenerDriverAccumUpdates(executionId, accumUpdates) = event Option(liveExecutions.get(executionId)).foreach { exec => - exec.driverAccumUpdates = accumUpdates.toMap + exec.driverAccumUpdates = exec.driverAccumUpdates ++ accumUpdates update(exec) } } http://git-wip-us.apache.org/repos/asf/spark/blob/df45ddb9/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index f3f0883..02df45d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -443,7 +443,8 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with val oldCount = statusStore.executionsList().size val expectedAccumValue = 12345 -val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue) +val expectedAccumValue2 = 54321 +val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue, expectedAccumValue2) val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) { override lazy val sparkPlan = physicalPlan override lazy val executedPlan = physicalPlan @@ -466,10 +467,14 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with val execId = statusStore.executionsList().last.executionId val metrics = statusStore.executionMetrics(execId) val driverMetric = physicalPlan.metrics("dummy") +val driverMetric2 = physicalPlan.metrics("dummy2") val expectedValue = SQLMetrics.stringValue(driverMetric.metricType, Seq(expectedAccumValue)) +val expectedValue2 = SQLMetrics.stringValue(driverMetric2.metricType, Seq(expectedAccumValue2)) assert(metrics.contains(driverMetric.id)) assert(metrics(driverMetric.id) === expectedValue) +assert(metrics.contains(driverMetric2.id)) +assert(metrics(driverMetric2.id) === expectedValue2) } test("roundtripping SparkListenerDriverAccumUpdates through JsonProtocol (SPARK-18462)") { @@ -562,20 +567,31 @@ class
svn commit: r26572 - in /dev/spark/2.4.0-SNAPSHOT-2018_04_27_12_01-3fd297a-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Apr 27 19:15:40 2018 New Revision: 26572 Log: Apache Spark 2.4.0-SNAPSHOT-2018_04_27_12_01-3fd297a docs [This commit notification would consist of 1460 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24085][SQL] Query returns UnsupportedOperationException when scalar subquery is present in partitioning expression
Repository: spark Updated Branches: refs/heads/master 2824f12b8 -> 3fd297af6 [SPARK-24085][SQL] Query returns UnsupportedOperationException when scalar subquery is present in partitioning expression ## What changes were proposed in this pull request? In this case, the partition pruning happens before the planning phase of scalar subquery expressions. For scalar subquery expressions, the planning occurs late in the cycle (after the physical planning) in "PlanSubqueries" just before execution. Currently we try to execute the scalar subquery expression as part of partition pruning and fail as it implements Unevaluable. The fix attempts to ignore the Subquery expressions from partition pruning computation. Another option can be to somehow plan the subqueries before the partition pruning. Since this may not be a commonly occuring expression, i am opting for a simpler fix. Repro ``` SQL CREATE TABLE test_prc_bug ( id_value string ) partitioned by (id_type string) location '/tmp/test_prc_bug' stored as parquet; insert into test_prc_bug values ('1','a'); insert into test_prc_bug values ('2','a'); insert into test_prc_bug values ('3','b'); insert into test_prc_bug values ('4','b'); select * from test_prc_bug where id_type = (select 'b'); ``` ## How was this patch tested? Added test in SubquerySuite and hive/SQLQuerySuite Author: Dilip BiswalCloses #21174 from dilipbiswal/spark-24085. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fd297af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fd297af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fd297af Branch: refs/heads/master Commit: 3fd297af6dc568357c97abf86760c570309d6597 Parents: 2824f12 Author: Dilip Biswal Authored: Fri Apr 27 11:43:29 2018 -0700 Committer: gatorsmile Committed: Fri Apr 27 11:43:29 2018 -0700 -- .../datasources/FileSourceStrategy.scala| 5 +++- .../datasources/PruneFileSourcePartitions.scala | 4 ++- .../org/apache/spark/sql/SubquerySuite.scala| 15 ++ .../sql/hive/execution/SQLQuerySuite.scala | 31 4 files changed, 53 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3fd297af/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 16b2271..0a568d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -76,7 +76,10 @@ object FileSourceStrategy extends Strategy with Logging { fsRelation.partitionSchema, fsRelation.sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = - ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet))) +ExpressionSet(normalizedFilters + .filterNot(SubqueryExpression.hasSubquery(_)) + .filter(_.references.subsetOf(partitionSet))) + logInfo(s"Pruning directories with: ${partitionKeyFilters.mkString(",")}") val dataColumns = http://git-wip-us.apache.org/repos/asf/spark/blob/3fd297af/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala index 3b830ac..16b2367 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala @@ -55,7 +55,9 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = - ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet))) +ExpressionSet(normalizedFilters + .filterNot(SubqueryExpression.hasSubquery(_)) + .filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileIndex =
spark git commit: [SPARK-23565][SS] New error message for structured streaming sources assertion
Repository: spark Updated Branches: refs/heads/master 109935fc5 -> 2824f12b8 [SPARK-23565][SS] New error message for structured streaming sources assertion ## What changes were proposed in this pull request? A more informative message to tell you why a structured streaming query cannot continue if you have added more sources, than there are in the existing checkpoint offsets. ## How was this patch tested? I added a Unit Test. Author: Patrick McGloinCloses #20946 from patrickmcgloin/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2824f12b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2824f12b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2824f12b Branch: refs/heads/master Commit: 2824f12b8bac5d86a82339d4dfb4d2625e978a15 Parents: 109935f Author: Patrick McGloin Authored: Fri Apr 27 23:04:14 2018 +0800 Committer: Shixiong Zhu Committed: Fri Apr 27 23:04:14 2018 +0800 -- .../org/apache/spark/sql/execution/streaming/OffsetSeq.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2824f12b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala index 73945b3..7871744 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala @@ -39,7 +39,9 @@ case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMet * cannot be serialized). */ def toStreamProgress(sources: Seq[BaseStreamingSource]): StreamProgress = { -assert(sources.size == offsets.size) +assert(sources.size == offsets.size, s"There are [${offsets.size}] sources in the " + + s"checkpoint offsets and now there are [${sources.size}] sources requested by the query. " + + s"Cannot continue.") new StreamProgress ++ sources.zip(offsets).collect { case (s, Some(o)) => (s, o) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26567 - in /dev/spark/2.4.0-SNAPSHOT-2018_04_27_04_01-109935f-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Apr 27 11:17:53 2018 New Revision: 26567 Log: Apache Spark 2.4.0-SNAPSHOT-2018_04_27_04_01-109935f docs [This commit notification would consist of 1460 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23830][YARN] added check to ensure main method is found
Repository: spark Updated Branches: refs/heads/master 8aa1d7b0e -> 109935fc5 [SPARK-23830][YARN] added check to ensure main method is found ## What changes were proposed in this pull request? When a user specifies the wrong class -- or, in fact, a class instead of an object -- Spark throws an NPE which is not useful for debugging. This was reported in [SPARK-23830](https://issues.apache.org/jira/browse/SPARK-23830). This PR adds a check to ensure the main method was found and logs a useful error in the event that it's null. ## How was this patch tested? * Unit tests + Manual testing * The scope of the changes is very limited Author: eric-maynardAuthor: Eric Maynard Closes #21168 from eric-maynard/feature/SPARK-23830. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/109935fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/109935fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/109935fc Branch: refs/heads/master Commit: 109935fc5d8b3d381bb1b09a4a570040a0a1846f Parents: 8aa1d7b Author: eric-maynard Authored: Fri Apr 27 15:25:07 2018 +0800 Committer: jerryshao Committed: Fri Apr 27 15:25:07 2018 +0800 -- .../apache/spark/deploy/yarn/ApplicationMaster.scala | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/109935fc/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 6508400..595077e 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} -import java.lang.reflect.InvocationTargetException +import java.lang.reflect.{InvocationTargetException, Modifier} import java.net.{Socket, URI, URL} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -675,9 +675,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends val userThread = new Thread { override def run() { try { - mainMethod.invoke(null, userArgs.toArray) - finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS) - logDebug("Done running users class") + if (!Modifier.isStatic(mainMethod.getModifiers)) { +logError(s"Could not find static main method in object ${args.userClass}") +finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_EXCEPTION_USER_CLASS) + } else { +mainMethod.invoke(null, userArgs.toArray) +finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS) +logDebug("Done running user class") + } } catch { case e: InvocationTargetException => e.getCause match { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26565 - in /dev/spark/2.4.0-SNAPSHOT-2018_04_27_00_01-8aa1d7b-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Apr 27 07:16:54 2018 New Revision: 26565 Log: Apache Spark 2.4.0-SNAPSHOT-2018_04_27_00_01-8aa1d7b docs [This commit notification would consist of 1460 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org