[spark] branch master updated: [SPARK-28667][SQL] Support InsertInto through the V2SessionCatalog

2019-08-26 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new e31aec9  [SPARK-28667][SQL] Support InsertInto through the 
V2SessionCatalog
e31aec9 is described below

commit e31aec9be478c7b5ea45c1a27d570d82a0dfe30b
Author: Burak Yavuz 
AuthorDate: Tue Aug 27 12:59:53 2019 +0800

[SPARK-28667][SQL] Support InsertInto through the V2SessionCatalog

### What changes were proposed in this pull request?

This PR adds support for INSERT INTO through both the SQL and 
DataFrameWriter APIs through the V2SessionCatalog.

### Why are the changes needed?

This will allow V2 tables to be plugged in through the V2SessionCatalog, 
and be used seamlessly with existing APIs.

### Does this PR introduce any user-facing change?

No behavior changes.

### How was this patch tested?

Pulled out a lot of tests so that they can be shared across the 
DataFrameWriter and SQL code paths.

Closes #25507 from brkyvz/insertSesh.

Lead-authored-by: Burak Yavuz 
Co-authored-by: Burak Yavuz 
Signed-off-by: Wenchen Fan 
---
 .../spark/sql/catalyst/analysis/Analyzer.scala | 119 +++---
 .../sql/catalyst/analysis/CheckAnalysis.scala  |   1 -
 .../org/apache/spark/sql/DataFrameWriter.scala |  20 +-
 .../DataSourceV2DataFrameSessionCatalogSuite.scala |  42 +-
 .../sources/v2/DataSourceV2DataFrameSuite.scala| 117 +-
 .../v2/DataSourceV2SQLSessionCatalogSuite.scala|  46 ++
 .../sql/sources/v2/DataSourceV2SQLSuite.scala  | 311 +-
 .../spark/sql/sources/v2/InsertIntoTests.scala | 467 +
 .../sql/sources/v2/V1WriteFallbackSuite.scala  |  17 +-
 9 files changed, 681 insertions(+), 459 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 051783b..4a3f9c4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -24,9 +24,8 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.{CatalogManager, 
CatalogNotFoundException, CatalogPlugin, LookupCatalog, TableChange}
+import org.apache.spark.sql.catalog.v2._
 import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, 
IdentityTransform}
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util.loadTable
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
@@ -36,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import 
org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, 
AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, 
AlterTableRenameColumnStatement, AlterTableSetLocationStatement, 
AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, 
DescribeTableStatement, InsertIntoStatement}
+import org.apache.spark.sql.catalyst.plans.logical.sql._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
@@ -641,21 +640,13 @@ class Analyzer(
* [[ResolveRelations]] still resolves v1 tables.
*/
   object ResolveTables extends Rule[LogicalPlan] {
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util._
-
 def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-  case u @ UnresolvedRelation(AsTemporaryViewIdentifier(ident))
-  if catalog.isTemporaryTable(ident) =>
-u // temporary views take precedence over catalog table names
-
-  case u @ UnresolvedRelation(CatalogObjectIdentifier(maybeCatalog, 
ident)) =>
-maybeCatalog.orElse(sessionCatalog)
-  .flatMap(loadTable(_, ident))
-  .map {
-case unresolved: UnresolvedTable => u
-case resolved => DataSourceV2Relation.create(resolved)
-  }
-  .getOrElse(u)
+  case u: UnresolvedRelation =>
+val v2TableOpt = lookupV2Relation(u.multipartIdentifier) match {
+  case scala.Left((_, _, tableOpt)) => tableOpt
+  case scala.Right(tableOpt) => tableOpt
+}
+v2TableOpt.map(DataSourceV2Relation.create).getOrElse(u)
 }
   }
 
@@ -770,40 +761,41 @@ class Analyzer(
 
   object ResolveInsertInto extends Rule[LogicalPlan] {
 override def apply(plan: 

[spark] branch master updated (64032cb -> 13b1eb6)

2019-08-26 Thread srowen
This is an automated email from the ASF dual-hosted git repository.

srowen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 64032cb  [MINOR][SS] Reuse KafkaSourceInitialOffsetWriter to 
deduplicate
 add 13b1eb6  [SPARK-22955][DSTREAMS] - graceful shutdown shouldn't lead to 
job gen…

No new revisions were added by this update.

Summary of changes:
 .../spark/streaming/scheduler/JobGenerator.scala   |  2 +-
 .../spark/streaming/StreamingContextSuite.scala| 30 --
 2 files changed, 29 insertions(+), 3 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (b205269 -> 64032cb)

2019-08-26 Thread vanzin
This is an automated email from the ASF dual-hosted git repository.

vanzin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from b205269  [SPARK-28875][DSTREAMS][SS][TESTS] Add Task retry tests to 
make sure new consumer used
 add 64032cb  [MINOR][SS] Reuse KafkaSourceInitialOffsetWriter to 
deduplicate

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/kafka010/KafkaMicroBatchStream.scala | 41 --
 .../apache/spark/sql/kafka010/KafkaSource.scala| 34 +---
 .../kafka010/KafkaSourceInitialOffsetWriter.scala  | 63 ++
 3 files changed, 64 insertions(+), 74 deletions(-)
 create mode 100644 
external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceInitialOffsetWriter.scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-28875][DSTREAMS][SS][TESTS] Add Task retry tests to make sure new consumer used

2019-08-26 Thread vanzin
This is an automated email from the ASF dual-hosted git repository.

vanzin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new b205269  [SPARK-28875][DSTREAMS][SS][TESTS] Add Task retry tests to 
make sure new consumer used
b205269 is described below

commit b205269ae09dc384de98ab027da4c17abf3a9dd9
Author: Gabor Somogyi 
AuthorDate: Mon Aug 26 13:12:14 2019 -0700

[SPARK-28875][DSTREAMS][SS][TESTS] Add Task retry tests to make sure new 
consumer used

### What changes were proposed in this pull request?
When Task retry happens with Kafka source then it's not known whether the 
consumer is the issue so the old consumer removed from cache and new consumer 
created. The feature works fine but not covered with tests.

In this PR I've added such test for DStreams + Structured Streaming.

### Why are the changes needed?
No such tests are there.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Existing + new unit tests.

Closes #25582 from gaborgsomogyi/SPARK-28875.

Authored-by: Gabor Somogyi 
Signed-off-by: Marcelo Vanzin 
---
 .../spark/sql/kafka010/KafkaDataConsumer.scala |  6 +--
 .../sql/kafka010/KafkaDataConsumerSuite.scala  | 62 +++---
 .../kafka010/KafkaDataConsumerSuite.scala  | 24 +
 3 files changed, 71 insertions(+), 21 deletions(-)

diff --git 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index cbb99fd..af240dc 100644
--- 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -78,7 +78,7 @@ private[kafka010] sealed trait KafkaDataConsumer {
   def release(): Unit
 
   /** Reference to the internal implementation that this wrapper delegates to 
*/
-  protected def internalConsumer: InternalKafkaConsumer
+  def internalConsumer: InternalKafkaConsumer
 }
 
 
@@ -512,7 +512,7 @@ private[kafka010] object KafkaDataConsumer extends Logging {
 override def release(): Unit = { internalConsumer.close() }
   }
 
-  private case class CacheKey(groupId: String, topicPartition: TopicPartition) 
{
+  private[kafka010] case class CacheKey(groupId: String, topicPartition: 
TopicPartition) {
 def this(topicPartition: TopicPartition, kafkaParams: ju.Map[String, 
Object]) =
   
this(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String], 
topicPartition)
   }
@@ -521,7 +521,7 @@ private[kafka010] object KafkaDataConsumer extends Logging {
   // - We make a best-effort attempt to maintain the max size of the cache as 
configured capacity.
   //   The capacity is not guaranteed to be maintained, especially when there 
are more active
   //   tasks simultaneously using consumers than the capacity.
-  private lazy val cache = {
+  private[kafka010] lazy val cache = {
 val conf = SparkEnv.get.conf
 val capacity = conf.get(CONSUMER_CACHE_CAPACITY)
 new ju.LinkedHashMap[CacheKey, InternalKafkaConsumer](capacity, 0.75f, 
true) {
diff --git 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
index 2aa869c..8aa7e06 100644
--- 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
+++ 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
@@ -20,22 +20,23 @@ package org.apache.spark.sql.kafka010
 import java.util.concurrent.{Executors, TimeUnit}
 
 import scala.collection.JavaConverters._
-import scala.concurrent.{ExecutionContext, Future}
-import scala.concurrent.duration.Duration
 import scala.util.Random
 
-import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.clients.consumer.ConsumerConfig._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.{TaskContext, TaskContextImpl}
+import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.util.ThreadUtils
 
 class KafkaDataConsumerSuite extends SharedSparkSession with 
PrivateMethodTester {
 
   protected var testUtils: KafkaTestUtils = _
+  private val topic = "topic" + Random.nextInt()
+  private val topicPartition = new TopicPartition(topic, 0)
+  private val groupId = "groupId"
 
   override def beforeAll(): Unit = {
 super.beforeAll()
@@ -51,6 +52,15 @@ class KafkaDataConsumerSuite extends 

[spark] branch master updated: [SPARK-28701][INFRA][FOLLOWUP] Fix the key error when looking in os.environ

2019-08-26 Thread shaneknapp
This is an automated email from the ASF dual-hosted git repository.

shaneknapp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 84d4f94  [SPARK-28701][INFRA][FOLLOWUP] Fix the key error when looking 
in os.environ
84d4f94 is described below

commit 84d4f945969e199a5d3fb658864e494b88d15f3c
Author: shane knapp 
AuthorDate: Mon Aug 26 12:40:31 2019 -0700

[SPARK-28701][INFRA][FOLLOWUP] Fix the key error when looking in os.environ

### What changes were proposed in this pull request?

i broke run-tests.py for non-PRB builds in this PR:
https://github.com/apache/spark/pull/25423

### Why are the changes needed?

to fix what i broke

### Does this PR introduce any user-facing change?
no

### How was this patch tested?
the build system will test this

Closes #25585 from shaneknapp/fix-run-tests.

Authored-by: shane knapp 
Signed-off-by: shane knapp 
---
 dev/run-tests.py | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index a338667..ea51570 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -405,10 +405,11 @@ def run_scala_tests(build_tool, hadoop_version, 
test_modules, excluded_tags):
 test_profiles += ['-Dtest.exclude.tags=' + ",".join(excluded_tags)]
 
 # set up java11 env if this is a pull request build with 'test-java11' in 
the title
-if "test-java11" in os.environ["ghprbPullTitle"].lower():
-os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1"
-os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], 
os.environ["PATH"])
-test_profiles += ['-Djava.version=11']
+if "ghprbPullTitle" in os.environ:
+if "test-java11" in os.environ["ghprbPullTitle"].lower():
+os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1"
+os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], 
os.environ["PATH"])
+test_profiles += ['-Djava.version=11']
 
 if build_tool == "maven":
 run_scala_tests_maven(test_profiles)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-28679][YARN] changes to setResourceInformation to handle empty resources and reflection error handling

2019-08-26 Thread vanzin
This is an automated email from the ASF dual-hosted git repository.

vanzin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new dd0725d  [SPARK-28679][YARN] changes to setResourceInformation to 
handle empty resources and reflection error handling
dd0725d is described below

commit dd0725d7eaf2f0dc9fb3b13326b07c32812b0ad9
Author: Alessandro Bellina 
AuthorDate: Mon Aug 26 12:00:33 2019 -0700

[SPARK-28679][YARN] changes to setResourceInformation to handle empty 
resources and reflection error handling

## What changes were proposed in this pull request?

This fixes issues that can arise when the jars for different hadoop 
versions mix, and short-circuits the case where we are running with a spark 
that was not built for yarn 3 (resource support).

## How was this patch tested?

I tested it manually.

Closes #25403 from abellina/SPARK-28679.

Authored-by: Alessandro Bellina 
Signed-off-by: Marcelo Vanzin 
---
 .../spark/deploy/yarn/ResourceRequestHelper.scala   | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index 522c16b..f524962 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -143,17 +143,28 @@ private object ResourceRequestHelper extends Logging {
 require(resource != null, "Resource parameter should not be null!")
 
 logDebug(s"Custom resources requested: $resources")
+if (resources.isEmpty) {
+  // no point in going forward, as we don't have anything to set
+  return
+}
+
 if (!isYarnResourceTypesAvailable()) {
-  if (resources.nonEmpty) {
-logWarning("Ignoring custom resource requests because " +
-"the version of YARN does not support it!")
-  }
+  logWarning("Ignoring custom resource requests because " +
+  "the version of YARN does not support it!")
   return
 }
 
 val resInfoClass = Utils.classForName(RESOURCE_INFO_CLASS)
 val setResourceInformationMethod =
-  resource.getClass.getMethod("setResourceInformation", classOf[String], 
resInfoClass)
+  try {
+resource.getClass.getMethod("setResourceInformation", classOf[String], 
resInfoClass)
+  } catch {
+case e: NoSuchMethodException =>
+  throw new SparkException(
+s"Cannot find setResourceInformation in ${resource.getClass}. " +
+  "This is likely due to a JAR conflict between different YARN 
versions.", e)
+  }
+
 resources.foreach { case (name, rawAmount) =>
   try {
 val AMOUNT_AND_UNIT_REGEX(amountPart, unitPart) = rawAmount


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (13fd32c -> 2efa6f5)

2019-08-26 Thread vanzin
This is an automated email from the ASF dual-hosted git repository.

vanzin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 13fd32c  [SPARK-28701][TEST-HADOOP3.2][TEST-JAVA11][K8S] adding java11 
support for pull request builds
 add 2efa6f5  [SPARK-28607][CORE][SHUFFLE] Don't store partition lengths 
twice

No new revisions were added by this update.

Summary of changes:
 .../spark/shuffle/api/ShuffleMapOutputWriter.java  |  8 ++-
 .../shuffle/sort/BypassMergeSortShuffleWriter.java | 58 ++
 .../sort/io/LocalDiskShuffleMapOutputWriter.java   |  3 +-
 .../io/LocalDiskShuffleMapOutputWriterSuite.scala  |  6 +--
 4 files changed, 35 insertions(+), 40 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r35381 - /dev/spark/KEYS

2019-08-26 Thread kiszk
Author: kiszk
Date: Mon Aug 26 17:18:45 2019
New Revision: 35381

Log:
Update KEYS

Modified:
dev/spark/KEYS

Modified: dev/spark/KEYS
==
--- dev/spark/KEYS (original)
+++ dev/spark/KEYS Mon Aug 26 17:18:45 2019
@@ -993,12 +993,12 @@ ZTFPNYvCMMHM8A==
 =PEdD
 -END PGP PUBLIC KEY BLOCK-
 
-pub   rsa4096/7F0FEF75 2019-08-19 [SC]
-uid [ultimate] Kazuaki Ishizaki (CODE SIGNING KEY) 
-sub   rsa4096/7C3AEC68 2019-08-19 [E]
+pub   4096R/7F0FEF75 2019-08-19
+uid  Kazuaki Ishizaki (CODE SIGNING KEY) 
+sub   4096R/7C3AEC68 2019-08-19
 
 -BEGIN PGP PUBLIC KEY BLOCK-
-Version: GnuPG v2
+Version: GnuPG v1
 
 mQINBF1a3YcBEAC7I6f1jWpY9WlJBkbwvLneYBjnD2BRwG1eKjkz49aUXVKkx4Du
 XB7b+agbhWL7EIPjQHVJf0RVGochOujKfcPxOz5bZwAV078EbsJpiAYIAeVEimQF
@@ -1049,3 +1049,4 @@ au2shXGZFmo4V56uCJ5HqZTJJZaMceQx7u8uqZbh
 XJ5Dp1pqv9DC6cl9vLSHctRrM2kG
 =mQLW
 -END PGP PUBLIC KEY BLOCK-
+



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (9f8c7a2 -> 13fd32c)

2019-08-26 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 9f8c7a2  [SPARK-28709][DSTREAMS] Fix StreamingContext leak through 
Streaming
 add 13fd32c  [SPARK-28701][TEST-HADOOP3.2][TEST-JAVA11][K8S] adding java11 
support for pull request builds

No new revisions were added by this update.

Summary of changes:
 dev/run-tests-jenkins.py   | 2 +-
 dev/run-tests.py   | 7 +++
 .../kubernetes/integration-tests/dev/dev-run-integration-tests.sh  | 6 ++
 3 files changed, 14 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-28709][DSTREAMS] Fix StreamingContext leak through Streaming

2019-08-26 Thread srowen
This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 9f8c7a2  [SPARK-28709][DSTREAMS] Fix StreamingContext leak through 
Streaming
9f8c7a2 is described below

commit 9f8c7a280476d37fb430da0adbde5d61e8a40714
Author: Nikita Gorbachevsky 
AuthorDate: Mon Aug 26 09:30:36 2019 -0500

[SPARK-28709][DSTREAMS] Fix StreamingContext leak through Streaming

## What changes were proposed in this pull request?

In my application spark streaming is restarted programmatically by stopping 
StreamingContext without stopping of SparkContext and creating/starting a new 
one. I use it for automatic detection of Kafka topic/partition changes and 
automatic failover in case of non fatal exceptions.

However i notice that after multiple restarts driver fails with OOM. During 
investigation of heap dump i figured out that StreamingContext object isn't 
cleared by GC after stopping.

https://user-images.githubusercontent.com/13151161/63010149-83f4c200-be8e-11e9-9f48-12b6e97839f4.png;>

There are several places which holds reference to it :

1. StreamingTab registers StreamingJobProgressListener which holds 
reference to Streaming Context directly to LiveListenerBus shared queue via 
ssc.sc.addSparkListener(listener) method invocation. However this listener 
isn't unregistered at stop method.
2. json handlers (/streaming/json and /streaming/batch/json) aren't 
unregistered in SparkUI, while they hold reference to 
StreamingJobProgressListener. Basically the same issue affects all the pages, i 
assume that renderJsonHandler should be added to pageToHandlers cache on 
attachPage method invocation in order to unregistered it as well on detachPage.
3. SparkUi holds reference to StreamingJobProgressListener in the 
corresponding local variable which isn't cleared after stopping of 
StreamingContext.

## How was this patch tested?

Added tests to existing test suites.
After i applied these changes via reflection in my app OOM on driver side 
gone.

Closes #25439 from choojoyq/SPARK-28709-fix-streaming-context-leak-on-stop.

Authored-by: Nikita Gorbachevsky 
Signed-off-by: Sean Owen 
---
 .../main/scala/org/apache/spark/ui/SparkUI.scala   |  3 +++
 .../src/main/scala/org/apache/spark/ui/WebUI.scala |  1 +
 .../apache/spark/streaming/StreamingContext.scala  | 29 ++
 .../apache/spark/streaming/ui/StreamingTab.scala   | 28 ++---
 .../apache/spark/streaming/InputStreamsSuite.scala |  4 ---
 .../spark/streaming/StreamingContextSuite.scala| 24 ++
 .../apache/spark/streaming/UISeleniumSuite.scala   |  4 +++
 7 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala 
b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 1175bc2..6fb8e45 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -138,6 +138,9 @@ private[spark] class SparkUI private (
 streamingJobProgressListener = Option(sparkListener)
   }
 
+  def clearStreamingJobProgressListener(): Unit = {
+streamingJobProgressListener = None
+  }
 }
 
 private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala 
b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 54ae258..1fe822a 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -93,6 +93,7 @@ private[spark] abstract class WebUI(
 attachHandler(renderJsonHandler)
 val handlers = pageToHandlers.getOrElseUpdate(page, 
ArrayBuffer[ServletContextHandler]())
 handlers += renderHandler
+handlers += renderJsonHandler
   }
 
   /** Attaches a handler to this UI. */
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 15ebef2..48913ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -38,7 +38,6 @@ import org.apache.spark.annotation.{DeveloperApi, 
Experimental}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.UI._
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.SerializationDebugger
@@ -189,10 +188,9 @@ class StreamingContext private[streaming] (
   private[streaming] val progressListener = new 

[spark] branch master updated (c61270f -> 6e12b58)

2019-08-26 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from c61270f  [SPARK-27395][SQL] Improve EXPLAIN command
 add 6e12b58  [SPARK-28527][SQL][TEST] Re-run all the tests in 
SQLQueryTestSuite via Thrift Server

No new revisions were added by this update.

Summary of changes:
 project/SparkBuild.scala   |   3 +-
 .../org/apache/spark/sql/SQLQueryTestSuite.scala   |  86 ++---
 sql/hive-thriftserver/pom.xml  |   7 +
 .../sql/hive/thriftserver/HiveThriftServer2.scala  |   3 +-
 .../thriftserver/ThriftServerQueryTestSuite.scala  | 358 +
 5 files changed, 416 insertions(+), 41 deletions(-)
 create mode 100644 
sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (c353a84 -> c61270f)

2019-08-26 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from c353a84  [SPARK-28642][SQL][TEST][FOLLOW-UP] Test 
spark.sql.redaction.options.regex with and without default values
 add c61270f  [SPARK-27395][SQL] Improve EXPLAIN command

No new revisions were added by this update.

Summary of changes:
 .../sql/catalyst/expressions/Expression.scala  |   4 +
 .../catalyst/expressions/codegen/javaCode.scala|   3 +
 .../spark/sql/catalyst/plans/QueryPlan.scala   |  27 +-
 .../apache/spark/sql/catalyst/trees/TreeNode.scala |  44 +-
 .../apache/spark/sql/execution/ExplainUtils.scala  | 234 +++
 .../spark/sql/execution/QueryExecution.scala   |  11 +-
 .../org/apache/spark/sql/execution/SparkPlan.scala |  20 +
 .../spark/sql/execution/SparkSqlParser.scala   |   8 +-
 .../sql/execution/WholeStageCodegenExec.scala  |  15 +-
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |  22 +-
 .../sql/execution/adaptive/QueryStageExec.scala|  16 +-
 .../sql/execution/basicPhysicalOperators.scala |  45 ++
 .../sql/execution/columnar/InMemoryRelation.scala  |   2 +-
 .../execution/columnar/InMemoryTableScanExec.scala |   2 +-
 .../command/InsertIntoDataSourceDirCommand.scala   |   2 +-
 .../apache/spark/sql/execution/command/cache.scala |   2 +-
 .../spark/sql/execution/command/commands.scala |   7 +-
 .../apache/spark/sql/execution/command/views.scala |   4 +-
 .../datasources/InsertIntoDataSourceCommand.scala  |   2 +-
 .../datasources/SaveIntoDataSourceCommand.scala|   2 +-
 .../spark/sql/execution/exchange/Exchange.scala|  11 +-
 .../sql/execution/joins/CartesianProductExec.scala |  14 +-
 .../spark/sql/execution/joins/HashJoin.scala   |  20 +-
 .../sql/execution/joins/SortMergeJoinExec.scala|  17 +
 .../test/resources/sql-tests/inputs/explain.sql|  96 +++
 .../resources/sql-tests/results/explain.sql.out| 738 +
 26 files changed, 1318 insertions(+), 50 deletions(-)
 create mode 100644 
sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/explain.sql
 create mode 100644 
sql/core/src/test/resources/sql-tests/results/explain.sql.out


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r35371 - in /dev/spark/v2.3.4-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark

2019-08-26 Thread kiszk
Author: kiszk
Date: Mon Aug 26 09:54:45 2019
New Revision: 35371

Log:
Apache Spark v2.3.4-rc1 docs


[This commit notification would consist of 1447 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r35370 - /dev/spark/v2.3.4-rc1-bin/

2019-08-26 Thread kiszk
Author: kiszk
Date: Mon Aug 26 09:00:20 2019
New Revision: 35370

Log:
Apache Spark v2.3.4-rc1

Added:
dev/spark/v2.3.4-rc1-bin/
dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz   (with props)
dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.asc
dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.sha512
dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz   (with props)
dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.asc
dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.sha512
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.6.tgz   (with props)
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.6.tgz.asc
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.6.tgz.sha512
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.7.tgz   (with props)
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.7.tgz.asc
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-hadoop2.7.tgz.sha512
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-without-hadoop.tgz   (with props)
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-without-hadoop.tgz.asc
dev/spark/v2.3.4-rc1-bin/spark-2.3.4-bin-without-hadoop.tgz.sha512
dev/spark/v2.3.4-rc1-bin/spark-2.3.4.tgz   (with props)
dev/spark/v2.3.4-rc1-bin/spark-2.3.4.tgz.asc
dev/spark/v2.3.4-rc1-bin/spark-2.3.4.tgz.sha512

Added: dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.asc
==
--- dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.asc (added)
+++ dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.asc Mon Aug 26 09:00:20 2019
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJFBAABCgAvFiEEgFK1nI3grK++CMoV5JoEbH8P73UFAl1jmEMRHGtpc3prQGFw
+YWNoZS5vcmcACgkQ5JoEbH8P73Xmog//Qj/814bac4xbMnvsmEQyA9RfIRfv2i2T
+jJNh2jHiwUefV4Wd+vXy+5YXSW/A9y8MOgBHXRRbdsv+wzuaccy+SayFCg8gWXOb
+CihXw5gc3sUswIRFlxSsjwL0xkcqsxLkmPQtg7eOjIlq1LS3ynLzRPbnOov71que
+45dHOnZi1PIEonhQiIgwWEVQiEyUQk0cBjiWDgprrZe4sZStHm0IbTsPJNAmJ3qX
+KUZddOfEwmzm4u44oVYR1Z88YrRT/F7LOB8cNvCT/JLGNkn0Sf1DNN42E8gcSUyJ
+EWU8cgjy0j2kBYLVdO123Qo/V/HJ8XJUrz9fd3p89ZX6z+q66lCHVypg9Chku/OI
+CZ3pnTcBbaUKTMjB0R+r8Yj6OuIyEx95oMABoOi8ye98xrRSw7kEZ1CVIPHUiiDu
+oZdP8XQyg5sLda4qFAs/6AGY9jXTDojk46zE+MqJ7jefXVn8lvdwWKVhVaIyZYDs
+bDm9lGFTlXyakX0qxeMC7dCNkINMuXgQBZpMb+HMlUWDurneWA3IjwtzvJd2AfiU
+ZvBo7Gzv6eBjbcJ9eaG3UXEv25dt3sK56fV7/7Jh+9LVLIZDIIdNwV+YDDmVX4HF
+f7KHtaWIfQpy9lbHQqLuf6DikxntT3jIV1NUg7UbkWKrKg1wuBUozmiX4aqRTAnQ
+4MKVJZuZmzU=
+=h0w6
+-END PGP SIGNATURE-

Added: dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.sha512
==
--- dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.sha512 (added)
+++ dev/spark/v2.3.4-rc1-bin/SparkR_2.3.4.tar.gz.sha512 Mon Aug 26 09:00:20 2019
@@ -0,0 +1,3 @@
+SparkR_2.3.4.tar.gz: 09173710 547AFB95 417F908E 8057C0FC C78C41E7 17F64233
+ 440B8E58 B43AEB9F 15B9F5CC 1972750B 5A60D3BA AA702D22
+ 7AEF3D79 495C323A 803F9F54 7EE5DB13

Added: dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.asc
==
--- dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.asc (added)
+++ dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.asc Mon Aug 26 09:00:20 2019
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJFBAABCgAvFiEEgFK1nI3grK++CMoV5JoEbH8P73UFAl1jmEURHGtpc3prQGFw
+YWNoZS5vcmcACgkQ5JoEbH8P73Vd5BAAmzqGMEWC50Eet0e8Jpl2IT77dRfY+6zz
+mj5Nf/4tAFZ8eys7rbr4qKkNoqV3+cfytmNQSC/va6hbb0ioOB19uhvQqUe+OXaF
+93enkUjV0FGFwUgh8dD6x+9V0hAQ8lFA6V0Y1NYBa53t5xJFAJSrpVcXv/Af4y0A
+p8vyZN9Fea15RQykBQBjszhaQuh8nMqZbZjd19Kmwk2Dfe+ABFRjljpwuZt/paaX
+qZaaRpgVj30JmxkbKtXfVeDW6IstcntBJdmCoA2wwcgZmn7vTu5Fu1dd4xXhLq/H
+LIlIJXTxzPEmZuHmt7kNMYrj/M1ulPj2GFI0Cm4zg0uw9wbA01VjQ79sFuS6n0HC
+cC2JGm8inG6CHmWrZ4peBM1BxefL7yhfWYROQm2jwhfRpeI5EcmHkUlhoK8w6+F6
+2i6H187IXizL0UQjMcQu8WiGHtlcvTPrMP3BHwKuALZlgnrXFfcIrXD+oE1AakK3
+vVwTSt48RxX7dp89pRGx3bxS8zaIsh5bG2GlgYVxx8EtAyq6hK9nzHulLAcY1hS9
+A/8j8lQKZlCtDmr+JkOhcGuZsiUtB2elMwsMJmFn+qBbu0R+AT08x5kAILBNDkp6
+iN8xRoOpgVvcqzHZvraz7a6OqxfoPtQ53A4xNtT8gFTDs1Kq7jLOvmjntZotreUs
+gJ3741FqslM=
+=hX41
+-END PGP SIGNATURE-

Added: dev/spark/v2.3.4-rc1-bin/pyspark-2.3.4.tar.gz.sha512
==
--- 

svn commit: r35366 - /dev/spark/v2.4.4-rc2-bin/

2019-08-26 Thread dongjoon
Author: dongjoon
Date: Mon Aug 26 07:34:58 2019
New Revision: 35366

Log:
Apache Spark v2.4.4-rc2

Added:
dev/spark/v2.4.4-rc2-bin/
dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz   (with props)
dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.asc
dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.sha512
dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz   (with props)
dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz.asc
dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz.sha512
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.6.tgz   (with props)
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.6.tgz.asc
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.6.tgz.sha512
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.7.tgz   (with props)
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.7.tgz.asc
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-hadoop2.7.tgz.sha512
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop-scala-2.12.tgz   
(with props)
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop-scala-2.12.tgz.asc

dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop-scala-2.12.tgz.sha512
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop.tgz   (with props)
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop.tgz.asc
dev/spark/v2.4.4-rc2-bin/spark-2.4.4-bin-without-hadoop.tgz.sha512
dev/spark/v2.4.4-rc2-bin/spark-2.4.4.tgz   (with props)
dev/spark/v2.4.4-rc2-bin/spark-2.4.4.tgz.asc
dev/spark/v2.4.4-rc2-bin/spark-2.4.4.tgz.sha512

Added: dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.asc
==
--- dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.asc (added)
+++ dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.asc Mon Aug 26 07:34:58 2019
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+Version: GnuPG v1
+
+iQIcBAABAgAGBQJdY3kpAAoJEO2gDOg08PxcdnIP/iF7VoCcCpTL8tIZOB5vH44P
+o8hwicPbz7k3Qv1bp/IedyjN2Jf8d0XcMwkoYyl9rgyfDl68X570k3nXPoWEC6Di
+YC8OT9WLF29aQwggA0EqaJ1CnBYDvX1LYg11aPqcZW6lhGqTCf/3NBTHu6iRm7sz
+qFdfnmZNWnuk7BwFttLiu1e0t321Bnt6wzxzy/phwc8hGgrR/iakv7ErJtIlggkk
+2PDlwcsUfdOCn/XUdv4AcrFPmJEaUz8Zz2w5K7fs9rFnad7DEbezwGpJHaUMrSei
+BrpilxYBQLCKnvgZs3V28lTr50wMHmqOm2KbyIUnJX4S61BbXZaLTM866kJnB++A
+kfpXuaNDSkOavy8loPvYFSAmVVUF7RCqCr59PIZy+NWNvT/NKRX/p4ZkzobjONsj
+L7dsiJzijSFgMwBC70FJ0oek9HgP1QfrQsfMdqUT0nnutN7jwQ/mukcRiItT7jSt
+KpE5pGD05StIB3qyjpBcOHAmhTll+iksT+o3gQhERwsnc+BnBRQIa+EEkZjMceWr
+Bd4FAzV9DLYvykOrtWMa1O/Z0C+sF1NBLs3NpLCL1A9faSL8Z/nmiO+iJyAuk34w
+JTmk2zXo5gqm8fEo2qi9mw5xewXWvpfyktN1mprxz4gHEbC/9k8mJLRBYhTf2Zku
+TkC/pOxz/jHtj5Pt8Bmq
+=aRGM
+-END PGP SIGNATURE-

Added: dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.sha512
==
--- dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.sha512 (added)
+++ dev/spark/v2.4.4-rc2-bin/SparkR_2.4.4.tar.gz.sha512 Mon Aug 26 07:34:58 2019
@@ -0,0 +1,3 @@
+SparkR_2.4.4.tar.gz: 8335958B D2D86C85 E6035E55 C39D05CE 2B7838AA 7DA9538F
+ C2A103D3 2287EDA2 B7052E4F C25EB346 059C2F47 1EDDEEB4
+ 546B01E6 7F39399D E414E602 FF91DEFB

Added: dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz.asc
==
--- dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz.asc (added)
+++ dev/spark/v2.4.4-rc2-bin/pyspark-2.4.4.tar.gz.asc Mon Aug 26 07:34:58 2019
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+Version: GnuPG v1
+
+iQIcBAABAgAGBQJdY23+AAoJEO2gDOg08Pxc3H4P/jXlGMKoSRIYEU3SgAl3Lca3
+BYfSY9N9ic3L/iQj4Qm++lF9gbZQTq6mxaQC3G6Zi8qauuWsLfY7BmczeT3nDC4g
+aDaFnPY3SJqH175IvHJR6FibaF+ehIJk9kuIN7guTzFdXmSQK81MRsZcVf2DsAdl
+tlctGmpNc+uP+myQWygAUL0n9xEuv5wYdD9XMuw9O8Nt9JYQF/Fmzf7uJAdXHC12
+P3MVcWmBeWEdHLdsOCy+I+Ib4b4HEnvNxMPBJmjam+mU4RMtLYpGJhqWfl1/R1mN
+LU0TYNHPdOSYnIaqPNN4QyBZEytemYcEoxbrqteWZwE1N+u1UiltKBFl/013m/k8
+bhTND+8F288kqoNq30oIjtIuexnMatOJ+xH5/jRPB1oOXkX/il35KASJyXHHupHp
+XrnOD9sRsJG0MmYc8GtylsohR+2JI/4CrKRzFrP7uQgplUB387nGzPcPwvS0hfXY
+Lldo3RFKk7QDvsjIrxDrJ7G6859FfeF1KBwnLNJaQQPNRxw10AG1x+XgCqNKILtn
+wfcwASI8WzctgURlGUeeznZ1Bi2pqtpn5sVDhPFpYuWeNC7TamAGebP7F688GLh5
+e9K/vbYCpgfFHgA645NnPRcLItUGiXOuiUYwAYD/N9y4lmzP+DlUZiQ8q0fGVyo2
+A93xKkZUV2LHXJQ7XYF/
+=5Q6T
+-END PGP SIGNATURE-

Added: 

[spark] branch master updated: [SPARK-28642][SQL][TEST][FOLLOW-UP] Test spark.sql.redaction.options.regex with and without default values

2019-08-26 Thread lixiao
This is an automated email from the ASF dual-hosted git repository.

lixiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new c353a84  [SPARK-28642][SQL][TEST][FOLLOW-UP] Test 
spark.sql.redaction.options.regex with and without default values
c353a84 is described below

commit c353a84d1a991797f255ec312e5935438727536c
Author: Yuming Wang 
AuthorDate: Sun Aug 25 23:12:16 2019 -0700

[SPARK-28642][SQL][TEST][FOLLOW-UP] Test spark.sql.redaction.options.regex 
with and without default values

### What changes were proposed in this pull request?

Test `spark.sql.redaction.options.regex` with and without  default values.

### Why are the changes needed?

Normally, we do not rely on the default value of 
`spark.sql.redaction.options.regex`.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
N/A

Closes #25579 from wangyum/SPARK-28642-f1.

Authored-by: Yuming Wang 
Signed-off-by: Xiao Li 
---
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 72a5645..7fe00ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1031,8 +1031,10 @@ class JDBCSuite extends QueryTest
   }
 
   test("Hide credentials in show create table") {
+val userName = "testUser"
 val password = "testPass"
 val tableName = "tab1"
+val dbTable = "TEST.PEOPLE"
 withTable(tableName) {
   sql(
 s"""
@@ -1040,18 +1042,30 @@ class JDBCSuite extends QueryTest
|USING org.apache.spark.sql.jdbc
|OPTIONS (
| url '$urlWithUserAndPass',
-   | dbtable 'TEST.PEOPLE',
-   | user 'testUser',
+   | dbtable '$dbTable',
+   | user '$userName',
| password '$password')
  """.stripMargin)
 
   val show = ShowCreateTableCommand(TableIdentifier(tableName))
   
spark.sessionState.executePlan(show).executedPlan.executeCollect().foreach { r 
=>
 assert(!r.toString.contains(password))
+assert(r.toString.contains(dbTable))
+assert(r.toString.contains(userName))
   }
 
   sql(s"SHOW CREATE TABLE $tableName").collect().foreach { r =>
-assert(!r.toString().contains(password))
+assert(!r.toString.contains(password))
+assert(r.toString.contains(dbTable))
+assert(r.toString.contains(userName))
+  }
+
+  withSQLConf(SQLConf.SQL_OPTIONS_REDACTION_PATTERN.key -> 
"(?i)dbtable|user") {
+
spark.sessionState.executePlan(show).executedPlan.executeCollect().foreach { r 
=>
+  assert(!r.toString.contains(password))
+  assert(!r.toString.contains(dbTable))
+  assert(!r.toString.contains(userName))
+}
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org