Repository: spark
Updated Branches:
  refs/heads/master 39b3f10dd -> 2014e7a78


[SPARK-22479][SQL] Exclude credentials from 
SaveintoDataSourceCommand.simpleString

## What changes were proposed in this pull request?

Do not include jdbc properties which may contain credentials in logging a 
logical plan with `SaveIntoDataSourceCommand` in it.

## How was this patch tested?

building locally and trying to reproduce (per the steps in 
https://issues.apache.org/jira/browse/SPARK-22479):
```
== Parsed Logical Plan ==
SaveIntoDataSourceCommand 
org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider570127fa, 
Map(dbtable -> test20, driver -> org.postgresql.Driver, url -> 
*********(redacted), password -> *********(redacted)), ErrorIfExists
   +- Range (0, 100, step=1, splits=Some(8))

== Analyzed Logical Plan ==
SaveIntoDataSourceCommand 
org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider570127fa, 
Map(dbtable -> test20, driver -> org.postgresql.Driver, url -> 
*********(redacted), password -> *********(redacted)), ErrorIfExists
   +- Range (0, 100, step=1, splits=Some(8))

== Optimized Logical Plan ==
SaveIntoDataSourceCommand 
org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider570127fa, 
Map(dbtable -> test20, driver -> org.postgresql.Driver, url -> 
*********(redacted), password -> *********(redacted)), ErrorIfExists
   +- Range (0, 100, step=1, splits=Some(8))

== Physical Plan ==
Execute SaveIntoDataSourceCommand
   +- SaveIntoDataSourceCommand 
org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider570127fa, 
Map(dbtable -> test20, driver -> org.postgresql.Driver, url -> 
*********(redacted), password -> *********(redacted)), ErrorIfExists
         +- Range (0, 100, step=1, splits=Some(8))
```

Author: osatici <osat...@palantir.com>

Closes #19708 from onursatici/os/redact-jdbc-creds.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2014e7a7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2014e7a7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2014e7a7

Branch: refs/heads/master
Commit: 2014e7a789d36e376ca62b1e24636d79c1b19745
Parents: 39b3f10
Author: osatici <osat...@palantir.com>
Authored: Wed Nov 15 14:08:51 2017 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Wed Nov 15 14:08:51 2017 -0800

----------------------------------------------------------------------
 .../apache/spark/internal/config/package.scala  |  2 +-
 .../datasources/SaveIntoDataSourceCommand.scala |  7 +++
 .../SaveIntoDataSourceCommandSuite.scala        | 48 ++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2014e7a7/core/src/main/scala/org/apache/spark/internal/config/package.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala 
b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 57e2da8..84315f5 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -307,7 +307,7 @@ package object config {
         "a property key or value, the value is redacted from the environment 
UI and various logs " +
         "like YARN and event logs.")
       .regexConf
-      .createWithDefault("(?i)secret|password".r)
+      .createWithDefault("(?i)secret|password|url|user|username".r)
 
   private[spark] val STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.string.regex")

http://git-wip-us.apache.org/repos/asf/spark/blob/2014e7a7/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
index 96c84ea..568e953 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.sources.CreatableRelationProvider
+import org.apache.spark.util.Utils
 
 /**
  * Saves the results of `query` in to a data source.
@@ -46,4 +48,9 @@ case class SaveIntoDataSourceCommand(
 
     Seq.empty[Row]
   }
+
+  override def simpleString: String = {
+    val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap
+    s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}"
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/2014e7a7/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
new file mode 100644
index 0000000..4b3ca8e
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.test.SharedSQLContext
+
+class SaveIntoDataSourceCommandSuite extends SharedSQLContext {
+
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.redaction.regex", "(?i)password|url")
+
+  test("simpleString is redacted") {
+    val URL = "connection.url"
+    val PASS = "123"
+    val DRIVER = "mydriver"
+
+    val dataSource = DataSource(
+      sparkSession = spark,
+      className = "jdbc",
+      partitionColumns = Nil,
+      options = Map("password" -> PASS, "url" -> URL, "driver" -> DRIVER))
+
+    val logicalPlanString = dataSource
+      .planForWriting(SaveMode.ErrorIfExists, spark.range(1).logicalPlan)
+      .treeString(true)
+
+    assert(!logicalPlanString.contains(URL))
+    assert(!logicalPlanString.contains(PASS))
+    assert(logicalPlanString.contains(DRIVER))
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to