This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 49f94eb6e88a [SPARK-46579][SQL] Redact JDBC url in errors and logs
49f94eb6e88a is described below

commit 49f94eb6e88a9e5aaff675fb53125ce6091529fa
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Wed Jan 3 12:02:02 2024 -0800

    [SPARK-46579][SQL] Redact JDBC url in errors and logs
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to redact the JDBC url in error message parameters and 
logs.
    
    ### Why are the changes needed?
    To avoid leaking of user's secrets.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, it can.
    
    ### How was this patch tested?
    By running the modified test suites:
    ```
    $ build/sbt "test:testOnly *JDBCTableCatalogSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #44574 from MaxGekk/redact-jdbc-url.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../execution/datasources/jdbc/JDBCOptions.scala   |  3 +++
 .../jdbc/connection/BasicConnectionProvider.scala  |  3 ++-
 .../execution/datasources/v2/jdbc/JDBCTable.scala  |  4 ++--
 .../datasources/v2/jdbc/JDBCTableCatalog.scala     | 22 +++++++++++-----------
 .../org/apache/spark/sql/jdbc/JdbcDialects.scala   |  2 +-
 .../v2/jdbc/JDBCTableCatalogSuite.scala            | 15 +++++++++++----
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 28fa7b8bf561..43db0c6eef11 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.TimestampNTZType
+import org.apache.spark.util.Utils
 
 /**
  * Options for the JDBC data source.
@@ -248,6 +249,8 @@ class JDBCOptions(
       otherOption.parameters.equals(this.parameters)
     case _ => false
   }
+
+  def getRedactUrl(): String = 
Utils.redact(SQLConf.get.stringRedactionPattern, url)
 }
 
 class JdbcOptionsInWrite(
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
index 369cf59e0599..57902336ebf2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
@@ -45,7 +45,8 @@ private[jdbc] class BasicConnectionProvider extends 
JdbcConnectionProvider with
     jdbcOptions.asConnectionProperties.asScala.foreach { case(k, v) =>
       properties.put(k, v)
     }
-    logDebug(s"JDBC connection initiated with URL: ${jdbcOptions.url} and 
properties: $properties")
+    logDebug(s"JDBC connection initiated with URL: 
${jdbcOptions.getRedactUrl()} " +
+      s"and properties: $properties")
     driver.connect(jdbcOptions.url, properties)
   }
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
index c251010881f3..120a68075a8f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
@@ -66,7 +66,7 @@ case class JDBCTable(ident: Identifier, schema: StructType, 
jdbcOptions: JDBCOpt
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.CREATE_INDEX",
         messageParameters = Map(
-          "url" -> jdbcOptions.url,
+          "url" -> jdbcOptions.getRedactUrl(),
           "indexName" -> toSQLId(indexName),
           "tableName" -> toSQLId(name)),
         dialect = JdbcDialects.get(jdbcOptions.url)) {
@@ -87,7 +87,7 @@ case class JDBCTable(ident: Identifier, schema: StructType, 
jdbcOptions: JDBCOpt
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.DROP_INDEX",
         messageParameters = Map(
-          "url" -> jdbcOptions.url,
+          "url" -> jdbcOptions.getRedactUrl(),
           "indexName" -> toSQLId(indexName),
           "tableName" -> toSQLId(name)),
         dialect = JdbcDialects.get(jdbcOptions.url)) {
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 976cd3f6e9aa..2eb61b3d01fc 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -71,7 +71,7 @@ class JDBCTableCatalog extends TableCatalog
       val rs = JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.GET_TABLES",
         messageParameters = Map(
-          "url" -> options.url,
+          "url" -> options.getRedactUrl(),
           "namespace" -> toSQLId(namespace.toSeq)),
         dialect) {
         conn.getMetaData.getTables(null, schemaPattern, "%", Array("TABLE"))
@@ -90,7 +90,7 @@ class JDBCTableCatalog extends TableCatalog
     JdbcUtils.classifyException(
       errorClass = "FAILED_JDBC.TABLE_EXISTS",
       messageParameters = Map(
-        "url" -> options.url,
+        "url" -> options.getRedactUrl(),
         "tableName" -> toSQLId(ident)),
       dialect) {
       JdbcUtils.withConnection(options)(JdbcUtils.tableExists(_, writeOptions))
@@ -115,7 +115,7 @@ class JDBCTableCatalog extends TableCatalog
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.RENAME_TABLE",
         messageParameters = Map(
-          "url" -> options.url,
+          "url" -> options.getRedactUrl(),
           "oldName" -> toSQLId(oldIdent),
           "newName" -> toSQLId(newIdent)),
         dialect) {
@@ -181,7 +181,7 @@ class JDBCTableCatalog extends TableCatalog
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.CREATE_TABLE",
         messageParameters = Map(
-          "url" -> options.url,
+          "url" -> options.getRedactUrl(),
           "tableName" -> toSQLId(ident)),
         dialect) {
         JdbcUtils.createTable(conn, getTableName(ident), schema, 
caseSensitive, writeOptions)
@@ -197,7 +197,7 @@ class JDBCTableCatalog extends TableCatalog
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.ALTER_TABLE",
         messageParameters = Map(
-          "url" -> options.url,
+          "url" -> options.getRedactUrl(),
           "tableName" -> toSQLId(ident)),
         dialect) {
         JdbcUtils.alterTable(conn, getTableName(ident), changes, options)
@@ -212,7 +212,7 @@ class JDBCTableCatalog extends TableCatalog
         JdbcUtils.classifyException(
           errorClass = "FAILED_JDBC.NAMESPACE_EXISTS",
           messageParameters = Map(
-            "url" -> options.url,
+            "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(namespace.toSeq)),
           dialect) {
           JdbcUtils.schemaExists(conn, options, db)
@@ -225,7 +225,7 @@ class JDBCTableCatalog extends TableCatalog
     JdbcUtils.withConnection(options) { conn =>
       JdbcUtils.classifyException(
         errorClass = "FAILED_JDBC.LIST_NAMESPACES",
-        messageParameters = Map("url" -> options.url),
+        messageParameters = Map("url" -> options.getRedactUrl()),
         dialect) {
         JdbcUtils.listSchemas(conn, options)
       }
@@ -277,7 +277,7 @@ class JDBCTableCatalog extends TableCatalog
         JdbcUtils.classifyException(
           errorClass = "FAILED_JDBC.CREATE_NAMESPACE",
           messageParameters = Map(
-            "url" -> options.url,
+            "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(db)),
           dialect) {
           JdbcUtils.createSchema(conn, options, db, comment)
@@ -301,7 +301,7 @@ class JDBCTableCatalog extends TableCatalog
                 JdbcUtils.classifyException(
                   errorClass = "FAILED_JDBC.CREATE_NAMESPACE_COMMENT",
                   messageParameters = Map(
-                    "url" -> options.url,
+                    "url" -> options.getRedactUrl(),
                     "namespace" -> toSQLId(db)),
                   dialect) {
                   JdbcUtils.alterSchemaComment(conn, options, db, set.value)
@@ -317,7 +317,7 @@ class JDBCTableCatalog extends TableCatalog
                 JdbcUtils.classifyException(
                   errorClass = "FAILED_JDBC.REMOVE_NAMESPACE_COMMENT",
                   messageParameters = Map(
-                    "url" -> options.url,
+                    "url" -> options.getRedactUrl(),
                     "namespace" -> toSQLId(db)),
                   dialect) {
                   JdbcUtils.removeSchemaComment(conn, options, db)
@@ -344,7 +344,7 @@ class JDBCTableCatalog extends TableCatalog
         JdbcUtils.classifyException(
           errorClass = "FAILED_JDBC.DROP_NAMESPACE",
           messageParameters = Map(
-            "url" -> options.url,
+            "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(db)),
           dialect) {
           JdbcUtils.dropSchema(conn, options, db, cascade)
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 4825568d88eb..87d3e2d97eba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -159,7 +159,7 @@ abstract class JdbcDialect extends Serializable with 
Logging {
       val connection =
         ConnectionProvider.create(driver, options.parameters, 
options.connectionProviderName)
       require(connection != null,
-        s"The driver could not open a JDBC connection. Check the URL: 
${options.url}")
+        s"The driver could not open a JDBC connection. Check the URL: 
${options.getRedactUrl()}")
       connection
     }
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 1cd4077b4ec1..fa35c0f33ced 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -554,15 +554,22 @@ class JDBCTableCatalogSuite extends QueryTest with 
SharedSparkSession {
 
   test("CREATE TABLE with table property") {
     withTable("h2.test.new_table") {
+      val sqlText = "CREATE TABLE h2.test.new_table(i INT, j STRING)" +
+        " TBLPROPERTIES('ENGINE'='tableEngineName')"
       checkError(
-        exception = intercept[AnalysisException] {
-          sql("CREATE TABLE h2.test.new_table(i INT, j STRING)" +
-            " TBLPROPERTIES('ENGINE'='tableEngineName')")
-        },
+        exception = intercept[AnalysisException] { sql(sqlText) },
         errorClass = "FAILED_JDBC.CREATE_TABLE",
         parameters = Map(
           "url" -> url,
           "tableName" -> "`test`.`new_table`"))
+      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> ".*password=.*") 
{
+        checkError(
+          exception = intercept[AnalysisException] { sql(sqlText) },
+          errorClass = "FAILED_JDBC.CREATE_TABLE",
+          parameters = Map(
+            "url" -> "*********(redacted)",
+            "tableName" -> "`test`.`new_table`"))
+      }
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to