[spark] branch master updated: [SPARK-39221][SQL] Make sensitive information be redacted correctly for thrift server job/stage tab

yao Thu, 19 May 2022 19:10:33 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 5ee6f727441 [SPARK-39221][SQL] Make sensitive information be redacted 
correctly for thrift server job/stage tab
5ee6f727441 is described below

commit 5ee6f72744143cc5e19aa058df209f7156e51cee
Author: Kent Yao <y...@apache.org>
AuthorDate: Fri May 20 10:10:07 2022 +0800

    [SPARK-39221][SQL] Make sensitive information be redacted correctly for 
thrift server job/stage tab
    
    ### What changes were proposed in this pull request?
    
    set redacted statement in job desc correctly
    
    #### http://localhost:4040/sqlserver/
    <img width="1759" alt="image" 
src="https://user-images.githubusercontent.com/8326978/168989638-d88ec124-fda7-4642-9c11-bad11c52fcea.png";>
    
    #### http://localhost:4040/jobs/
    <img width="769" alt="image" 
src="https://user-images.githubusercontent.com/8326978/168989800-4b67c3cf-c4f7-482a-9479-47f8705d6e9f.png";>
    
    ### Why are the changes needed?
    
    bugfix
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    verified locally
    
    Closes #36592 from yaooqinn/SPARK-39221.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../hive/thriftserver/SparkExecuteStatementOperation.scala | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 2c77e00c46c..090d741d9ee 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -62,8 +62,11 @@ private[hive] class SparkExecuteStatementOperation(
 
   private val forceCancel = 
sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
 
-  private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) 
{
-    new VariableSubstitution().substitute(statement)
+  private val redactedStatement = {
+    val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
+      new VariableSubstitution().substitute(statement)
+    }
+    SparkUtils.redact(sqlContext.conf.stringRedactionPattern, 
substitutorStatement)
   }
 
   private var result: DataFrame = _
@@ -76,14 +79,14 @@ private[hive] class SparkExecuteStatementOperation(
       val sparkType = new StructType().add("Result", "string")
       SparkExecuteStatementOperation.toTTableSchema(sparkType)
     } else {
-      logInfo(s"Result Schema: ${result.schema}")
+      logInfo(s"Result Schema: ${result.schema.sql}")
       SparkExecuteStatementOperation.toTTableSchema(result.schema)
     }
   }
 
   def getNextRowSet(order: FetchOrientation, maxRowsL: Long): TRowSet = 
withLocalProperties {
     try {
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, 
forceCancel)
+      sqlContext.sparkContext.setJobGroup(statementId, redactedStatement, 
forceCancel)
       getNextRowSetInternal(order, maxRowsL)
     } finally {
       sqlContext.sparkContext.clearJobGroup()
@@ -118,7 +121,6 @@ private[hive] class SparkExecuteStatementOperation(
 
   override def runInternal(): Unit = {
     setState(OperationState.PENDING)
-    val redactedStatement = 
SparkUtils.redact(sqlContext.conf.stringRedactionPattern, statement)
     logInfo(s"Submitting query '$redactedStatement' with $statementId")
     HiveThriftServer2.eventManager.onStatementStart(
       statementId,
@@ -220,7 +222,7 @@ private[hive] class SparkExecuteStatementOperation(
         
parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader)
       }
 
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, 
forceCancel)
+      sqlContext.sparkContext.setJobGroup(statementId, redactedStatement, 
forceCancel)
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())
       HiveThriftServer2.eventManager.onStatementParsed(statementId,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39221][SQL] Make sensitive information be redacted correctly for thrift server job/stage tab

Reply via email to