This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6391655  [SPARK-37266][SQL] View text can only be SELECT queries
6391655 is described below

commit 6391655152d15d32abebf3b0447b98d9391edf44
Author: Jiaan Geng <belie...@163.com>
AuthorDate: Mon Nov 15 22:47:38 2021 +0800

    [SPARK-37266][SQL] View text can only be SELECT queries
    
    ### What changes were proposed in this pull request?
    
    The current implementation of persistent view is create hive table with 
view text.
    The view text is just a query string, so the hackers may tamper with it 
through various means.
    Such as, `select * from tab1` tampered with `drop table tab1`.
    
    ### Why are the changes needed?
    First, the view text is query string, `parser.parsePlan(viewText)` occurs 
more overhead than `parser.parseQuery(viewText)`.
    Second, the view text can be tampered by hackers and issue security 
vulnerabilities.
    
    ### Does this PR introduce _any_ user-facing change?
    'No'. Unless hackers tamper view text, user will not see any change.
    
    ### How was this patch tested?
    New tests.
    
    Closes #34543 from beliefer/SPARK-37266.
    
    Authored-by: Jiaan Geng <belie...@163.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/catalog/SessionCatalog.scala      |  9 +++++++--
 .../apache/spark/sql/catalyst/parser/ParseDriver.scala   |  5 +++++
 .../spark/sql/catalyst/parser/ParserInterface.scala      |  6 ++++++
 .../apache/spark/sql/errors/QueryCompilationErrors.scala |  5 +++++
 .../apache/spark/sql/SparkSessionExtensionSuite.scala    |  3 +++
 .../apache/spark/sql/execution/SQLViewTestSuite.scala    | 16 ++++++++++++++++
 6 files changed, 42 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 471e52d..f529b13 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, 
ExpressionInfo, UpCast}
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, 
ParserInterface}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, 
ParseException, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, 
SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
@@ -877,7 +877,12 @@ class SessionCatalog(
     }
     val viewConfigs = metadata.viewSQLConfigs
     val parsedPlan = 
SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs, isTempView)) {
-      parser.parsePlan(viewText)
+      try {
+        parser.parseQuery(viewText)
+      } catch {
+        case _: ParseException =>
+          throw QueryCompilationErrors.invalidViewText(viewText, 
metadata.qualifiedName)
+      }
     }
     val projectList = if (!isHiveCreatedView(metadata)) {
       val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index b459a2d..f53c0d3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -73,6 +73,11 @@ abstract class AbstractSqlParser extends ParserInterface 
with SQLConfHelper with
     astBuilder.visitSingleTableSchema(parser.singleTableSchema())
   }
 
+  /** Creates LogicalPlan for a given SQL string of query. */
+  override def parseQuery(sqlText: String): LogicalPlan = parse(sqlText) { 
parser =>
+    astBuilder.visitQuery(parser.query())
+  }
+
   /** Creates LogicalPlan for a given SQL string. */
   override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { 
parser =>
     astBuilder.visitSingleStatement(parser.singleStatement()) match {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 77e357a..46dfbf2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -70,4 +70,10 @@ trait ParserInterface {
    */
   @throws[ParseException]("Text cannot be parsed to a DataType")
   def parseDataType(sqlText: String): DataType
+
+  /**
+   * Parse a query string to a [[LogicalPlan]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a LogicalPlan")
+  def parseQuery(sqlText: String): LogicalPlan
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index b7f4cce..fe1c358 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2366,4 +2366,9 @@ object QueryCompilationErrors {
   def tableIndexNotSupportedError(errorMessage: String): Throwable = {
     new AnalysisException(errorMessage)
   }
+
+  def invalidViewText(viewText: String, tableName: String): Throwable = {
+    new AnalysisException(
+      s"Invalid view text: $viewText. The view $tableName may have been 
tampered with")
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 046714c..4994968 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -441,6 +441,9 @@ case class MyParser(spark: SparkSession, delegate: 
ParserInterface) extends Pars
 
   override def parseDataType(sqlText: String): DataType =
     delegate.parseDataType(sqlText)
+
+  override def parseQuery(sqlText: String): LogicalPlan =
+    delegate.parseQuery(sqlText)
 }
 
 object MyExtensions {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 6ed9798..091442d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -550,4 +550,20 @@ class PersistedViewTestSuite extends SQLViewTestSuite with 
SharedSparkSession {
       spark.sessionState.conf.clear()
     }
   }
+
+  test("SPARK-37266: View text can only be SELECT queries") {
+    withView("v") {
+      sql("CREATE VIEW v AS SELECT 1")
+      val table = 
spark.sessionState.catalog.getTableMetadata(TableIdentifier("v"))
+      val dropView = "DROP VIEW v"
+      // Simulate the behavior of hackers
+      val tamperedTable = table.copy(viewText = Some(dropView))
+      spark.sessionState.catalog.alterTable(tamperedTable)
+      val message = intercept[AnalysisException] {
+        sql("SELECT * FROM v")
+      }.getMessage
+      assert(message.contains(s"Invalid view text: $dropView." +
+        s" The view ${table.qualifiedName} may have been tampered with"))
+    }
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to