This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6391655 [SPARK-37266][SQL] View text can only be SELECT queries 6391655 is described below commit 6391655152d15d32abebf3b0447b98d9391edf44 Author: Jiaan Geng <belie...@163.com> AuthorDate: Mon Nov 15 22:47:38 2021 +0800 [SPARK-37266][SQL] View text can only be SELECT queries ### What changes were proposed in this pull request? The current implementation of persistent view is create hive table with view text. The view text is just a query string, so the hackers may tamper with it through various means. Such as, `select * from tab1` tampered with `drop table tab1`. ### Why are the changes needed? First, the view text is query string, `parser.parsePlan(viewText)` occurs more overhead than `parser.parseQuery(viewText)`. Second, the view text can be tampered by hackers and issue security vulnerabilities. ### Does this PR introduce _any_ user-facing change? 'No'. Unless hackers tamper view text, user will not see any change. ### How was this patch tested? New tests. Closes #34543 from beliefer/SPARK-37266. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 9 +++++++-- .../apache/spark/sql/catalyst/parser/ParseDriver.scala | 5 +++++ .../spark/sql/catalyst/parser/ParserInterface.scala | 6 ++++++ .../apache/spark/sql/errors/QueryCompilationErrors.scala | 5 +++++ .../apache/spark/sql/SparkSessionExtensionSuite.scala | 3 +++ .../apache/spark/sql/execution/SQLViewTestSuite.scala | 16 ++++++++++++++++ 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 471e52d..f529b13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst._ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExpressionInfo, UpCast} -import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} +import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager @@ -877,7 +877,12 @@ class SessionCatalog( } val viewConfigs = metadata.viewSQLConfigs val parsedPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs, isTempView)) { - parser.parsePlan(viewText) + try { + parser.parseQuery(viewText) + } catch { + case _: ParseException => + throw QueryCompilationErrors.invalidViewText(viewText, metadata.qualifiedName) + } } val projectList = if (!isHiveCreatedView(metadata)) { val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index b459a2d..f53c0d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -73,6 +73,11 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with astBuilder.visitSingleTableSchema(parser.singleTableSchema()) } + /** Creates LogicalPlan for a given SQL string of query. */ + override def parseQuery(sqlText: String): LogicalPlan = parse(sqlText) { parser => + astBuilder.visitQuery(parser.query()) + } + /** Creates LogicalPlan for a given SQL string. */ override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser => astBuilder.visitSingleStatement(parser.singleStatement()) match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala index 77e357a..46dfbf2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala @@ -70,4 +70,10 @@ trait ParserInterface { */ @throws[ParseException]("Text cannot be parsed to a DataType") def parseDataType(sqlText: String): DataType + + /** + * Parse a query string to a [[LogicalPlan]]. + */ + @throws[ParseException]("Text cannot be parsed to a LogicalPlan") + def parseQuery(sqlText: String): LogicalPlan } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index b7f4cce..fe1c358 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2366,4 +2366,9 @@ object QueryCompilationErrors { def tableIndexNotSupportedError(errorMessage: String): Throwable = { new AnalysisException(errorMessage) } + + def invalidViewText(viewText: String, tableName: String): Throwable = { + new AnalysisException( + s"Invalid view text: $viewText. The view $tableName may have been tampered with") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 046714c..4994968 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -441,6 +441,9 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) + + override def parseQuery(sqlText: String): LogicalPlan = + delegate.parseQuery(sqlText) } object MyExtensions { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index 6ed9798..091442d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -550,4 +550,20 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { spark.sessionState.conf.clear() } } + + test("SPARK-37266: View text can only be SELECT queries") { + withView("v") { + sql("CREATE VIEW v AS SELECT 1") + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("v")) + val dropView = "DROP VIEW v" + // Simulate the behavior of hackers + val tamperedTable = table.copy(viewText = Some(dropView)) + spark.sessionState.catalog.alterTable(tamperedTable) + val message = intercept[AnalysisException] { + sql("SELECT * FROM v") + }.getMessage + assert(message.contains(s"Invalid view text: $dropView." + + s" The view ${table.qualifiedName} may have been tampered with")) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org