This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9cc925c [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file. 9cc925c is described below commit 9cc925cda2c415deca8cf142a2a1774e81fda3fb Author: Dilip Biswal <dbis...@us.ibm.com> AuthorDate: Mon Mar 25 17:43:03 2019 -0700 [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file. ## What changes were proposed in this pull request? Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples : ```SQL select * from (insert into bar values (2)); ``` ``` Error in query: unresolved operator 'Project [*]; 'Project [*] +- SubqueryAlias `__auto_generated_subquery_name` +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1] +- Project [cast(col1#18 as int) AS c1#20] +- LocalRelation [col1#18] ``` ```SQL select * from foo where c1 in (insert into bar values (2)) ``` ``` Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch: The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. #columns in left hand side: 1. #columns in right hand side: 0. Left side columns: [default.foo.`c1`]. Right side columns: [].;; 'Project [*] +- 'Filter c1#6 IN (list#5 []) : +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1] : +- Project [cast(col1#7 as int) AS c1#9] : +- LocalRelation [col1#7] +- SubqueryAlias `default`.`foo` +- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6] ``` For both the cases above, we should reject the syntax at parser level. In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively. I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in. ## How was this patch tested? Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites. Closes #24150 from dilipbiswal/split-query-insert. Authored-by: Dilip Biswal <dbis...@us.ibm.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 23 +++++-- .../spark/sql/catalyst/parser/AstBuilder.scala | 77 ++++++++++++++++------ .../sql/catalyst/parser/PlanParserSuite.scala | 49 +++++++++++++- .../spark/sql/execution/SparkSqlParser.scala | 17 ----- .../spark/sql/execution/SparkSqlParserSuite.scala | 22 ------- 5 files changed, 124 insertions(+), 64 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 1f7da19..78dc60c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -81,6 +81,8 @@ singleTableSchema statement : query #statementDefault + | insertStatement #insertStatementDefault + | multiSelectStatement #multiSelectStatementDefault | USE db=identifier #use | CREATE database (IF NOT EXISTS)? identifier (COMMENT comment=STRING)? locationSpec? @@ -358,9 +360,14 @@ resource : identifier STRING ; +insertStatement + : (ctes)? insertInto queryTerm queryOrganization #singleInsertQuery + | (ctes)? fromClause multiInsertQueryBody+ #multiInsertQuery + ; + queryNoWith - : insertInto? queryTerm queryOrganization #singleInsertQuery - | fromClause multiInsertQueryBody+ #multiInsertQuery + : queryTerm queryOrganization #noWithQuery + | fromClause selectStatement #queryWithFrom ; queryOrganization @@ -373,9 +380,15 @@ queryOrganization ; multiInsertQueryBody - : insertInto? - querySpecification - queryOrganization + : insertInto selectStatement + ; + +multiSelectStatement + : (ctes)? fromClause selectStatement+ #multiSelect + ; + +selectStatement + : querySpecification queryOrganization ; queryTerm diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 52a5d2c..7164ad2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -112,21 +112,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val query = plan(ctx.queryNoWith) // Apply CTEs - query.optional(ctx.ctes) { - val ctes = ctx.ctes.namedQuery.asScala.map { nCtx => - val namedQuery = visitNamedQuery(nCtx) - (namedQuery.alias, namedQuery) - } - // Check for duplicate names. - checkDuplicateKeys(ctes, ctx) - With(query, ctes) + query.optionalMap(ctx.ctes)(withCTE) + } + + private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = { + val ctes = ctx.namedQuery.asScala.map { nCtx => + val namedQuery = visitNamedQuery(nCtx) + (namedQuery.alias, namedQuery) } + // Check for duplicate names. + checkDuplicateKeys(ctes, ctx) + With(plan, ctes) } override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) { plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) } + override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) { + val from = visitFromClause(ctx.fromClause) + validate(ctx.selectStatement.querySpecification.fromClause == null, + "Individual select statement can not have FROM cause as its already specified in the" + + " outer query block", ctx) + withQuerySpecification(ctx.selectStatement.querySpecification, from). + optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses) + } + + override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) { + plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) + } + /** * Create a named logical plan. * @@ -156,24 +171,49 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val from = visitFromClause(ctx.fromClause) // Build the insert clauses. - val inserts = ctx.multiInsertQueryBody.asScala.map { + val inserts = ctx.multiInsertQueryBody().asScala.map { body => - validate(body.querySpecification.fromClause == null, + validate(body.selectStatement.querySpecification.fromClause == null, "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements", body) + withInsertInto(body.insertInto, + withQuerySpecification(body.selectStatement.querySpecification, from). + // Add organization statements. + optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses)) + } + + // If there are multiple INSERTS just UNION them together into one query. + val insertPlan = inserts match { + case Seq(query) => query + case queries => Union(queries) + } + // Apply CTEs + insertPlan.optionalMap(ctx.ctes)(withCTE) + } + + override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) { + val from = visitFromClause(ctx.fromClause) + + // Build the insert clauses. + val selects = ctx.selectStatement.asScala.map { + body => + validate(body.querySpecification.fromClause == null, + "Multi-select queries cannot have a FROM clause in their individual SELECT statements", + body) + withQuerySpecification(body.querySpecification, from). // Add organization statements. - optionalMap(body.queryOrganization)(withQueryResultClauses). - // Add insert. - optionalMap(body.insertInto())(withInsertInto) + optionalMap(body.queryOrganization)(withQueryResultClauses) } // If there are multiple INSERTS just UNION them together into one query. - inserts match { + val selectUnionPlan = selects match { case Seq(query) => query case queries => Union(queries) } + // Apply CTEs + selectUnionPlan.optionalMap(ctx.ctes)(withCTE) } /** @@ -181,11 +221,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitSingleInsertQuery( ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) { - plan(ctx.queryTerm). - // Add organization statements. - optionalMap(ctx.queryOrganization)(withQueryResultClauses). - // Add insert. - optionalMap(ctx.insertInto())(withInsertInto) + val insertPlan = withInsertInto(ctx.insertInto(), + plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)) + // Apply CTEs + insertPlan.optionalMap(ctx.ctes)(withCTE) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index d628150..9208d93 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest { table("a").select(star()).union(table("a").where('s < 10).select(star()))) intercept( "from a select * select * from x where a.s < 10", - "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements") + "Multi-select queries cannot have a FROM clause in their individual SELECT statements") + intercept( + "from a select * from b", + "Individual select statement can not have FROM cause as its already specified in " + + "the outer query block") assertEqual( "from a insert into tbl1 select * insert into tbl2 select * where s < 10", table("a").select(star()).insertInto("tbl1").union( @@ -754,4 +758,47 @@ class PlanParserSuite extends AnalysisTest { assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true)) } } + + test("create/alter view as insert into table") { + val m1 = intercept[ParseException] { + parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)") + }.getMessage + assert(m1.contains("mismatched input 'INSERT' expecting")) + // Multi insert query + val m2 = intercept[ParseException] { + parsePlan( + """ + |CREATE VIEW testView AS FROM jt + |INSERT INTO tbl1 SELECT * WHERE jt.id < 5 + |INSERT INTO tbl2 SELECT * WHERE jt.id > 4 + """.stripMargin) + }.getMessage + assert(m2.contains("mismatched input 'INSERT' expecting")) + val m3 = intercept[ParseException] { + parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)") + }.getMessage + assert(m3.contains("mismatched input 'INSERT' expecting")) + // Multi insert query + val m4 = intercept[ParseException] { + parsePlan( + """ + |ALTER VIEW testView AS FROM jt + |INSERT INTO tbl1 SELECT * WHERE jt.id < 5 + |INSERT INTO tbl2 SELECT * WHERE jt.id > 4 + """.stripMargin + ) + }.getMessage + assert(m4.contains("mismatched input 'INSERT' expecting")) + } + + test("Invalid insert constructs in the query") { + val m1 = intercept[ParseException] { + parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))") + }.getMessage + assert(m1.contains("mismatched input 'FROM' expecting")) + val m2 = intercept[ParseException] { + parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))") + }.getMessage + assert(m2.contains("mismatched input 'FROM' expecting")) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 735c0a5..8c7b2cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1257,15 +1257,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { if (ctx.identifierList != null) { operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx) } else { - // CREATE VIEW ... AS INSERT INTO is not allowed. - ctx.query.queryNoWith match { - case s: SingleInsertQueryContext if s.insertInto != null => - operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx) - case _: MultiInsertQueryContext => - operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx) - case _ => // OK - } - val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl => icl.identifierComment.asScala.map { ic => ic.identifier.getText -> Option(ic.STRING).map(string) @@ -1302,14 +1293,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { * }}} */ override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) { - // ALTER VIEW ... AS INSERT INTO is not allowed. - ctx.query.queryNoWith match { - case s: SingleInsertQueryContext if s.insertInto != null => - operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx) - case _: MultiInsertQueryContext => - operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx) - case _ => // OK - } AlterViewAsCommand( name = visitTableIdentifier(ctx.tableIdentifier), originalText = source(ctx.query), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index be3d079..fd60779 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -215,17 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest { "no viable alternative at input") } - test("create view as insert into table") { - // Single insert query - intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)", - "Operation not allowed: CREATE VIEW ... AS INSERT INTO") - - // Multi insert query - intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " + - "INSERT INTO tbl2 SELECT * WHERE jt.id > 4", - "Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+") - } - test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") { assertEqual("describe t", DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false)) @@ -369,17 +358,6 @@ class SparkSqlParserSuite extends AnalysisTest { Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) } - test("SPARK-25046 Fix Alter View ... As Insert Into Table") { - // Single insert query - intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)", - "Operation not allowed: ALTER VIEW ... AS INSERT INTO") - - // Multi insert query - intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " + - "INSERT INTO tbl2 SELECT * WHERE jt.id > 4", - "Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+") - } - test("database and schema tokens are interchangeable") { assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo")) assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org