This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 4be2a79 [SPARK-30049][SQL] SQL fails to parse when comment contains an unmatched quote character 4be2a79 is described below commit 4be2a79c7a9a1b1e3b0c3704e94da19c2b87ba47 Author: Javier <jfuen...@hortonworks.com> AuthorDate: Tue Mar 3 09:55:15 2020 -0600 [SPARK-30049][SQL] SQL fails to parse when comment contains an unmatched quote character ### What changes were proposed in this pull request? A SQL statement that contains a comment with an unmatched quote character can lead to a parse error: - Added a insideComment flag in the splitter method to avoid checking single and double quotes within a comment: ``` spark-sql> SELECT 1 -- someone's comment here > ; Error in query: extraneous input ';' expecting <EOF>(line 2, pos 0) == SQL == SELECT 1 -- someone's comment here ; ^^^ ``` ### Why are the changes needed? This misbehaviour was not present on previous spark versions. ### Does this PR introduce any user-facing change? - No ### How was this patch tested? - New tests were added. Closes #27321 from javierivanov/SPARK-30049B. Lead-authored-by: Javier <jfuen...@hortonworks.com> Co-authored-by: Javier Fuentes <j.fuente...@icloud.com> Signed-off-by: Thomas Graves <tgra...@apache.org> (cherry picked from commit 3ff213568694e265466d8480b61fd38f4fd8fdff) Signed-off-by: Thomas Graves <tgra...@apache.org> --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 24 ++++++++++++++++++---- .../spark/sql/hive/thriftserver/CliSuite.scala | 22 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index b665d4a..19f7ea8 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -509,24 +509,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false + var insideComment = false var escape = false var beginIndex = 0 + var endIndex = line.length val ret = new JArrayList[String] + for (index <- 0 until line.length) { - if (line.charAt(index) == '\'') { + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } - } else if (line.charAt(index) == '\"') { + } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote } + } else if (line.charAt(index) == '-') { + val hasNext = index + 1 < line.length + if (insideDoubleQuote || insideSingleQuote || insideComment) { + // Ignores '-' in any case of quotes or comment. + // Avoids to start a comment(--) within a quoted segment or already in a comment. + // Sample query: select "quoted value --" + // ^^ avoids starting a comment if it's inside quotes. + } else if (hasNext && line.charAt(index + 1) == '-') { + // ignore quotes and ; + insideComment = true + // ignore eol + endIndex = index + } } else if (line.charAt(index) == ';') { - if (insideSingleQuote || insideDoubleQuote) { + if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { // split, do not include ; itself @@ -543,7 +559,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } } - ret.add(line.substring(beginIndex)) + ret.add(line.substring(beginIndex, endIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6609701..43aafc3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -400,4 +400,26 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { -> "1.000000000000000000" ) } + + test("SPARK-30049 Should not complain for quotes in commented lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here + |;""".stripMargin -> "testcomment" + ) + } + + test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + | comment continues here with single ' quote \\ + | extra ' \\ + |;""".stripMargin -> "testcomment" + ) + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + | comment continues here with single ' quote \\ + | extra ' \\ + | ;""".stripMargin -> "testcomment" + ) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org