This is an automated email from the ASF dual-hosted git repository.

tgraves pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4be2a79  [SPARK-30049][SQL] SQL fails to parse when comment contains 
an unmatched quote character
4be2a79 is described below

commit 4be2a79c7a9a1b1e3b0c3704e94da19c2b87ba47
Author: Javier <jfuen...@hortonworks.com>
AuthorDate: Tue Mar 3 09:55:15 2020 -0600

    [SPARK-30049][SQL] SQL fails to parse when comment contains an unmatched 
quote character
    
    ### What changes were proposed in this pull request?
    
    A SQL statement that contains a comment with an unmatched quote character 
can lead to a parse error:
    - Added a insideComment flag in the splitter method to avoid checking 
single and double quotes within a comment:
    ```
    spark-sql> SELECT 1 -- someone's comment here
             > ;
    Error in query:
    extraneous input ';' expecting <EOF>(line 2, pos 0)
    
    == SQL ==
    SELECT 1 -- someone's comment here
    ;
    ^^^
    ```
    
    ### Why are the changes needed?
    
    This misbehaviour was not present on previous spark versions.
    
    ### Does this PR introduce any user-facing change?
    
    - No
    
    ### How was this patch tested?
    
    - New tests were added.
    
    Closes #27321 from javierivanov/SPARK-30049B.
    
    Lead-authored-by: Javier <jfuen...@hortonworks.com>
    Co-authored-by: Javier Fuentes <j.fuente...@icloud.com>
    Signed-off-by: Thomas Graves <tgra...@apache.org>
    (cherry picked from commit 3ff213568694e265466d8480b61fd38f4fd8fdff)
    Signed-off-by: Thomas Graves <tgra...@apache.org>
---
 .../sql/hive/thriftserver/SparkSQLCLIDriver.scala  | 24 ++++++++++++++++++----
 .../spark/sql/hive/thriftserver/CliSuite.scala     | 22 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b665d4a..19f7ea8 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -509,24 +509,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver 
with Logging {
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
+    var insideComment = false
     var escape = false
     var beginIndex = 0
+    var endIndex = line.length
     val ret = new JArrayList[String]
+
     for (index <- 0 until line.length) {
-      if (line.charAt(index) == '\'') {
+      if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
         if (!escape) {
           // flip the boolean variable
           insideSingleQuote = !insideSingleQuote
         }
-      } else if (line.charAt(index) == '\"') {
+      } else if (line.charAt(index) == '\"' && !insideComment) {
         // take a look to see if it is escaped
         if (!escape) {
           // flip the boolean variable
           insideDoubleQuote = !insideDoubleQuote
         }
+      } else if (line.charAt(index) == '-') {
+        val hasNext = index + 1 < line.length
+        if (insideDoubleQuote || insideSingleQuote || insideComment) {
+          // Ignores '-' in any case of quotes or comment.
+          // Avoids to start a comment(--) within a quoted segment or already 
in a comment.
+          // Sample query: select "quoted value --"
+          //                                    ^^ avoids starting a comment 
if it's inside quotes.
+        } else if (hasNext && line.charAt(index + 1) == '-') {
+          // ignore quotes and ;
+          insideComment = true
+          // ignore eol
+          endIndex = index
+        }
       } else if (line.charAt(index) == ';') {
-        if (insideSingleQuote || insideDoubleQuote) {
+        if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
           // split, do not include ; itself
@@ -543,7 +559,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver 
with Logging {
         escape = true
       }
     }
-    ret.add(line.substring(beginIndex))
+    ret.add(line.substring(beginIndex, endIndex))
     ret
   }
 }
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 6609701..43aafc3 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -400,4 +400,26 @@ class CliSuite extends SparkFunSuite with 
BeforeAndAfterAll with Logging {
         -> "1.000000000000000000"
     )
   }
+
+  test("SPARK-30049 Should not complain for quotes in commented lines") {
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here
+        |;""".stripMargin -> "testcomment"
+    )
+  }
+
+  test("SPARK-30049 Should not complain for quotes in commented with 
multi-lines") {
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here \\
+        | comment continues here with single ' quote \\
+        | extra ' \\
+        |;""".stripMargin -> "testcomment"
+    )
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here \\
+        |   comment continues here with single ' quote \\
+        |   extra ' \\
+        |   ;""".stripMargin -> "testcomment"
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to