This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b37bfd  [SPARK-27949][SQL] Support SUBSTRING(str FROM n1 [FOR n2]) 
syntax
3b37bfd is described below

commit 3b37bfde2a9bef6f6f95138b1c399b36dbed99eb
Author: Zhu, Lipeng <lip...@ebay.com>
AuthorDate: Mon Jun 10 09:05:10 2019 -0700

    [SPARK-27949][SQL] Support SUBSTRING(str FROM n1 [FOR n2]) syntax
    
    ## What changes were proposed in this pull request?
    
    Currently, function `substr/substring`'s usage is like 
`substring(string_expression, n1 [,n2])`.
    
    But, the ANSI SQL defined the pattern for substr/substring is like 
`SUBSTRING(str FROM n1 [FOR n2])`. This gap makes some inconvenient when we 
switch to the SparkSQL.
    
    - ANSI SQL-92: http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
    
    Below are the mainly DB engines to support the ANSI standard for substring.
    - PostgreSQL https://www.postgresql.org/docs/9.1/functions-string.html
    - MySQL 
https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_substring
    - Redshift https://docs.aws.amazon.com/redshift/latest/dg/r_SUBSTRING.html
    - Teradata 
https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/XnePye0Cwexw6Pny_qnxVA
    
    **Oracle, SQL Server, Hive, Presto don't have this additional syntax.**
    
    ## How was this patch tested?
    
    Pass the Jenkins with the updated test cases.
    
    Closes #24802 from lipzhu/SPARK-27949.
    
    Authored-by: Zhu, Lipeng <lip...@ebay.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 docs/sql-keywords.md                               |  2 +
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    |  8 ++
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 11 +++
 .../parser/TableIdentifierParserSuite.scala        |  2 +
 .../sql-tests/inputs/string-functions.sql          | 14 ++++
 .../sql-tests/results/string-functions.sql.out     | 98 +++++++++++++++++++++-
 6 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md
index 62378d9..8442291 100644
--- a/docs/sql-keywords.md
+++ b/docs/sql-keywords.md
@@ -248,6 +248,8 @@ Below is a list of all the keywords in Spark SQL.
   
<tr><td>STORED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   
<tr><td>STRATIFY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   
<tr><td>STRUCT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  
<tr><td>SUBSTR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  
<tr><td>SUBSTRING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   
<tr><td>TABLE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   
<tr><td>TABLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   
<tr><td>TABLESAMPLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d32f922..e919c90 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -671,6 +671,8 @@ primaryExpression
     | base=primaryExpression '.' fieldName=identifier                          
                #dereference
     | '(' expression ')'                                                       
                #parenthesizedExpression
     | EXTRACT '(' field=identifier FROM source=valueExpression ')'             
                #extract
+    | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') 
pos=valueExpression
+      ((FOR | ',') len=valueExpression)? ')'                                   
                #substring
     ;
 
 constant
@@ -1001,6 +1003,8 @@ ansiNonReserved
     | STORED
     | STRATIFY
     | STRUCT
+    | SUBSTR
+    | SUBSTRING
     | TABLES
     | TABLESAMPLE
     | TBLPROPERTIES
@@ -1255,6 +1259,8 @@ nonReserved
     | STORED
     | STRATIFY
     | STRUCT
+    | SUBSTR
+    | SUBSTRING
     | TABLE
     | TABLES
     | TABLESAMPLE
@@ -1511,6 +1517,8 @@ STATISTICS: 'STATISTICS';
 STORED: 'STORED';
 STRATIFY: 'STRATIFY';
 STRUCT: 'STRUCT';
+SUBSTR: 'SUBSTR';
+SUBSTRING: 'SUBSTRING';
 TABLE: 'TABLE';
 TABLES: 'TABLES';
 TABLESAMPLE: 'TABLESAMPLE';
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ca5a567..fdef5f9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1334,6 +1334,17 @@ class AstBuilder(conf: SQLConf) extends 
SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
+   * Create a Substring/Substr expression.
+   */
+  override def visitSubstring(ctx: SubstringContext): Expression = 
withOrigin(ctx) {
+    if (ctx.len != null) {
+      Substring(expression(ctx.str), expression(ctx.pos), expression(ctx.len))
+    } else {
+      new Substring(expression(ctx.str), expression(ctx.pos))
+    }
+  }
+
+  /**
    * Create a (windowed) Function expression.
    */
   override def visitFunctionCall(ctx: FunctionCallContext): Expression = 
withOrigin(ctx) {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index f86ce16..551597e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -511,6 +511,8 @@ class TableIdentifierParserSuite extends SparkFunSuite with 
SQLHelper {
     "stored",
     "stratify",
     "struct",
+    "substr",
+    "substring",
     "table",
     "tables",
     "tablesample",
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index fbc2316..3fe571d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -24,3 +24,17 @@ select right(null, -2), right("abcd", -2), right("abcd", 0), 
right("abcd", 'a');
 -- split function
 SELECT split('aa1cc2ee3', '[1-9]+');
 SELECT split('aa1cc2ee3', '[1-9]+', 2);
+
+-- substring function
+SELECT substr('Spark SQL', 5);
+SELECT substr('Spark SQL', -3);
+SELECT substr('Spark SQL', 5, 1);
+SELECT substr('Spark SQL' from 5);
+SELECT substr('Spark SQL' from -3);
+SELECT substr('Spark SQL' from 5 for 1);
+SELECT substring('Spark SQL', 5);
+SELECT substring('Spark SQL', -3);
+SELECT substring('Spark SQL', 5, 1);
+SELECT substring('Spark SQL' from 5);
+SELECT substring('Spark SQL' from -3);
+SELECT substring('Spark SQL' from 5 for 1);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 25d93b2..e6e0795 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 25
 
 
 -- !query 0
@@ -106,3 +106,99 @@ SELECT split('aa1cc2ee3', '[1-9]+', 2)
 struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
 -- !query 12 output
 ["aa","cc2ee3"]
+
+
+-- !query 13
+SELECT substr('Spark SQL', 5)
+-- !query 13 schema
+struct<substring(Spark SQL, 5, 2147483647):string>
+-- !query 13 output
+k SQL
+
+
+-- !query 14
+SELECT substr('Spark SQL', -3)
+-- !query 14 schema
+struct<substring(Spark SQL, -3, 2147483647):string>
+-- !query 14 output
+SQL
+
+
+-- !query 15
+SELECT substr('Spark SQL', 5, 1)
+-- !query 15 schema
+struct<substring(Spark SQL, 5, 1):string>
+-- !query 15 output
+k
+
+
+-- !query 16
+SELECT substr('Spark SQL' from 5)
+-- !query 16 schema
+struct<substring(Spark SQL, 5, 2147483647):string>
+-- !query 16 output
+k SQL
+
+
+-- !query 17
+SELECT substr('Spark SQL' from -3)
+-- !query 17 schema
+struct<substring(Spark SQL, -3, 2147483647):string>
+-- !query 17 output
+SQL
+
+
+-- !query 18
+SELECT substr('Spark SQL' from 5 for 1)
+-- !query 18 schema
+struct<substring(Spark SQL, 5, 1):string>
+-- !query 18 output
+k
+
+
+-- !query 19
+SELECT substring('Spark SQL', 5)
+-- !query 19 schema
+struct<substring(Spark SQL, 5, 2147483647):string>
+-- !query 19 output
+k SQL
+
+
+-- !query 20
+SELECT substring('Spark SQL', -3)
+-- !query 20 schema
+struct<substring(Spark SQL, -3, 2147483647):string>
+-- !query 20 output
+SQL
+
+
+-- !query 21
+SELECT substring('Spark SQL', 5, 1)
+-- !query 21 schema
+struct<substring(Spark SQL, 5, 1):string>
+-- !query 21 output
+k
+
+
+-- !query 22
+SELECT substring('Spark SQL' from 5)
+-- !query 22 schema
+struct<substring(Spark SQL, 5, 2147483647):string>
+-- !query 22 output
+k SQL
+
+
+-- !query 23
+SELECT substring('Spark SQL' from -3)
+-- !query 23 schema
+struct<substring(Spark SQL, -3, 2147483647):string>
+-- !query 23 output
+SQL
+
+
+-- !query 24
+SELECT substring('Spark SQL' from 5 for 1)
+-- !query 24 schema
+struct<substring(Spark SQL, 5, 1):string>
+-- !query 24 output
+k


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to