This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
commit ed69190ce0762f3b741b8d175ef8d02da45f3183 Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Tue Jun 16 00:27:45 2020 +0900 [SPARK-26905][SQL] Follow the SQL:2016 reserved keywords ### What changes were proposed in this pull request? This PR intends to move keywords `ANTI`, `SEMI`, and `MINUS` from reserved to non-reserved. ### Why are the changes needed? To comply with the ANSI/SQL standard. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #28807 from maropu/SPARK-26905-2. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- docs/sql-ref-ansi-compliance.md | 6 +- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 3 + .../resources/ansi-sql-2016-reserved-keywords.txt | 401 +++++++++++++++++++++ .../parser/TableIdentifierParserSuite.scala | 24 +- 4 files changed, 429 insertions(+), 5 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index eab194c..e5ca7e9d 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -135,7 +135,7 @@ Below is a list of all the keywords in Spark SQL. |ALTER|non-reserved|non-reserved|reserved| |ANALYZE|non-reserved|non-reserved|non-reserved| |AND|reserved|non-reserved|reserved| -|ANTI|reserved|strict-non-reserved|non-reserved| +|ANTI|non-reserved|strict-non-reserved|non-reserved| |ANY|reserved|non-reserved|reserved| |ARCHIVE|non-reserved|non-reserved|non-reserved| |ARRAY|non-reserved|non-reserved|reserved| @@ -264,7 +264,7 @@ Below is a list of all the keywords in Spark SQL. |MAP|non-reserved|non-reserved|non-reserved| |MATCHED|non-reserved|non-reserved|non-reserved| |MERGE|non-reserved|non-reserved|non-reserved| -|MINUS|reserved|strict-non-reserved|non-reserved| +|MINUS|not-reserved|strict-non-reserved|non-reserved| |MINUTE|reserved|non-reserved|reserved| |MONTH|reserved|non-reserved|reserved| |MSCK|non-reserved|non-reserved|non-reserved| @@ -325,7 +325,7 @@ Below is a list of all the keywords in Spark SQL. |SCHEMA|non-reserved|non-reserved|non-reserved| |SECOND|reserved|non-reserved|reserved| |SELECT|reserved|non-reserved|reserved| -|SEMI|reserved|strict-non-reserved|non-reserved| +|SEMI|non-reserved|strict-non-reserved|non-reserved| |SEPARATED|non-reserved|non-reserved|non-reserved| |SERDE|non-reserved|non-reserved|non-reserved| |SERDEPROPERTIES|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 14a6687..5821a74 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -994,6 +994,7 @@ ansiNonReserved | AFTER | ALTER | ANALYZE + | ANTI | ARCHIVE | ARRAY | ASC @@ -1126,10 +1127,12 @@ ansiNonReserved | ROW | ROWS | SCHEMA + | SEMI | SEPARATED | SERDE | SERDEPROPERTIES | SET + | SETMINUS | SETS | SHOW | SKEWED diff --git a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt new file mode 100644 index 0000000..921491a --- /dev/null +++ b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt @@ -0,0 +1,401 @@ +-- This file comes from: https://github.com/postgres/postgres/tree/master/doc/src/sgml/keywords +ABS +ACOS +ALL +ALLOCATE +ALTER +AND +ANY +ARE +ARRAY +ARRAY_AGG +ARRAY_MAX_CARDINALITY +AS +ASENSITIVE +ASIN +ASYMMETRIC +AT +ATAN +ATOMIC +AUTHORIZATION +AVG +BEGIN +BEGIN_FRAME +BEGIN_PARTITION +BETWEEN +BIGINT +BINARY +BLOB +BOOLEAN +BOTH +BY +CALL +CALLED +CARDINALITY +CASCADED +CASE +CAST +CEIL +CEILING +CHAR +CHAR_LENGTH +CHARACTER +CHARACTER_LENGTH +CHECK +CLASSIFIER +CLOB +CLOSE +COALESCE +COLLATE +COLLECT +COLUMN +COMMIT +CONDITION +CONNECT +CONSTRAINT +CONTAINS +CONVERT +COPY +CORR +CORRESPONDING +COS +COSH +COUNT +COVAR_POP +COVAR_SAMP +CREATE +CROSS +CUBE +CUME_DIST +CURRENT +CURRENT_CATALOG +CURRENT_DATE +CURRENT_DEFAULT_TRANSFORM_GROUP +CURRENT_PATH +CURRENT_ROLE +CURRENT_ROW +CURRENT_SCHEMA +CURRENT_TIME +CURRENT_TIMESTAMP +CURRENT_TRANSFORM_GROUP_FOR_TYPE +CURRENT_USER +CURSOR +CYCLE +DATE +DAY +DEALLOCATE +DEC +DECIMAL +DECFLOAT +DECLARE +DEFAULT +DEFINE +DELETE +DENSE_RANK +DEREF +DESCRIBE +DETERMINISTIC +DISCONNECT +DISTINCT +DOUBLE +DROP +DYNAMIC +EACH +ELEMENT +ELSE +EMPTY +END +END_FRAME +END_PARTITION +END-EXEC +EQUALS +ESCAPE +EVERY +EXCEPT +EXEC +EXECUTE +EXISTS +EXP +EXTERNAL +EXTRACT +FALSE +FETCH +FILTER +FIRST_VALUE +FLOAT +FLOOR +FOR +FOREIGN +FRAME_ROW +FREE +FROM +FULL +FUNCTION +FUSION +GET +GLOBAL +GRANT +GROUP +GROUPING +GROUPS +HAVING +HOLD +HOUR +IDENTITY +IN +INDICATOR +INITIAL +INNER +INOUT +INSENSITIVE +INSERT +INT +INTEGER +INTERSECT +INTERSECTION +INTERVAL +INTO +IS +JOIN +JSON_ARRAY +JSON_ARRAYAGG +JSON_EXISTS +JSON_OBJECT +JSON_OBJECTAGG +JSON_QUERY +JSON_TABLE +JSON_TABLE_PRIMITIVE +JSON_VALUE +LAG +LANGUAGE +LARGE +LAST_VALUE +LATERAL +LEAD +LEADING +LEFT +LIKE +LIKE_REGEX +LISTAGG +LN +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOG +LOG10 +LOWER +MATCH +MATCH_NUMBER +MATCH_RECOGNIZE +MATCHES +MAX +MEASURES +MEMBER +MERGE +METHOD +MIN +MINUTE +MOD +MODIFIES +MODULE +MONTH +MULTISET +NATIONAL +NATURAL +NCHAR +NCLOB +NEW +NO +NONE +NORMALIZE +NOT +NTH_VALUE +NTILE +NULL +NULLIF +NUMERIC +OCTET_LENGTH +OCCURRENCES_REGEX +OF +OFFSET +OLD +OMIT +ON +ONE +ONLY +OPEN +OR +ORDER +OUT +OUTER +OVER +OVERLAPS +OVERLAY +PARAMETER +PARTITION +PATTERN +PER +PERCENT +PERCENT_RANK +PERCENTILE_CONT +PERCENTILE_DISC +PERIOD +PERMUTE +PORTION +POSITION +POSITION_REGEX +POWER +PRECEDES +PRECISION +PREPARE +PRIMARY +PROCEDURE +PTF +RANGE +RANK +READS +REAL +RECURSIVE +REF +REFERENCES +REFERENCING +REGR_AVGX +REGR_AVGY +REGR_COUNT +REGR_INTERCEPT +REGR_R2 +REGR_SLOPE +REGR_SXX +REGR_SXY +REGR_SYY +RELEASE +RESULT +RETURN +RETURNS +REVOKE +RIGHT +ROLLBACK +ROLLUP +ROW +ROW_NUMBER +ROWS +RUNNING +SAVEPOINT +SCOPE +SCROLL +SEARCH +SECOND +SEEK +SELECT +SENSITIVE +SESSION_USER +SET +SHOW +SIMILAR +SIN +SINH +SKIP +SMALLINT +SOME +SPECIFIC +SPECIFICTYPE +SQL +SQLEXCEPTION +SQLSTATE +SQLWARNING +SQRT +START +STATIC +STDDEV_POP +STDDEV_SAMP +SUBMULTISET +SUBSET +SUBSTRING +SUBSTRING_REGEX +SUCCEEDS +SUM +SYMMETRIC +SYSTEM +SYSTEM_TIME +SYSTEM_USER +TABLE +TABLESAMPLE +TAN +TANH +THEN +TIME +TIMESTAMP +TIMEZONE_HOUR +TIMEZONE_MINUTE +TO +TRAILING +TRANSLATE +TRANSLATE_REGEX +TRANSLATION +TREAT +TRIGGER +TRIM +TRIM_ARRAY +TRUE +TRUNCATE +UESCAPE +UNION +UNIQUE +UNKNOWN +UNMATCHED +UNNEST +UPDATE +UPPER +USER +USING +VALUE +VALUES +VALUE_OF +VAR_POP +VAR_SAMP +VARBINARY +VARCHAR +VARYING +VERSIONING +WHEN +WHENEVER +WHERE +WIDTH_BUCKET +WINDOW +WITH +WITHIN +WITHOUT +YEAR +DATALINK +DLNEWCOPY +DLPREVIOUSCOPY +DLURLCOMPLETE +DLURLCOMPLETEWRITE +DLURLCOMPLETEONLY +DLURLPATH +DLURLPATHWRITE +DLURLPATHONLY +DLURLSCHEME +DLURLSERVER +DLVALUE +IMPORT +XML +XMLAGG +XMLATTRIBUTES +XMLBINARY +XMLCAST +XMLCOMMENT +XMLCONCAT +XMLDOCUMENT +XMLELEMENT +XMLEXISTS +XMLFOREST +XMLITERATE +XMLNAMESPACES +XMLPARSE +XMLPI +XMLQUERY +XMLSERIALIZE +XMLTABLE +XMLTEXT +XMLVALIDATE diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 04969e3..04c427d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -16,8 +16,11 @@ */ package org.apache.spark.sql.catalyst.parser +import java.io.File +import java.nio.file.Files import java.util.Locale +import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.SparkFunSuite @@ -340,7 +343,12 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { // The case where a symbol has multiple literal definitions, // e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`. if (hasMultipleLiterals) { - val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq + // Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';` + val litDef = """([A-Z_]+)""".r + val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap { + case litDef(lit) => Some(lit) + case _ => None + } (symbol, literals) :: Nil } else { val literal = literalDef.replaceAll("'", "").trim @@ -388,12 +396,24 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode test("check # of reserved keywords") { - val numReservedKeywords = 78 + val numReservedKeywords = 74 assert(reservedKeywordsInAnsiMode.size == numReservedKeywords, s"The expected number of reserved keywords is $numReservedKeywords, but " + s"${reservedKeywordsInAnsiMode.size} found.") } + test("reserved keywords in Spark are also reserved in SQL 2016") { + withTempDir { dir => + val tmpFile = new File(dir, "tmp") + val is = Thread.currentThread().getContextClassLoader + .getResourceAsStream("ansi-sql-2016-reserved-keywords.txt") + Files.copy(is, tmpFile.toPath) + val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath) + .asScala.filterNot(_.startsWith("--")).map(_.trim).toSet + assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty) + } + } + test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org