This is an automated email from the ASF dual-hosted git repository.
jiajunxie pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new be2146679e [CALCITE-5962] Support parse Spark-style syntax LEFT ANTI
JOIN in Babel parser
be2146679e is described below
commit be2146679e3be0d4ce1a7578b93237c30b1a1709
Author: jiefei <[email protected]>
AuthorDate: Mon Aug 28 20:44:12 2023 +0800
[CALCITE-5962] Support parse Spark-style syntax LEFT ANTI JOIN in Babel
parser
---
babel/src/main/codegen/config.fmpp | 3 ++
babel/src/main/codegen/includes/parserImpls.ftl | 7 +++++
.../org/apache/calcite/test/BabelParserTest.java | 34 ++++++++++++++++++++++
.../java/org/apache/calcite/test/BabelTest.java | 15 ++++++++++
babel/src/test/resources/sql/select.iq | 22 ++++++++++++++
core/src/main/codegen/templates/Parser.jj | 2 +-
.../main/java/org/apache/calcite/sql/JoinType.java | 7 +++++
.../main/java/org/apache/calcite/sql/SqlJoin.java | 4 +++
.../apache/calcite/sql/validate/JoinNamespace.java | 3 +-
.../org/apache/calcite/sql/validate/JoinScope.java | 6 ++--
.../calcite/sql/validate/SqlValidatorImpl.java | 3 +-
11 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/babel/src/main/codegen/config.fmpp
b/babel/src/main/codegen/config.fmpp
index dc7b8fb695..4e1f9c36d4 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -42,6 +42,7 @@ data: {
# List of new keywords. Example: "DATABASES", "TABLES". If the keyword is
# not a reserved keyword, add it to the 'nonReservedKeywords' section.
keywords: [
+ "ANTI"
"DISCARD"
"IF"
"PLANS"
@@ -56,6 +57,7 @@ data: {
# items in this list become non-reserved
nonReservedKeywordsToAdd: [
# not in core, added in babel
+ "ANTI"
"DISCARD"
"IF"
"PLANS"
@@ -545,6 +547,7 @@ data: {
# List of additional join types. Each is a method with no arguments.
# Example: "LeftSemiJoin".
joinTypes: [
+ "LeftAntiJoin",
"LeftSemiJoin"
]
diff --git a/babel/src/main/codegen/includes/parserImpls.ftl
b/babel/src/main/codegen/includes/parserImpls.ftl
index fb169fe1e8..b481a29f0f 100644
--- a/babel/src/main/codegen/includes/parserImpls.ftl
+++ b/babel/src/main/codegen/includes/parserImpls.ftl
@@ -22,6 +22,13 @@ JoinType LeftSemiJoin() :
<LEFT> <SEMI> <JOIN> { return JoinType.LEFT_SEMI_JOIN; }
}
+JoinType LeftAntiJoin() :
+{
+}
+{
+ <LEFT> <ANTI> <JOIN> { return JoinType.LEFT_ANTI_JOIN; }
+}
+
SqlNode DateaddFunctionCall() :
{
final Span s;
diff --git a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
index 69c6465127..14df0a38a6 100644
--- a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
+++ b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
@@ -18,6 +18,7 @@ package org.apache.calcite.test;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.dialect.MysqlSqlDialect;
import org.apache.calcite.sql.dialect.PostgresqlSqlDialect;
+import org.apache.calcite.sql.dialect.SparkSqlDialect;
import org.apache.calcite.sql.parser.SqlAbstractParserImpl;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.parser.SqlParserFixture;
@@ -430,6 +431,39 @@ class BabelParserTest extends SqlParserTest {
f.sql("DISCARD TEMP").same();
}
+ @Test void testSparkLeftAntiJoin() {
+ final SqlParserFixture f = fixture().withDialect(SparkSqlDialect.DEFAULT);
+ final String sql = "select a.cid, a.cname, count(1) as amount\n"
+ + "from geo.area1 as a\n"
+ + "left anti join (select distinct cid, cname\n"
+ + "from geo.area2\n"
+ + "where cname = 'cityA') as b on a.cid = b.cid\n"
+ + "group by a.cid, a.cname";
+ final String expected = "SELECT A.CID, A.CNAME, COUNT(1) AMOUNT\n"
+ + "FROM GEO.AREA1 A\n"
+ + "LEFT ANTI JOIN (SELECT DISTINCT CID, CNAME\n"
+ + "FROM GEO.AREA2\n"
+ + "WHERE (CNAME = 'cityA')) B ON (A.CID = B.CID)\n"
+ + "GROUP BY A.CID, A.CNAME";
+ f.sql(sql).ok(expected);
+ }
+
+ @Test void testLeftAntiJoin() {
+ final String sql = "select a.cid, a.cname, count(1) as amount\n"
+ + "from geo.area1 as a\n"
+ + "left anti join (select distinct cid, cname\n"
+ + "from geo.area2\n"
+ + "where cname = 'cityA') as b on a.cid = b.cid\n"
+ + "group by a.cid, a.cname";
+ final String expected = "SELECT `A`.`CID`, `A`.`CNAME`, COUNT(1) AS
`AMOUNT`\n"
+ + "FROM `GEO`.`AREA1` AS `A`\n"
+ + "LEFT ANTI JOIN (SELECT DISTINCT `CID`, `CNAME`\n"
+ + "FROM `GEO`.`AREA2`\n"
+ + "WHERE (`CNAME` = 'cityA')) AS `B` ON (`A`.`CID` = `B`.`CID`)\n"
+ + "GROUP BY `A`.`CID`, `A`.`CNAME`";
+ sql(sql).ok(expected);
+ }
+
/** Similar to {@link #testHoist()} but using custom parser. */
@Test void testHoistMySql() {
// SQL contains back-ticks, which require MySQL's quoting,
diff --git a/babel/src/test/java/org/apache/calcite/test/BabelTest.java
b/babel/src/test/java/org/apache/calcite/test/BabelTest.java
index da463df30c..42166a1d0d 100644
--- a/babel/src/test/java/org/apache/calcite/test/BabelTest.java
+++ b/babel/src/test/java/org/apache/calcite/test/BabelTest.java
@@ -247,6 +247,21 @@ class BabelTest {
.type("RecordType(INTEGER NOT NULL DEPTNO) NOT NULL");
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-5962">[CALCITE-5962]
+ * Support parse Spark-style syntax "LEFT ANTI JOIN" in Babel parser</a>. */
+ @Test public void testLeftAntiJoin() {
+ final SqlValidatorFixture v = Fixtures.forValidator()
+ .withParserConfig(c -> c.withParserFactory(SqlBabelParserImpl.FACTORY))
+ .withConformance(SqlConformanceEnum.BABEL);
+
+ v.withSql("SELECT * FROM dept LEFT ANTI JOIN emp ON emp.deptno =
dept.deptno")
+ .type("RecordType(INTEGER NOT NULL DEPTNO, VARCHAR(10) NOT NULL NAME)
NOT NULL");
+
+ v.withSql("SELECT name FROM dept LEFT ANTI JOIN emp ON emp.deptno =
dept.deptno")
+ .type("RecordType(VARCHAR(10) NOT NULL NAME) NOT NULL");
+ }
+
private void checkSqlResult(String funLibrary, String query, String result) {
CalciteAssert.that()
.with(CalciteConnectionProperty.PARSER_FACTORY,
diff --git a/babel/src/test/resources/sql/select.iq
b/babel/src/test/resources/sql/select.iq
index 0d665540c8..9c234d1a04 100755
--- a/babel/src/test/resources/sql/select.iq
+++ b/babel/src/test/resources/sql/select.iq
@@ -51,6 +51,28 @@ FROM emp LEFT SEMI JOIN dept ON emp.deptno = dept.deptno
where dept.deptno = 100
Table 'DEPT' not found
!error
+# LEFT ANTI JOIN (Spark only)
+# Only LHS columns are referenced in SELECT clause with LEFT ANTI JOIN syntax
+SELECT *
+FROM emp LEFT ANTI JOIN dept ON emp.deptno = dept.deptno;
+
+SELECT "EMP"."EMPNO", "EMP"."ENAME", "EMP"."JOB", "EMP"."MGR",
"EMP"."HIREDATE", "EMP"."SAL", "EMP"."COMM", "EMP"."DEPTNO"
+FROM "scott"."EMP" AS "EMP"
+ LEFT ANTI JOIN "scott"."DEPT" AS "DEPT" ON "EMP"."DEPTNO" = "DEPT"."DEPTNO"
+!explain-validated-on spark
+
+# Can not reference RHS columns in SELECT clause with LEFT ANTI JOIN syntax
+SELECT emp.ename
+FROM dept LEFT ANTI JOIN emp ON dept.deptno = emp.deptno;
+Table 'EMP' not found
+!error
+
+# Can not reference RHS columns in WHERE clause with LEFT ANTI JOIN syntax
+SELECT *
+FROM dept LEFT ANTI JOIN emp ON dept.deptno = emp.deptno where emp.empno = 30;
+Table 'EMP' not found
+!error
+
# Test CONNECT BY (Oracle only)
!if (false) {
SELECT *
diff --git a/core/src/main/codegen/templates/Parser.jj
b/core/src/main/codegen/templates/Parser.jj
index fffe2a4dca..269e11d2ba 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -1988,8 +1988,8 @@ SqlLiteral JoinType() :
}
{
(
- LOOKAHEAD(3) // required for "LEFT SEMI JOIN" in Babel
<#list (parser.joinTypes!default.parser.joinTypes) as method>
+ LOOKAHEAD(3) // required for "LEFT SEMI JOIN" and "LEFT ANTI JOIN" in
Babel
joinType = ${method}()
|
</#list>
diff --git a/core/src/main/java/org/apache/calcite/sql/JoinType.java
b/core/src/main/java/org/apache/calcite/sql/JoinType.java
index fa689c611b..bb75755a35 100644
--- a/core/src/main/java/org/apache/calcite/sql/JoinType.java
+++ b/core/src/main/java/org/apache/calcite/sql/JoinType.java
@@ -54,6 +54,13 @@ public enum JoinType implements Symbolizable {
*/
LEFT_SEMI_JOIN,
+ /**
+ * Left anti join.
+ *
+ * <p>Not used by Calcite; only in Babel's Spark dialect.
+ */
+ LEFT_ANTI_JOIN,
+
/**
* Comma join: the good old-fashioned SQL <code>FROM</code> clause,
* where table expressions are specified with commas between them, and
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlJoin.java
b/core/src/main/java/org/apache/calcite/sql/SqlJoin.java
index 94887f23f2..75706adaa1 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlJoin.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlJoin.java
@@ -236,6 +236,10 @@ public class SqlJoin extends SqlCall {
writer.sep(join.isNatural() ? "NATURAL LEFT SEMI JOIN"
: "LEFT SEMI JOIN");
break;
+ case LEFT_ANTI_JOIN:
+ writer.sep(join.isNatural() ? "NATURAL LEFT ANTI JOIN"
+ : "LEFT ANTI JOIN");
+ break;
case RIGHT:
writer.sep(join.isNatural() ? "NATURAL RIGHT JOIN" : "RIGHT JOIN");
break;
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/JoinNamespace.java
b/core/src/main/java/org/apache/calcite/sql/validate/JoinNamespace.java
index a869f1167c..f816d6deb5 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/JoinNamespace.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/JoinNamespace.java
@@ -57,8 +57,9 @@ class JoinNamespace extends AbstractNamespace {
leftType = typeFactory.createTypeWithNullability(leftType, true);
rightType = typeFactory.createTypeWithNullability(rightType, true);
break;
- // LEFT SEMI JOIN can only come from Babel.
+ // LEFT SEMI JOIN and LEFT ANTI JOIN can only come from Babel.
case LEFT_SEMI_JOIN:
+ case LEFT_ANTI_JOIN:
return typeFactory.createJoinType(leftType);
default:
break;
diff --git a/core/src/main/java/org/apache/calcite/sql/validate/JoinScope.java
b/core/src/main/java/org/apache/calcite/sql/validate/JoinScope.java
index deabdbfcbb..f03f11201c 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/JoinScope.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/JoinScope.java
@@ -22,6 +22,7 @@ import org.apache.calcite.sql.SqlWindow;
import org.checkerframework.checker.nullness.qual.Nullable;
+import static org.apache.calcite.sql.JoinType.LEFT_ANTI_JOIN;
import static org.apache.calcite.sql.JoinType.LEFT_SEMI_JOIN;
import static org.apache.calcite.sql.SqlUtil.stripAs;
@@ -70,8 +71,9 @@ public class JoinScope extends ListScope {
boolean nullable) {
super.addChild(ns, alias, nullable);
- // LEFT SEMI JOIN can only come from Babel.
- if (join.getJoinType() == LEFT_SEMI_JOIN
+ // LEFT SEMI JOIN and LEFT ANTI JOIN can only come from Babel.
+ if ((join.getJoinType() == LEFT_SEMI_JOIN
+ || join.getJoinType() == LEFT_ANTI_JOIN)
&& stripAs(join.getRight()) == ns.getNode()) {
// Ignore the right hand side.
return;
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java
b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java
index 75eb3637ed..1e649a1f7f 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlValidatorImpl.java
@@ -3619,10 +3619,11 @@ public class SqlValidatorImpl implements
SqlValidatorWithHints {
// Which join types require/allow a ON/USING condition, or allow
// a NATURAL keyword?
switch (joinType) {
+ case LEFT_ANTI_JOIN:
case LEFT_SEMI_JOIN:
if (!this.config.conformance().isLiberal()) {
throw newValidationError(join.getJoinTypeNode(),
- RESOURCE.dialectDoesNotSupportFeature("LEFT SEMI JOIN"));
+ RESOURCE.dialectDoesNotSupportFeature(joinType.name()));
}
// fall through
case INNER: