[
https://issues.apache.org/jira/browse/DRILL-4232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17684213#comment-17684213
]
ASF GitHub Bot commented on DRILL-4232:
---------------------------------------
vvysotskyi commented on code in PR #2599:
URL: https://github.com/apache/drill/pull/2599#discussion_r1096581680
##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAddAggForExceptRule.java:
##########
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.trace.CalciteTrace;
+import org.apache.curator.shaded.com.google.common.collect.ImmutableList;
+import org.apache.drill.exec.planner.physical.PrelUtil;
+import org.slf4j.Logger;
+
+import static org.apache.drill.exec.ExecConstants.EXCEPT_ADD_AGG_BELOW;
+
+/**
+ * Rule that try to add agg for Except set op.
+ */
+public class DrillAddAggForExceptRule extends RelOptRule {
+ public static final RelOptRule INSTANCE = new
DrillAddAggForExceptRule(RelOptHelper.any(DrillExceptRel.class),
"DrillAddAggForExceptRule");
+ protected static final Logger tracer = CalciteTrace.getPlannerTracer();
+
+ public DrillAddAggForExceptRule(RelOptRuleOperand operand, String
description) {
+ super(operand, description);
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ DrillExceptRel drillExceptRel = call.rel(0);
+ return !drillExceptRel.all && !drillExceptRel.isAggAdded() &&
!findAggRel(drillExceptRel.getInput(0));
Review Comment:
I'm not sure whether this check would work properly in some cases. For
example, the volcano planner will use RelSet to wrap nodes, and perhaps there
are some other cases. Instead, I propose using
`RelMetadataQuery.getUniqueKeys()` to ensure that input columns have unique
values, and if it is so, do not add aggregate. It calls methods from
`org.apache.calcite.rel.metadata.RelMdUniqueKeys` for specific node types and
should handle more cases than existing checks. In this case, the `isAggAdded`
field wouldn't be required.
##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAddAggForExceptRule.java:
##########
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.trace.CalciteTrace;
+import org.apache.curator.shaded.com.google.common.collect.ImmutableList;
+import org.apache.drill.exec.planner.physical.PrelUtil;
+import org.slf4j.Logger;
+
+import static org.apache.drill.exec.ExecConstants.EXCEPT_ADD_AGG_BELOW;
+
+/**
+ * Rule that try to add agg for Except set op.
+ */
+public class DrillAddAggForExceptRule extends RelOptRule {
+ public static final RelOptRule INSTANCE = new
DrillAddAggForExceptRule(RelOptHelper.any(DrillExceptRel.class),
"DrillAddAggForExceptRule");
+ protected static final Logger tracer = CalciteTrace.getPlannerTracer();
+
+ public DrillAddAggForExceptRule(RelOptRuleOperand operand, String
description) {
+ super(operand, description);
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ DrillExceptRel drillExceptRel = call.rel(0);
+ return !drillExceptRel.all && !drillExceptRel.isAggAdded() &&
!findAggRel(drillExceptRel.getInput(0));
+ }
+
+ private boolean findAggRel(RelNode relNode) {
+ if (relNode instanceof HepRelVertex) {
+ return findAggRel(((HepRelVertex) relNode).getCurrentRel());
+ }
+ if (relNode instanceof DrillAggregateRel) {
+ return true;
+ }
+ if (relNode.getInputs().size() == 1 && relNode.getInput(0) != null) {
+ return findAggRel(relNode.getInput(0));
+ }
+ return false;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final DrillExceptRel drillExceptRel = call.rel(0);
+ boolean addAggBelow =
PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(EXCEPT_ADD_AGG_BELOW);
+ if (addAggBelow) {
+ RelNode aggNode = new DrillAggregateRel(drillExceptRel.getCluster(),
drillExceptRel.getTraitSet(), drillExceptRel.getInput(0),
+ ImmutableBitSet.range(0,
drillExceptRel.getInput(0).getRowType().getFieldList().size()),
ImmutableList.of(), ImmutableList.of());
+ call.transformTo(drillExceptRel.copy(ImmutableList.of(aggNode,
drillExceptRel.getInput(1)), true));
+ } else {
+ call.transformTo(new DrillAggregateRel(drillExceptRel.getCluster(),
drillExceptRel.getTraitSet(), drillExceptRel.copy(true),
Review Comment:
Do we need to add aggregate on top of except?
##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillSetOpRule.java:
##########
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.InvalidRelException;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.logical.LogicalIntersect;
+import org.apache.calcite.rel.logical.LogicalMinus;
+import org.apache.calcite.util.trace.CalciteTrace;
+import org.slf4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Rule that converts {@link LogicalIntersect} or {@link LogicalMinus} to
+ * {@link DrillIntersectRel} or {@link DrillExceptRel}.
+ */
+public class DrillSetOpRule extends RelOptRule {
+ public static final List<RelOptRule> INSTANCES = Arrays.asList(
+ new DrillSetOpRule(RelOptHelper.any(LogicalIntersect.class,
Convention.NONE), "DrillIntersectRelRule"),
+ new DrillSetOpRule(RelOptHelper.any(LogicalMinus.class,
Convention.NONE), "DrillExceptRelRule")
+ );
+ protected static final Logger tracer = CalciteTrace.getPlannerTracer();
+
+ public DrillSetOpRule(RelOptRuleOperand operand, String description) {
+ super(operand, description);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final SetOp setOp = call.rel(0);
+ final RelTraitSet traits =
setOp.getTraitSet().plus(DrillRel.DRILL_LOGICAL);
+ final List<RelNode> convertedInputs = new ArrayList<>();
+ for (RelNode input : setOp.getInputs()) {
+ RelNode convertedInput = convert(input,
input.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify());
+ convertedInputs.add(convertedInput);
+ }
+ try {
+ if (setOp instanceof LogicalMinus) {
Review Comment:
We could create and pass a specific rel factory to the place where the rule
is created and use it here instead of checking which type of node is.
##########
exec/java-exec/src/test/java/org/apache/drill/TestSetOp.java:
##########
@@ -0,0 +1,1183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+import org.apache.drill.exec.planner.physical.PlannerSettings;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.BatchSchemaBuilder;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.drill.categories.OperatorTest;
+import org.apache.drill.categories.SqlTest;
+import org.apache.drill.categories.UnlikelyTest;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+@Category({SqlTest.class, OperatorTest.class})
+public class TestSetOp extends ClusterTest {
+ private static final String EMPTY_DIR_NAME = "empty_directory";
+ private static final String SLICE_TARGET_DEFAULT = "alter session reset
`planner.slice_target`";
+
+ @BeforeClass
+ public static void setupTestFiles() throws Exception {
+ startCluster(ClusterFixture.builder(dirTestWatcher));
+ dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "parquet"));
+ dirTestWatcher.makeTestTmpSubDir(Paths.get(EMPTY_DIR_NAME));
+ }
+
+ @Test
+ public void TestExceptionWithSchemaLessDataSource() {
+ boolean exceptionEncountered = true;
+ String root = "/multilevel/csv/1994/Q1/orders_94_q1.csv";
+ try {
+ testBuilder()
+ .sqlQuery("select * from cp.`%s` intersect select * from cp.`%s`",
root, root)
+ .unOrdered()
+ .baselineColumns("a", "b")
+ .baselineValues(1, 1)
+ .go();
+ exceptionEncountered = false;
Review Comment:
instead of the flag please use the `Assert.fail("reason")` method here.
##########
logical/src/main/java/org/apache/drill/common/logical/data/visitors/LogicalVisitor.java:
##########
@@ -62,7 +64,10 @@
public RETURN visitRunningAggregate(RunningAggregate runningAggregate,
EXTRA value) throws EXCEP;
public RETURN visitTransform(Transform transform, EXTRA value) throws
EXCEP;
public RETURN visitUnion(Union union, EXTRA value) throws EXCEP;
- public RETURN visitWindow(Window window, EXTRA value) throws EXCEP;
+ public RETURN visitExcept(Except except, EXTRA value) throws EXCEP;
+ public RETURN visitIntersect(Intersect intersect, EXTRA value) throws
EXCEP;
+
+ public RETURN visitWindow(Window window, EXTRA value) throws EXCEP;
Review Comment:
Please fix the indentation here.
##########
exec/java-exec/src/test/java/org/apache/drill/TestSetOp.java:
##########
@@ -0,0 +1,1183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+import org.apache.drill.exec.planner.physical.PlannerSettings;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.BatchSchemaBuilder;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.drill.categories.OperatorTest;
+import org.apache.drill.categories.SqlTest;
+import org.apache.drill.categories.UnlikelyTest;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+@Category({SqlTest.class, OperatorTest.class})
+public class TestSetOp extends ClusterTest {
+ private static final String EMPTY_DIR_NAME = "empty_directory";
+ private static final String SLICE_TARGET_DEFAULT = "alter session reset
`planner.slice_target`";
+
+ @BeforeClass
+ public static void setupTestFiles() throws Exception {
+ startCluster(ClusterFixture.builder(dirTestWatcher));
+ dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "parquet"));
+ dirTestWatcher.makeTestTmpSubDir(Paths.get(EMPTY_DIR_NAME));
+ }
+
+ @Test
+ public void TestExceptionWithSchemaLessDataSource() {
+ boolean exceptionEncountered = true;
+ String root = "/multilevel/csv/1994/Q1/orders_94_q1.csv";
+ try {
+ testBuilder()
+ .sqlQuery("select * from cp.`%s` intersect select * from cp.`%s`",
root, root)
+ .unOrdered()
+ .baselineColumns("a", "b")
+ .baselineValues(1, 1)
+ .go();
+ exceptionEncountered = false;
+ } catch (Exception ex) {
+ assertTrue(ex.getMessage(),
Review Comment:
`assertThat` + `containsString` will show more informative messages in the
case of failures.
> Support for EXCEPT set operator
> -------------------------------
>
> Key: DRILL-4232
> URL: https://issues.apache.org/jira/browse/DRILL-4232
> Project: Apache Drill
> Issue Type: New Feature
> Components: Query Planning & Optimization
> Reporter: Victoria Markman
> Assignee: Tengfei Wang
> Priority: Major
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)