>From Wail Alkowaileet <[email protected]>:
Wail Alkowaileet has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17377 )
Change subject: [WIP] Allow to use hash-join for redundant variables
......................................................................
[WIP] Allow to use hash-join for redundant variables
Change-Id: I3226457e4c9352fae0433cfeb39a158ec6562955
---
A
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
A
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
M
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
A
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
A
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
M
asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
7 files changed, 591 insertions(+), 3 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/77/17377/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
index 473c8ec..594cf31 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
@@ -44,6 +44,7 @@
import
org.apache.asterix.optimizer.rules.ExtractBatchableExternalFunctionCallsRule;
import org.apache.asterix.optimizer.rules.ExtractDistinctByExpressionsRule;
import org.apache.asterix.optimizer.rules.ExtractOrderExpressionsRule;
+import org.apache.asterix.optimizer.rules.ExtractRedundantVariablesInJoinRule;
import org.apache.asterix.optimizer.rules.ExtractWindowExpressionsRule;
import org.apache.asterix.optimizer.rules.FeedScanCollectionToUnnest;
import
org.apache.asterix.optimizer.rules.FilterRefineSpatialJoinRuleForSTDistanceFunction;
@@ -69,6 +70,7 @@
import org.apache.asterix.optimizer.rules.LoadRecordFieldsRule;
import org.apache.asterix.optimizer.rules.MetaFunctionToMetaVariableRule;
import org.apache.asterix.optimizer.rules.NestGroupByRule;
+import
org.apache.asterix.optimizer.rules.NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule;
import org.apache.asterix.optimizer.rules.PullSelectOutOfSpatialJoin;
import
org.apache.asterix.optimizer.rules.PushAggFuncIntoStandaloneAggregateRule;
import org.apache.asterix.optimizer.rules.PushAggregateIntoNestedSubplanRule;
@@ -350,6 +352,10 @@
planCleanupRules.add(new
RemoveUnknownCheckForKnownTypeExpressionRule());
// relies on RemoveOrReplaceDefaultNullCastRule AND
RemoveUnknownCheckForKnownTypeExpressionRule
planCleanupRules.add(new RemoveRedundantSelectRule());
+ planCleanupRules.add(new
NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule());
+ // NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule has to run
first to probably eliminate the need for
+ // introducing an assign operator in ExtractSimilarVariablesInJoinRule
+ planCleanupRules.add(new ExtractRedundantVariablesInJoinRule());
// Needs to invoke ByNameToByIndexFieldAccessRule as the last logical
optimization rule because
// some rules can push a FieldAccessByName to a place where the name
it tries to access is in the closed part.
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
new file mode 100644
index 0000000..2dded4a
--- /dev/null
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public abstract class AbstractConditionExpressionRule implements
IAlgebraicRewriteRule {
+ private IOptimizationContext context;
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
+ throws AlgebricksException {
+ final ILogicalOperator op = opRef.getValue();
+ final Mutable<ILogicalExpression> condRef;
+ switch (op.getOperatorTag()) {
+ case SELECT:
+ final SelectOperator select = (SelectOperator) op;
+ condRef = select.getCondition();
+ break;
+ case INNERJOIN:
+ case LEFTOUTERJOIN:
+ final AbstractBinaryJoinOperator join =
(AbstractBinaryJoinOperator) op;
+ condRef = join.getCondition();
+ break;
+ default:
+ return false;
+ }
+
+ this.context = context;
+
+ boolean changed = transform(condRef);
+ if (changed) {
+ context.computeAndSetTypeEnvironmentForOperator(op);
+ }
+
+ return changed;
+ }
+
+ protected final AbstractFunctionCallExpression
getFunctionExpression(ILogicalExpression expression) {
+ if (expression.getExpressionTag() !=
LogicalExpressionTag.FUNCTION_CALL) {
+ return null;
+ }
+
+ return (AbstractFunctionCallExpression) expression;
+ }
+
+ protected final IFunctionInfo getFunctionInfo(FunctionIdentifier fid) {
+ return context.getMetadataProvider().lookupFunction(fid);
+ }
+
+ /**
+ * Transform condition expression
+ *
+ * @param condRef SELECT or join condition reference
+ * @return {@code <code>true</code>} condition has been modified
+ * {@code <code>false</code>} otherwise.
+ */
+ protected abstract boolean transform(Mutable<ILogicalExpression> condRef);
+}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
new file mode 100644
index 0000000..b723009
--- /dev/null
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class ExtractRedundantVariablesInJoinRule implements
IAlgebraicRewriteRule {
+ private final Map<LogicalVariable, List<Mutable<ILogicalExpression>>>
variableToExpressiosnMap = new HashMap<>();
+ private final Set<LogicalVariable> leftLiveVars = new HashSet<>();
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
+ throws AlgebricksException {
+ ILogicalOperator op = opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN
+ && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
+ return false;
+ }
+
+ AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
+ if (!ensureAndExtractVarAndExpr(joinOp.getCondition().getValue())) {
+ return false;
+ }
+
+ setLeftLiveVariables(joinOp);
+
+ List<LogicalVariable> leftAssignVars = new ArrayList<>();
+ List<Mutable<ILogicalExpression>> leftAssignExprs = new ArrayList<>();
+
+ List<LogicalVariable> rightAssignVars = new ArrayList<>();
+ List<Mutable<ILogicalExpression>> rightAssignExprs = new ArrayList<>();
+
+ for (Map.Entry<LogicalVariable, List<Mutable<ILogicalExpression>>> kv
: variableToExpressiosnMap.entrySet()) {
+ LogicalVariable repeatedVariable = kv.getKey();
+ List<Mutable<ILogicalExpression>> repeatedReferences =
kv.getValue();
+
+ if (leftLiveVars.contains(repeatedVariable)) {
+ reassignRepeatedVariables(context, repeatedVariable,
repeatedReferences, leftAssignVars,
+ leftAssignExprs);
+ } else {
+ reassignRepeatedVariables(context, repeatedVariable,
repeatedReferences, rightAssignVars,
+ rightAssignExprs);
+ }
+ }
+
+ if (!leftAssignVars.isEmpty()) {
+ createAndSetAssign(context, joinOp.getInputs().get(0),
leftAssignVars, leftAssignExprs);
+ }
+
+ if (!rightAssignVars.isEmpty()) {
+ createAndSetAssign(context, joinOp.getInputs().get(1),
rightAssignVars, rightAssignExprs);
+ }
+
+ return true;
+ }
+
+ private void createAndSetAssign(IOptimizationContext context,
Mutable<ILogicalOperator> joinInputRef,
+ List<LogicalVariable> assignVars,
List<Mutable<ILogicalExpression>> assignExprs)
+ throws AlgebricksException {
+ AssignOperator assignOp = new AssignOperator(assignVars, assignExprs);
+ assignOp.getInputs().add(new MutableObject<>(joinInputRef.getValue()));
+ joinInputRef.setValue(assignOp);
+ context.computeAndSetTypeEnvironmentForOperator(assignOp);
+ }
+
+ private void setLeftLiveVariables(AbstractBinaryJoinOperator op) throws
AlgebricksException {
+ ILogicalOperator leftOp = op.getInputs().get(0).getValue();
+ leftLiveVars.clear();
+ VariableUtilities.getLiveVariables(leftOp, leftLiveVars);
+ }
+
+ private void reassignRepeatedVariables(IOptimizationContext context,
LogicalVariable repeatedVariable,
+ List<Mutable<ILogicalExpression>> repeatedReferences,
List<LogicalVariable> assignVars,
+ List<Mutable<ILogicalExpression>> assignExprs) {
+
+ // keep one of the repeated references and reassign the others
+ for (int i = 1; i < repeatedReferences.size(); i++) {
+ Mutable<ILogicalExpression> exprRef = repeatedReferences.get(i);
+ LogicalVariable newVar = context.newVar();
+ exprRef.setValue(new VariableReferenceExpression(newVar));
+
+ assignVars.add(newVar);
+ assignExprs.add(new MutableObject<>(new
VariableReferenceExpression(repeatedVariable)));
+ // Prevent inlining the variable
+ context.addNotToBeInlinedVar(newVar);
+ }
+ }
+
+ private boolean ensureAndExtractVarAndExpr(ILogicalExpression expr) {
+ if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+ return false;
+ }
+
+ AbstractFunctionCallExpression funcExpr =
(AbstractFunctionCallExpression) expr;
+ if
(!AlgebricksBuiltinFunctions.AND.equals(funcExpr.getFunctionIdentifier())) {
+ return false;
+ }
+
+ variableToExpressiosnMap.clear();
+ boolean containsRepeatedReferences = false;
+ for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+ ILogicalExpression arg = argRef.getValue();
+ if (arg.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+ return false;
+ }
+
+ AbstractFunctionCallExpression argFuncExpr =
(AbstractFunctionCallExpression) arg;
+ if
(!AlgebricksBuiltinFunctions.EQ.equals(argFuncExpr.getFunctionIdentifier())) {
+ return false;
+ }
+
+ List<Mutable<ILogicalExpression>> eqArgs =
argFuncExpr.getArguments();
+ Mutable<ILogicalExpression> leftRef = eqArgs.get(0);
+ Mutable<ILogicalExpression> rightRef = eqArgs.get(1);
+
+ ILogicalExpression left = leftRef.getValue();
+ ILogicalExpression right = rightRef.getValue();
+
+ LogicalVariable leftVar = VariableUtilities.getVariable(left);
+ LogicalVariable rightVar = VariableUtilities.getVariable(right);
+
+ // shouldn't be possible. But here for sanity check
+ if (leftVar == null || rightVar == null) {
+ return false;
+ }
+
+ List<Mutable<ILogicalExpression>> leftList =
+ variableToExpressiosnMap.computeIfAbsent(leftVar, k -> new
ArrayList<>());
+ leftList.add(leftRef);
+
+ List<Mutable<ILogicalExpression>> rightList =
+ variableToExpressiosnMap.computeIfAbsent(rightVar, k ->
new ArrayList<>());
+ rightList.add(rightRef);
+
+ containsRepeatedReferences |= leftList.size() > 1 ||
rightList.size() > 1;
+ }
+
+ // return true only if there's a repeated reference to a variable
+ return containsRepeatedReferences;
+ }
+}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
new file mode 100644
index 0000000..7eebf1c
--- /dev/null
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.List;
+
+import org.apache.asterix.lang.common.util.FunctionUtil;
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * Inline and remove redundant boolean expressions
+ * <p>
+ * Inline Example:
+ * and(x, and(y, and(z, w))) -> and(x, y, z, w)
+ * <p>
+ * Remove redundant example:
+ * or(x, y, y) -> or(x, y)
+ * TODO(wyk) include this rule in {@link
org.apache.asterix.optimizer.base.RuleCollections}
+ */
+public class InlineAndRemoveRedundantBooleanExpressionsRule extends
AbstractConditionExpressionRule {
+
+ @Override
+ protected boolean transform(Mutable<ILogicalExpression> condRef) {
+ AbstractFunctionCallExpression function =
getFunctionExpression(condRef.getValue());
+ if (function == null) {
+ return false;
+ }
+
+ boolean changed = false;
+ for (Mutable<ILogicalExpression> argRef : function.getArguments()) {
+ changed |= transform(argRef);
+ }
+
+ final FunctionIdentifier fid = function.getFunctionIdentifier();
+ if (AlgebricksBuiltinFunctions.AND.equals(fid) ||
AlgebricksBuiltinFunctions.OR.equals(fid)) {
+ changed |= inlineCondition(function);
+ changed |= removeRedundantExpressions(function.getArguments());
+
+ //Special case: disjuncts/conjuncts have been factored out into a
single (non-disjunct/conjunct) expression
+ if (function.getArguments().size() == 1) {
+ final ILogicalExpression newCond =
function.getArguments().get(0).getValue();
+ condRef.setValue(newCond);
+ }
+ }
+
+ return changed;
+ }
+
+ private boolean inlineCondition(AbstractFunctionCallExpression function) {
+ final FunctionIdentifier fid = function.getFunctionIdentifier();
+ final List<Mutable<ILogicalExpression>> args = function.getArguments();
+
+ int i = 0;
+ boolean changed = false;
+ while (i < args.size()) {
+ final AbstractFunctionCallExpression argFunction =
getFunctionExpression(args.get(i).getValue());
+ if (argFunction != null &&
fid.equals(argFunction.getFunctionIdentifier())) {
+ args.remove(i);
+ args.addAll(i, argFunction.getArguments());
+ changed = true;
+ } else {
+ i++;
+ }
+ }
+
+ return changed;
+ }
+
+ private boolean
removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs) {
+ final int originalSize = exprs.size();
+ int i = 0;
+ while (i < exprs.size()) {
+ int j = i + 1;
+ while (j < exprs.size()) {
+ if (FunctionUtil.commutativeEquals(exprs.get(i).getValue(),
exprs.get(j).getValue())) {
+ exprs.remove(j);
+ } else {
+ j++;
+ }
+ }
+ i++;
+ }
+
+ return exprs.size() != originalSize;
+ }
+
+}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
new file mode 100644
index 0000000..e91394d
--- /dev/null
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.EquivalenceClass;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.FDsAndEquivClassesVisitor;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule
+ extends InlineAndRemoveRedundantBooleanExpressionsRule {
+ private final FDsAndEquivClassesVisitor visitor = new
FDsAndEquivClassesVisitor();
+ private final Map<LogicalVariable, LogicalVariable> normalizedVariables =
new HashMap<>();
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
+ throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
+ throws AlgebricksException {
+ ILogicalOperator op = opRef.getValue();
+ LogicalOperatorTag opTag = op.getOperatorTag();
+
+ if (context.checkIfInDontApplySet(this, op)) {
+ return false;
+ }
+
+ if (opTag != LogicalOperatorTag.INNERJOIN && opTag !=
LogicalOperatorTag.LEFTOUTERJOIN) {
+ // TODO FDsAndEquivClassesVisitor alters the distinct variables?
We have seen bugs with distinct
+ // not sure if that related
+ if (op.getOperatorTag() != LogicalOperatorTag.DISTINCT) {
+ // Compute the equivalent classes for op
+ op.accept(visitor, context);
+ }
+ context.addToDontApplySet(this, op);
+ return false;
+ }
+
+ boolean changed = normalize(context, op);
+ // compute equivalent classes for the join op
+ op.accept(visitor, context);
+ context.addToDontApplySet(this, op);
+ return changed;
+ }
+
+ private boolean normalize(IOptimizationContext context, ILogicalOperator
op) {
+ AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
+ ILogicalOperator leftOp = joinOp.getInputs().get(0).getValue();
+ ILogicalOperator rightOp = joinOp.getInputs().get(1).getValue();
+
+ Map<LogicalVariable, EquivalenceClass> leftEqMap =
context.getEquivalenceClassMap(leftOp);
+ Map<LogicalVariable, EquivalenceClass> rightEqMap =
context.getEquivalenceClassMap(rightOp);
+
+ normalizedVariables.clear();
+
+ Mutable<ILogicalExpression> joinCondRef = joinOp.getCondition();
+ Mutable<ILogicalExpression> clonedCondition = new
MutableObject<>(joinCondRef.getValue().cloneExpression());
+
+ if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) &&
transform(clonedCondition)) {
+ // replace the join condition iff the normalization led to a
minimized circuit
+ joinCondRef.setValue(clonedCondition.getValue());
+ return true;
+ }
+
+ return false;
+ }
+
+ private boolean normalizeVariables(Map<LogicalVariable, EquivalenceClass>
leftEqMap,
+ Map<LogicalVariable, EquivalenceClass> rightEqMap,
Mutable<ILogicalExpression> exprRef) {
+ ILogicalExpression expr = exprRef.getValue();
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ return processFunction(leftEqMap, rightEqMap,
(AbstractFunctionCallExpression) expr);
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+ // TODO is this possible in joins?
+ return false;
+ }
+
+ LogicalVariable toNormalizeVariable =
VariableUtilities.getVariable(expr);
+ LogicalVariable normalized =
+ getNormalizedVariableAndSetEquivalentsIfAny(leftEqMap,
rightEqMap, toNormalizeVariable);
+
+ if (normalized == toNormalizeVariable) {
+ // both are the same, do nothing
+ return false;
+ }
+
+ // we need to replace the variable expression using the normalized
expression
+ exprRef.setValue(new VariableReferenceExpression(normalized));
+ return true;
+ }
+
+ private LogicalVariable getNormalizedVariableAndSetEquivalentsIfAny(
+ Map<LogicalVariable, EquivalenceClass> leftEqMap,
Map<LogicalVariable, EquivalenceClass> rightEqMap,
+ LogicalVariable toNormalizeVariable) {
+ if (normalizedVariables.containsKey(toNormalizeVariable)) {
+ // get the normalized variable
+ return normalizedVariables.get(toNormalizeVariable);
+ } else if (leftEqMap.containsKey(toNormalizeVariable)) {
+ setNormalizedVariables(toNormalizeVariable,
leftEqMap.get(toNormalizeVariable));
+ } else if (rightEqMap.containsKey(toNormalizeVariable)) {
+ setNormalizedVariables(toNormalizeVariable,
rightEqMap.get(toNormalizeVariable));
+ }
+
+ return toNormalizeVariable;
+ }
+
+ private void setNormalizedVariables(LogicalVariable toNormalizeVariable,
EquivalenceClass equivalenceClass) {
+ for (LogicalVariable eqVar : equivalenceClass.getMembers()) {
+ normalizedVariables.put(eqVar, toNormalizeVariable);
+ }
+ }
+
+ private boolean processFunction(Map<LogicalVariable, EquivalenceClass>
leftEqMap,
+ Map<LogicalVariable, EquivalenceClass> rightEqMap,
AbstractFunctionCallExpression funcExpr) {
+
+ boolean changed = false;
+ for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+ changed |= normalizeVariables(leftEqMap, rightEqMap, argRef);
+ }
+
+ return changed;
+ }
+}
diff --git
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
index 1cf88c2..45aec68 100644
---
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
+++
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
@@ -26,6 +26,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import java.util.function.BiFunction;
import org.apache.asterix.common.exceptions.AsterixException;
@@ -67,6 +68,8 @@
public class FunctionUtil {
public static final String IMPORT_PRIVATE_FUNCTIONS =
"import-private-functions";
+ private static final Set<FunctionIdentifier> COMMUTATIVE_FUNCTIONS =
+ Set.of(BuiltinFunctions.EQ, BuiltinFunctions.NUMERIC_ADD,
BuiltinFunctions.NUMERIC_MULTIPLY);
private static final DataverseName FN_DATASET_DATAVERSE_NAME =
FunctionSignature.getDataverseName(BuiltinFunctions.DATASET);
@@ -316,4 +319,38 @@
return BuiltinFunctions.FIELD_ACCESS_BY_INDEX.equals(fid) ||
BuiltinFunctions.FIELD_ACCESS_BY_NAME.equals(fid)
|| BuiltinFunctions.FIELD_ACCESS_NESTED.equals(fid);
}
+
+ public static boolean commutativeEquals(ILogicalExpression expr1,
ILogicalExpression expr2) {
+ if (expr1.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL
+ || expr2.getExpressionTag() !=
LogicalExpressionTag.FUNCTION_CALL) {
+ return expr1.equals(expr2);
+ }
+
+ AbstractFunctionCallExpression funcExpr1 =
(AbstractFunctionCallExpression) expr1;
+ AbstractFunctionCallExpression funcExpr2 =
(AbstractFunctionCallExpression) expr2;
+
+ FunctionIdentifier fid1 = funcExpr1.getFunctionIdentifier();
+ FunctionIdentifier fid2 = funcExpr2.getFunctionIdentifier();
+
+ List<Mutable<ILogicalExpression>> args1 = funcExpr1.getArguments();
+ List<Mutable<ILogicalExpression>> args2 = funcExpr2.getArguments();
+
+ if (!fid1.equals(fid2) || args1.size() != args2.size()) {
+ return false;
+ } else if (!COMMUTATIVE_FUNCTIONS.contains(fid1)) {
+ return expr1.equals(expr2);
+ }
+
+ int numberOfEqualities = 0;
+ for (Mutable<ILogicalExpression> arg1 : funcExpr1.getArguments()) {
+ for (Mutable<ILogicalExpression> arg2 : funcExpr2.getArguments()) {
+ if (commutativeEquals(arg1.getValue(), arg2.getValue())) {
+ numberOfEqualities++;
+ break;
+ }
+ }
+ }
+
+ return numberOfEqualities == args1.size();
+ }
}
diff --git
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
index d515fcf..06f4e0b 100644
---
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
+++
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
@@ -29,6 +29,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.api.exceptions.ErrorCode;
/**
* Implements constraints in between requirements for the children of the same
@@ -80,10 +81,17 @@
}
if (!covered.equals(set1)) {
- throw new AlgebricksException("Could not
modify " + rqdpp
- + " to agree with partitioning
property " + firstDeliveredPartitioning
- + " delivered by previous input
operator.");
+ throw new
AlgebricksException(ErrorCode.ILLEGAL_STATE,
+ "Could not modify " + rqdpp + " to
agree with partitioning property "
+ +
firstDeliveredPartitioning
+ + " delivered by previous
input operator.");
}
+
+ if (modifuppreq.size() != set1.size()) {
+ throw new
AlgebricksException(ErrorCode.ILLEGAL_STATE,
+ "The number of variables are not
equal in both partitioning sides");
+ }
+
UnorderedPartitionedProperty upp2 =
new
UnorderedPartitionedProperty(modifuppreq, rqdpp.getNodeDomain());
return new Pair<Boolean,
IPartitioningProperty>(false, upp2);
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17377
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: I3226457e4c9352fae0433cfeb39a158ec6562955
Gerrit-Change-Number: 17377
Gerrit-PatchSet: 1
Gerrit-Owner: Wail Alkowaileet <[email protected]>
Gerrit-MessageType: newchange