>From Wail Alkowaileet <[email protected]>:

Wail Alkowaileet has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17377 )


Change subject: [WIP] Allow to use hash-join for redundant variables
......................................................................

[WIP] Allow to use hash-join for redundant variables

Change-Id: I3226457e4c9352fae0433cfeb39a158ec6562955
---
A 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
A 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
M 
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
A 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
A 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
M 
asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
7 files changed, 591 insertions(+), 3 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/77/17377/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
index 473c8ec..594cf31 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/base/RuleCollections.java
@@ -44,6 +44,7 @@
 import 
org.apache.asterix.optimizer.rules.ExtractBatchableExternalFunctionCallsRule;
 import org.apache.asterix.optimizer.rules.ExtractDistinctByExpressionsRule;
 import org.apache.asterix.optimizer.rules.ExtractOrderExpressionsRule;
+import org.apache.asterix.optimizer.rules.ExtractRedundantVariablesInJoinRule;
 import org.apache.asterix.optimizer.rules.ExtractWindowExpressionsRule;
 import org.apache.asterix.optimizer.rules.FeedScanCollectionToUnnest;
 import 
org.apache.asterix.optimizer.rules.FilterRefineSpatialJoinRuleForSTDistanceFunction;
@@ -69,6 +70,7 @@
 import org.apache.asterix.optimizer.rules.LoadRecordFieldsRule;
 import org.apache.asterix.optimizer.rules.MetaFunctionToMetaVariableRule;
 import org.apache.asterix.optimizer.rules.NestGroupByRule;
+import 
org.apache.asterix.optimizer.rules.NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule;
 import org.apache.asterix.optimizer.rules.PullSelectOutOfSpatialJoin;
 import 
org.apache.asterix.optimizer.rules.PushAggFuncIntoStandaloneAggregateRule;
 import org.apache.asterix.optimizer.rules.PushAggregateIntoNestedSubplanRule;
@@ -350,6 +352,10 @@
         planCleanupRules.add(new 
RemoveUnknownCheckForKnownTypeExpressionRule());
         // relies on RemoveOrReplaceDefaultNullCastRule AND 
RemoveUnknownCheckForKnownTypeExpressionRule
         planCleanupRules.add(new RemoveRedundantSelectRule());
+        planCleanupRules.add(new 
NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule());
+        // NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule has to run 
first to probably eliminate the need for
+        // introducing an assign operator in ExtractSimilarVariablesInJoinRule
+        planCleanupRules.add(new ExtractRedundantVariablesInJoinRule());

         // Needs to invoke ByNameToByIndexFieldAccessRule as the last logical 
optimization rule because
         // some rules can push a FieldAccessByName to a place where the name 
it tries to access is in the closed part.
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
new file mode 100644
index 0000000..2dded4a
--- /dev/null
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public abstract class AbstractConditionExpressionRule implements 
IAlgebraicRewriteRule {
+    private IOptimizationContext context;
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
+            throws AlgebricksException {
+        final ILogicalOperator op = opRef.getValue();
+        final Mutable<ILogicalExpression> condRef;
+        switch (op.getOperatorTag()) {
+            case SELECT:
+                final SelectOperator select = (SelectOperator) op;
+                condRef = select.getCondition();
+                break;
+            case INNERJOIN:
+            case LEFTOUTERJOIN:
+                final AbstractBinaryJoinOperator join = 
(AbstractBinaryJoinOperator) op;
+                condRef = join.getCondition();
+                break;
+            default:
+                return false;
+        }
+
+        this.context = context;
+
+        boolean changed = transform(condRef);
+        if (changed) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+        }
+
+        return changed;
+    }
+
+    protected final AbstractFunctionCallExpression 
getFunctionExpression(ILogicalExpression expression) {
+        if (expression.getExpressionTag() != 
LogicalExpressionTag.FUNCTION_CALL) {
+            return null;
+        }
+
+        return (AbstractFunctionCallExpression) expression;
+    }
+
+    protected final IFunctionInfo getFunctionInfo(FunctionIdentifier fid) {
+        return context.getMetadataProvider().lookupFunction(fid);
+    }
+
+    /**
+     * Transform condition expression
+     *
+     * @param condRef SELECT or join condition reference
+     * @return {@code <code>true</code>} condition has been modified
+     * {@code <code>false</code>} otherwise.
+     */
+    protected abstract boolean transform(Mutable<ILogicalExpression> condRef);
+}
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
new file mode 100644
index 0000000..b723009
--- /dev/null
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/ExtractRedundantVariablesInJoinRule.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class ExtractRedundantVariablesInJoinRule implements 
IAlgebraicRewriteRule {
+    private final Map<LogicalVariable, List<Mutable<ILogicalExpression>>> 
variableToExpressiosnMap = new HashMap<>();
+    private final Set<LogicalVariable> leftLiveVars = new HashSet<>();
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
+            throws AlgebricksException {
+        ILogicalOperator op = opRef.getValue();
+        if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN
+                && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
+            return false;
+        }
+
+        AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
+        if (!ensureAndExtractVarAndExpr(joinOp.getCondition().getValue())) {
+            return false;
+        }
+
+        setLeftLiveVariables(joinOp);
+
+        List<LogicalVariable> leftAssignVars = new ArrayList<>();
+        List<Mutable<ILogicalExpression>> leftAssignExprs = new ArrayList<>();
+
+        List<LogicalVariable> rightAssignVars = new ArrayList<>();
+        List<Mutable<ILogicalExpression>> rightAssignExprs = new ArrayList<>();
+
+        for (Map.Entry<LogicalVariable, List<Mutable<ILogicalExpression>>> kv 
: variableToExpressiosnMap.entrySet()) {
+            LogicalVariable repeatedVariable = kv.getKey();
+            List<Mutable<ILogicalExpression>> repeatedReferences = 
kv.getValue();
+
+            if (leftLiveVars.contains(repeatedVariable)) {
+                reassignRepeatedVariables(context, repeatedVariable, 
repeatedReferences, leftAssignVars,
+                        leftAssignExprs);
+            } else {
+                reassignRepeatedVariables(context, repeatedVariable, 
repeatedReferences, rightAssignVars,
+                        rightAssignExprs);
+            }
+        }
+
+        if (!leftAssignVars.isEmpty()) {
+            createAndSetAssign(context, joinOp.getInputs().get(0), 
leftAssignVars, leftAssignExprs);
+        }
+
+        if (!rightAssignVars.isEmpty()) {
+            createAndSetAssign(context, joinOp.getInputs().get(1), 
rightAssignVars, rightAssignExprs);
+        }
+
+        return true;
+    }
+
+    private void createAndSetAssign(IOptimizationContext context, 
Mutable<ILogicalOperator> joinInputRef,
+            List<LogicalVariable> assignVars, 
List<Mutable<ILogicalExpression>> assignExprs)
+            throws AlgebricksException {
+        AssignOperator assignOp = new AssignOperator(assignVars, assignExprs);
+        assignOp.getInputs().add(new MutableObject<>(joinInputRef.getValue()));
+        joinInputRef.setValue(assignOp);
+        context.computeAndSetTypeEnvironmentForOperator(assignOp);
+    }
+
+    private void setLeftLiveVariables(AbstractBinaryJoinOperator op) throws 
AlgebricksException {
+        ILogicalOperator leftOp = op.getInputs().get(0).getValue();
+        leftLiveVars.clear();
+        VariableUtilities.getLiveVariables(leftOp, leftLiveVars);
+    }
+
+    private void reassignRepeatedVariables(IOptimizationContext context, 
LogicalVariable repeatedVariable,
+            List<Mutable<ILogicalExpression>> repeatedReferences, 
List<LogicalVariable> assignVars,
+            List<Mutable<ILogicalExpression>> assignExprs) {
+
+        // keep one of the repeated references and reassign the others
+        for (int i = 1; i < repeatedReferences.size(); i++) {
+            Mutable<ILogicalExpression> exprRef = repeatedReferences.get(i);
+            LogicalVariable newVar = context.newVar();
+            exprRef.setValue(new VariableReferenceExpression(newVar));
+
+            assignVars.add(newVar);
+            assignExprs.add(new MutableObject<>(new 
VariableReferenceExpression(repeatedVariable)));
+            // Prevent inlining the variable
+            context.addNotToBeInlinedVar(newVar);
+        }
+    }
+
+    private boolean ensureAndExtractVarAndExpr(ILogicalExpression expr) {
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+
+        AbstractFunctionCallExpression funcExpr = 
(AbstractFunctionCallExpression) expr;
+        if 
(!AlgebricksBuiltinFunctions.AND.equals(funcExpr.getFunctionIdentifier())) {
+            return false;
+        }
+
+        variableToExpressiosnMap.clear();
+        boolean containsRepeatedReferences = false;
+        for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+            ILogicalExpression arg = argRef.getValue();
+            if (arg.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                return false;
+            }
+
+            AbstractFunctionCallExpression argFuncExpr = 
(AbstractFunctionCallExpression) arg;
+            if 
(!AlgebricksBuiltinFunctions.EQ.equals(argFuncExpr.getFunctionIdentifier())) {
+                return false;
+            }
+
+            List<Mutable<ILogicalExpression>> eqArgs = 
argFuncExpr.getArguments();
+            Mutable<ILogicalExpression> leftRef = eqArgs.get(0);
+            Mutable<ILogicalExpression> rightRef = eqArgs.get(1);
+
+            ILogicalExpression left = leftRef.getValue();
+            ILogicalExpression right = rightRef.getValue();
+
+            LogicalVariable leftVar = VariableUtilities.getVariable(left);
+            LogicalVariable rightVar = VariableUtilities.getVariable(right);
+
+            // shouldn't be possible. But here for sanity check
+            if (leftVar == null || rightVar == null) {
+                return false;
+            }
+
+            List<Mutable<ILogicalExpression>> leftList =
+                    variableToExpressiosnMap.computeIfAbsent(leftVar, k -> new 
ArrayList<>());
+            leftList.add(leftRef);
+
+            List<Mutable<ILogicalExpression>> rightList =
+                    variableToExpressiosnMap.computeIfAbsent(rightVar, k -> 
new ArrayList<>());
+            rightList.add(rightRef);
+
+            containsRepeatedReferences |= leftList.size() > 1 || 
rightList.size() > 1;
+        }
+
+        // return true only if there's a repeated reference to a variable
+        return containsRepeatedReferences;
+    }
+}
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
new file mode 100644
index 0000000..7eebf1c
--- /dev/null
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.List;
+
+import org.apache.asterix.lang.common.util.FunctionUtil;
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * Inline and remove redundant boolean expressions
+ * <p>
+ * Inline Example:
+ * and(x, and(y, and(z, w))) -> and(x, y, z, w)
+ * <p>
+ * Remove redundant example:
+ * or(x, y, y) -> or(x, y)
+ * TODO(wyk) include this rule in {@link 
org.apache.asterix.optimizer.base.RuleCollections}
+ */
+public class InlineAndRemoveRedundantBooleanExpressionsRule extends 
AbstractConditionExpressionRule {
+
+    @Override
+    protected boolean transform(Mutable<ILogicalExpression> condRef) {
+        AbstractFunctionCallExpression function = 
getFunctionExpression(condRef.getValue());
+        if (function == null) {
+            return false;
+        }
+
+        boolean changed = false;
+        for (Mutable<ILogicalExpression> argRef : function.getArguments()) {
+            changed |= transform(argRef);
+        }
+
+        final FunctionIdentifier fid = function.getFunctionIdentifier();
+        if (AlgebricksBuiltinFunctions.AND.equals(fid) || 
AlgebricksBuiltinFunctions.OR.equals(fid)) {
+            changed |= inlineCondition(function);
+            changed |= removeRedundantExpressions(function.getArguments());
+
+            //Special case: disjuncts/conjuncts have been factored out into a 
single (non-disjunct/conjunct) expression
+            if (function.getArguments().size() == 1) {
+                final ILogicalExpression newCond = 
function.getArguments().get(0).getValue();
+                condRef.setValue(newCond);
+            }
+        }
+
+        return changed;
+    }
+
+    private boolean inlineCondition(AbstractFunctionCallExpression function) {
+        final FunctionIdentifier fid = function.getFunctionIdentifier();
+        final List<Mutable<ILogicalExpression>> args = function.getArguments();
+
+        int i = 0;
+        boolean changed = false;
+        while (i < args.size()) {
+            final AbstractFunctionCallExpression argFunction = 
getFunctionExpression(args.get(i).getValue());
+            if (argFunction != null && 
fid.equals(argFunction.getFunctionIdentifier())) {
+                args.remove(i);
+                args.addAll(i, argFunction.getArguments());
+                changed = true;
+            } else {
+                i++;
+            }
+        }
+
+        return changed;
+    }
+
+    private boolean 
removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs) {
+        final int originalSize = exprs.size();
+        int i = 0;
+        while (i < exprs.size()) {
+            int j = i + 1;
+            while (j < exprs.size()) {
+                if (FunctionUtil.commutativeEquals(exprs.get(i).getValue(), 
exprs.get(j).getValue())) {
+                    exprs.remove(j);
+                } else {
+                    j++;
+                }
+            }
+            i++;
+        }
+
+        return exprs.size() != originalSize;
+    }
+
+}
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
new file mode 100644
index 0000000..e91394d
--- /dev/null
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.EquivalenceClass;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.FDsAndEquivClassesVisitor;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class NormalizeAndRemoveRedundantBooleanExpressionsInJoinRule
+        extends InlineAndRemoveRedundantBooleanExpressionsRule {
+    private final FDsAndEquivClassesVisitor visitor = new 
FDsAndEquivClassesVisitor();
+    private final Map<LogicalVariable, LogicalVariable> normalizedVariables = 
new HashMap<>();
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
+            throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
+            throws AlgebricksException {
+        ILogicalOperator op = opRef.getValue();
+        LogicalOperatorTag opTag = op.getOperatorTag();
+
+        if (context.checkIfInDontApplySet(this, op)) {
+            return false;
+        }
+
+        if (opTag != LogicalOperatorTag.INNERJOIN && opTag != 
LogicalOperatorTag.LEFTOUTERJOIN) {
+            // TODO FDsAndEquivClassesVisitor alters the distinct variables? 
We have seen bugs with distinct
+            // not sure if that related
+            if (op.getOperatorTag() != LogicalOperatorTag.DISTINCT) {
+                // Compute the equivalent classes for op
+                op.accept(visitor, context);
+            }
+            context.addToDontApplySet(this, op);
+            return false;
+        }
+
+        boolean changed = normalize(context, op);
+        // compute equivalent classes for the join op
+        op.accept(visitor, context);
+        context.addToDontApplySet(this, op);
+        return changed;
+    }
+
+    private boolean normalize(IOptimizationContext context, ILogicalOperator 
op) {
+        AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
+        ILogicalOperator leftOp = joinOp.getInputs().get(0).getValue();
+        ILogicalOperator rightOp = joinOp.getInputs().get(1).getValue();
+
+        Map<LogicalVariable, EquivalenceClass> leftEqMap = 
context.getEquivalenceClassMap(leftOp);
+        Map<LogicalVariable, EquivalenceClass> rightEqMap = 
context.getEquivalenceClassMap(rightOp);
+
+        normalizedVariables.clear();
+
+        Mutable<ILogicalExpression> joinCondRef = joinOp.getCondition();
+        Mutable<ILogicalExpression> clonedCondition = new 
MutableObject<>(joinCondRef.getValue().cloneExpression());
+
+        if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) && 
transform(clonedCondition)) {
+            // replace the join condition iff the normalization led to a 
minimized circuit
+            joinCondRef.setValue(clonedCondition.getValue());
+            return true;
+        }
+
+        return false;
+    }
+
+    private boolean normalizeVariables(Map<LogicalVariable, EquivalenceClass> 
leftEqMap,
+            Map<LogicalVariable, EquivalenceClass> rightEqMap, 
Mutable<ILogicalExpression> exprRef) {
+        ILogicalExpression expr = exprRef.getValue();
+        if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+            return processFunction(leftEqMap, rightEqMap, 
(AbstractFunctionCallExpression) expr);
+        } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+            // TODO is this possible in joins?
+            return false;
+        }
+
+        LogicalVariable toNormalizeVariable = 
VariableUtilities.getVariable(expr);
+        LogicalVariable normalized =
+                getNormalizedVariableAndSetEquivalentsIfAny(leftEqMap, 
rightEqMap, toNormalizeVariable);
+
+        if (normalized == toNormalizeVariable) {
+            // both are the same, do nothing
+            return false;
+        }
+
+        // we need to replace the variable expression using the normalized 
expression
+        exprRef.setValue(new VariableReferenceExpression(normalized));
+        return true;
+    }
+
+    private LogicalVariable getNormalizedVariableAndSetEquivalentsIfAny(
+            Map<LogicalVariable, EquivalenceClass> leftEqMap, 
Map<LogicalVariable, EquivalenceClass> rightEqMap,
+            LogicalVariable toNormalizeVariable) {
+        if (normalizedVariables.containsKey(toNormalizeVariable)) {
+            // get the normalized variable
+            return normalizedVariables.get(toNormalizeVariable);
+        } else if (leftEqMap.containsKey(toNormalizeVariable)) {
+            setNormalizedVariables(toNormalizeVariable, 
leftEqMap.get(toNormalizeVariable));
+        } else if (rightEqMap.containsKey(toNormalizeVariable)) {
+            setNormalizedVariables(toNormalizeVariable, 
rightEqMap.get(toNormalizeVariable));
+        }
+
+        return toNormalizeVariable;
+    }
+
+    private void setNormalizedVariables(LogicalVariable toNormalizeVariable, 
EquivalenceClass equivalenceClass) {
+        for (LogicalVariable eqVar : equivalenceClass.getMembers()) {
+            normalizedVariables.put(eqVar, toNormalizeVariable);
+        }
+    }
+
+    private boolean processFunction(Map<LogicalVariable, EquivalenceClass> 
leftEqMap,
+            Map<LogicalVariable, EquivalenceClass> rightEqMap, 
AbstractFunctionCallExpression funcExpr) {
+
+        boolean changed = false;
+        for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+            changed |= normalizeVariables(leftEqMap, rightEqMap, argRef);
+        }
+
+        return changed;
+    }
+}
diff --git 
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
 
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
index 1cf88c2..45aec68 100644
--- 
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
+++ 
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
@@ -26,6 +26,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.function.BiFunction;

 import org.apache.asterix.common.exceptions.AsterixException;
@@ -67,6 +68,8 @@
 public class FunctionUtil {

     public static final String IMPORT_PRIVATE_FUNCTIONS = 
"import-private-functions";
+    private static final Set<FunctionIdentifier> COMMUTATIVE_FUNCTIONS =
+            Set.of(BuiltinFunctions.EQ, BuiltinFunctions.NUMERIC_ADD, 
BuiltinFunctions.NUMERIC_MULTIPLY);
 
     private static final DataverseName FN_DATASET_DATAVERSE_NAME =
             FunctionSignature.getDataverseName(BuiltinFunctions.DATASET);
@@ -316,4 +319,38 @@
         return BuiltinFunctions.FIELD_ACCESS_BY_INDEX.equals(fid) || 
BuiltinFunctions.FIELD_ACCESS_BY_NAME.equals(fid)
                 || BuiltinFunctions.FIELD_ACCESS_NESTED.equals(fid);
     }
+
+    public static boolean commutativeEquals(ILogicalExpression expr1, 
ILogicalExpression expr2) {
+        if (expr1.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL
+                || expr2.getExpressionTag() != 
LogicalExpressionTag.FUNCTION_CALL) {
+            return expr1.equals(expr2);
+        }
+
+        AbstractFunctionCallExpression funcExpr1 = 
(AbstractFunctionCallExpression) expr1;
+        AbstractFunctionCallExpression funcExpr2 = 
(AbstractFunctionCallExpression) expr2;
+
+        FunctionIdentifier fid1 = funcExpr1.getFunctionIdentifier();
+        FunctionIdentifier fid2 = funcExpr2.getFunctionIdentifier();
+
+        List<Mutable<ILogicalExpression>> args1 = funcExpr1.getArguments();
+        List<Mutable<ILogicalExpression>> args2 = funcExpr2.getArguments();
+
+        if (!fid1.equals(fid2) || args1.size() != args2.size()) {
+            return false;
+        } else if (!COMMUTATIVE_FUNCTIONS.contains(fid1)) {
+            return expr1.equals(expr2);
+        }
+
+        int numberOfEqualities = 0;
+        for (Mutable<ILogicalExpression> arg1 : funcExpr1.getArguments()) {
+            for (Mutable<ILogicalExpression> arg2 : funcExpr2.getArguments()) {
+                if (commutativeEquals(arg1.getValue(), arg2.getValue())) {
+                    numberOfEqualities++;
+                    break;
+                }
+            }
+        }
+
+        return numberOfEqualities == args1.size();
+    }
 }
diff --git 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
index d515fcf..06f4e0b 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/IPartitioningRequirementsCoordinator.java
@@ -29,6 +29,7 @@
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.api.exceptions.ErrorCode;

 /**
  * Implements constraints in between requirements for the children of the same
@@ -80,10 +81,17 @@
                                 }

                                 if (!covered.equals(set1)) {
-                                    throw new AlgebricksException("Could not 
modify " + rqdpp
-                                            + " to agree with partitioning 
property " + firstDeliveredPartitioning
-                                            + " delivered by previous input 
operator.");
+                                    throw new 
AlgebricksException(ErrorCode.ILLEGAL_STATE,
+                                            "Could not modify " + rqdpp + " to 
agree with partitioning property "
+                                                    + 
firstDeliveredPartitioning
+                                                    + " delivered by previous 
input operator.");
                                 }
+
+                                if (modifuppreq.size() != set1.size()) {
+                                    throw new 
AlgebricksException(ErrorCode.ILLEGAL_STATE,
+                                            "The number of variables are not 
equal in both partitioning sides");
+                                }
+
                                 UnorderedPartitionedProperty upp2 =
                                         new 
UnorderedPartitionedProperty(modifuppreq, rqdpp.getNodeDomain());
                                 return new Pair<Boolean, 
IPartitioningProperty>(false, upp2);

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17377
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: I3226457e4c9352fae0433cfeb39a158ec6562955
Gerrit-Change-Number: 17377
Gerrit-PatchSet: 1
Gerrit-Owner: Wail Alkowaileet <[email protected]>
Gerrit-MessageType: newchange

Reply via email to