>From <[email protected]>:

[email protected] has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19550 )


Change subject: PLEASE EDIT to provide a meaningful commit message!
......................................................................

PLEASE EDIT to provide a meaningful commit message!

The following commits from your working branch will be included:

commit aa98f96037c1b47bcc06bef13ad83cb6de13307d
Author: murali4104 <[email protected]>
Date:   Tue Mar 25 15:15:10 2025 -0700

    [ASTERIXDB-3568][COMP] Add transitive closure INNER join predicates

Change-Id: I3d8dca03ece22435da6225cb50a4630dce8326bb
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
3 files changed, 323 insertions(+), 41 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/50/19550/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
index 3ef1bea..0cf5bff 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
@@ -19,7 +19,10 @@

 package org.apache.asterix.optimizer.rules.cbo;

+import java.util.List;
+
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;

 public class JoinCondition {

@@ -27,7 +30,7 @@

     protected ILogicalExpression joinCondition;
     protected boolean outerJoin;
-    private boolean derived = false;
+    protected boolean derived = false;
     protected boolean partOfComposite = false;
     protected boolean deleted = false;
     protected int numberOfVars = 0; // how many variables
@@ -35,11 +38,14 @@
     protected int datasetBits;
     // used for triangle detection; we strictly do not mean left and right 
here.
     // first and second sides would be more appropriate
+    protected int leftSide;
+    protected int rightSide;
     protected int leftSideBits;
     protected int rightSideBits;
     protected double selectivity;
     protected comparisonOp comparisonType;
-    protected JoinOperator joinOp;
+    protected JoinOperator joinOp = null;
+    protected List<LogicalVariable> usedVars = null;

     protected enum comparisonOp {
         OP_EQ,
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 0670fb8..7f53cf1 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -33,6 +33,7 @@
 import 
org.apache.asterix.common.annotations.SkipSecondaryIndexSearchExpressionAnnotation;
 import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.lang.common.util.FunctionUtil;
 import org.apache.asterix.metadata.declared.DataSource;
 import org.apache.asterix.metadata.declared.DataSourceId;
 import org.apache.asterix.metadata.declared.DatasetDataSource;
@@ -68,7 +69,9 @@
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.PredicateCardinalityAnnotation;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import org.apache.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
@@ -284,10 +287,19 @@
         ScalarFunctionCallExpression andExpr = new 
ScalarFunctionCallExpression(
                 
BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
         for (int joinNum : newJoinConditions) {
-            // need to AND all the expressions.
+            // need to AND all the expressions. skip derived exprs for now.
             JoinCondition jc = joinConditions.get(joinNum);
+            if (jc.derived) {
+                continue;
+            }
             andExpr.getArguments().add(new MutableObject<>(jc.joinCondition));
         }
+
+        if (andExpr.getArguments().size() == 1) {
+            return andExpr.getArguments().get(0).getValue(); // remove the AND 
if there is only one argument
+        } else if (andExpr.getArguments().size() > 1) {
+            return null; // the nested loops code expects only one predicate 
of the type R.a op S.a
+        }
         return andExpr;
     }

@@ -443,8 +455,8 @@
     }

     // This finds all the join Conditions in the whole query. This is a global 
list of all join predicates.
-    // It also fills in the dataset Bits for each join predicate.
-    private void findJoinConditionsAndAssignSels() throws AlgebricksException {
+    // It also fills in the dataset Bits for each join predicate. Add 
Transitive Join Predicates also.
+    private void findJoinConditionsAndDoTC() throws AlgebricksException {
         List<Mutable<ILogicalExpression>> conjs = new ArrayList<>();
         for (JoinOperator jOp : allJoinOps) {
             AbstractBinaryJoinOperator joinOp = jOp.getAbstractJoinOp();
@@ -458,7 +470,9 @@
                     if (jc.outerJoin) {
                         outerJoin = true;
                     }
-                    jc.joinCondition = conj.getValue().cloneExpression();
+                    jc.joinCondition = conj.getValue();
+                    LOGGER.info("adding JC " + jc.joinCondition);
+                    jc.usedVars = getUsedVars(jc);
                     joinConditions.add(jc);
                     jc.joinOp = jOp;
                 }
@@ -469,14 +483,15 @@
                     if (jc.outerJoin) {
                         outerJoin = true;
                     }
-                    // change to not a true condition
-                    jc.joinCondition = expr.cloneExpression();
+                    jc.joinCondition = expr;
+                    LOGGER.info("adding JC " + jc.joinCondition);
+                    jc.usedVars = getUsedVars(jc);
                     joinConditions.add(jc);
                     jc.joinOp = jOp;
                 }
             }
         }
-
+        //addTCJoinPreds(); // transitive close of join predicates
         // now patch up any join conditions that have variables referenced in 
any internal assign statements.
         List<LogicalVariable> usedVars = new ArrayList<>();
         List<AssignOperator> erase = new ArrayList<>();
@@ -494,7 +509,6 @@
                     }
                 }
             }
-            jc.selectivity = 
stats.getSelectivityFromAnnotationMain(jc.joinCondition, true, false, 
jc.joinOp);
         }
         for (int i = erase.size() - 1; i >= 0; i--) {
             assignOps.remove(erase.get(i));
@@ -515,22 +529,131 @@
             }
             jc.numberOfVars = usedVars.size();

+            List<Integer> Indexes = new ArrayList<>(jc.numberOfVars);
             for (int i = 0; i < jc.numberOfVars; i++) {
-                int bits = 1 << (varLeafInputIds.get(usedVars.get(i)) - 1);
-                if (bits != JoinCondition.NO_JC) {
-                    if (i == 0) {
-                        jc.leftSideBits = bits;
-                    } else if (i == 1) {
-                        jc.rightSideBits = bits;
-                    } else {
-                        // have to deal with preds such as r.a + s.a = 5 OR 
r.a + s.a = t.a
+                int idx = varLeafInputIds.get(usedVars.get(i));
+                if (!Indexes.contains(idx)) {
+                    Indexes.add(idx);
+                }
+            }
+
+            for (int i = 0; i < jc.numberOfVars; i++) {
+                if (i < Indexes.size()) {
+                    int side = Indexes.get(i);
+                    int bits = 1 << (Indexes.get(i) - 1);
+                    if (bits != JoinCondition.NO_JC) {
+                        if (i == 0) {
+                            jc.leftSide = side;
+                            jc.leftSideBits = bits;
+                        } else if (i == 1) {
+                            jc.rightSide = side;
+                            jc.rightSideBits = bits;
+                        } else {
+                            // have to deal with preds such as r.a + s.a = 5 
OR r.a + s.a = t.a
+                        }
+                        jc.datasetBits |= bits;
                     }
-                    jc.datasetBits |= bits;
                 }
             }
         }
     }

+    // transitive close of join predicates; add only if they are not already 
present; user may have added them in the query
+    private void addTCJoinPreds() {
+        boolean changes = true;
+        while (changes) {
+            changes = false;
+            int size = joinConditions.size(); // store the size here. We will 
add more join conditions.
+            for (int i = 0; i < size - 1; i++) {
+                List<LogicalVariable> vars1 = joinConditions.get(i).usedVars; 
// see if the predicate just added will yield any TC preds.
+                if (vars1 != null) {
+                    for (int j = i + 1; j < size; j++) {
+                        ILogicalExpression newExpr = null;
+                        List<LogicalVariable> vars2 = 
joinConditions.get(j).usedVars;
+                        if (vars2 != null) {
+                            if (vars1.get(0) == vars2.get(0)) {
+                                if (notFound(vars1.get(1), vars2.get(1))) {
+                                    newExpr = makeNewEQJoinExpr(vars1.get(1), 
vars2.get(1));
+                                }
+                            } else if (vars1.get(0) == vars2.get(1)) {
+                                if (notFound(vars1.get(1), vars2.get(0))) {
+                                    newExpr = makeNewEQJoinExpr(vars1.get(1), 
vars2.get(0));
+                                }
+                            } else if (vars1.get(1) == vars2.get(1)) {
+                                if (notFound(vars1.get(0), vars2.get(0))) {
+                                    newExpr = makeNewEQJoinExpr(vars1.get(0), 
vars2.get(0));
+                                }
+                            } else if (vars1.get(1) == vars2.get(0)) {
+                                if (notFound(vars1.get(0), vars2.get(1))) {
+                                    newExpr = makeNewEQJoinExpr(vars1.get(0), 
vars2.get(1));
+                                }
+                            }
+                        }
+                        if (newExpr != null) {
+                            changes = true;
+                            LOGGER.info("vars1 " + vars1 + "; vars2 " + vars2 
+ " = " + "newExpr " + newExpr);
+                            JoinCondition jc = new JoinCondition();
+                            jc.outerJoin = false;
+                            jc.derived = true; // useful to exclude for NL 
Joins since NL joins can take only one pred
+                            jc.joinCondition = newExpr;
+                            jc.usedVars = getUsedVars(jc);
+                            joinConditions.add(jc);
+                            jc.joinOp = joinConditions.get(i).joinOp; // 
borrowing the joinOp here as this does not have a joinOp of its own.
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    protected ILogicalExpression makeNewEQJoinExpr(LogicalVariable var1, 
LogicalVariable var2) {
+        if (varLeafInputIds.get(var1) == varLeafInputIds.get(var2)) {
+            return null; // must be from different datasets to make a join 
expression
+        }
+        List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
+        VariableReferenceExpression e1 = new VariableReferenceExpression(var1);
+        arguments.add(new MutableObject<>(e1));
+        VariableReferenceExpression e2 = new VariableReferenceExpression(var2);
+        arguments.add(new MutableObject<>(e2));
+        ScalarFunctionCallExpression expr = new ScalarFunctionCallExpression(
+                FunctionUtil.getFunctionInfo(AlgebricksBuiltinFunctions.EQ), 
arguments);
+        return expr;
+    }
+
+    private boolean notFound(LogicalVariable var1, LogicalVariable var2) {
+        for (int i = 0; i < joinConditions.size(); i++) {
+            List<LogicalVariable> vars = joinConditions.get(i).usedVars;
+            if (vars != null) {
+                if (vars.get(0) == var1 && vars.get(1) == var2) {
+                    return false;
+                }
+                if (vars.get(1) == var1 && vars.get(0) == var2) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    // This routine is collecting information about the variables in the 
predicates to see if we can do a Transitive closure.
+    // Only considering equi join predicates for now.
+    private List<LogicalVariable> getUsedVars(JoinCondition jc) {
+        ILogicalExpression exp = jc.joinCondition;
+        if (!jc.outerJoin) {
+            if 
(exp.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
+                AbstractFunctionCallExpression afcexpr = 
(AbstractFunctionCallExpression) exp;
+                if 
(afcexpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ)) {
+                    List<LogicalVariable> usedVars = new ArrayList<>();
+                    exp.getUsedVariables(usedVars);
+                    if (usedVars.size() == 2) {
+                        return usedVars;
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
     // in case we have l.partkey = ps.partkey and l.suppkey = ps.suppkey, we 
will only use the first one for cardinality computations.
     // treat it like a Pk-Fk join; simplifies cardinality computation
     private void markCompositeJoinPredicates() {
@@ -1057,8 +1180,12 @@
     // Find the join conditions. Assign selectivities to the join conditions 
from any user provided annotation hints.
     // If there are no annotation hints, use samples to find the selectivities 
of the single table predicates
     // found inside of complex join predicates (as in q7). A lot of extra code 
has gone into making q7 work.
-    private void findJoinConditions() throws AlgebricksException {
-        findJoinConditionsAndAssignSels();
+    // With this routine we can compute the cardinality of the join predicate 
between n1 and n2 correctly (2 in this case).
+    //The predicate in Q7 between n1 and n2 is
+    //(n1.name = INDIA AND n2.name = JAPAN) OR
+    //(n1.name = JAPAN AND n2.name = INDIA)
+    // So this appears as a join predicate but we have to compute the 
selectivities of the selection predicates inside the join. MESSY
+    private void findSelectionPredsInsideJoins() throws AlgebricksException {
         // for all the singleVarExprs, we need to issue a sample query. These 
exprs did not get assigned a selectivity.
         for (ILogicalExpression exp : this.singleDatasetPreds) {
             if (isPredicateCardinalityAnnotationPresent(exp)) {
@@ -1071,7 +1198,7 @@
                 ILogicalOperator leafInput = findLeafInput(vars);
                 SelectOperator selOp;
                 if 
(leafInput.getOperatorTag().equals(LogicalOperatorTag.SELECT)) {
-                    selOp = (SelectOperator) 
getStatsHandle().findSelectOpWithExpr(leafInput, exp);
+                    selOp = getStatsHandle().findSelectOpWithExpr(leafInput, 
exp);
                     if (selOp == null) {
                         selOp = (SelectOperator) leafInput;
                     }
@@ -1091,10 +1218,10 @@
             }
         }

-        if (this.singleDatasetPreds.size() > 0) { // We did not have 
selectivities for these before. Now we do.
+        if (this.singleDatasetPreds.size() > 0) {
             for (JoinCondition jc : joinConditions) {
                 // we may be repeating some work here, but that is ok. This 
will rarely happen (happens in q7 tpch)
-                double sel = 
stats.getSelectivityFromAnnotationMain(jc.getJoinCondition(), false, true, 
null);
+                double sel = stats.getSelectivityFromAnnotationMain(jc, null, 
false, true, null);
                 if (sel != -1) {
                     jc.selectivity = sel;
                 }
@@ -1109,6 +1236,8 @@
         localJoinOp = new InnerJoinOperator(new 
MutableObject<>(ConstantExpression.TRUE),
                 new MutableObject<>(dummyInput), new 
MutableObject<>(dummyInput));

+        findJoinConditionsAndDoTC();
+        addTCSelectionPredicates();
         int lastBaseLevelJnNum = enumerateBaseLevelJoinNodes();
         if (lastBaseLevelJnNum == PlanNode.NO_PLAN) {
             return PlanNode.NO_PLAN;
@@ -1119,7 +1248,11 @@
             EnumerateJoinsRule.printPlan(pp, op, "Original Whole plan in JN 
1");
         }

-        findJoinConditions();
+        for (JoinCondition jc : joinConditions) {
+            jc.selectivity = stats.getSelectivityFromAnnotationMain(jc, null, 
true, false, jc.joinOp);
+        }
+
+        findSelectionPredsInsideJoins(); // This was added to make TPCH Q7 
work.
         findIfJoinGraphIsConnected();

         if (LOGGER.isTraceEnabled()) {
@@ -1138,6 +1271,119 @@
         return lastJn.cheapestPlanIndex;
     }

+    // R.a = S.a and R.a op operand ==> S.a op operand
+    private void addTCSelectionPredicates() throws AlgebricksException {
+        List<SelectOperator> existingSelOps = new ArrayList<>();
+        for (ILogicalOperator leafInput : this.leafInputs) {
+            ILogicalOperator li = leafInput.getInputs().get(0).getValue(); // 
skip the true on the top
+            List<SelectOperator> selOps = findAllSimpleSelOps(li); // variable 
op operand
+            existingSelOps.addAll(selOps);
+        }
+        addTCSelectionPredicatesHelper(existingSelOps);
+    }
+
+    private void addTCSelectionPredicatesHelper(List<SelectOperator> 
existingSelOps) throws AlgebricksException {
+        for (SelectOperator selOp : existingSelOps) {
+            AbstractFunctionCallExpression exp = 
(AbstractFunctionCallExpression) selOp.getCondition().getValue();
+            Mutable<ILogicalExpression> x = exp.getArguments().get(0);
+            VariableReferenceExpression varRef = (VariableReferenceExpression) 
x.getValue();
+            LogicalVariable var = varRef.getVariableReference();
+            SelectOperator newSelOp;
+            List<JoinCondition> jcs = findVarinJoinPreds(var);
+            for (JoinCondition jc : jcs) { // join predicate can be R.a = S.a 
or S.a = R.a. Check for both cases
+                if (var == jc.usedVars.get(0)) { // R.a
+                    newSelOp = makeNewSelOper(existingSelOps, 
jc.usedVars.get(1), // == S.a
+                            ((AbstractFunctionCallExpression) 
selOp.getCondition().getValue()).getFunctionInfo(), // op
+                            exp.getArguments().get(1)); // operand
+                    if (newSelOp != null) { // does not already exist
+                        addSelOpToLeafInput(jc.usedVars.get(1), newSelOp);
+                    }
+                } else if (var == jc.usedVars.get(1)) { // R.a
+                    newSelOp = makeNewSelOper(existingSelOps, 
jc.usedVars.get(0), // == S.a
+                            ((AbstractFunctionCallExpression) 
selOp.getCondition().getValue()).getFunctionInfo(), // op
+                            exp.getArguments().get(1)); // operand
+                    if (newSelOp != null) {
+                        addSelOpToLeafInput(jc.usedVars.get(0), newSelOp);
+                    }
+                }
+            }
+        }
+    }
+
+    private SelectOperator makeNewSelOper(List<SelectOperator> existingSelOps, 
LogicalVariable var, IFunctionInfo tag,
+            Mutable<ILogicalExpression> arg) throws AlgebricksException {
+        List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
+        VariableReferenceExpression e1 = new VariableReferenceExpression(var);
+        arguments.add(new MutableObject<>(e1)); // S.a
+        arguments.add(new MutableObject<>(arg.getValue())); // this will be 
the operand
+        ScalarFunctionCallExpression expr = new 
ScalarFunctionCallExpression(tag, arguments); //S.a op operand
+        SelectOperator newsel = new SelectOperator(new MutableObject<>(expr), 
null, null);
+        if (newSelNotPresent(newsel, existingSelOps)) {
+            LOGGER.info("adding newsel " + newsel.getCondition());
+            return newsel; // add since it does not exist
+        } else {
+            return null; // already exists, no need to add again
+        }
+    }
+
+    private boolean newSelNotPresent(SelectOperator newsel, 
List<SelectOperator> existingSelOps) {
+        for (SelectOperator existingSelOp : existingSelOps) {
+            if (newsel.getCondition().equals(existingSelOp.getCondition())) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private void addSelOpToLeafInput(LogicalVariable var, SelectOperator 
newSelOp) throws AlgebricksException {
+        int l = varLeafInputIds.get(var); // get the corresponding leafInput 
using the map
+        ILogicalOperator parent = leafInputs.get(l - 1);
+        ILogicalOperator child = parent.getInputs().get(0).getValue();
+        parent.getInputs().get(0).setValue(newSelOp);
+        newSelOp.getInputs().add(new MutableObject<>(child));
+        optCtx.computeAndSetTypeEnvironmentForOperator(newSelOp);
+    }
+
+    private List<JoinCondition> findVarinJoinPreds(LogicalVariable var) {
+        List<JoinCondition> jcs = new ArrayList<>();
+        for (JoinCondition jc : joinConditions) {
+            if (jc.usedVars != null && jc.usedVars.contains(var)) { // this 
will only search inner join predicates
+                jcs.add(jc);
+            }
+        }
+        return jcs;
+    }
+
+    private List<SelectOperator> findAllSimpleSelOps(ILogicalOperator li) {
+        List<SelectOperator> selOps = new ArrayList<>();
+        while (li != null && li.getOperatorTag() != 
LogicalOperatorTag.EMPTYTUPLESOURCE) {
+            if (li.getOperatorTag().equals(LogicalOperatorTag.SELECT)) {
+                SelectOperator selOp = (SelectOperator) li;
+                ILogicalExpression condition = selOp.getCondition().getValue();
+                if (simpleCondition(condition)) {
+                    selOps.add(selOp);
+                }
+            }
+            li = li.getInputs().get(0).getValue();
+        }
+        return selOps;
+    }
+
+    private boolean simpleCondition(ILogicalExpression condition) {
+        if (condition.getExpressionTag() == 
LogicalExpressionTag.FUNCTION_CALL) {
+            AbstractFunctionCallExpression exp = 
(AbstractFunctionCallExpression) condition;
+            if (exp.getArguments().size() == 2) {
+                Mutable<ILogicalExpression> arg0 = exp.getArguments().get(0);
+                Mutable<ILogicalExpression> arg1 = exp.getArguments().get(1);
+                if (arg0.getValue().getExpressionTag() == 
LogicalExpressionTag.VARIABLE
+                        && arg1.getValue().getExpressionTag() == 
LogicalExpressionTag.CONSTANT) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
     private String dumpJoinNodes(int numJoinNodes) {
         StringBuilder sb = new StringBuilder(128);
         sb.append(LocalDateTime.now());
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 52566ef..e60ba39 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -114,16 +114,20 @@
         return mdp.findSampleIndex(dsid.getDatabaseName(), 
dsid.getDataverseName(), dsid.getDatasourceName());
     }

-    private double findJoinSelectivity(JoinProductivityAnnotation anno, 
AbstractFunctionCallExpression joinExpr,
-            JoinOperator jOp) throws AlgebricksException {
+    private double findJoinSelectivity(JoinCondition jc, 
JoinProductivityAnnotation anno,
+            AbstractFunctionCallExpression joinExpr, JoinOperator jOp) throws 
AlgebricksException {
         List<LogicalVariable> exprUsedVars = new ArrayList<>();
         joinExpr.getUsedVariables(exprUsedVars);
+        /*
         if (exprUsedVars.size() != 2) {
             // Since there is a left and right dataset here, expecting only 
two variables.
             return 1.0;
         }
+        */

-        int idx1, idx2;
+        int idx1 = jc.leftSide;
+        int idx2 = jc.rightSide;
+        /*
         if (joinEnum.varLeafInputIds.containsKey(exprUsedVars.get(0))) {
             idx1 = joinEnum.varLeafInputIds.get(exprUsedVars.get(0));
         } else
@@ -132,7 +136,7 @@
             idx2 = joinEnum.varLeafInputIds.get(exprUsedVars.get(1));
         } else
             return 1.0;
-
+        */
         if (joinEnum.jnArray[idx1].getFake()) {
             return 1.0;
         }
@@ -190,12 +194,14 @@
             Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
             Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
             if ((idxDetails1.getSourceCardinality() < 
idxDetails1.getSampleCardinalityTarget())
-                    || (idxDetails2.getSourceCardinality() < 
idxDetails2.getSampleCardinalityTarget())) {
+                    || (idxDetails2.getSourceCardinality() < 
idxDetails2.getSampleCardinalityTarget())
+                    || exprUsedVars.size() > 2) { //* if there are more than 2 
variables, it is not a simple join like r.a op s.a
                 double sel = 
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                         joinEnum.leafInputs.get(idx2 - 1), index1, index2, 
joinExpr, jOp);
-                if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
-                    return sel;
+                if (sel == 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
+                    sel = 1 / Math.max(card1, card2);
                 }
+                return sel;
             }
             // Now we can handle only equi joins. We make all the uniform and 
independence assumptions here.
             double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, 
idx1, idx2);
@@ -286,7 +292,7 @@

     // The expression we get may not be a base condition. It could be 
comprised of ors and ands and nots. So have to
     //recursively find the overall selectivity.
-    private double getSelectivityFromAnnotation(AbstractFunctionCallExpression 
afcExpr, boolean join,
+    private double getSelectivityFromAnnotation(JoinCondition jc, 
AbstractFunctionCallExpression afcExpr, boolean join,
             boolean singleDatasetPreds, JoinOperator jOp) throws 
AlgebricksException {
         double sel = 1.0;
         if 
(afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.OR)) {
@@ -294,7 +300,7 @@
             for (int i = 0; i < afcExpr.getArguments().size(); i++) {
                 ILogicalExpression lexpr = 
afcExpr.getArguments().get(i).getValue();
                 if 
(lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                    sel = getSelectivityFromAnnotation(
+                    sel = getSelectivityFromAnnotation(jc,
                             (AbstractFunctionCallExpression) 
afcExpr.getArguments().get(i).getValue(), join,
                             singleDatasetPreds, jOp);
                     orSel = orSel + sel - orSel * sel;
@@ -306,7 +312,7 @@
             for (int i = 0; i < afcExpr.getArguments().size(); i++) {
                 ILogicalExpression lexpr = 
afcExpr.getArguments().get(i).getValue();
                 if 
(lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                    sel = getSelectivityFromAnnotation(
+                    sel = getSelectivityFromAnnotation(jc,
                             (AbstractFunctionCallExpression) 
afcExpr.getArguments().get(i).getValue(), join,
                             singleDatasetPreds, jOp);
                     andSel *= sel;
@@ -316,7 +322,7 @@
         } else if 
(afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.NOT)) {
             ILogicalExpression lexpr = 
afcExpr.getArguments().get(0).getValue();
             if 
(lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
-                sel = getSelectivityFromAnnotation(
+                sel = getSelectivityFromAnnotation(jc,
                         (AbstractFunctionCallExpression) 
afcExpr.getArguments().get(0).getValue(), join,
                         singleDatasetPreds, jOp);
                 // We want to return 1.0 and not 0.0 if there was no annotation
@@ -347,7 +353,7 @@
             }
         } else {
             JoinProductivityAnnotation jpa = 
afcExpr.getAnnotation(JoinProductivityAnnotation.class);
-            s = findJoinSelectivity(jpa, afcExpr, jOp);
+            s = findJoinSelectivity(jc, jpa, afcExpr, jOp);
             sel *= s;
         }

@@ -361,13 +367,20 @@
         return sel;
     }

-    protected double getSelectivityFromAnnotationMain(ILogicalExpression 
leExpr, boolean join,
+    protected double getSelectivityFromAnnotationMain(JoinCondition jc, 
ILogicalExpression expr, boolean join,
             boolean singleDatasetPreds, JoinOperator jOp) throws 
AlgebricksException {
         double sel = 1.0;

+        ILogicalExpression leExpr;
+
+        if (jc != null)
+            leExpr = jc.joinCondition;
+        else
+            leExpr = expr;
+
         if 
(leExpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
             AbstractFunctionCallExpression afcExpr = 
(AbstractFunctionCallExpression) leExpr;
-            sel = getSelectivityFromAnnotation(afcExpr, join, 
singleDatasetPreds, jOp);
+            sel = getSelectivityFromAnnotation(jc, afcExpr, join, 
singleDatasetPreds, jOp);
         }

         return sel;
@@ -385,7 +398,7 @@
         while (op.getOperatorTag() != LogicalOperatorTag.EMPTYTUPLESOURCE) {
             if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
                 SelectOperator selOper = (SelectOperator) op;
-                sel *= 
getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), join, 
false, null);
+                sel *= getSelectivityFromAnnotationMain(null, 
selOper.getCondition().getValue(), join, false, null);
             }
             if (op.getOperatorTag() == LogicalOperatorTag.SUBPLAN) {
                 sel *= getSelectivity((SubplanOperator) op);
@@ -402,7 +415,7 @@
         while (true) {
             if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
                 SelectOperator selOper = (SelectOperator) op;
-                sel *= 
getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), false, 
false, null);
+                sel *= getSelectivityFromAnnotationMain(null, 
selOper.getCondition().getValue(), false, false, null);
             }
             if (op.getInputs().size() > 0) {
                 op = op.getInputs().get(0).getValue();

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19550
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I3d8dca03ece22435da6225cb50a4630dce8326bb
Gerrit-Change-Number: 19550
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange

Reply via email to