>From <[email protected]>:

[email protected] has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19358 )


Change subject: [ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity
......................................................................

[ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity

Change-Id: I2822d42c64752414309b09c05832b5c8a593fd1f
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
1 file changed, 57 insertions(+), 38 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/58/19358/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 1cc643a..139a90e 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -46,6 +46,7 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
@@ -61,6 +62,7 @@
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
@@ -172,54 +174,62 @@
                 return productivity / card1;
             }
         } else {
+            /*
             ILogicalOperator leafInput;
             LogicalVariable var;
             if 
(!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ))) {
                 return 0.5; // we will assume half; rest of the code assumes 
EQ joins
             }
-            // choose the smaller side sample; better results this way for 
sure!
-            if (card1 < card2) {
-                leafInput = joinEnum.leafInputs.get(idx1 - 1);
-                var = exprUsedVars.get(0);
-            } else {
-                leafInput = joinEnum.leafInputs.get(idx2 - 1);
-                var = exprUsedVars.get(1);
-            }
-            Index index = findIndex(leafInput);
-            if (index == null) {
-                return 1.0;
-            }
-            List<List<IAObject>> result = 
runSamplingQueryDistinct(this.optCtx, leafInput, var, index);
-            if (result == null) {
-                return 1.0;
-            }
+
+             */

-            double estDistinctCardinalityFromSample = 
findPredicateCardinality(result, true);
-            if (estDistinctCardinalityFromSample == 0) {
-                estDistinctCardinalityFromSample = 1; // just in case
+            double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 
1), joinEnum.leafInputs.get(idx2 - 1),
+                    joinExpr);
+            if (sel == 0.0) {
+                sel = 0.0001;
             }
-            Index.SampleIndexDetails details = (Index.SampleIndexDetails) 
index.getIndexDetails();
-            double numDistincts;
-            // if the table is smaller than the sample size, there is no need 
to use the estimator
-            //                                            
getSampleCardinalityTarget() equals 1063 or 4252 or 17008
-            if (details.getSourceCardinality() <= 
details.getSampleCardinalityTarget()) {
-                numDistincts = estDistinctCardinalityFromSample;
-            } else { // when the number of distincts is smaller than approx 
25% of the sample size, then we do not
-                         // then we do not need to call the estimator. This is 
a good heuristic. This was obtained by looking at the graph
-                     // of d = D ( 1 - e^(-getSampleCardinalityTarget/D) ; d = 
estDistinctCardinalityFromSample; D = actual number of distincts
-                if (estDistinctCardinalityFromSample <= 0.25 * 
details.getSampleCardinalityTarget()) {
-                    numDistincts = estDistinctCardinalityFromSample;
-                } else {
-                    numDistincts = 
secondDistinctEstimator(estDistinctCardinalityFromSample, index);
-                }
-            }
-            if (numDistincts > details.getSourceCardinality()) {
-                numDistincts = details.getSourceCardinality(); // cannot 
exceed table cardinality
-            }
-            return 1.0 / numDistincts; // this is the expected selectivity for 
joins for Fk-PK and Fk-Fk joins
+            return sel;
+
         }
     }

+    private double findJoinSelFromSamples(ILogicalOperator left, 
ILogicalOperator right,
+            AbstractFunctionCallExpression joinExpr) throws 
AlgebricksException {
+        JoinOperator join = joinEnum.allJoinOps.get(0);
+        AbstractBinaryJoinOperator abjoin = join.getAbstractJoinOp();
+        Pair<ILogicalOperator, Double> leftOutput = 
replaceDataSourceWithSample(left);
+        abjoin.getInputs().get(0).setValue(leftOutput.getFirst());
+        Pair<ILogicalOperator, Double> rightOutput = 
replaceDataSourceWithSample(right);
+        abjoin.getInputs().get(1).setValue(rightOutput.getFirst());
+        abjoin.getCondition().setValue(joinExpr);
+        List<List<IAObject>> result = runSamplingQuery(optCtx, abjoin);
+        double estCardSample = findPredicateCardinality(result, false);
+        return estCardSample / leftOutput.getSecond() / 
rightOutput.getSecond();
+    }
+
+    private Pair<ILogicalOperator, Double> 
replaceDataSourceWithSample(ILogicalOperator op) throws AlgebricksException {
+        ILogicalOperator selOp = 
OperatorManipulationUtil.bottomUpCopyOperators(op);
+        // must set all the Sel operators to be true, otherwise we will be 
multiplying the single table sels also here.
+        List<ILogicalExpression> ignore = 
storeSelectConditionsAndMakeThemTrue(selOp, null);
+        ILogicalOperator parent = 
joinEnum.findDataSourceScanOperatorParent(selOp);
+        DataSourceScanOperator scanOp = (DataSourceScanOperator) 
parent.getInputs().get(0).getValue();
+        Index index = findIndex(op);
+        Index.SampleIndexDetails idxDetails = (Index.SampleIndexDetails) 
index.getIndexDetails();
+        double origDatasetCard = idxDetails.getSourceCardinality();
+        double sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(), 
origDatasetCard);
+        issueWarning(sampleCard, scanOp);
+
+        // replace the dataScanSourceOperator with the sampling source
+        SampleDataSource sampledatasource = 
joinEnum.getSampleDataSource(scanOp);
+        DataSourceScanOperator deepCopyofScan =
+                (DataSourceScanOperator) 
OperatorManipulationUtil.bottomUpCopyOperators(scanOp);
+        deepCopyofScan.setDataSource(sampledatasource);
+        parent.getInputs().get(0).setValue(deepCopyofScan);
+        Pair<ILogicalOperator, Double> retVal = new Pair<>(selOp, sampleCard);
+
+        return retVal;
+    }
+
     // The expression we get may not be a base condition. It could be 
comprised of ors and ands and nots. So have to
     //recursively find the overall selectivity.
     private double getSelectivityFromAnnotation(AbstractFunctionCallExpression 
afcExpr, boolean join,

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19358
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I2822d42c64752414309b09c05832b5c8a593fd1f
Gerrit-Change-Number: 19358
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange

Reply via email to