>From <[email protected]>:
[email protected] has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19358 )
Change subject: [ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity
......................................................................
[ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity
Change-Id: I2822d42c64752414309b09c05832b5c8a593fd1f
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
1 file changed, 57 insertions(+), 38 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/58/19358/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 1cc643a..139a90e 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -46,6 +46,7 @@
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
@@ -61,6 +62,7 @@
import
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
@@ -172,54 +174,62 @@
return productivity / card1;
}
} else {
+ /*
ILogicalOperator leafInput;
LogicalVariable var;
if
(!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ))) {
return 0.5; // we will assume half; rest of the code assumes
EQ joins
}
- // choose the smaller side sample; better results this way for
sure!
- if (card1 < card2) {
- leafInput = joinEnum.leafInputs.get(idx1 - 1);
- var = exprUsedVars.get(0);
- } else {
- leafInput = joinEnum.leafInputs.get(idx2 - 1);
- var = exprUsedVars.get(1);
- }
- Index index = findIndex(leafInput);
- if (index == null) {
- return 1.0;
- }
- List<List<IAObject>> result =
runSamplingQueryDistinct(this.optCtx, leafInput, var, index);
- if (result == null) {
- return 1.0;
- }
+
+ */
- double estDistinctCardinalityFromSample =
findPredicateCardinality(result, true);
- if (estDistinctCardinalityFromSample == 0) {
- estDistinctCardinalityFromSample = 1; // just in case
+ double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 -
1), joinEnum.leafInputs.get(idx2 - 1),
+ joinExpr);
+ if (sel == 0.0) {
+ sel = 0.0001;
}
- Index.SampleIndexDetails details = (Index.SampleIndexDetails)
index.getIndexDetails();
- double numDistincts;
- // if the table is smaller than the sample size, there is no need
to use the estimator
- //
getSampleCardinalityTarget() equals 1063 or 4252 or 17008
- if (details.getSourceCardinality() <=
details.getSampleCardinalityTarget()) {
- numDistincts = estDistinctCardinalityFromSample;
- } else { // when the number of distincts is smaller than approx
25% of the sample size, then we do not
- // then we do not need to call the estimator. This is
a good heuristic. This was obtained by looking at the graph
- // of d = D ( 1 - e^(-getSampleCardinalityTarget/D) ; d =
estDistinctCardinalityFromSample; D = actual number of distincts
- if (estDistinctCardinalityFromSample <= 0.25 *
details.getSampleCardinalityTarget()) {
- numDistincts = estDistinctCardinalityFromSample;
- } else {
- numDistincts =
secondDistinctEstimator(estDistinctCardinalityFromSample, index);
- }
- }
- if (numDistincts > details.getSourceCardinality()) {
- numDistincts = details.getSourceCardinality(); // cannot
exceed table cardinality
- }
- return 1.0 / numDistincts; // this is the expected selectivity for
joins for Fk-PK and Fk-Fk joins
+ return sel;
+
}
}
+ private double findJoinSelFromSamples(ILogicalOperator left,
ILogicalOperator right,
+ AbstractFunctionCallExpression joinExpr) throws
AlgebricksException {
+ JoinOperator join = joinEnum.allJoinOps.get(0);
+ AbstractBinaryJoinOperator abjoin = join.getAbstractJoinOp();
+ Pair<ILogicalOperator, Double> leftOutput =
replaceDataSourceWithSample(left);
+ abjoin.getInputs().get(0).setValue(leftOutput.getFirst());
+ Pair<ILogicalOperator, Double> rightOutput =
replaceDataSourceWithSample(right);
+ abjoin.getInputs().get(1).setValue(rightOutput.getFirst());
+ abjoin.getCondition().setValue(joinExpr);
+ List<List<IAObject>> result = runSamplingQuery(optCtx, abjoin);
+ double estCardSample = findPredicateCardinality(result, false);
+ return estCardSample / leftOutput.getSecond() /
rightOutput.getSecond();
+ }
+
+ private Pair<ILogicalOperator, Double>
replaceDataSourceWithSample(ILogicalOperator op) throws AlgebricksException {
+ ILogicalOperator selOp =
OperatorManipulationUtil.bottomUpCopyOperators(op);
+ // must set all the Sel operators to be true, otherwise we will be
multiplying the single table sels also here.
+ List<ILogicalExpression> ignore =
storeSelectConditionsAndMakeThemTrue(selOp, null);
+ ILogicalOperator parent =
joinEnum.findDataSourceScanOperatorParent(selOp);
+ DataSourceScanOperator scanOp = (DataSourceScanOperator)
parent.getInputs().get(0).getValue();
+ Index index = findIndex(op);
+ Index.SampleIndexDetails idxDetails = (Index.SampleIndexDetails)
index.getIndexDetails();
+ double origDatasetCard = idxDetails.getSourceCardinality();
+ double sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(),
origDatasetCard);
+ issueWarning(sampleCard, scanOp);
+
+ // replace the dataScanSourceOperator with the sampling source
+ SampleDataSource sampledatasource =
joinEnum.getSampleDataSource(scanOp);
+ DataSourceScanOperator deepCopyofScan =
+ (DataSourceScanOperator)
OperatorManipulationUtil.bottomUpCopyOperators(scanOp);
+ deepCopyofScan.setDataSource(sampledatasource);
+ parent.getInputs().get(0).setValue(deepCopyofScan);
+ Pair<ILogicalOperator, Double> retVal = new Pair<>(selOp, sampleCard);
+
+ return retVal;
+ }
+
// The expression we get may not be a base condition. It could be
comprised of ors and ands and nots. So have to
//recursively find the overall selectivity.
private double getSelectivityFromAnnotation(AbstractFunctionCallExpression
afcExpr, boolean join,
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19358
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I2822d42c64752414309b09c05832b5c8a593fd1f
Gerrit-Change-Number: 19358
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange