>From <[email protected]>: [email protected] has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496 )
Change subject: PLEASE EDIT to provide a meaningful commit message! ...................................................................... PLEASE EDIT to provide a meaningful commit message! The following commits from your working branch will be included: commit 01de28af30d70094b2835bb59e345c0721ef1c71 Author: murali4104 <[email protected]> Date: Fri Mar 7 07:46:42 2025 -0800 [ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0 --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java M asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan 2 files changed, 34 insertions(+), 10 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/96/19496/1 diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java index 52566ef..b4a926c 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java @@ -40,6 +40,7 @@ import org.apache.asterix.om.functions.BuiltinFunctionInfo; import org.apache.asterix.om.functions.BuiltinFunctions; import org.apache.asterix.optimizer.base.AnalysisUtil; +import org.apache.asterix.optimizer.cost.Cost; import org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite; import org.apache.asterix.translator.ConstantHelper; import org.apache.commons.collections.CollectionUtils; @@ -187,18 +188,24 @@ return 0.5; // this may not be accurate obviously! } // we can do all relops here and other joins such as interval joins and spatial joins, the compile time might increase a lot + //If one of the tables is smaller than the target sample size, we can join the samples directly + // to get a good estimate of the join selectivity. Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails(); Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails(); if ((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget()) || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget())) { double sel = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1), joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp); - if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity + + if (sel > 0.0) { return sel; + } else { + return 1.0/Math.max(card1, card2); // R.uniq = S.uniq is nicely modelled here. Best we can do so far. } } // Now we can handle only equi joins. We make all the uniform and independence assumptions here. double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, idx1, idx2); + return sel; } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan index 8aae0e3..29e8edc 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan @@ -1,20 +1,20 @@ -distribute result [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] +distribute result [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- DISTRIBUTE_RESULT |UNPARTITIONED| - exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| - assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- ASSIGN |UNPARTITIONED| - aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- AGGREGATE |UNPARTITIONED| - exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- RANDOM_MERGE_EXCHANGE |PARTITIONED| - aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- AGGREGATE |PARTITIONED| - project ([$$50]) [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + project ([$$50]) [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- STREAM_PROJECT |PARTITIONED| - exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] + exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0] -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - join (eq($$55, $$56)) [cardinality: 4.0, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0] + join (eq($$55, $$56)) [cardinality: 2.1, doc-size: 2.0, op-cost: 4.0, total-cost: 12.0] -- HYBRID_HASH_JOIN [$$55][$$56] |PARTITIONED| exchange [cardinality: 2.0, doc-size: 1.0, op-cost: 0.0, total-cost: 2.0] -- HASH_PARTITION_EXCHANGE [$$55] |PARTITIONED| -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0 Gerrit-Change-Number: 19496 Gerrit-PatchSet: 1 Gerrit-Owner: [email protected] Gerrit-MessageType: newchange
