>From <[email protected]>:

[email protected] has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496 )


Change subject: PLEASE EDIT to provide a meaningful commit message!
......................................................................

PLEASE EDIT to provide a meaningful commit message!

The following commits from your working branch will be included:

commit 01de28af30d70094b2835bb59e345c0721ef1c71
Author: murali4104 <[email protected]>
Date:   Fri Mar 7 07:46:42 2025 -0800

    [ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity

Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
2 files changed, 34 insertions(+), 10 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/96/19496/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 52566ef..b4a926c 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -40,6 +40,7 @@
 import org.apache.asterix.om.functions.BuiltinFunctionInfo;
 import org.apache.asterix.om.functions.BuiltinFunctions;
 import org.apache.asterix.optimizer.base.AnalysisUtil;
+import org.apache.asterix.optimizer.cost.Cost;
 import 
org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite;
 import org.apache.asterix.translator.ConstantHelper;
 import org.apache.commons.collections.CollectionUtils;
@@ -187,18 +188,24 @@
                 return 0.5; // this may not be accurate obviously!
             } // we can do all relops here and other joins such as interval 
joins and spatial joins, the compile time might increase a lot

+            //If one of the tables is smaller than the target sample size, we 
can join the samples directly
+            // to get a good estimate of the join selectivity.
             Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
             Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
             if ((idxDetails1.getSourceCardinality() < 
idxDetails1.getSampleCardinalityTarget())
                     || (idxDetails2.getSourceCardinality() < 
idxDetails2.getSampleCardinalityTarget())) {
                 double sel = 
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                         joinEnum.leafInputs.get(idx2 - 1), index1, index2, 
joinExpr, jOp);
-                if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
+
+                if (sel > 0.0) {
                     return sel;
+                } else {
+                    return 1.0/Math.max(card1, card2); // R.uniq = S.uniq is 
nicely modelled here. Best we can do so far.
                 }
             }
             // Now we can handle only equi joins. We make all the uniform and 
independence assumptions here.
             double sel = naiveJoinSelectivity(exprUsedVars, card1, card2, 
idx1, idx2);
+
             return sel;
         }
     }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
index 8aae0e3..29e8edc 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
@@ -1,20 +1,20 @@
-distribute result [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
+distribute result [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
 -- DISTRIBUTE_RESULT  |UNPARTITIONED|
-  exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+  exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
   -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
-    assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 4.0, 
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+    assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 2.1, 
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
     -- ASSIGN  |UNPARTITIONED|
-      aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 4.0, doc-size: 
2.0, op-cost: 0.0, total-cost: 12.0]
+      aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 2.1, doc-size: 
2.0, op-cost: 0.0, total-cost: 12.0]
       -- AGGREGATE  |UNPARTITIONED|
-        exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 
12.0]
+        exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 
12.0]
         -- RANDOM_MERGE_EXCHANGE  |PARTITIONED|
-          aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 4.0, 
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+          aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 2.1, 
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
           -- AGGREGATE  |PARTITIONED|
-            project ([$$50]) [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
+            project ([$$50]) [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
             -- STREAM_PROJECT  |PARTITIONED|
-              exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
+              exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, 
total-cost: 12.0]
               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                join (eq($$55, $$56)) [cardinality: 4.0, doc-size: 2.0, 
op-cost: 4.0, total-cost: 12.0]
+                join (eq($$55, $$56)) [cardinality: 2.1, doc-size: 2.0, 
op-cost: 4.0, total-cost: 12.0]
                 -- HYBRID_HASH_JOIN [$$55][$$56]  |PARTITIONED|
                   exchange [cardinality: 2.0, doc-size: 1.0, op-cost: 0.0, 
total-cost: 2.0]
                   -- HASH_PARTITION_EXCHANGE [$$55]  |PARTITIONED|

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0
Gerrit-Change-Number: 19496
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange

Reply via email to