This is an automated email from the ASF dual-hosted git repository.
luozenglin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 00727e8c11 [fix](in-bitmap) fix result may be wrong if the left side
of the in bitmap predicate is a constant (#17570)
00727e8c11 is described below
commit 00727e8c113575252f9b5b1f6503966cb8c334c0
Author: luozenglin <[email protected]>
AuthorDate: Thu Mar 9 10:59:05 2023 +0800
[fix](in-bitmap) fix result may be wrong if the left side of the in bitmap
predicate is a constant (#17570)
---
be/src/vec/exec/join/vnested_loop_join_node.cpp | 8 ++++-
.../org/apache/doris/analysis/StmtRewriter.java | 3 ++
.../java/org/apache/doris/analysis/TableRef.java | 9 +++++
.../apache/doris/planner/NestedLoopJoinNode.java | 14 +++++++-
.../apache/doris/planner/SingleNodePlanner.java | 1 +
.../data/query_p0/join/test_bitmap_filter.out | 40 ++++++++++++++++++++++
.../suites/query_p0/join/test_bitmap_filter.groovy | 8 +++++
7 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp
b/be/src/vec/exec/join/vnested_loop_join_node.cpp
index 2555cea209..3a26ef165c 100644
--- a/be/src/vec/exec/join/vnested_loop_join_node.cpp
+++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp
@@ -96,7 +96,6 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode,
RuntimeState* state) {
}
RETURN_IF_ERROR(
vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs,
&_filter_src_expr_ctxs));
- DCHECK(!filter_src_exprs.empty() == _is_output_left_side_only);
return Status::OK();
}
@@ -189,6 +188,13 @@ Status VNestedLoopJoinNode::sink(doris::RuntimeState*
state, vectorized::Block*
if (eos) {
COUNTER_UPDATE(_build_rows_counter, _build_rows);
RuntimeFilterBuild(this)(state);
+
+ // optimize `in bitmap`, see
https://github.com/apache/doris/issues/14338
+ if (_is_output_left_side_only &&
+ ((_join_op == TJoinOp::type::LEFT_SEMI_JOIN &&
_build_blocks.empty()) ||
+ (_join_op == TJoinOp::type::LEFT_ANTI_JOIN &&
!_build_blocks.empty()))) {
+ _left_side_eos = true;
+ }
}
return Status::OK();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
index 5b4fce1f0f..0184bf1b64 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
@@ -816,6 +816,7 @@ public class StmtRewriter {
break;
}
+ boolean isInBitmap = false;
if (!hasEqJoinPred && !inlineView.isCorrelated()) {
// Join with InPredicate is actually an equal join, so we choose
HashJoin.
if (expr instanceof ExistsPredicate) {
@@ -825,6 +826,7 @@ public class StmtRewriter {
&& (((FunctionCallExpr)
joinConjunct).getFnName().getFunction()
.equalsIgnoreCase(BITMAP_CONTAINS))) {
joinOp = ((InPredicate) expr).isNotIn() ?
JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN;
+ isInBitmap = true;
} else {
joinOp = JoinOperator.CROSS_JOIN;
// We can equal the aggregate subquery using a cross join. All
conjuncts
@@ -843,6 +845,7 @@ public class StmtRewriter {
inlineView.setMark(markTuple);
inlineView.setJoinOp(joinOp);
+ inlineView.setInBitmap(isInBitmap);
if (joinOp != JoinOperator.CROSS_JOIN) {
inlineView.setOnClause(onClausePredicate);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
index e081540ec9..90d3460e85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
@@ -94,6 +94,7 @@ public class TableRef implements ParseNode, Writable {
// Indicates whether this table ref is given an explicit alias,
protected boolean hasExplicitAlias;
protected JoinOperator joinOp;
+ protected boolean isInBitmap;
// for mark join
protected boolean isMark;
// we must record mark tuple name for re-analyze
@@ -280,6 +281,14 @@ public class TableRef implements ParseNode, Writable {
this.joinOp = op;
}
+ public boolean isInBitmap() {
+ return isInBitmap;
+ }
+
+ public void setInBitmap(boolean inBitmap) {
+ isInBitmap = inBitmap;
+ }
+
public boolean isMark() {
return isMark;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
index 33611ff64e..fffadd7f58 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
@@ -49,6 +49,18 @@ import java.util.List;
public class NestedLoopJoinNode extends JoinNodeBase {
private static final Logger LOG =
LogManager.getLogger(NestedLoopJoinNode.class);
+ // If isOutputLeftSideOnly=true, the data from the left table is returned
directly without a join operation.
+ // This is used to optimize `in bitmap`, because bitmap will make a lot of
copies when doing Nested Loop Join,
+ // which is very resource intensive.
+ // `in bitmap` has two cases:
+ // 1. select * from tbl1 where k1 in (select bitmap_col from tbl2);
+ // This will generate a bitmap runtime filter to filter the left table,
because the bitmap is an exact filter
+ // and does not need to be filtered again in the NestedLoopJoinNode, so
it returns the left table data directly.
+ // 2. select * from tbl1 where 1 in (select bitmap_col from tbl2);
+ // This sql will be rewritten to
+ // "select * from tbl1 left semi join tbl2 where
bitmap_contains(tbl2.bitmap_col, 1);"
+ // return all data in the left table to parent node when there is data
on the build side, and return empty when
+ // there is no data on the build side.
private boolean isOutputLeftSideOnly = false;
private List<Expr> runtimeFilterExpr = Lists.newArrayList();
@@ -245,8 +257,8 @@ public class NestedLoopJoinNode extends JoinNodeBase {
if (!runtimeFilters.isEmpty()) {
output.append(detailPrefix).append("runtime filters: ");
output.append(getRuntimeFilterExplainString(true));
- output.append(detailPrefix).append("is output left side only:
").append(isOutputLeftSideOnly).append("\n");
}
+ output.append(detailPrefix).append("is output left side only:
").append(isOutputLeftSideOnly).append("\n");
output.append(detailPrefix).append(String.format("cardinality=%,d",
cardinality)).append("\n");
// todo unify in plan node
if (vOutputTupleDesc != null) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index cab354b0be..a9d8cdc603 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -2115,6 +2115,7 @@ public class SingleNodePlanner {
result.setJoinConjuncts(joinConjuncts);
result.addConjuncts(analyzer.getMarkConjuncts(innerRef));
result.init(analyzer);
+ result.setOutputLeftSideOnly(innerRef.isInBitmap() &&
joinConjuncts.isEmpty());
return result;
}
diff --git a/regression-test/data/query_p0/join/test_bitmap_filter.out
b/regression-test/data/query_p0/join/test_bitmap_filter.out
index f44900b0e0..1ad7849124 100644
--- a/regression-test/data/query_p0/join/test_bitmap_filter.out
+++ b/regression-test/data/query_p0/join/test_bitmap_filter.out
@@ -98,3 +98,43 @@
1 1989
10 1991
+-- !sql15 --
+\N \N
+1 1989
+2 1986
+3 1989
+4 1991
+5 1985
+6 32767
+7 -32767
+8 255
+9 1991
+10 1991
+11 1989
+12 32767
+13 -32767
+14 255
+15 1992
+
+-- !sql16 --
+
+-- !sql17 --
+
+-- !sql18 --
+\N \N
+1 1989
+2 1986
+3 1989
+4 1991
+5 1985
+6 32767
+7 -32767
+8 255
+9 1991
+10 1991
+11 1989
+12 32767
+13 -32767
+14 255
+15 1992
+
diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
index 022307f9e0..fe18451978 100644
--- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
+++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
@@ -69,6 +69,14 @@ suite("test_bitmap_filter", "query_p0") {
qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select
bitmap_from_string('1,10')) order by 1, 2"
+ qt_sql15 "select k1, k2 from ${tbl1} t where 11 in (select k2 from
${tbl2}) order by 1, 2;"
+
+ qt_sql16 "select k1, k2 from ${tbl1} t where 100 in (select k2 from
${tbl2}) order by 1, 2;"
+
+ qt_sql17 "select k1, k2 from ${tbl1} t where 10 not in (select k2 from
${tbl2}) order by 1, 2;"
+
+ qt_sql18 "select k1, k2 from ${tbl1} t where 100 not in (select k2 from
${tbl2}) order by 1, 2;"
+
test {
sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2}
b2 where b1.k2 = b2.k1) order by k1;"
exception "In bitmap does not support correlated subquery"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]