xiaokang commented on code in PR #32620:
URL: https://github.com/apache/doris/pull/32620#discussion_r1561930236
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ bool is_all_ones(const roaring::Roaring& r) {
+ return r.contains(0);
+ for (roaring::RoaringSetBitForwardIterator i = r.begin(); i !=
r.end(); ++i) {
+ if (*i == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
+ // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
+ // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
+ // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
+ Status eval_inverted_index(
+ VExprContext* context,
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_to_inverted_index_iter,
+ uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ if (_op == TExprOpcode::COMPOUND_OR) {
+ for (auto child : _children) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
+ std::make_shared<roaring::Roaring>();
+ Status st = child->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ continue;
+ }
+ *bitmap |= *child_roaring;
+ if (!child_roaring->isEmpty()) {
Review Comment:
I think the check for NOT empty is wrong. And further, short circuit for OR
is different to AND, you should check there is no zero in the total bitmap and
skip remaining exprs.
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ bool is_all_ones(const roaring::Roaring& r) {
+ return r.contains(0);
+ for (roaring::RoaringSetBitForwardIterator i = r.begin(); i !=
r.end(); ++i) {
+ if (*i == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
+ // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
+ // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
+ // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
+ Status eval_inverted_index(
+ VExprContext* context,
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_to_inverted_index_iter,
+ uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ if (_op == TExprOpcode::COMPOUND_OR) {
+ for (auto child : _children) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
+ std::make_shared<roaring::Roaring>();
+ Status st = child->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ continue;
+ }
+ *bitmap |= *child_roaring;
+ if (!child_roaring->isEmpty()) {
+ // means inverted index filter do not reduce any rows
+ // the left expr no need to be extracted by inverted index,
+ // and cur roaring is all rows which means this inverted
index is not useful,
+ // do not need to calculate with res bitmap
+ return Status::OK();
+ }
+ }
+ } else if (_op == TExprOpcode::COMPOUND_AND) {
+ for (auto child : _children) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
+ std::make_shared<roaring::Roaring>();
+ Status st = child->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ continue;
+ }
+ *bitmap &= *child_roaring;
+ if (child_roaring->isEmpty()) {
Review Comment:
It's OK to check child_roaring here, but it's more accurate to check bitmap.
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ bool is_all_ones(const roaring::Roaring& r) {
+ return r.contains(0);
+ for (roaring::RoaringSetBitForwardIterator i = r.begin(); i !=
r.end(); ++i) {
+ if (*i == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
+ // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
+ // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
+ // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
+ Status eval_inverted_index(
+ VExprContext* context,
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_to_inverted_index_iter,
+ uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ if (_op == TExprOpcode::COMPOUND_OR) {
+ for (auto child : _children) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
+ std::make_shared<roaring::Roaring>();
+ Status st = child->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ continue;
+ }
+ *bitmap |= *child_roaring;
+ if (!child_roaring->isEmpty()) {
+ // means inverted index filter do not reduce any rows
+ // the left expr no need to be extracted by inverted index,
+ // and cur roaring is all rows which means this inverted
index is not useful,
+ // do not need to calculate with res bitmap
+ return Status::OK();
+ }
+ }
+ } else if (_op == TExprOpcode::COMPOUND_AND) {
+ for (auto child : _children) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
+ std::make_shared<roaring::Roaring>();
+ Status st = child->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ continue;
+ }
+ *bitmap &= *child_roaring;
+ if (child_roaring->isEmpty()) {
+ // the left expr no need to be extracted by inverted
index, just return 0 rows
+ // res bitmap will be zero
+ return Status::OK();
+ }
+ }
+ } else if (_op == TExprOpcode::COMPOUND_NOT) {
+ std::shared_ptr<roaring::Roaring> child_roaring =
std::make_shared<roaring::Roaring>();
+ Status st = _children[0]->eval_inverted_index(context,
colId_to_inverted_index_iter,
+ num_rows,
child_roaring.get());
+ if (!st.ok()) {
+ return st;
+ }
+ *bitmap -= *child_roaring;
Review Comment:
-= child_roaring or -child_roaring ?
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1209,6 +1226,34 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
+ // support expr to evaluate inverted index
+ std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
InvertedIndexIterator*>>
+ iter_map;
+
+ for (auto col_id : _common_expr_columns) {
+ if (_check_apply_by_inverted_index(col_id)) {
+ iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id],
+
_inverted_index_iterators[col_id].get());
+ }
+ }
+ for (auto expr_ctx : _common_expr_ctxs_push_down) {
+ // _inverted_index_iterators has all column ids which has inverted
index
+ // _common_expr_columns has all column ids from
_common_expr_ctxs_push_down
+ // if current bitmap is already empty just return
+ if (_row_bitmap.isEmpty()) {
+ break;
+ }
+ roaring::Roaring bitmap = _row_bitmap;
Review Comment:
It's expensive to copy large bitmap.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]