xiaokang commented on code in PR #32620:
URL: https://github.com/apache/doris/pull/32620#discussion_r1548824626
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ bool is_all_ones(const roaring::Roaring& r) {
+ return r.contains(0);
+ for (roaring::RoaringSetBitForwardIterator i = r.begin(); i !=
r.end(); ++i) {
+ if (*i == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
+ // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
+ // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
+ // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
+ Status eval_inverted_index(
+ VExprContext* context,
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_invertedIndexIter_mapping,
+ uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ if (_op == TExprOpcode::COMPOUND_OR) {
+ for (auto child : _children) {
+ Status st = child->eval_inverted_index(context,
colId_invertedIndexIter_mapping,
Review Comment:
It implies AND
##########
be/src/vec/functions/array/function_array_index.h:
##########
@@ -70,6 +74,11 @@ struct ArrayCountEqual {
static constexpr void apply(ResultType& current, size_t j) noexcept {
++current; }
};
+struct ParamValue {
+ PrimitiveType type;
+ Field query_value;
Review Comment:
just value to be more generic
##########
be/src/vec/exprs/vexpr_context.h:
##########
@@ -69,6 +70,21 @@ class VExprContext {
return _fn_contexts[i].get();
}
+ // execute expr with inverted index which column a, b has inverted indexes
+ // but some situation although column b has indexes, but apply index is
not useful, we should
+ // skip this expr, just do not apply index anymore.
+ /**
+ * @param colId_invertedIndexIter_mapping contains all column id to
inverted index iterator mapping from segmentIterator
+ * @param num_rows number of rows in one segment.
+ * @param bitmap roaring bitmap to store the result. 0 is present filed by
index.
+ * @return status not ok means execute failed.
+ */
+ [[nodiscard]] Status eval_inverted_indexs(
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_invertedIndexIter_mapping,
Review Comment:
use consistent variable name style: colid_to_inverted_index_iter
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1209,6 +1226,33 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
+ // support expr to evaluate inverted index
+ std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
InvertedIndexIterator*>>
+ iter_map;
+
+ for (auto col_id : _common_expr_columns) {
+ if (_check_apply_by_inverted_index(col_id)) {
+ iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id],
+
_inverted_index_iterators[col_id].get());
+ }
+ }
+ for (auto exprCtx : _common_expr_ctxs_push_down) {
+ // _inverted_index_iterators has all column ids which has inverted
index
+ // _common_expr_columns has all column ids from
_common_expr_ctxs_push_down
+ // if current bitmap is already empty just return
+ if (_row_bitmap.isEmpty()) {
+ break;
+ }
+ roaring::Roaring bitmap = _row_bitmap;
+ const Status st = exprCtx->eval_inverted_indexs(iter_map, num_rows(),
&bitmap);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to evaluate index in expr" <<
exprCtx->root()->debug_string()
+ << ", error msg: " << st;
+ } else {
+ _row_bitmap &= bitmap;
Review Comment:
`_row_bitmap &= bitmap` implies that there is AND logic between
_common_expr_ctxs_push_down and predicates. It's not always true.
##########
be/src/vec/functions/array/function_array_index.h:
##########
@@ -87,6 +96,58 @@ class FunctionArrayIndex : public IFunction {
bool use_default_implementation_for_nulls() const override { return false;
}
+ Status open(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
+ if (scope == FunctionContext::THREAD_LOCAL) {
+ return Status::OK();
Review Comment:
why?
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ bool is_all_ones(const roaring::Roaring& r) {
+ return r.contains(0);
+ for (roaring::RoaringSetBitForwardIterator i = r.begin(); i !=
r.end(); ++i) {
+ if (*i == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
+ // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
+ // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
+ // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
+ Status eval_inverted_index(
+ VExprContext* context,
+ const std::unordered_map<ColumnId,
std::pair<vectorized::NameAndTypePair,
+
segment_v2::InvertedIndexIterator*>>&
+ colId_invertedIndexIter_mapping,
+ uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ if (_op == TExprOpcode::COMPOUND_OR) {
+ for (auto child : _children) {
+ Status st = child->eval_inverted_index(context,
colId_invertedIndexIter_mapping,
+ num_rows, bitmap);
+ if (!st.ok()) {
+ return st;
+ }
+ if (!bitmap->contains(
+ 0)) { // the left expr no need to be extracted by
inverted index
+ return Status::OK();
+ }
+ }
+ } else if (_op == TExprOpcode::COMPOUND_AND) {
+ for (auto child : _children) {
+ Status st = child->eval_inverted_index(context,
colId_invertedIndexIter_mapping,
+ num_rows, bitmap);
+ if (!st.ok()) {
+ return st;
+ }
+ if (bitmap->isEmpty()) { // the left expr no need to be
extracted by inverted index
+ return Status::OK();
+ }
+ }
+ } else {
+ return Status::InternalError(
Review Comment:
process NOT
##########
be/src/olap/rowset/segment_v2/inverted_index_reader.h:
##########
@@ -72,6 +72,75 @@ class InvertedIndexIterator;
class InvertedIndexQueryCacheHandle;
class InvertedIndexFileReader;
+struct PrimitiveTypeConvertorHelper {
Review Comment:
It can be moved to more generic file like PrimitiveTypeTraits.
##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -252,11 +252,12 @@ Status
FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta
Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState*
runtime_state,
const std::string& column_name, const void*
query_value,
- InvertedIndexQueryType query_type,
+ PrimitiveType primitiveType,
InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
-
- std::string search_str = reinterpret_cast<const
StringRef*>(query_value)->to_string();
+ auto&& storage_value =
Review Comment:
The duplicate code can be moved to
`InvertedIndexIterator::read_from_inverted_index`
##########
be/src/vec/functions/function.h:
##########
@@ -395,6 +404,14 @@ class IFunction : public
std::enable_shared_from_this<IFunction>,
return Status::OK();
}
+ Status eval_inverted_index(FunctionContext* context,
+ const vectorized::NameAndTypePair&
data_type_with_name,
+ segment_v2::InvertedIndexIterator* iter,
uint32_t num_rows,
+ roaring::Roaring* bitmap) const override {
+ LOG(FATAL) << "eval_inverted_index is not implemented for IFunction";
Review Comment:
Why not return ERROR
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]