This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 462efea [Performance Optimization and Refactor] (#5358) (#5364)
462efea is described below
commit 462efeaf396ffdc135c4a19a121a917c72e23524
Author: HappenLee <[email protected]>
AuthorDate: Sun Feb 7 22:41:33 2021 +0800
[Performance Optimization and Refactor] (#5358) (#5364)
1. Add BlockColumnPredicate support OR and AND column predicate in
RowBlockV2
2. Support evaluate vectorization delete predicate in storage engine not in
Reader in SegmentV2
---
be/src/exec/olap_scanner.cpp | 3 +-
be/src/olap/CMakeLists.txt | 1 +
be/src/olap/block_column_predicate.cpp | 116 +++++++++
be/src/olap/block_column_predicate.h | 116 +++++++++
be/src/olap/collect_iterator.cpp | 35 ++-
be/src/olap/collect_iterator.h | 5 +-
be/src/olap/column_predicate.h | 9 +-
be/src/olap/comparison_predicate.cpp | 102 ++++++--
be/src/olap/comparison_predicate.h | 5 +-
be/src/olap/delete_handler.cpp | 51 +++-
be/src/olap/delete_handler.h | 11 +-
be/src/olap/in_list_predicate.cpp | 90 +++++--
be/src/olap/in_list_predicate.h | 5 +-
be/src/olap/iterators.h | 4 +
be/src/olap/null_predicate.cpp | 30 ++-
be/src/olap/null_predicate.h | 7 +-
be/src/olap/olap_common.h | 1 +
be/src/olap/reader.cpp | 91 +++----
be/src/olap/reader.h | 15 +-
be/src/olap/rowset/beta_rowset_reader.cpp | 4 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 17 +-
be/src/olap/rowset/segment_v2/segment_iterator.h | 2 +
be/src/olap/schema_change.cpp | 9 +-
be/src/olap/schema_change.h | 2 +-
be/test/olap/CMakeLists.txt | 1 +
be/test/olap/block_column_predicate_test.cpp | 288 +++++++++++++++++++++
be/test/olap/delete_handler_test.cpp | 2 +-
27 files changed, 896 insertions(+), 126 deletions(-)
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 50e5430..e3fc004 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -479,9 +479,10 @@ void OlapScanner::update_counter() {
COUNTER_UPDATE(_parent->_stats_filtered_counter,
_reader->stats().rows_stats_filtered);
COUNTER_UPDATE(_parent->_bf_filtered_counter,
_reader->stats().rows_bf_filtered);
COUNTER_UPDATE(_parent->_del_filtered_counter,
_reader->stats().rows_del_filtered);
+ COUNTER_UPDATE(_parent->_del_filtered_counter,
_reader->stats().rows_vec_del_cond_filtered);
+
COUNTER_UPDATE(_parent->_conditions_filtered_counter,
_reader->stats().rows_conditions_filtered);
-
COUNTER_UPDATE(_parent->_key_range_filtered_counter,
_reader->stats().rows_key_range_filtered);
COUNTER_UPDATE(_parent->_index_load_timer, _reader->stats().index_load_ns);
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index f9827f8..ffa4112 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library(Olap STATIC
bloom_filter.hpp
bloom_filter_reader.cpp
bloom_filter_writer.cpp
+ block_column_predicate.cpp
byte_buffer.cpp
collect_iterator.cpp
compaction.cpp
diff --git a/be/src/olap/block_column_predicate.cpp
b/be/src/olap/block_column_predicate.cpp
new file mode 100644
index 0000000..f460be5
--- /dev/null
+++ b/be/src/olap/block_column_predicate.cpp
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "block_column_predicate.h"
+
+#include "olap/row_block2.h"
+
+namespace doris {
+
+void SingleColumnBlockPredicate::evaluate(RowBlockV2* block, uint16_t*
selected_size) const {
+ auto column_id = _predicate->column_id();
+ auto column_block = block->column_block(column_id);
+ _predicate->evaluate(&column_block, block->selection_vector(),
selected_size);
+}
+
+void SingleColumnBlockPredicate::evaluate_and(RowBlockV2 *block, uint16_t
selected_size, bool *flags) const {
+ auto column_id = _predicate->column_id();
+ auto column_block = block->column_block(column_id);
+ _predicate->evaluate_and(&column_block, block->selection_vector(),
selected_size, flags);
+}
+
+void SingleColumnBlockPredicate::evaluate_or(RowBlockV2 *block, uint16_t
selected_size, bool *flags) const {
+ auto column_id = _predicate->column_id();
+ auto column_block = block->column_block(column_id);
+ _predicate->evaluate_or(&column_block, block->selection_vector(),
selected_size, flags);
+}
+
+void OrBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t*
selected_size) const {
+ if (num_of_column_predicate() == 1) {
+ _block_column_predicate_vec[0]->evaluate(block, selected_size);
+ } else {
+ bool flags[*selected_size];
+ memset(flags, false, *selected_size);
+ for (int i = 0; i < num_of_column_predicate(); ++i) {
+ auto column_predicate = _block_column_predicate_vec[i];
+ column_predicate->evaluate_or(block, *selected_size, flags);
+ }
+
+ uint16_t new_size = 0;
+ for (int i = 0; i < *selected_size; ++i) {
+ if (flags[i]) {
+ block->selection_vector()[new_size++] =
block->selection_vector()[i];
+ }
+ }
+ *selected_size = new_size;
+ }
+}
+
+void OrBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t
selected_size, bool* flags) const {
+ for (auto block_column_predicate : _block_column_predicate_vec) {
+ block_column_predicate->evaluate_or(block, selected_size, flags);
+ }
+}
+
+void OrBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t
selected_size, bool* flags) const {
+ if (num_of_column_predicate() == 1) {
+ _block_column_predicate_vec[0]->evaluate_and(block, selected_size,
flags);
+ } else {
+ bool new_flags[selected_size];
+ memset(new_flags, false, selected_size);
+ for (int i = 0; i < num_of_column_predicate(); ++i) {
+ auto column_predicate = _block_column_predicate_vec[i];
+ column_predicate->evaluate_or(block, selected_size, new_flags);
+ }
+
+ for (int i = 0; i < selected_size; ++i) {
+ flags[i] &= new_flags[i];
+ }
+ }
+}
+
+void AndBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t*
selected_size) const {
+ for (auto block_column_predicate : _block_column_predicate_vec) {
+ block_column_predicate->evaluate(block, selected_size);
+ }
+}
+
+void AndBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t
selected_size, bool* flags) const {
+ for (auto block_column_predicate : _block_column_predicate_vec) {
+ block_column_predicate->evaluate_and(block, selected_size, flags);
+ }
+}
+
+void AndBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t
selected_size, bool* flags) const {
+ if (num_of_column_predicate() == 1) {
+ _block_column_predicate_vec[0]->evaluate_or(block, selected_size,
flags);
+ } else {
+ bool new_flags[selected_size];
+ memset(new_flags, true, selected_size);
+
+ for (int i = 0; i < num_of_column_predicate(); ++i) {
+ auto column_predicate = _block_column_predicate_vec[i];
+ column_predicate->evaluate_and(block, selected_size, new_flags);
+ }
+
+ for (int i = 0; i < selected_size; ++i) {
+ flags[i] |= new_flags[i];
+ }
+ }
+}
+
+} // namespace doris
diff --git a/be/src/olap/block_column_predicate.h
b/be/src/olap/block_column_predicate.h
new file mode 100644
index 0000000..d20408b
--- /dev/null
+++ b/be/src/olap/block_column_predicate.h
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_SRC_OLAP_BLOCK_COLUMN_PREDICATE_H
+#define DORIS_BE_SRC_OLAP_BLOCK_COLUMN_PREDICATE_H
+
+#include <vector>
+
+#include "olap/column_predicate.h"
+
+namespace doris {
+
+// Block Column Predicate support do column predicate in RowBlockV2 and
support OR and AND predicate
+// Block Column Predicate will replace column predicate as a unified external
vectorization interface
+// in the future
+// TODO: support do predicate on Bitmap and ZoneMap, So we can use index of
column to do predicate on
+// page and segment
+
+class BlockColumnPredicate {
+public:
+ BlockColumnPredicate() = default;
+ virtual ~BlockColumnPredicate() = default;
+
+ // evaluate all predicate on Block
+ virtual void evaluate(RowBlockV2* block, uint16_t* selected_size) const =
0;
+ // evaluate and semantics in all child block column predicate, flags as
temporary variable identification
+ // to mark whether select vector is selected in evaluate the column
predicate
+ virtual void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool*
flags) const = 0;
+ // evaluate or semantics in all child block column predicate
+ virtual void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool*
flags) const = 0;
+
+ virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const =
0;
+};
+
+class SingleColumnBlockPredicate : public BlockColumnPredicate {
+public:
+ explicit SingleColumnBlockPredicate(const ColumnPredicate*
pre):_predicate(pre) {};
+
+ void evaluate(RowBlockV2* block, uint16_t* selected_size) const override;
+ void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+ void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+
+ void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
+ column_id_set.insert(_predicate->column_id());
+ };
+private:
+ const ColumnPredicate* _predicate;
+};
+
+class MutilColumnBlockPredicate : public BlockColumnPredicate {
+public:
+ MutilColumnBlockPredicate() = default;
+
+ ~MutilColumnBlockPredicate() override {
+ for (auto ptr : _block_column_predicate_vec) {
+ delete ptr;
+ }
+ }
+
+ void add_column_predicate(const BlockColumnPredicate* column_predicate) {
+ _block_column_predicate_vec.push_back(column_predicate);
+ }
+
+ size_t num_of_column_predicate() const {
+ return _block_column_predicate_vec.size();
+ }
+
+ void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
+ for (auto child_block_predicate : _block_column_predicate_vec) {
+ child_block_predicate->get_all_column_ids(column_id_set);
+ }
+ };
+
+protected:
+ std::vector<const BlockColumnPredicate*> _block_column_predicate_vec;
+};
+
+class OrBlockColumnPredicate : public MutilColumnBlockPredicate {
+public:
+ void evaluate(RowBlockV2* block, uint16_t* selected_size) const override;
+
+ // It's kind of confusing here, when OrBlockColumnPredicate as a child of
AndBlockColumnPredicate:
+ // 1.OrBlockColumnPredicate need evaluate all child BlockColumnPredicate
OR SEMANTICS inside first
+ // 2.Do AND SEMANTICS in flags use 1 result to get proper select flags
+ void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+ void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+};
+
+class AndBlockColumnPredicate : public MutilColumnBlockPredicate {
+public:
+ void evaluate(RowBlockV2* block, uint16_t* selected_size) const override;
+ void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+
+ // It's kind of confusing here, when AndBlockColumnPredicate as a child of
OrBlockColumnPredicate:
+ // 1.AndBlockColumnPredicate need evaluate all child BlockColumnPredicate
AND SEMANTICS inside first
+ // 2.Evaluate OR SEMANTICS in flags use 1 result to get proper select flags
+ void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
+};
+
+} //namespace doris
+
+#endif //DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H
diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp
index 9212fc0..cf9fdbe 100644
--- a/be/src/olap/collect_iterator.cpp
+++ b/be/src/olap/collect_iterator.cpp
@@ -21,6 +21,7 @@
#include "olap/row.h"
#include "olap/row_block.h"
#include "olap/row_cursor.h"
+#include "olap/rowset/beta_rowset_reader.h"
namespace doris {
@@ -139,7 +140,14 @@ OLAPStatus CollectIterator::next(const RowCursor** row,
bool* delete_flag) {
}
CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr
rs_reader, Reader* reader)
- : _rs_reader(rs_reader), _is_delete(rs_reader->delete_flag()),
_reader(reader) {}
+ : _rs_reader(rs_reader), _is_delete(rs_reader->delete_flag()),
_reader(reader) {
+ auto* ans = dynamic_cast<BetaRowsetReader*>(rs_reader.get());
+ if (LIKELY(ans != nullptr)) {
+ _refresh_current_row = &Level0Iterator::_refresh_current_row_v2;
+ } else {
+ _refresh_current_row = &Level0Iterator::_refresh_current_row_v1;
+ }
+}
CollectIterator::Level0Iterator::~Level0Iterator() {}
@@ -149,7 +157,7 @@ OLAPStatus CollectIterator::Level0Iterator::init() {
LOG(WARNING) << "failed to init row cursor, res=" << res;
return res;
}
- RETURN_NOT_OK(_refresh_current_row());
+ RETURN_NOT_OK((this->*_refresh_current_row)());
return OLAP_SUCCESS;
}
@@ -166,7 +174,7 @@ int64_t CollectIterator::Level0Iterator::version() const {
return _rs_reader->version().second;
}
-OLAPStatus CollectIterator::Level0Iterator::_refresh_current_row() {
+OLAPStatus CollectIterator::Level0Iterator::_refresh_current_row_v1() {
do {
if (_row_block != nullptr && _row_block->has_remaining()) {
size_t pos = _row_block->pos();
@@ -191,9 +199,28 @@ OLAPStatus
CollectIterator::Level0Iterator::_refresh_current_row() {
return OLAP_ERR_DATA_EOF;
}
+OLAPStatus CollectIterator::Level0Iterator::_refresh_current_row_v2() {
+ do {
+ if (_row_block != nullptr && _row_block->has_remaining()) {
+ size_t pos = _row_block->pos();
+ _row_block->get_row(pos, &_row_cursor);
+ _current_row = &_row_cursor;
+ return OLAP_SUCCESS;
+ } else {
+ auto res = _rs_reader->next_block(&_row_block);
+ if (res != OLAP_SUCCESS) {
+ _current_row = nullptr;
+ return res;
+ }
+ }
+ } while (_row_block != nullptr);
+ _current_row = nullptr;
+ return OLAP_ERR_DATA_EOF;
+}
+
OLAPStatus CollectIterator::Level0Iterator::next(const RowCursor** row, bool*
delete_flag) {
_row_block->pos_inc();
- auto res = _refresh_current_row();
+ auto res = (this->*_refresh_current_row)();
*row = _current_row;
*delete_flag = _is_delete;
if (_current_row != nullptr) {
diff --git a/be/src/olap/collect_iterator.h b/be/src/olap/collect_iterator.h
index c8e712b..1ed0831 100644
--- a/be/src/olap/collect_iterator.h
+++ b/be/src/olap/collect_iterator.h
@@ -103,7 +103,10 @@ private:
~Level0Iterator();
private:
- OLAPStatus _refresh_current_row();
+ OLAPStatus (Level0Iterator::*_refresh_current_row)() = nullptr;
+
+ OLAPStatus _refresh_current_row_v1();
+ OLAPStatus _refresh_current_row_v2();
RowsetReaderSharedPtr _rs_reader;
const RowCursor* _current_row = nullptr;
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index c62a2cb..1456566 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -30,18 +30,22 @@ namespace doris {
class VectorizedRowBatch;
class Schema;
+class RowBlockV2;
class ColumnPredicate {
public:
- explicit ColumnPredicate(uint32_t column_id) : _column_id(column_id) {}
+ explicit ColumnPredicate(uint32_t column_id, bool opposite = false)
+ : _column_id(column_id), _opposite(opposite) {}
- virtual ~ColumnPredicate() {}
+ virtual ~ColumnPredicate() = default;
//evaluate predicate on VectorizedRowBatch
virtual void evaluate(VectorizedRowBatch* batch) const = 0;
// evaluate predicate on ColumnBlock
virtual void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size)
const = 0;
+ virtual void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const = 0;
+ virtual void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const = 0;
//evaluate predicate on Bitmap
virtual Status evaluate(const Schema& schema,
@@ -52,6 +56,7 @@ public:
protected:
uint32_t _column_id;
+ bool _opposite;
};
} //namespace doris
diff --git a/be/src/olap/comparison_predicate.cpp
b/be/src/olap/comparison_predicate.cpp
index cb8d1b2..c097e78 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -26,8 +26,8 @@ namespace doris {
#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \
template <class type> \
- CLASS<type>::CLASS(uint32_t column_id, const type& value) \
- : ColumnPredicate(column_id), _value(value) {}
+ CLASS<type>::CLASS(uint32_t column_id, const type& value, bool opposite) \
+ : ColumnPredicate(column_id, opposite), _value(value) {}
COMPARISON_PRED_CONSTRUCTOR(EqualPredicate)
COMPARISON_PRED_CONSTRUCTOR(NotEqualPredicate)
@@ -38,8 +38,8 @@ COMPARISON_PRED_CONSTRUCTOR(GreaterEqualPredicate)
#define COMPARISON_PRED_CONSTRUCTOR_STRING(CLASS) \
template <> \
- CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value) \
- : ColumnPredicate(column_id) { \
+ CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value,
bool opposite) \
+ : ColumnPredicate(column_id, opposite) {
\
_value.len = value.len; \
_value.ptr = value.ptr; \
}
@@ -119,7 +119,8 @@ COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
sel[new_size] = idx;
\
const type* cell_value =
\
reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
- new_size += (!block->cell(idx).is_null() && (*cell_value OP
_value)); \
+ auto result = (!block->cell(idx).is_null() && (*cell_value OP
_value)); \
+ new_size += _opposite ? !result : result;
\
}
\
} else {
\
for (uint16_t i = 0; i < *size; ++i) {
\
@@ -127,7 +128,8 @@ COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
sel[new_size] = idx;
\
const type* cell_value =
\
reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
- new_size += (*cell_value OP _value);
\
+ auto result = (*cell_value OP _value);
\
+ new_size += _opposite ? !result : result;
\
}
\
}
\
*size = new_size;
\
@@ -140,6 +142,68 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessEqualPredicate,
<=)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
+#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)
\
+ template <class type>
\
+ void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const { \
+ if (block->is_nullable()) {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (!block->cell(idx).is_null() && (*cell_value OP
_value)); \
+ flags[i] |= _opposite ? !result : result;
\
+ }
\
+ } else {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (*cell_value OP _value);
\
+ flags[i] |= _opposite ? !result : result;
\
+ }
\
+ }
\
+ }
+
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessPredicate, <)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=)
+
+#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)
\
+ template <class type>
\
+ void CLASS<type>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const { \
+ if (block->is_nullable()) {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (!flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (!block->cell(idx).is_null() && (*cell_value OP
_value)); \
+ flags[i] &= _opposite ? !result : result;
\
+ }
\
+ } else {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (!flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (*cell_value OP _value);
\
+ flags[i] &= _opposite ? !result : result;
\
+ }
\
+ }
\
+ }
+
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessPredicate, <)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=)
+
#define BITMAP_COMPARE_EqualPredicate(s, exact_match, seeked_ordinal,
iterator, bitmap, roaring) \
do {
\
if (!s.is_not_found()) {
\
@@ -254,19 +318,19 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=)
COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
-#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS)
\
- template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value);
\
- template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value);
\
- template CLASS<int32_t>::CLASS(uint32_t column_id, const int32_t& value);
\
- template CLASS<int64_t>::CLASS(uint32_t column_id, const int64_t& value);
\
- template CLASS<int128_t>::CLASS(uint32_t column_id, const int128_t&
value); \
- template CLASS<float>::CLASS(uint32_t column_id, const float& value);
\
- template CLASS<double>::CLASS(uint32_t column_id, const double& value);
\
- template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t&
value); \
- template CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue&
value); \
- template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t&
value); \
- template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t&
value); \
- template CLASS<bool>::CLASS(uint32_t column_id, const bool& value);
+#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS)
\
+ template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value,
bool opposite); \
+ template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value,
bool opposite); \
+ template CLASS<int32_t>::CLASS(uint32_t column_id, const int32_t& value,
bool opposite); \
+ template CLASS<int64_t>::CLASS(uint32_t column_id, const int64_t& value,
bool opposite); \
+ template CLASS<int128_t>::CLASS(uint32_t column_id, const int128_t& value,
bool opposite); \
+ template CLASS<float>::CLASS(uint32_t column_id, const float& value, bool
opposite); \
+ template CLASS<double>::CLASS(uint32_t column_id, const double& value,
bool opposite); \
+ template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t&
value, bool opposite); \
+ template CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue&
value, bool opposite); \
+ template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value,
bool opposite); \
+ template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value,
bool opposite); \
+ template CLASS<bool>::CLASS(uint32_t column_id, const bool& value, bool
opposite);
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(EqualPredicate)
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(NotEqualPredicate)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index d5574a5..542073e 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -30,10 +30,11 @@ class VectorizedRowBatch;
template <class type>
\
class CLASS : public ColumnPredicate {
\
public:
\
- CLASS(uint32_t column_id, const type& value);
\
- virtual ~CLASS() {}
\
+ CLASS(uint32_t column_id, const type& value, bool opposite = false);
\
virtual void evaluate(VectorizedRowBatch* batch) const override;
\
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const
override; \
+ void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const override;\
+ void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const override;\
virtual Status evaluate(const Schema& schema,
\
const std::vector<BitmapIndexIterator*>&
iterators, \
uint32_t num_rows, Roaring* roaring) const
override; \
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 94fca29..5778021 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -16,6 +16,7 @@
// under the License.
#include "olap/delete_handler.h"
+#include "olap/reader.h"
#include <errno.h>
#include <json2pb/pb_to_json.h>
@@ -95,7 +96,7 @@ std::string
DeleteConditionHandler::construct_sub_predicates(const TCondition& c
} else if (op == ">") {
op += ">";
}
- string condition_str;
+ string condition_str = "";
if ("IS" == op) {
condition_str = condition.column_name + " " + op + " " +
condition.condition_values[0];
} else {
@@ -164,6 +165,7 @@ OLAPStatus
DeleteConditionHandler::check_condition_valid(const TabletSchema& sch
// Delete condition should only applied on key columns or duplicate key
table, and
// the condition column type should not be float or double.
const TabletColumn& column = schema.column(field_index);
+
if ((!column.is_key() && schema.keys_type() != KeysType::DUP_KEYS) ||
column.type() == OLAP_FIELD_TYPE_DOUBLE || column.type() ==
OLAP_FIELD_TYPE_FLOAT) {
LOG(WARNING) << "field is not key column, or storage model is not
duplicate, or data type "
@@ -225,7 +227,7 @@ bool DeleteHandler::_parse_condition(const std::string&
condition_str, TConditio
}
OLAPStatus DeleteHandler::init(const TabletSchema& schema,
- const DelPredicateArray& delete_conditions,
int64_t version) {
+ const DelPredicateArray& delete_conditions, int64_t version, const
Reader* reader) {
DCHECK(!_is_inited) << "reinitialize delete handler.";
DCHECK(version >= 0) << "invalid parameters. version=" << version;
@@ -238,6 +240,7 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
DeleteConditions temp;
temp.filter_version = delete_condition.version();
temp.del_cond = new (std::nothrow) Conditions();
+
if (temp.del_cond == nullptr) {
LOG(FATAL) << "fail to malloc Conditions. size=" <<
sizeof(Conditions);
return OLAP_ERR_MALLOC_ERROR;
@@ -256,6 +259,13 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
OLAP_LOG_WARNING("fail to append condition.[res=%d]", res);
return res;
}
+
+ if (reader != nullptr) {
+ auto predicate = reader->_parse_to_predicate(condition, true);
+ if (predicate != nullptr) {
+ temp.column_predicate_vec.push_back(predicate);
+ }
+ }
}
for (const auto& in_predicate : delete_condition.in_predicates()) {
@@ -274,9 +284,14 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
OLAP_LOG_WARNING("fail to append condition.[res=%d]", res);
return res;
}
+
+ if (reader != nullptr) {
+
temp.column_predicate_vec.push_back(reader->_parse_to_predicate(condition,
true));
+ }
+
}
- _del_conds.push_back(temp);
+ _del_conds.emplace_back(std::move(temp));
}
_is_inited = true;
@@ -301,6 +316,7 @@ std::vector<int64_t> DeleteHandler::get_conds_version() {
for (const auto& cond : _del_conds) {
conds_version.push_back(cond.filter_version);
}
+
return conds_version;
}
@@ -312,16 +328,41 @@ void DeleteHandler::finalize() {
for (auto& cond : _del_conds) {
cond.del_cond->finalize();
delete cond.del_cond;
+
+ for (auto pred : cond.column_predicate_vec) {
+ delete pred;
+ }
}
+
_del_conds.clear();
_is_inited = false;
}
-void DeleteHandler::get_delete_conditions_after_version(
- int64_t version, std::vector<const Conditions*>* delete_conditions)
const {
+void DeleteHandler::get_delete_conditions_after_version(int64_t version,
+ std::vector<const
Conditions *>* delete_conditions,
+
AndBlockColumnPredicate* and_block_column_predicate_ptr) const {
for (auto& del_cond : _del_conds) {
if (del_cond.filter_version > version) {
delete_conditions->emplace_back(del_cond.del_cond);
+
+ // now, only query support delete column predicate operator
+ if (!del_cond.column_predicate_vec.empty()) {
+ if (del_cond.column_predicate_vec.size() == 1) {
+ auto single_column_block_predicate = new
SingleColumnBlockPredicate(
+ del_cond.column_predicate_vec[0]);
+
and_block_column_predicate_ptr->add_column_predicate(single_column_block_predicate);
+ } else {
+ auto or_column_predicate = new OrBlockColumnPredicate();
+
+ // build or_column_predicate
+ std::for_each(del_cond.column_predicate_vec.cbegin(),
del_cond.column_predicate_vec.cend(), \
+ [&or_column_predicate](const ColumnPredicate *predicate) {
+
or_column_predicate->add_column_predicate(new
SingleColumnBlockPredicate(predicate));
+ }
+ );
+
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
+ }
+ }
}
}
}
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index aea828d..c595d9b 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -23,6 +23,8 @@
#include "gen_cpp/AgentService_types.h"
#include "gen_cpp/olap_file.pb.h"
+#include "olap/block_column_predicate.h"
+#include "olap/column_predicate.h"
#include "olap/olap_define.h"
#include "olap/tablet_schema.h"
@@ -31,6 +33,7 @@ namespace doris {
typedef google::protobuf::RepeatedPtrField<DeletePredicatePB>
DelPredicateArray;
class Conditions;
class RowCursor;
+class Reader;
class DeleteConditionHandler {
public:
@@ -65,6 +68,7 @@ private:
struct DeleteConditions {
int64_t filter_version = 0; // The version of this condition
Conditions* del_cond = nullptr; // The delete condition
+ std::vector<const ColumnPredicate*> column_predicate_vec;
};
// This class is used for checking whether a row should be deleted.
@@ -101,7 +105,7 @@ public:
// * OLAP_ERR_DELETE_INVALID_PARAMETERS: input parameters are not valid
// * OLAP_ERR_MALLOC_ERROR: alloc memory failed
OLAPStatus init(const TabletSchema& schema, const DelPredicateArray&
delete_conditions,
- int64_t version);
+ int64_t version, const doris::Reader* = nullptr);
// Check whether a row should be deleted.
//
@@ -127,8 +131,9 @@ public:
// Return all the delete conditions.
const std::vector<DeleteConditions>& get_delete_conditions() const {
return _del_conds; }
- void get_delete_conditions_after_version(
- int64_t version, std::vector<const Conditions*>*
delete_conditions) const;
+ void get_delete_conditions_after_version(int64_t version,
+ std::vector<const Conditions *>*
delete_conditions,
+ AndBlockColumnPredicate*
and_block_column_predicate_ptr) const;
private:
// Use regular expression to extract 'column_name', 'op' and 'operands'
diff --git a/be/src/olap/in_list_predicate.cpp
b/be/src/olap/in_list_predicate.cpp
index 41b306e..f4d2d98 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -25,8 +25,8 @@ namespace doris {
#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \
template <class type> \
- CLASS<type>::CLASS(uint32_t column_id, std::set<type>&& values) \
- : ColumnPredicate(column_id), _values(std::move(values)) {}
+ CLASS<type>::CLASS(uint32_t column_id, std::set<type>&& values, bool
opposite) \
+ : ColumnPredicate(column_id, opposite), _values(std::move(values))
{}
IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate)
@@ -95,8 +95,9 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
sel[new_size] = idx;
\
const type* cell_value =
\
reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
- new_size += (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
+ auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
OP
_values.end()); \
+ new_size += _opposite ? !result : result;
\
}
\
} else {
\
for (uint16_t i = 0; i < *size; ++i) {
\
@@ -104,7 +105,8 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
sel[new_size] = idx;
\
const type* cell_value =
\
reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
- new_size += (_values.find(*cell_value) OP _values.end());
\
+ auto result = (_values.find(*cell_value) OP _values.end());
\
+ new_size += _opposite ? !result : result;
\
}
\
}
\
*size = new_size;
\
@@ -113,6 +115,62 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=)
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
+#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)
\
+ template <class type>
\
+ void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const { \
+ if (block->is_nullable()) {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
+ OP
_values.end()); \
+ flags[i] |= _opposite ? !result : result;
\
+ }
\
+ } else {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (_values.find(*cell_value) OP _values.end());
\
+ flags[i] |= _opposite ? !result : result;
\
+ }
\
+ }
\
+ }
+
+IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=)
+IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==)
+
+#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)
\
+ template <class type>
\
+ void CLASS<type>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const { \
+ if (block->is_nullable()) {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (!flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
+ OP
_values.end()); \
+ flags[i] &= _opposite ? !result : result;
\
+ }
\
+ } else {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
+ if (!flags[i]) continue;
\
+ uint16_t idx = sel[i];
\
+ const type* cell_value =
\
+ reinterpret_cast<const
type*>(block->cell(idx).cell_ptr()); \
+ auto result = (_values.find(*cell_value) OP _values.end());
\
+ flags[i] &= _opposite ? !result : result;
\
+ }
\
+ }
\
+ }
+
+IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=)
+IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
+
#define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP)
\
template <class type>
\
Status CLASS<type>::evaluate(const Schema& schema,
\
@@ -150,18 +208,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
-#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS)
\
- template CLASS<int8_t>::CLASS(uint32_t column_id, std::set<int8_t>&&
values); \
- template CLASS<int16_t>::CLASS(uint32_t column_id, std::set<int16_t>&&
values); \
- template CLASS<int32_t>::CLASS(uint32_t column_id, std::set<int32_t>&&
values); \
- template CLASS<int64_t>::CLASS(uint32_t column_id, std::set<int64_t>&&
values); \
- template CLASS<int128_t>::CLASS(uint32_t column_id, std::set<int128_t>&&
values); \
- template CLASS<float>::CLASS(uint32_t column_id, std::set<float>&&
values); \
- template CLASS<double>::CLASS(uint32_t column_id, std::set<double>&&
values); \
- template CLASS<decimal12_t>::CLASS(uint32_t column_id,
std::set<decimal12_t>&& values); \
- template CLASS<StringValue>::CLASS(uint32_t column_id,
std::set<StringValue>&& values); \
- template CLASS<uint24_t>::CLASS(uint32_t column_id, std::set<uint24_t>&&
values); \
- template CLASS<uint64_t>::CLASS(uint32_t column_id, std::set<uint64_t>&&
values);
+#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS)
\
+ template CLASS<int8_t>::CLASS(uint32_t column_id, std::set<int8_t>&&
values, bool opposite); \
+ template CLASS<int16_t>::CLASS(uint32_t column_id, std::set<int16_t>&&
values, bool opposite); \
+ template CLASS<int32_t>::CLASS(uint32_t column_id, std::set<int32_t>&&
values, bool opposite); \
+ template CLASS<int64_t>::CLASS(uint32_t column_id, std::set<int64_t>&&
values, bool opposite); \
+ template CLASS<int128_t>::CLASS(uint32_t column_id, std::set<int128_t>&&
values, bool opposite); \
+ template CLASS<float>::CLASS(uint32_t column_id, std::set<float>&& values,
bool opposite); \
+ template CLASS<double>::CLASS(uint32_t column_id, std::set<double>&&
values, bool opposite); \
+ template CLASS<decimal12_t>::CLASS(uint32_t column_id,
std::set<decimal12_t>&& values, bool opposite); \
+ template CLASS<StringValue>::CLASS(uint32_t column_id,
std::set<StringValue>&& values, bool opposite); \
+ template CLASS<uint24_t>::CLASS(uint32_t column_id, std::set<uint24_t>&&
values, bool opposite); \
+ template CLASS<uint64_t>::CLASS(uint32_t column_id, std::set<uint64_t>&&
values, bool opposite);
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate)
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index ecca65d..8f96a04 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -33,10 +33,11 @@ class VectorizedRowBatch;
template <class type>
\
class CLASS : public ColumnPredicate {
\
public:
\
- CLASS(uint32_t column_id, std::set<type>&& values);
\
- virtual ~CLASS() {}
\
+ CLASS(uint32_t column_id, std::set<type>&& values, bool is_opposite =
false); \
virtual void evaluate(VectorizedRowBatch* batch) const override;
\
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const
override; \
+ void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const override;\
+ void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const override;\
virtual Status evaluate(const Schema& schema,
\
const std::vector<BitmapIndexIterator*>&
iterators, \
uint32_t num_rows, Roaring* bitmap) const
override; \
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index d3a74db..78a5a04 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -21,6 +21,8 @@
#include "common/status.h"
#include "olap/olap_common.h"
+#include "olap/column_predicate.h"
+#include "olap/block_column_predicate.h"
namespace doris {
@@ -67,6 +69,8 @@ public:
// delete conditions used by column index to filter pages
std::vector<const Conditions*> delete_conditions;
+
+ std::shared_ptr<AndBlockColumnPredicate> delete_condition_predicates =
std::make_shared<AndBlockColumnPredicate>();
// reader's column predicate, nullptr if not existed
// used to fiter rows in row block
// TODO(hkp): refactor the column predicate framework
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index 4d25b51..443c854 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -23,10 +23,8 @@
namespace doris {
-NullPredicate::NullPredicate(uint32_t column_id, bool is_null)
- : ColumnPredicate(column_id), _is_null(is_null) {}
-
-NullPredicate::~NullPredicate() {}
+NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite)
+ : ColumnPredicate(column_id), _is_null(opposite != is_null) {}
void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
uint16_t n = batch->size();
@@ -79,6 +77,30 @@ void NullPredicate::evaluate(ColumnBlock* block, uint16_t*
sel, uint16_t* size)
*size = new_size;
}
+void NullPredicate::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const {
+ if (!block->is_nullable() && _is_null) {
+ memset(flags, true, size);
+ } else {
+ for (uint16_t i = 0; i < size; ++i) {
+ if (flags[i]) continue;
+ uint16_t idx = sel[i];
+ flags[i] |= (block->cell(idx).is_null() == _is_null);
+ }
+ }
+}
+
+void NullPredicate::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) const {
+ if (!block->is_nullable() && _is_null) {
+ return;
+ } else {
+ for (uint16_t i = 0; i < size; ++i) {
+ if (!flags[i]) continue;
+ uint16_t idx = sel[i];
+ flags[i] &= (block->cell(idx).is_null() == _is_null);
+ }
+ }
+}
+
Status NullPredicate::evaluate(const Schema& schema,
const std::vector<BitmapIndexIterator*>&
iterators,
uint32_t num_rows, Roaring* roaring) const {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 3636a9c..9aca70c 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -30,13 +30,16 @@ class VectorizedRowBatch;
class NullPredicate : public ColumnPredicate {
public:
- NullPredicate(uint32_t column_id, bool is_null);
- virtual ~NullPredicate();
+ NullPredicate(uint32_t column_id, bool is_null,bool opposite = false);
virtual void evaluate(VectorizedRowBatch* batch) const override;
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const
override;
+ void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool*
flags) const override;
+
+ void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool*
flags) const override;
+
virtual Status evaluate(const Schema& schema, const
vector<BitmapIndexIterator*>& iterators,
uint32_t num_rows, Roaring* roaring) const
override;
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 1fd83d6..48cba42 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -253,6 +253,7 @@ struct OlapReaderStatistics {
int64_t raw_rows_read = 0;
int64_t rows_vec_cond_filtered = 0;
+ int64_t rows_vec_del_cond_filtered = 0;
int64_t vec_cond_ns = 0;
int64_t rows_key_range_filtered = 0;
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index bfceff9..c56160b 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -17,6 +17,7 @@
#include "olap/reader.h"
+#include <boost/algorithm/string/case_conv.hpp>
#include <sstream>
#include "olap/collect_iterator.h"
@@ -27,6 +28,7 @@
#include "olap/row_block.h"
#include "olap/row_cursor.h"
#include "olap/rowset/column_data.h"
+#include "olap/rowset/beta_rowset_reader.h"
#include "olap/storage_engine.h"
#include "olap/tablet.h"
#include "runtime/mem_pool.h"
@@ -601,48 +603,48 @@ void Reader::_init_conditions_param(const ReaderParams&
read_params) {
#define COMPARISON_PREDICATE_CONDITION_VALUE(NAME, PREDICATE)
\
ColumnPredicate* Reader::_new_##NAME##_pred(const TabletColumn& column,
int index, \
- const std::string& cond) {
\
+ const std::string& cond, bool
opposite) const { \
ColumnPredicate* predicate = nullptr;
\
switch (column.type()) {
\
case OLAP_FIELD_TYPE_TINYINT: {
\
std::stringstream ss(cond);
\
int32_t value = 0;
\
ss >> value;
\
- predicate = new PREDICATE<int8_t>(index, value);
\
+ predicate = new PREDICATE<int8_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_SMALLINT: {
\
std::stringstream ss(cond);
\
int16_t value = 0;
\
ss >> value;
\
- predicate = new PREDICATE<int16_t>(index, value);
\
+ predicate = new PREDICATE<int16_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_INT: {
\
std::stringstream ss(cond);
\
int32_t value = 0;
\
ss >> value;
\
- predicate = new PREDICATE<int32_t>(index, value);
\
+ predicate = new PREDICATE<int32_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_BIGINT: {
\
std::stringstream ss(cond);
\
int64_t value = 0;
\
ss >> value;
\
- predicate = new PREDICATE<int64_t>(index, value);
\
+ predicate = new PREDICATE<int64_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_LARGEINT: {
\
std::stringstream ss(cond);
\
int128_t value = 0;
\
ss >> value;
\
- predicate = new PREDICATE<int128_t>(index, value);
\
+ predicate = new PREDICATE<int128_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_DECIMAL: {
\
decimal12_t value(0, 0);
\
value.from_string(cond);
\
- predicate = new PREDICATE<decimal12_t>(index, value);
\
+ predicate = new PREDICATE<decimal12_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_CHAR: {
\
@@ -653,7 +655,7 @@ void Reader::_init_conditions_param(const ReaderParams&
read_params) {
memory_copy(buffer, cond.c_str(), cond.length());
\
value.len = length;
\
value.ptr = buffer;
\
- predicate = new PREDICATE<StringValue>(index, value);
\
+ predicate = new PREDICATE<StringValue>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_VARCHAR: {
\
@@ -663,24 +665,24 @@ void Reader::_init_conditions_param(const ReaderParams&
read_params) {
memory_copy(buffer, cond.c_str(), length);
\
value.len = length;
\
value.ptr = buffer;
\
- predicate = new PREDICATE<StringValue>(index, value);
\
+ predicate = new PREDICATE<StringValue>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_DATE: {
\
uint24_t value = timestamp_from_date(cond);
\
- predicate = new PREDICATE<uint24_t>(index, value);
\
+ predicate = new PREDICATE<uint24_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_DATETIME: {
\
uint64_t value = timestamp_from_datetime(cond);
\
- predicate = new PREDICATE<uint64_t>(index, value);
\
+ predicate = new PREDICATE<uint64_t>(index, value, opposite);
\
break;
\
}
\
case OLAP_FIELD_TYPE_BOOL: {
\
std::stringstream ss(cond);
\
bool value = false;
\
ss >> value;
\
- predicate = new PREDICATE<bool>(index, value);
\
+ predicate = new PREDICATE<bool>(index, value, opposite);
\
break;
\
}
\
default:
\
@@ -697,7 +699,8 @@ COMPARISON_PREDICATE_CONDITION_VALUE(le, LessEqualPredicate)
COMPARISON_PREDICATE_CONDITION_VALUE(gt, GreaterPredicate)
COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate)
-ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) {
+ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool
opposite) const {
+ // TODO: not equal and not in predicate is not pushed down
int32_t index = _tablet->field_index(condition.column_name);
if (index < 0) {
return nullptr;
@@ -705,18 +708,18 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
const TabletColumn& column = _tablet->tablet_schema().column(index);
ColumnPredicate* predicate = nullptr;
- if ((condition.condition_op == "*=" || condition.condition_op == "!*=" ||
condition.condition_op == "=" || condition.condition_op == "!=")
- && condition.condition_values.size() == 1) {
- predicate = condition.condition_op == "*=" || condition.condition_op
== "=" ? _new_eq_pred(column, index, condition.condition_values[0]) :
- _new_ne_pred(column, index, condition.condition_values[0]);
+
+ if ((condition.condition_op == "*=" || condition.condition_op == "!*=" ||
condition.condition_op == "=" || condition.condition_op == "!=") &&
condition.condition_values.size() == 1) {
+ predicate = condition.condition_op == "*=" || condition.condition_op
== "=" ? _new_eq_pred(column, index, condition.condition_values[0], opposite) :
+ _new_ne_pred(column, index, condition.condition_values[0],
opposite);
} else if (condition.condition_op == "<<") {
- predicate = _new_lt_pred(column, index, condition.condition_values[0]);
+ predicate = _new_lt_pred(column, index, condition.condition_values[0],
opposite);
} else if (condition.condition_op == "<=") {
- predicate = _new_le_pred(column, index, condition.condition_values[0]);
+ predicate = _new_le_pred(column, index, condition.condition_values[0],
opposite);
} else if (condition.condition_op == ">>") {
- predicate = _new_gt_pred(column, index, condition.condition_values[0]);
+ predicate = _new_gt_pred(column, index, condition.condition_values[0],
opposite);
} else if (condition.condition_op == ">=") {
- predicate = _new_ge_pred(column, index, condition.condition_values[0]);
+ predicate = _new_ge_pred(column, index, condition.condition_values[0],
opposite);
} else if ((condition.condition_op == "*=" || condition.condition_op ==
"!*=") && condition.condition_values.size() > 1) {
switch (column.type()) {
case OLAP_FIELD_TYPE_TINYINT: {
@@ -728,9 +731,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<int8_t>(index,
std::move(values));
+ predicate = new InListPredicate<int8_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<int8_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<int8_t>(index,
std::move(values),opposite);
}
break;
}
@@ -743,9 +746,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<int16_t>(index,
std::move(values));
+ predicate = new InListPredicate<int16_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<int16_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<int16_t>(index,
std::move(values), opposite);
}
break;
}
@@ -758,9 +761,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<int32_t>(index,
std::move(values));
+ predicate = new InListPredicate<int32_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<int32_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<int32_t>(index,
std::move(values), opposite);
}
break;
}
@@ -773,9 +776,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<int64_t>(index,
std::move(values));
+ predicate = new InListPredicate<int64_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<int64_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<int64_t>(index,
std::move(values), opposite);
}
break;
}
@@ -788,9 +791,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<int128_t>(index,
std::move(values));
+ predicate = new InListPredicate<int128_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<int128_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<int128_t>(index,
std::move(values), opposite);
}
break;
}
@@ -802,9 +805,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<decimal12_t>(index,
std::move(values));
+ predicate = new InListPredicate<decimal12_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<decimal12_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<decimal12_t>(index,
std::move(values), opposite);
}
break;
}
@@ -821,9 +824,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ predicate = new InListPredicate<StringValue>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<StringValue>(index,
std::move(values));
+ predicate = new NotInListPredicate<StringValue>(index,
std::move(values), opposite);
}
break;
}
@@ -839,9 +842,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ predicate = new InListPredicate<StringValue>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<StringValue>(index,
std::move(values));
+ predicate = new NotInListPredicate<StringValue>(index,
std::move(values), opposite);
}
break;
}
@@ -852,9 +855,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<uint24_t>(index,
std::move(values));
+ predicate = new InListPredicate<uint24_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<uint24_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<uint24_t>(index,
std::move(values), opposite);
}
break;
}
@@ -865,9 +868,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
values.insert(value);
}
if (condition.condition_op == "*=") {
- predicate = new InListPredicate<uint64_t>(index,
std::move(values));
+ predicate = new InListPredicate<uint64_t>(index,
std::move(values), opposite);
} else {
- predicate = new NotInListPredicate<uint64_t>(index,
std::move(values));
+ predicate = new NotInListPredicate<uint64_t>(index,
std::move(values), opposite);
}
break;
}
@@ -875,8 +878,8 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
default:
break;
}
- } else if (condition.condition_op == "is") {
- predicate = new NullPredicate(index, condition.condition_values[0] ==
"null");
+ } else if (boost::to_lower_copy(condition.condition_op) == "is") {
+ predicate = new NullPredicate(index,
boost::to_lower_copy(condition.condition_values[0]) == "null", opposite);
}
return predicate;
}
@@ -940,7 +943,7 @@ OLAPStatus Reader::_init_delete_condition(const
ReaderParams& read_params) {
_tablet->obtain_header_rdlock();
OLAPStatus ret = _delete_handler.init(
- _tablet->tablet_schema(), _tablet->delete_predicates(),
read_params.version.second);
+ _tablet->tablet_schema(), _tablet->delete_predicates(),
read_params.version.second, this);
_tablet->release_header_lock();
if (read_params.reader_type == READER_BASE_COMPACTION) {
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index b3fff22..785a53a 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -120,6 +120,7 @@ private:
};
friend class CollectIterator;
+ friend class DeleteHandler;
OLAPStatus _init_params(const ReaderParams& read_params);
@@ -129,14 +130,14 @@ private:
void _init_conditions_param(const ReaderParams& read_params);
- ColumnPredicate* _new_eq_pred(const TabletColumn& column, int index, const
std::string& cond);
- ColumnPredicate* _new_ne_pred(const TabletColumn& column, int index, const
std::string& cond);
- ColumnPredicate* _new_lt_pred(const TabletColumn& column, int index, const
std::string& cond);
- ColumnPredicate* _new_le_pred(const TabletColumn& column, int index, const
std::string& cond);
- ColumnPredicate* _new_gt_pred(const TabletColumn& column, int index, const
std::string& cond);
- ColumnPredicate* _new_ge_pred(const TabletColumn& column, int index, const
std::string& cond);
+ ColumnPredicate* _new_eq_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
+ ColumnPredicate* _new_ne_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
+ ColumnPredicate* _new_lt_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
+ ColumnPredicate* _new_le_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
+ ColumnPredicate* _new_gt_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
+ ColumnPredicate* _new_ge_pred(const TabletColumn& column, int index, const
std::string& cond, bool opposite) const;
- ColumnPredicate* _parse_to_predicate(const TCondition& condition);
+ ColumnPredicate* _parse_to_predicate(const TCondition& condition, bool
opposite = false) const;
OLAPStatus _init_delete_condition(const ReaderParams& read_params);
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 52062d5..852e675 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -58,8 +58,8 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext*
read_context) {
}
}
if (read_context->delete_handler != nullptr) {
- read_context->delete_handler->get_delete_conditions_after_version(
- _rowset->end_version(), &read_options.delete_conditions);
+
read_context->delete_handler->get_delete_conditions_after_version(_rowset->end_version(),
+ &read_options.delete_conditions,
read_options.delete_condition_predicates.get());
}
if (read_context->predicates != nullptr) {
read_options.column_predicates.insert(read_options.column_predicates.end(),
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 1519f62..9792221 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -433,6 +433,8 @@ void SegmentIterator::_init_lazy_materialization() {
for (auto predicate : _col_predicates) {
predicate_columns.insert(predicate->column_id());
}
+
_opts.delete_condition_predicates.get()->get_all_column_ids(predicate_columns);
+
// when all return columns have predicates, disable lazy
materialization to avoid its overhead
if (_schema.column_ids().size() > predicate_columns.size()) {
_lazy_materialization_read = true;
@@ -521,18 +523,27 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
// phase 2: run vectorization evaluation on remaining predicates to prune
rows.
// block's selection vector will be set to indicate which rows have passed
predicates.
// TODO(hkp): optimize column predicate to check column block once for one
column
- if (!_col_predicates.empty()) {
+ if (!_col_predicates.empty() || _opts.delete_condition_predicates.get() !=
nullptr) {
// init selection position index
uint16_t selected_size = block->selected_size();
uint16_t original_size = selected_size;
+
SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
for (auto column_predicate : _col_predicates) {
- auto column_block =
block->column_block(column_predicate->column_id());
+ auto column_id = column_predicate->column_id();
+ auto column_block = block->column_block(column_id);
column_predicate->evaluate(&column_block,
block->selection_vector(), &selected_size);
}
+ _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
+
+ // set original_size again to check delete condition predicates
+ // filter how many data
+ original_size = selected_size;
+ _opts.delete_condition_predicates->evaluate(block, &selected_size);
+ _opts.stats->rows_vec_del_cond_filtered += original_size -
selected_size;
+
block->set_selected_size(selected_size);
block->set_num_rows(selected_size);
- _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
}
// phase 3: read non-predicate columns of rows that have passed predicates
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index bdf3898..295b157 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -119,6 +119,8 @@ private:
// make a copy of `_opts.column_predicates` in order to make local changes
std::vector<ColumnPredicate*> _col_predicates;
+ int16_t** _select_vec;
+
// row schema of the key to seek
// only used in `_get_row_ranges_by_keys`
std::unique_ptr<Schema> _seek_schema;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index ad8b8d3..7567ae6 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -449,18 +449,13 @@ OLAPStatus RowBlockChanger::change_row_block(const
RowBlock* ref_block, int32_t
if (is_data_left_vec[row_index] == 1) {
if (_delete_handler != nullptr &&
_delete_handler->is_filter_data(data_version, read_helper)) {
is_data_left_vec[row_index] = 0;
+ (*filtered_rows)++;
}
}
}
// a.2 计算留下的row num
- uint32_t new_row_num = 0;
- for (uint32_t i = 0; i < row_num; ++i) {
- if (is_data_left_vec[i] != 0) {
- ++new_row_num;
- }
- }
- *filtered_rows = row_num - new_row_num;
+ uint32_t new_row_num = row_num - *filtered_rows;
const bool need_filter_data = (new_row_num != row_num);
const bool filter_all = (new_row_num == 0);
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index 610afd0..0959470 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -57,7 +57,7 @@ public:
ColumnMapping* get_mutable_column_mapping(size_t column_index);
- SchemaMapping get_schema_mapping() const { return _schema_mapping; }
+ const SchemaMapping& get_schema_mapping() const { return _schema_mapping; }
OLAPStatus change_row_block(const RowBlock* ref_block, int32_t
data_version,
RowBlock* mutable_block, uint64_t*
filtered_rows) const;
diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt
index c01e925..34152e9 100644
--- a/be/test/olap/CMakeLists.txt
+++ b/be/test/olap/CMakeLists.txt
@@ -85,6 +85,7 @@ ADD_BE_TEST(page_cache_test)
ADD_BE_TEST(hll_test)
# ADD_BE_TEST(memtable_flush_executor_test)
ADD_BE_TEST(selection_vector_test)
+ADD_BE_TEST(block_column_predicate_test)
ADD_BE_TEST(options_test)
ADD_BE_TEST(fs/file_block_manager_test)
ADD_BE_TEST(memory/hash_index_test)
diff --git a/be/test/olap/block_column_predicate_test.cpp
b/be/test/olap/block_column_predicate_test.cpp
new file mode 100644
index 0000000..87aa34f
--- /dev/null
+++ b/be/test/olap/block_column_predicate_test.cpp
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/block_column_predicate.h"
+
+#include <google/protobuf/stubs/common.h>
+#include <gtest/gtest.h>
+
+#include "olap/comparison_predicate.h"
+#include "olap/column_predicate.h"
+#include "olap/field.h"
+#include "olap/row_block2.h"
+#include "olap/wrapper_field.h"
+#include "runtime/mem_pool.h"
+#include "runtime/string_value.hpp"
+#include "runtime/vectorized_row_batch.h"
+#include "util/logging.h"
+
+namespace doris {
+
+class BlockColumnPredicateTest : public testing::Test {
+public:
+ BlockColumnPredicateTest() {
+ _mem_tracker.reset(new MemTracker(-1));
+ _mem_pool.reset(new MemPool(_mem_tracker.get()));
+ }
+
+ ~BlockColumnPredicateTest() = default;
+
+ void SetTabletSchema(std::string name, const std::string &type,
+ const std::string &aggregation, uint32_t length, bool
is_allow_null,
+ bool is_key, TabletSchema *tablet_schema) {
+ TabletSchemaPB tablet_schema_pb;
+ static int id = 0;
+ ColumnPB *column = tablet_schema_pb.add_column();
+ column->set_unique_id(++id);
+ column->set_name(name);
+ column->set_type(type);
+ column->set_is_key(is_key);
+ column->set_is_nullable(is_allow_null);
+ column->set_length(length);
+ column->set_aggregation(aggregation);
+ column->set_precision(1000);
+ column->set_frac(1000);
+ column->set_is_bf_column(false);
+ tablet_schema->init_from_pb(tablet_schema_pb);
+ }
+
+ void init_row_block(const TabletSchema *tablet_schema, int size) {
+ Schema schema(*tablet_schema);
+ _row_block.reset(new RowBlockV2(schema, size));
+ }
+
+ std::shared_ptr<MemTracker> _mem_tracker;
+ std::unique_ptr<MemPool> _mem_pool;
+ std::unique_ptr<RowBlockV2> _row_block;
+};
+
+TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN) {
+ TabletSchema tablet_schema;
+ SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true,
true, &tablet_schema);
+ int size = 10;
+ std::vector<uint32_t> return_columns;
+ for (int i = 0; i < tablet_schema.num_columns(); ++i) {
+ return_columns.push_back(i);
+ }
+ float value = 5.0;
+
+ std::unique_ptr<ColumnPredicate> pred(new EqualPredicate<float>(0, value));
+ SingleColumnBlockPredicate single_column_block_pred(pred.get());
+
+ init_row_block(&tablet_schema, size);
+ ColumnBlock col_block = _row_block->column_block(0);
+ auto select_size = _row_block->selected_size();
+ ColumnBlockView col_block_view(&col_block);
+ for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
+ col_block_view.set_null_bits(1, false);
+ *reinterpret_cast<float *>(col_block_view.data()) = i;
+ }
+ single_column_block_pred.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 1);
+ ASSERT_FLOAT_EQ(*(float *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
+}
+
+
+TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) {
+ TabletSchema tablet_schema;
+ SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1,
true, true,
+ &tablet_schema);
+ int size = 10;
+ std::vector<uint32_t> return_columns;
+ for (int i = 0; i < tablet_schema.num_columns(); ++i) {
+ return_columns.push_back(i);
+ }
+ double less_value = 5.0;
+ double great_value = 3.0;
+ std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<double>(0,
less_value));
+ std::unique_ptr<ColumnPredicate> great_pred(new
GreaterPredicate<double>(0, great_value));
+ auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
+ auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
+
+ AndBlockColumnPredicate and_block_column_pred;
+ and_block_column_pred.add_column_predicate(single_less_pred);
+ and_block_column_pred.add_column_predicate(single_great_pred);
+
+ init_row_block(&tablet_schema, size);
+ ColumnBlock col_block = _row_block->column_block(0);
+ auto select_size = _row_block->selected_size();
+ ColumnBlockView col_block_view(&col_block);
+ for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
+ col_block_view.set_null_bits(1, false);
+ *reinterpret_cast<double *>(col_block_view.data()) = i;
+ }
+ and_block_column_pred.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 1);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
+}
+
+TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) {
+ TabletSchema tablet_schema;
+ SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1,
true, true,
+ &tablet_schema);
+ int size = 10;
+ std::vector<uint32_t> return_columns;
+ for (int i = 0; i < tablet_schema.num_columns(); ++i) {
+ return_columns.push_back(i);
+ }
+ double less_value = 5.0;
+ double great_value = 3.0;
+ std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<double>(0,
less_value));
+ std::unique_ptr<ColumnPredicate> great_pred(new
GreaterPredicate<double>(0, great_value));
+ auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
+ auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
+
+
+ OrBlockColumnPredicate or_block_column_pred;
+ or_block_column_pred.add_column_predicate(single_less_pred);
+ or_block_column_pred.add_column_predicate(single_great_pred);
+
+ init_row_block(&tablet_schema, size);
+ ColumnBlock col_block = _row_block->column_block(0);
+ auto select_size = _row_block->selected_size();
+ ColumnBlockView col_block_view(&col_block);
+ for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
+ col_block_view.set_null_bits(1, false);
+ *reinterpret_cast<double *>(col_block_view.data()) = i;
+ }
+ or_block_column_pred.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 10);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
+}
+
+TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
+ TabletSchema tablet_schema;
+ SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1,
true, true,
+ &tablet_schema);
+ int size = 10;
+ std::vector<uint32_t> return_columns;
+ for (int i = 0; i < tablet_schema.num_columns(); ++i) {
+ return_columns.push_back(i);
+ }
+ double less_value = 5.0;
+ double great_value = 3.0;
+ std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<double>(0,
less_value));
+ std::unique_ptr<ColumnPredicate> great_pred(new
GreaterPredicate<double>(0, great_value));
+ std::unique_ptr<ColumnPredicate> less_pred1(new LessPredicate<double>(0,
great_value));
+
+ init_row_block(&tablet_schema, size);
+ ColumnBlock col_block = _row_block->column_block(0);
+ auto select_size = _row_block->selected_size();
+ ColumnBlockView col_block_view(&col_block);
+ for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
+ col_block_view.set_null_bits(1, false);
+ *reinterpret_cast<double *>(col_block_view.data()) = i;
+ }
+
+ // Test for and or single
+ auto and_block_column_pred = new AndBlockColumnPredicate();
+ and_block_column_pred->add_column_predicate(new
SingleColumnBlockPredicate(less_pred.get()));
+ and_block_column_pred->add_column_predicate(new
SingleColumnBlockPredicate(great_pred.get()));
+
+ OrBlockColumnPredicate or_block_column_pred;
+ or_block_column_pred.add_column_predicate(and_block_column_pred);
+ or_block_column_pred.add_column_predicate(new
SingleColumnBlockPredicate(less_pred1.get()));
+
+ or_block_column_pred.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 4);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 1.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 2.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0);
+
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ // Test for single or and
+ auto and_block_column_pred1 = new AndBlockColumnPredicate();
+ and_block_column_pred1->add_column_predicate(new
SingleColumnBlockPredicate(less_pred.get()));
+ and_block_column_pred1->add_column_predicate(new
SingleColumnBlockPredicate(great_pred.get()));
+
+ OrBlockColumnPredicate or_block_column_pred1;
+ or_block_column_pred1.add_column_predicate(new
SingleColumnBlockPredicate(less_pred1.get()));
+ or_block_column_pred1.add_column_predicate(and_block_column_pred1);
+
+ or_block_column_pred1.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 4);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 1.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 2.0);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0);
+}
+
+TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
+ TabletSchema tablet_schema;
+ SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1,
true, true,
+ &tablet_schema);
+ int size = 10;
+ std::vector<uint32_t> return_columns;
+ for (int i = 0; i < tablet_schema.num_columns(); ++i) {
+ return_columns.push_back(i);
+ }
+ double less_value = 5.0;
+ double great_value = 3.0;
+ std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<double>(0,
less_value));
+ std::unique_ptr<ColumnPredicate> great_pred(new
GreaterPredicate<double>(0, great_value));
+ std::unique_ptr<ColumnPredicate> less_pred1(new LessPredicate<double>(0,
great_value));
+
+ init_row_block(&tablet_schema, size);
+ ColumnBlock col_block = _row_block->column_block(0);
+ auto select_size = _row_block->selected_size();
+ ColumnBlockView col_block_view(&col_block);
+ for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
+ col_block_view.set_null_bits(1, false);
+ *reinterpret_cast<double *>(col_block_view.data()) = i;
+ }
+
+ // Test for and or single
+ auto or_block_column_pred = new OrBlockColumnPredicate();
+ or_block_column_pred->add_column_predicate(new
SingleColumnBlockPredicate(less_pred.get()));
+ or_block_column_pred->add_column_predicate(new
SingleColumnBlockPredicate(less_pred1.get()));
+
+ AndBlockColumnPredicate and_block_column_pred;
+ and_block_column_pred.add_column_predicate(or_block_column_pred);
+ and_block_column_pred.add_column_predicate(new
SingleColumnBlockPredicate(great_pred.get()));
+
+ and_block_column_pred.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 1);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
+
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ // Test for single or and
+ auto or_block_column_pred1 = new OrBlockColumnPredicate();
+ or_block_column_pred1->add_column_predicate(new
SingleColumnBlockPredicate(less_pred.get()));
+ or_block_column_pred1->add_column_predicate(new
SingleColumnBlockPredicate(less_pred1.get()));
+
+ AndBlockColumnPredicate and_block_column_pred1;
+ and_block_column_pred1.add_column_predicate(new
SingleColumnBlockPredicate(great_pred.get()));
+ and_block_column_pred1.add_column_predicate(or_block_column_pred1);
+
+ and_block_column_pred1.evaluate(_row_block.get(), &select_size);
+ ASSERT_EQ(select_size, 1);
+ ASSERT_DOUBLE_EQ(*(double *)
col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
+}
+
+}
+
+int main(int argc, char** argv) {
+ int ret = doris::OLAP_SUCCESS;
+ testing::InitGoogleTest(&argc, argv);
+ doris::CpuInfo::init();
+ ret = RUN_ALL_TESTS();
+ google::protobuf::ShutdownProtobufLibrary();
+ return ret;
+}
diff --git a/be/test/olap/delete_handler_test.cpp
b/be/test/olap/delete_handler_test.cpp
index c0e9cbf..b6bb0cf 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -1117,7 +1117,7 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
_delete_handler.finalize();
}
-} // namespace doris
+} // namespace doris
int main(int argc, char** argv) {
doris::init_glog("be-test");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]