This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b9dabc3  [Enhance] Push down predicate on value column of unique table 
 to base rowset (#5022)
b9dabc3 is described below

commit b9dabc3b5b61845f25f2040f529232767af6bb6b
Author: Zhengguo Yang <[email protected]>
AuthorDate: Sun Dec 6 08:50:37 2020 +0800

    [Enhance] Push down predicate on value column of unique table  to base 
rowset (#5022)
---
 be/src/exec/olap_scanner.cpp                       | 12 ++++++++++--
 be/src/olap/iterators.h                            |  2 +-
 be/src/olap/reader.cpp                             | 16 ++++++++++++----
 be/src/olap/reader.h                               |  1 +
 be/src/olap/rowset/alpha_rowset.cpp                | 14 ++++++++------
 be/src/olap/rowset/alpha_rowset_reader.cpp         | 16 ++++++++++++++--
 be/src/olap/rowset/beta_rowset_reader.cpp          | 14 +++++++++++++-
 be/src/olap/rowset/rowset.h                        |  3 ++-
 be/src/olap/rowset/rowset_reader_context.h         |  2 ++
 be/src/olap/rowset/segment_group.cpp               |  6 +++---
 be/src/olap/rowset/segment_v2/column_writer.h      |  9 +++++++++
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  4 ++--
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  5 ++++-
 be/test/olap/rowset/segment_v2/segment_test.cpp    | 16 ++++++++--------
 14 files changed, 89 insertions(+), 31 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 4cb7ca0..0007f85 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -164,8 +164,16 @@ Status OlapScanner::_init_params(const 
std::vector<OlapScanRange*>& key_ranges,
     // TODO(zc)
     _params.profile = _profile;
     _params.runtime_state = _runtime_state;
-
-    if (_aggregation) {
+    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
+    bool single_version =
+            (_params.rs_readers.size() == 1 &&
+             _params.rs_readers[0]->rowset()->start_version() == 0 &&
+             
!_params.rs_readers[0]->rowset()->rowset_meta()->is_segments_overlapping()) ||
+            (_params.rs_readers.size() == 2 &&
+             _params.rs_readers[1]->rowset()->rowset_meta()->num_rows() == 0 &&
+             _params.rs_readers[1]->rowset()->start_version() == 2 &&
+             
!_params.rs_readers[1]->rowset()->rowset_meta()->is_segments_overlapping());
+    if (_aggregation || single_version) {
         _params.return_columns = _return_columns;
     } else {
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 670819a..d3a74db 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -71,7 +71,7 @@ public:
     // used to fiter rows in row block
     // TODO(hkp): refactor the column predicate framework
     // to unify Conditions and ColumnPredicate
-    const std::vector<ColumnPredicate*>* column_predicates = nullptr;
+    std::vector<ColumnPredicate*> column_predicates;
 
     // REQUIRED (null is not allowed)
     OlapReaderStatistics* stats = nullptr;
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index 75f9789..aaa44cd 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -284,6 +284,9 @@ void Reader::close() {
     for (auto pred : _col_predicates) {
         delete pred;
     }
+    for (auto pred : _value_col_predicates) {
+        delete pred;
+    }
 }
 
 OLAPStatus Reader::_capture_rs_readers(const ReaderParams& read_params) {
@@ -365,6 +368,7 @@ OLAPStatus Reader::_capture_rs_readers(const ReaderParams& 
read_params) {
     _reader_context.load_bf_columns = &_load_bf_columns;
     _reader_context.conditions = &_conditions;
     _reader_context.predicates = &_col_predicates;
+    _reader_context.value_predicates = &_value_col_predicates;
     _reader_context.lower_bound_keys = &_keys_param.start_keys;
     _reader_context.is_lower_keys_included = &_is_lower_keys_included;
     _reader_context.upper_bound_keys = &_keys_param.end_keys;
@@ -575,7 +579,13 @@ void Reader::_init_conditions_param(const ReaderParams& 
read_params) {
         DCHECK_EQ(OLAP_SUCCESS, _conditions.append_condition(condition));
         ColumnPredicate* predicate = _parse_to_predicate(condition);
         if (predicate != nullptr) {
-            _col_predicates.push_back(predicate);
+            if (_tablet->tablet_schema()
+                        .column(_tablet->field_index(condition.column_name))
+                        .aggregation() != 
FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {
+                _value_col_predicates.push_back(predicate);
+            } else {
+                _col_predicates.push_back(predicate);
+            }
         }
     }
 }
@@ -685,9 +695,6 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
         return nullptr;
     }
     const TabletColumn& column = _tablet->tablet_schema().column(index);
-    if (column.aggregation() != 
FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {
-        return nullptr;
-    }
     ColumnPredicate* predicate = nullptr;
     if (condition.condition_op == "*=" && condition.condition_values.size() == 
1) {
         predicate = _new_eq_pred(column, index, condition.condition_values[0]);
@@ -820,6 +827,7 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
     } else if (condition.condition_op == "is") {
         predicate = new NullPredicate(index, condition.condition_values[0] == 
"null");
     }
+
     return predicate;
 }
 
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 9b2c911..0bb2430 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -171,6 +171,7 @@ private:
     std::vector<bool> _is_upper_keys_included;
     Conditions _conditions;
     std::vector<ColumnPredicate*> _col_predicates;
+    std::vector<ColumnPredicate*> _value_col_predicates;
     DeleteHandler _delete_handler;
 
     OLAPStatus (Reader::*_next_row_func)(RowCursor* row_cursor, MemPool* 
mem_pool,
diff --git a/be/src/olap/rowset/alpha_rowset.cpp 
b/be/src/olap/rowset/alpha_rowset.cpp
index 157c124..f1e7af3 100644
--- a/be/src/olap/rowset/alpha_rowset.cpp
+++ b/be/src/olap/rowset/alpha_rowset.cpp
@@ -310,22 +310,24 @@ OLAPStatus AlphaRowset::init() {
         if (segment_group_meta.zone_maps_size() != 0) {
             size_t zone_maps_size = segment_group_meta.zone_maps_size();
             // after 0.12.10 the value column in duplicate table also has zone 
map.
-            size_t expect_zone_maps_num = _schema->keys_type() == 
KeysType::DUP_KEYS
+            // after 0.14 the value column in duplicate table also has zone 
map.
+            size_t expect_zone_maps_num = _schema->keys_type() != 
KeysType::AGG_KEYS
                                                   ? _schema->num_columns()
                                                   : _schema->num_key_columns();
-            if ((_schema->keys_type() != KeysType::DUP_KEYS &&
+            if ((_schema->keys_type() == KeysType::AGG_KEYS &&
                  expect_zone_maps_num != zone_maps_size) ||
-                (_schema->keys_type() == KeysType::DUP_KEYS &&
+                (_schema->keys_type() != KeysType::AGG_KEYS &&
                  expect_zone_maps_num < zone_maps_size)) {
-                LOG(ERROR) << "column pruning size is error."
+                LOG(ERROR) << "column pruning size is error. "
                            << "KeysType=" << 
KeysType_Name(_schema->keys_type()) << ", "
                            << "zone_maps_size=" << zone_maps_size << ", "
                            << "num_key_columns=" << _schema->num_key_columns() 
<< ", "
                            << "num_columns=" << _schema->num_columns();
                 return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR;
             }
-            // Before 0.12.10, the zone map columns number in duplicate table 
is the same with the key column numbers,
-            // but after 0.12.10 we build zone map for the value column, so 
when first start the two number is not the same,
+            // Before 0.12.10, the zone map columns number in duplicate/unique 
table is the same with the key column numbers,
+            // but after 0.12.10 we build zone map for duplicate table value 
column, after 0.14 we build zone map for unique
+            // table value column, so when first start the two number is not 
the same,
             // it causes start failed. When `expect_zone_maps_num > 
zone_maps_size` it may be the first start after upgrade
             if (expect_zone_maps_num > zone_maps_size) {
                 LOG(WARNING)
diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp 
b/be/src/olap/rowset/alpha_rowset_reader.cpp
index dae2461..24e897b 100644
--- a/be/src/olap/rowset/alpha_rowset_reader.cpp
+++ b/be/src/olap/rowset/alpha_rowset_reader.cpp
@@ -354,10 +354,22 @@ OLAPStatus 
AlphaRowsetReader::_init_merge_ctxs(RowsetReaderContext* read_context
                 continue;
             }
         } else {
+            std::vector<ColumnPredicate*> predicates;
+            if (read_context->predicates != nullptr) {
+                predicates.insert(predicates.end(), 
read_context->predicates->begin(),
+                                  read_context->predicates->end());
+            }
+            // if unique table with rowset [0-x] or [0-1] [2-y] [...],
+            // value column predicates can be pushdown on rowset [0-x] or [2-y]
+            if (read_context->value_predicates != nullptr && 
_rowset->keys_type() == UNIQUE_KEYS &&
+                (_rowset->start_version() == 0 || _rowset->start_version() == 
2)) {
+                predicates.insert(predicates.end(), 
read_context->value_predicates->begin(),
+                                  read_context->value_predicates->end());
+            }
             new_column_data->set_read_params(
                     *read_context->return_columns, *read_context->seek_columns,
-                    *read_context->load_bf_columns, *read_context->conditions,
-                    *read_context->predicates, use_index_stream_cache, 
read_context->runtime_state);
+                    *read_context->load_bf_columns, *read_context->conditions, 
predicates,
+                    use_index_stream_cache, read_context->runtime_state);
             // filter
             if (new_column_data->rowset_pruning_filter()) {
                 _stats->rows_stats_filtered += new_column_data->num_rows();
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index bb00420..52062d5 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -61,7 +61,19 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* 
read_context) {
         read_context->delete_handler->get_delete_conditions_after_version(
                 _rowset->end_version(), &read_options.delete_conditions);
     }
-    read_options.column_predicates = read_context->predicates;
+    if (read_context->predicates != nullptr) {
+        
read_options.column_predicates.insert(read_options.column_predicates.end(),
+                                              
read_context->predicates->begin(),
+                                              read_context->predicates->end());
+    }
+    // if unique table with rowset [0-x] or [0-1] [2-y] [...],
+    // value column predicates can be pushdown on rowset [0-x] or [2-y]
+    if (read_context->value_predicates != nullptr && _rowset->keys_type() == 
UNIQUE_KEYS &&
+        (_rowset->start_version() == 0 || _rowset->start_version() == 2)) {
+        
read_options.column_predicates.insert(read_options.column_predicates.end(),
+                                              
read_context->value_predicates->begin(),
+                                              
read_context->value_predicates->end());
+    }
     read_options.use_page_cache = read_context->use_page_cache;
 
     // create iterator for each segment
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index c4a64d0..d9fc804 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -25,6 +25,7 @@
 #include "gen_cpp/olap_file.pb.h"
 #include "gutil/macros.h"
 #include "olap/rowset/rowset_meta.h"
+#include "olap/tablet_schema.h"
 
 namespace doris {
 
@@ -36,7 +37,6 @@ class Rowset;
 using RowsetSharedPtr = std::shared_ptr<Rowset>;
 class RowsetFactory;
 class RowsetReader;
-class TabletSchema;
 
 // the rowset state transfer graph:
 //    ROWSET_UNLOADED    <--|
@@ -159,6 +159,7 @@ public:
     bool delete_flag() const { return rowset_meta()->delete_flag(); }
     int64_t num_segments() const { return rowset_meta()->num_segments(); }
     void to_rowset_pb(RowsetMetaPB* rs_meta) { return 
rowset_meta()->to_rowset_pb(rs_meta); }
+    inline KeysType keys_type() { return _schema->keys_type(); }
 
     // remove all files in this rowset
     // TODO should we rename the method to remove_files() to be more specific?
diff --git a/be/src/olap/rowset/rowset_reader_context.h 
b/be/src/olap/rowset/rowset_reader_context.h
index 48d3e46..9d757dd 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -47,6 +47,8 @@ struct RowsetReaderContext {
     // column name -> column predicate
     // adding column_name for predicate to make use of column selectivity
     const std::vector<ColumnPredicate*>* predicates = nullptr;
+    // value column predicate in UNIQUE table
+    const std::vector<ColumnPredicate*>* value_predicates = nullptr;
     const std::vector<RowCursor*>* lower_bound_keys = nullptr;
     const std::vector<bool>* is_lower_keys_included = nullptr;
     const std::vector<RowCursor*>* upper_bound_keys = nullptr;
diff --git a/be/src/olap/rowset/segment_group.cpp 
b/be/src/olap/rowset/segment_group.cpp
index ecca3bf..aaa1528 100644
--- a/be/src/olap/rowset/segment_group.cpp
+++ b/be/src/olap/rowset/segment_group.cpp
@@ -263,8 +263,8 @@ OLAPStatus 
SegmentGroup::add_zone_maps_for_linked_schema_change(
             << zonemap_col_num << " vs. " << schema_mapping.size();
 
     for (size_t i = 0; i < zonemap_col_num; ++i) {
-        // in duplicated table update from 0.11 to 0.12, zone map index may be 
missed and may not a new column.
-        if (_schema->keys_type() == DUP_KEYS && schema_mapping[i].ref_column 
!= -1 &&
+        // in duplicate/unique table update from 0.11 to 0.12, zone map index 
may be missed and may not a new column.
+        if (_schema->keys_type() != AGG_KEYS && schema_mapping[i].ref_column 
!= -1 &&
             schema_mapping[i].ref_column >= zone_map_fields.size()) {
             // the sequence of columns in _zone_maps and _schema must be 
consistent, so here
             // process should not add missed zonemap and we break the loop.
@@ -729,7 +729,7 @@ const TabletSchema& SegmentGroup::get_tablet_schema() {
 }
 
 int SegmentGroup::get_num_zone_map_columns() {
-    if (_schema->keys_type() == KeysType::DUP_KEYS) {
+    if (_schema->keys_type() != KeysType::AGG_KEYS) {
         return _schema->num_columns();
     }
     return _schema->num_key_columns();
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 000cadd..38ff3dd 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -50,6 +50,15 @@ struct ColumnWriterOptions {
     bool need_zone_map = false;
     bool need_bitmap_index = false;
     bool need_bloom_filter = false;
+    std::string to_string() {
+        std::stringstream ss;
+        ss << std::boolalpha << "meta=" << meta->DebugString()
+           << ", data_page_size=" << data_page_size
+           << ", compression_min_space_saving = " << 
compression_min_space_saving
+           << ", need_zone_map=" << need_zone_map << ", need_bitmap_index=" << 
need_bitmap_index
+           << ", need_bloom_filter" << need_bloom_filter;
+        return ss.str();
+    }
 };
 
 class BitmapIndexWriter;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index ef9971a..1519f62 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -109,8 +109,8 @@ SegmentIterator::~SegmentIterator() {
 
 Status SegmentIterator::init(const StorageReadOptions& opts) {
     _opts = opts;
-    if (opts.column_predicates != nullptr) {
-        _col_predicates = *(opts.column_predicates);
+    if (!opts.column_predicates.empty()) {
+        _col_predicates = opts.column_predicates;
     }
     return Status::OK();
 }
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 9bfb158..a7fb66d 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -69,11 +69,14 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec 
__attribute__((unused))
 
         _init_column_meta(opts.meta, &column_id, column);
 
-        // now we create zone map for key columns
+        // now we create zone map for key columns in AGG_KEYS or all column in 
UNIQUE_KEYS or DUP_KEYS
         // and not support zone map for array type.
         opts.need_zone_map = column.is_key() || _tablet_schema->keys_type() == 
KeysType::DUP_KEYS;
         if (column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
             opts.need_zone_map = false;
+        } else {
+            opts.need_zone_map =
+                    column.is_key() || _tablet_schema->keys_type() != 
KeysType::AGG_KEYS;
         }
         opts.need_bloom_filter = column.is_bf_column();
         opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp 
b/be/test/olap/rowset/segment_v2/segment_test.cpp
index e7d0417..fc480f6 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -307,7 +307,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) {
 
             OlapReaderStatistics stats;
             StorageReadOptions read_opts;
-            read_opts.column_predicates = &predicates;
+            read_opts.column_predicates = predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -331,7 +331,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) {
 
             OlapReaderStatistics stats;
             StorageReadOptions read_opts;
-            read_opts.column_predicates = &predicates;
+            read_opts.column_predicates = predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -383,7 +383,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) {
 
             OlapReaderStatistics stats;
             StorageReadOptions read_opts;
-            read_opts.column_predicates = &predicates;
+            read_opts.column_predicates = predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -1026,7 +1026,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
 
             StorageReadOptions read_opts;
             OlapReaderStatistics stats;
-            read_opts.column_predicates = &column_predicates;
+            read_opts.column_predicates = column_predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -1048,7 +1048,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
 
             StorageReadOptions read_opts;
             OlapReaderStatistics stats;
-            read_opts.column_predicates = &column_predicates;
+            read_opts.column_predicates = column_predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -1070,7 +1070,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
 
             StorageReadOptions read_opts;
             OlapReaderStatistics stats;
-            read_opts.column_predicates = &column_predicates;
+            read_opts.column_predicates = column_predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -1094,7 +1094,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
 
             StorageReadOptions read_opts;
             OlapReaderStatistics stats;
-            read_opts.column_predicates = &column_predicates;
+            read_opts.column_predicates = column_predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;
@@ -1117,7 +1117,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
 
             StorageReadOptions read_opts;
             OlapReaderStatistics stats;
-            read_opts.column_predicates = &column_predicates;
+            read_opts.column_predicates = column_predicates;
             read_opts.stats = &stats;
 
             std::unique_ptr<RowwiseIterator> iter;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to