This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5f844486e3b [enhancement](invert index) read columns by index reduce
seek time (#24735)
5f844486e3b is described below
commit 5f844486e3b1a8cf394641adf525a612cc013b1c
Author: zzzxl <[email protected]>
AuthorDate: Tue Oct 17 10:34:33 2023 +0800
[enhancement](invert index) read columns by index reduce seek time (#24735)
---
be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 41 +++++++++++++---------
be/src/vec/columns/column_fixed_length_object.h | 17 +++++++++
3 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index f2984603a0c..69d92f5ede9 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -283,6 +283,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n,
vectorized::MutableColumnPtr
Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t
page_first_ordinal,
size_t* n,
vectorized::MutableColumnPtr& dst) {
if (_encoding_type == PLAIN_ENCODING) {
+ dst = dst->convert_to_predicate_column_if_dictionary();
return _data_page_decoder->read_by_rowids(rowids, page_first_ordinal,
n, dst);
}
DCHECK(_parsed);
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 37d3e77a49f..42de242fe5a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1593,28 +1593,19 @@ void
SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit,
uint32_t& nrows_read,
bool set_block_rowid) {
SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
+
do {
- uint32_t range_from;
- uint32_t range_to;
+ uint32_t range_from = 0;
+ uint32_t range_to = 0;
bool has_next_range =
_range_iter->next_range(nrows_read_limit - nrows_read,
&range_from, &range_to);
if (!has_next_range) {
break;
}
- if (_cur_rowid == 0 || _cur_rowid != range_from) {
- _cur_rowid = range_from;
- _opts.stats->block_first_read_seek_num += 1;
- if (_opts.runtime_state && _opts.runtime_state->enable_profile()) {
- SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
- RETURN_IF_ERROR(_seek_columns(_first_read_column_ids,
_cur_rowid));
- } else {
- RETURN_IF_ERROR(_seek_columns(_first_read_column_ids,
_cur_rowid));
- }
- }
+
size_t rows_to_read = range_to - range_from;
- RETURN_IF_ERROR(
- _read_columns(_first_read_column_ids, _current_return_columns,
rows_to_read));
- _cur_rowid += rows_to_read;
+ _cur_rowid = range_to;
+
if (set_block_rowid) {
// Here use std::iota is better performance than for-loop, maybe
for-loop is not vectorized
auto start = _block_rowids.data() + nrows_read;
@@ -1626,8 +1617,26 @@ Status SegmentIterator::_read_columns_by_index(uint32_t
nrows_read_limit, uint32
}
_split_row_ranges.emplace_back(std::pair {range_from, range_to});
- // if _opts.read_orderby_key_reverse is true, only read one range for
fast reverse purpose
} while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse);
+
+ for (auto cid : _first_read_column_ids) {
+ auto& column = _current_return_columns[cid];
+ if (_prune_column(cid, column, true, nrows_read)) {
+ continue;
+ }
+ for (auto& range : _split_row_ranges) {
+ size_t nrows = range.second - range.first;
+
+
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(range.first));
+ size_t rows_read = nrows;
+ RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read,
column));
+ if (rows_read != nrows) {
+ return Status::Error<ErrorCode::INTERNAL_ERROR>("nrows({}) !=
rows_read({})", nrows,
+ rows_read);
+ }
+ }
+ }
+
return Status::OK();
}
diff --git a/be/src/vec/columns/column_fixed_length_object.h
b/be/src/vec/columns/column_fixed_length_object.h
index 0536ab0219c..dce6666f132 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -280,6 +280,23 @@ public:
memcpy(_data.data() + old_size, data + begin_offset, total_mem_size);
}
+ void insert_many_binary_data(char* data_array, uint32_t* len_array,
+ uint32_t* start_offset_array, size_t num)
override {
+ if (UNLIKELY(num == 0)) {
+ return;
+ }
+
+ size_t old_count = _item_count;
+ resize(old_count + num);
+ auto dst = _data.data() + old_count * _item_size;
+ for (size_t i = 0; i < num; i++) {
+ auto src = data_array + start_offset_array[i];
+ uint32_t len = len_array[i];
+ dst += i * _item_size;
+ memcpy(dst, src, len);
+ }
+ }
+
protected:
size_t _item_size;
size_t _item_count;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]