yiguolei commented on code in PR #28103:
URL: https://github.com/apache/doris/pull/28103#discussion_r1435918560
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -430,6 +456,53 @@ Status SegmentIterator::_get_row_ranges_by_keys() {
return Status::OK();
}
+Status SegmentIterator::_prepare_seek(const StorageReadOptions::SplitKeyRange&
key_range) {
+ std::vector<const Field*> key_fields;
+ std::set<uint32_t> column_set;
+ for (auto cid : key_range.lower_key.schema->column_ids()) {
+ column_set.emplace(cid);
+ key_fields.emplace_back(key_range.lower_key.schema->column(cid));
+ }
+
+ for (auto cid : key_range.upper_key.schema->column_ids()) {
+ if (!column_set.contains(cid)) {
+ key_fields.emplace_back(key_range.upper_key.schema->column(cid));
+ column_set.emplace(cid);
+ }
+ }
+
+ if (!_seek_schema) {
+ _seek_schema = std::make_unique<Schema>(key_fields, key_fields.size());
+ }
+ // todo(wb) need refactor here, when using pk to search, _seek_block is
useless
+ if (_seek_block.empty()) {
+ _seek_block.resize(_seek_schema->num_column_ids());
+ int i = 0;
+ for (auto cid : _seek_schema->column_ids()) {
+ const auto* column_desc = _seek_schema->column(cid);
+ _seek_block[i] = Schema::get_column_by_field(*column_desc);
+ i++;
+ }
+ }
+
+ // create used column iterator
+ for (auto cid : _seek_schema->column_ids()) {
+ if (_column_iterators[cid] == nullptr) {
+
RETURN_IF_ERROR(_segment->new_column_iterator(_opts.tablet_schema->column(cid),
+
&_column_iterators[cid], &_opts));
+ ColumnIteratorOptions iter_opts {
+ .use_page_cache = _opts.use_page_cache,
+ .file_reader = _file_reader.get(),
+ .stats = _opts.stats,
+ .io_ctx = _opts.io_ctx,
+ };
+ RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
+ }
+ }
+
+ return Status::OK();
+}
Review Comment:
parallel build
1. 拿到where 条件里的 key range
2. 我们对key range 拆分,拆分成多个, 此时 多个key range == 原来的key range。
3. key range 、 row range 下推到 rowset reader 和 segment iterator ---
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]