This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b602369482 [Refactor](merge-on-write) extract common logic in to
`Tablet::_get_segment_column_iterator` (#24048)
b602369482 is described below
commit b602369482cefc2d36cece656f4046ea15b3a3d1
Author: bobhan1 <[email protected]>
AuthorDate: Tue Sep 12 11:37:36 2023 +0800
[Refactor](merge-on-write) extract common logic in to
`Tablet::_get_segment_column_iterator` (#24048)
---
be/src/olap/tablet.cpp | 116 +++++++++++++++++--------------------------------
be/src/olap/tablet.h | 4 ++
2 files changed, 44 insertions(+), 76 deletions(-)
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 7617c80334..6d806b1a94 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2638,16 +2638,9 @@ void Tablet::update_max_version_schema(const
TabletSchemaSPtr& tablet_schema) {
}
}
-// fetch value by row column
-Status Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset,
uint32_t segid,
- const std::vector<uint32_t>&
rowids,
- const std::vector<uint32_t>&
cids,
- vectorized::Block& block) {
- // read row data
- BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
- CHECK(rowset);
-
- const TabletSchemaSPtr tablet_schema = rowset->tablet_schema();
+Status Tablet::_get_segment_column_iterator(
+ const BetaRowsetSharedPtr& rowset, uint32_t segid, const TabletColumn&
target_column,
+ std::unique_ptr<segment_v2::ColumnIterator>* column_iterator,
OlapReaderStatistics* stats) {
SegmentCacheHandle segment_cache;
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset,
&segment_cache, true));
// find segment
@@ -2658,26 +2651,38 @@ Status
Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, uint
return Status::NotFound(fmt::format("rowset {} 's segemnt not found,
seg_id {}",
rowset->rowset_id().to_string(),
segid));
}
- // read from segment column by column, row by row
segment_v2::SegmentSharedPtr segment = *it;
+ RETURN_IF_ERROR(segment->new_column_iterator(target_column,
column_iterator));
+ segment_v2::ColumnIteratorOptions opt;
+ opt.file_reader = segment->file_reader().get();
+ opt.stats = stats;
+ opt.use_page_cache = !config::disable_storage_page_cache;
+ opt.io_ctx.reader_type = ReaderType::READER_QUERY;
+ (*column_iterator)->init(opt);
+ return Status::OK();
+}
+
+// fetch value by row column
+Status Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset,
uint32_t segid,
+ const std::vector<uint32_t>&
rowids,
+ const std::vector<uint32_t>&
cids,
+ vectorized::Block& block) {
MonotonicStopWatch watch;
watch.start();
Defer _defer([&]() {
LOG_EVERY_N(INFO, 500) << "fetch_value_by_rowids, cost(us):" <<
watch.elapsed_time() / 1000
<< ", row_batch_size:" << rowids.size();
});
+
+ BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
+ CHECK(rowset);
+ const TabletSchemaSPtr tablet_schema = rowset->tablet_schema();
CHECK(tablet_schema->store_row_column());
- // create _source column
std::unique_ptr<segment_v2::ColumnIterator> column_iterator;
-
RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(BeConsts::ROW_STORE_COL),
- &column_iterator));
- segment_v2::ColumnIteratorOptions opt;
OlapReaderStatistics stats;
- opt.file_reader = segment->file_reader().get();
- opt.stats = &stats;
- opt.use_page_cache = !config::disable_storage_page_cache;
- opt.io_ctx.reader_type = ReaderType::READER_QUERY;
- column_iterator->init(opt);
+ RETURN_IF_ERROR(_get_segment_column_iterator(rowset, segid,
+
tablet_schema->column(BeConsts::ROW_STORE_COL),
+ &column_iterator, &stats));
// get and parse tuple row
vectorized::MutableColumnPtr column_ptr =
vectorized::ColumnString::create();
RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(),
rowids.size(), column_ptr));
@@ -2701,38 +2706,20 @@ Status Tablet::fetch_value_by_rowids(RowsetSharedPtr
input_rowset, uint32_t segi
const std::vector<uint32_t>& rowids,
const TabletColumn& tablet_column,
vectorized::MutableColumnPtr& dst) {
- // read row data
- BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
- CHECK(rowset);
-
- SegmentCacheHandle segment_cache;
- RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset,
&segment_cache, true));
- // find segment
- auto it = std::find_if(
- segment_cache.get_segments().begin(),
segment_cache.get_segments().end(),
- [&segid](const segment_v2::SegmentSharedPtr& seg) { return
seg->id() == segid; });
- if (it == segment_cache.get_segments().end()) {
- return Status::NotFound(fmt::format("rowset {} 's segemnt not found,
seg_id {}",
- rowset->rowset_id().to_string(),
segid));
- }
- // read from segment column by column, row by row
- segment_v2::SegmentSharedPtr segment = *it;
MonotonicStopWatch watch;
watch.start();
Defer _defer([&]() {
LOG_EVERY_N(INFO, 500) << "fetch_value_by_rowids, cost(us):" <<
watch.elapsed_time() / 1000
<< ", row_batch_size:" << rowids.size();
});
- // create _source column
- std::unique_ptr<segment_v2::ColumnIterator> column_iterator = nullptr;
- RETURN_IF_ERROR(segment->new_column_iterator(tablet_column,
&column_iterator));
- segment_v2::ColumnIteratorOptions opt;
+
+ // read row data
+ BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
+ CHECK(rowset);
+ std::unique_ptr<segment_v2::ColumnIterator> column_iterator;
OlapReaderStatistics stats;
- opt.file_reader = segment->file_reader().get();
- opt.stats = &stats;
- opt.use_page_cache = !config::disable_storage_page_cache;
- opt.io_ctx.reader_type = ReaderType::READER_QUERY;
- column_iterator->init(opt);
+ RETURN_IF_ERROR(
+ _get_segment_column_iterator(rowset, segid, tablet_column,
&column_iterator, &stats));
RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(),
rowids.size(), dst));
return Status::OK();
}
@@ -2741,45 +2728,22 @@ Status Tablet::lookup_row_data(const Slice&
encoded_key, const RowLocation& row_
RowsetSharedPtr input_rowset, const
TupleDescriptor* desc,
OlapReaderStatistics& stats, std::string&
values,
bool write_to_cache) {
- // read row data
- BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
- if (!rowset) {
- return Status::NotFound(
- fmt::format("rowset {} not found",
row_location.rowset_id.to_string()));
- }
-
- const TabletSchemaSPtr tablet_schema = rowset->tablet_schema();
- SegmentCacheHandle segment_cache;
- RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset,
&segment_cache, true));
- // find segment
- auto it = std::find_if(segment_cache.get_segments().begin(),
segment_cache.get_segments().end(),
- [&row_location](const segment_v2::SegmentSharedPtr&
seg) {
- return seg->id() == row_location.segment_id;
- });
- if (it == segment_cache.get_segments().end()) {
- return Status::NotFound(fmt::format("rowset {} 's segemnt not found,
seg_id {}",
- row_location.rowset_id.to_string(),
- row_location.segment_id));
- }
- // read from segment column by column, row by row
- segment_v2::SegmentSharedPtr segment = *it;
- size_t row_size = 0;
MonotonicStopWatch watch;
+ size_t row_size = 1;
watch.start();
Defer _defer([&]() {
LOG_EVERY_N(INFO, 500) << "get a single_row, cost(us):" <<
watch.elapsed_time() / 1000
<< ", row_size:" << row_size;
});
+
+ BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(input_rowset);
+ CHECK(rowset);
+ const TabletSchemaSPtr tablet_schema = rowset->tablet_schema();
CHECK(tablet_schema->store_row_column());
- // create _source column
std::unique_ptr<segment_v2::ColumnIterator> column_iterator;
-
RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(BeConsts::ROW_STORE_COL),
- &column_iterator));
- segment_v2::ColumnIteratorOptions opt;
- opt.file_reader = segment->file_reader().get();
- opt.stats = &stats;
- opt.use_page_cache = !config::disable_storage_page_cache;
- column_iterator->init(opt);
+ RETURN_IF_ERROR(_get_segment_column_iterator(rowset,
row_location.segment_id,
+
tablet_schema->column(BeConsts::ROW_STORE_COL),
+ &column_iterator, &stats));
// get and parse tuple row
vectorized::MutableColumnPtr column_ptr =
vectorized::ColumnString::create();
std::vector<segment_v2::rowid_t> rowids
{static_cast<segment_v2::rowid_t>(row_location.row_id)};
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index a1684fe9e6..99dfd445b1 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -555,6 +555,10 @@ public:
Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap,
int64_t max_version,
int64_t txn_id, const
RowsetIdUnorderedSet& rowset_ids,
std::vector<RowsetSharedPtr>*
rowsets = nullptr);
+ Status _get_segment_column_iterator(
+ const BetaRowsetSharedPtr& rowset, uint32_t segid, const
TabletColumn& target_column,
+ std::unique_ptr<segment_v2::ColumnIterator>* column_iterator,
+ OlapReaderStatistics* stats);
private:
Status _init_once_action();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]