This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new f78ca00064 [Enchancement](merge-on-write) check the correctness of
rowid conversion after compaction (#16689) (#17006)
f78ca00064 is described below
commit f78ca0006489a2e7d43eb8b9408debc705d34554
Author: Xin Liao <[email protected]>
AuthorDate: Wed Feb 22 19:15:52 2023 +0800
[Enchancement](merge-on-write) check the correctness of rowid conversion
after compaction (#16689) (#17006)
MoW updates the delete bitmap of the imported data during the compaction by
rowid conversion.
The correctness of rowid conversion is very important to the result of
delete bitmap.
So I add a rowid conversion result check.
cherry-pick from: #16689
---
be/src/olap/compaction.cpp | 12 ++++--
be/src/olap/primary_key_index.cpp | 2 +-
be/src/olap/rowset/segment_v2/segment.cpp | 16 ++++++++
be/src/olap/rowset/segment_v2/segment.h | 2 +
be/src/olap/tablet.cpp | 65 ++++++++++++++++++++++++++++++-
be/src/olap/tablet.h | 5 +++
6 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index e56738b3e1..6441749536 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -439,22 +439,26 @@ Status Compaction::modify_rowsets() {
_tablet->enable_unique_key_merge_on_write()) {
Version version = _tablet->max_version();
DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>> location_map;
// Convert the delete bitmap of the input rowsets to output rowset.
// New loads are not blocked, so some keys of input rowsets might
// be deleted during the time. We need to deal with delete bitmap
// of incremental data later.
_tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets,
_rowid_conversion, 0,
- version.second +
1,
+ version.second +
1, &location_map,
&output_rowset_delete_bitmap);
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
+ location_map.clear();
{
std::lock_guard<std::mutex>
wrlock_(_tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex>
wrlock(_tablet->get_header_lock());
// Convert the delete bitmap of the input rowsets to output rowset
for
// incremental data.
-
_tablet->calc_compaction_output_rowset_delete_bitmap(_input_rowsets,
_rowid_conversion,
-
version.second, UINT64_MAX,
-
&output_rowset_delete_bitmap);
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, version.second,
UINT64_MAX, &location_map,
+ &output_rowset_delete_bitmap);
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
_tablet->merge_delete_bitmap(output_rowset_delete_bitmap);
RETURN_NOT_OK(_tablet->modify_rowsets(output_rowsets,
_input_rowsets, true));
diff --git a/be/src/olap/primary_key_index.cpp
b/be/src/olap/primary_key_index.cpp
index 6e2c3d954d..7c2c5fe16a 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -27,7 +27,7 @@ Status PrimaryKeyIndexBuilder::init() {
// TODO(liaoxin) using the column type directly if there's only one column
in unique key columns
const auto* type_info = get_scalar_type_info<OLAP_FIELD_TYPE_VARCHAR>();
segment_v2::IndexedColumnWriterOptions options;
- options.write_ordinal_index = false;
+ options.write_ordinal_index = true;
options.write_value_index = true;
options.encoding =
segment_v2::EncodingInfo::get_default_encoding(type_info, true);
// TODO(liaoxin) test to confirm whether it needs to be compressed
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 24af866655..d4442bc4b9 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -341,5 +341,21 @@ Status Segment::lookup_row_key(const Slice& key,
RowLocation* row_location) {
return Status::OK();
}
+Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) {
+ RETURN_IF_ERROR(load_pk_index_and_bf());
+ std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+ RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter));
+
+ auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+ _pk_index_reader->type_info()->type(), 1, 0);
+ auto index_column = index_type->create_column();
+ RETURN_IF_ERROR(iter->seek_to_ordinal(row_id));
+ size_t num_read = 1;
+ RETURN_IF_ERROR(iter->next_batch(&num_read, index_column));
+ CHECK(num_read == 1);
+ *key = index_column->get_data_at(0).to_string();
+ return Status::OK();
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index b0dea5aeb8..c2dd12508e 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -93,6 +93,8 @@ public:
Status lookup_row_key(const Slice& key, RowLocation* row_location);
+ Status read_key_by_rowid(uint32_t row_id, std::string* key);
+
// only used by UT
const SegmentFooterPB& footer() const { return _footer; }
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 691658bb49..196d8f7cdd 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2213,7 +2213,9 @@ Status Tablet::update_delete_bitmap(const
RowsetSharedPtr& rowset, DeleteBitmapP
void Tablet::calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets, const
RowIdConversion& rowid_conversion,
- uint64_t start_version, uint64_t end_version, DeleteBitmap*
output_rowset_delete_bitmap) {
+ uint64_t start_version, uint64_t end_version,
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>* location_map,
+ DeleteBitmap* output_rowset_delete_bitmap) {
RowLocation src;
RowLocation dst;
for (auto& rowset : input_rowsets) {
@@ -2242,6 +2244,7 @@ void Tablet::calc_compaction_output_rowset_delete_bitmap(
<< " src location: |" << src.rowset_id << "|"
<< src.segment_id
<< "|" << src.row_id << " start version: " <<
start_version
<< "end version" << end_version;
+ (*location_map)[rowset].emplace_back(src, dst);
output_rowset_delete_bitmap->add({dst.rowset_id,
dst.segment_id, cur_version},
dst.row_id);
}
@@ -2254,6 +2257,66 @@ void Tablet::merge_delete_bitmap(const DeleteBitmap&
delete_bitmap) {
_tablet_meta->delete_bitmap().merge(delete_bitmap);
}
+Status Tablet::check_rowid_conversion(
+ RowsetSharedPtr dst_rowset,
+ const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>&
+ location_map) {
+ if (location_map.empty()) {
+ VLOG_DEBUG << "check_rowid_conversion, location_map is empty";
+ return Status::OK();
+ }
+ std::vector<segment_v2::SegmentSharedPtr> dst_segments;
+ _load_rowset_segments(dst_rowset, &dst_segments);
+ std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>,
HashOfRowsetId>
+ input_rowsets_segment;
+
+ VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " <<
dst_segments.size();
+ for (auto [src_rowset, locations] : location_map) {
+ std::vector<segment_v2::SegmentSharedPtr>& segments =
+ input_rowsets_segment[src_rowset->rowset_id()];
+ if (segments.empty()) {
+ _load_rowset_segments(src_rowset, &segments);
+ }
+ for (auto& [src, dst] : locations) {
+ std::string src_key;
+ std::string dst_key;
+ Status s = segments[src.segment_id]->read_key_by_rowid(src.row_id,
&src_key);
+ if (UNLIKELY(s.code() == TStatusCode::NOT_IMPLEMENTED_ERROR)) {
+ LOG(INFO) << "primary key index of old version does not "
+ "support reading key by rowid";
+ break;
+ }
+ if (UNLIKELY(!s)) {
+ LOG(WARNING) << "failed to get src key: |" << src.rowset_id <<
"|" << src.segment_id
+ << "|" << src.row_id << " status: " << s;
+ DCHECK(false);
+ return s;
+ }
+
+ s = dst_segments[dst.segment_id]->read_key_by_rowid(dst.row_id,
&dst_key);
+ if (UNLIKELY(!s)) {
+ LOG(WARNING) << "failed to get dst key: |" << dst.rowset_id <<
"|" << dst.segment_id
+ << "|" << dst.row_id << " status: " << s;
+ DCHECK(false);
+ return s;
+ }
+
+ VLOG_DEBUG << "check_rowid_conversion, src: |" << src.rowset_id <<
"|" << src.segment_id
+ << "|" << src.row_id << "|" << src_key << " dst: |" <<
dst.rowset_id << "|"
+ << dst.segment_id << "|" << dst.row_id << "|" <<
dst_key;
+ if (UNLIKELY(src_key.compare(dst_key) != 0)) {
+ LOG(WARNING) << "failed to check key, src key: |" <<
src.rowset_id << "|"
+ << src.segment_id << "|" << src.row_id << "|" <<
src_key
+ << " dst key: |" << dst.rowset_id << "|" <<
dst.segment_id << "|"
+ << dst.row_id << "|" << dst_key;
+ DCHECK(false);
+ return Status::InternalError("failed to check rowid
conversion");
+ }
+ }
+ }
+ return Status::OK();
+}
+
RowsetIdUnorderedSet Tablet::all_rs_id(int64_t max_version) const {
RowsetIdUnorderedSet rowset_ids;
for (const auto& rs_it : _rs_version_map) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index a9293bfd54..e439427661 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -356,8 +356,13 @@ public:
void calc_compaction_output_rowset_delete_bitmap(
const std::vector<RowsetSharedPtr>& input_rowsets,
const RowIdConversion& rowid_conversion, uint64_t start_version,
uint64_t end_version,
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>* location_map,
DeleteBitmap* output_rowset_delete_bitmap);
void merge_delete_bitmap(const DeleteBitmap& delete_bitmap);
+ Status check_rowid_conversion(
+ RowsetSharedPtr dst_rowset,
+ const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>>&
+ location_map);
RowsetIdUnorderedSet all_rs_id(int64_t max_version) const;
void remove_self_owned_remote_rowsets();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]