This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new a5c07f0ae4 [fix](merge-on-write) Add delete bitmap correctness check
in commit phase #23316 (#23911)
a5c07f0ae4 is described below
commit a5c07f0ae43f048ba7d5c71eb9605a14f4d22e4c
Author: bobhan1 <[email protected]>
AuthorDate: Tue Sep 5 17:44:33 2023 +0800
[fix](merge-on-write) Add delete bitmap correctness check in commit phase
#23316 (#23911)
---
be/src/olap/delta_writer.cpp | 14 ++++++++++++++
be/src/olap/tablet.cpp | 26 +++++++++++++++++---------
be/src/olap/tablet.h | 6 +++---
3 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index ee9d003e92..22ed65e0fc 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -510,6 +510,20 @@ Status DeltaWriter::wait_calc_delete_bitmap() {
Status DeltaWriter::commit_txn(const PSlaveTabletNodes& slave_tablet_nodes,
const bool write_single_replica) {
+ if (_tablet->enable_unique_key_merge_on_write() &&
+ config::enable_merge_on_write_correctness_check &&
_cur_rowset->num_rows() != 0) {
+ auto st = _tablet->check_delete_bitmap_correctness(
+ _delete_bitmap, _cur_rowset->end_version() - 1, _req.txn_id,
_rowset_ids);
+ if (!st.ok()) {
+ LOG(WARNING) << fmt::format(
+ "[tablet_id:{}][txn_id:{}][load_id:{}][partition_id:{}] "
+ "delete bitmap correctness check failed in commit phase!",
+ _req.tablet_id, _req.txn_id,
UniqueId(_req.load_id).to_string(),
+ _req.partition_id);
+ return st;
+ }
+ }
+
std::lock_guard<std::mutex> l(_lock);
SCOPED_TIMER(_close_wait_timer);
Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id,
_tablet, _req.txn_id,
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 5e81291a43..24e8747740 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3257,8 +3257,11 @@ Status Tablet::update_delete_bitmap_without_lock(const
RowsetSharedPtr& rowset)
<< "(us), total rows: " << total_rows;
if (config::enable_merge_on_write_correctness_check) {
// check if all the rowset has ROWSET_SENTINEL_MARK
- RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap,
cur_version - 1, -1,
- cur_rowset_ids,
&specified_rowsets));
+ auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version -
1, -1,
+ cur_rowset_ids,
&specified_rowsets);
+ if (!st.ok()) {
+ LOG(WARNING) << fmt::format("delete bitmap correctness check
failed in publish phase!");
+ }
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
for (auto iter = delete_bitmap->delete_bitmap.begin();
@@ -3361,8 +3364,11 @@ Status Tablet::update_delete_bitmap(const
RowsetSharedPtr& rowset,
if (config::enable_merge_on_write_correctness_check && rowset->num_rows()
!= 0) {
// only do correctness check if the rowset has at least one row written
// check if all the rowset has ROWSET_SENTINEL_MARK
- RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap,
cur_version - 1, txn_id,
- cur_rowset_ids));
+ auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version -
1, -1,
+ cur_rowset_ids,
&specified_rowsets);
+ if (!st.ok()) {
+ LOG(WARNING) << fmt::format("delete bitmap correctness check
failed in publish phase!");
+ }
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
@@ -3718,10 +3724,10 @@ void
Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bit
}
}
-Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap,
int64_t max_version,
- int64_t txn_id,
- const RowsetIdUnorderedSet&
rowset_ids,
- std::vector<RowsetSharedPtr>*
rowsets) {
+Status Tablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap,
int64_t max_version,
+ int64_t txn_id,
+ const RowsetIdUnorderedSet&
rowset_ids,
+ std::vector<RowsetSharedPtr>*
rowsets) {
RowsetIdUnorderedSet missing_ids;
for (const auto& rowsetid : rowset_ids) {
if (!delete_bitmap->delete_bitmap.contains(
@@ -3780,7 +3786,9 @@ Status
Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, i
root.Accept(writer);
std::string rowset_status_string = std::string(strbuf.GetString());
LOG_EVERY_SECOND(WARNING) << rowset_status_string;
- DCHECK(false) << "check delete bitmap correctness failed!";
+ // let it crash if correctness check failed in Debug mode
+ DCHECK(false) << "delete bitmap correctness check failed in publish
phase!";
+ return Status::InternalError("check delete bitmap failed!");
}
return Status::OK();
}
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 5ea64cce78..a1684fe9e6 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -552,6 +552,9 @@ public:
void set_binlog_config(BinlogConfig binlog_config);
void add_sentinel_mark_to_delete_bitmap(DeleteBitmap* delete_bitmap,
const RowsetIdUnorderedSet&
rowsetids);
+ Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap,
int64_t max_version,
+ int64_t txn_id, const
RowsetIdUnorderedSet& rowset_ids,
+ std::vector<RowsetSharedPtr>*
rowsets = nullptr);
private:
Status _init_once_action();
@@ -597,9 +600,6 @@ private:
////////////////////////////////////////////////////////////////////////////
void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr
delete_bitmap);
- Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap,
int64_t max_version,
- int64_t txn_id, const
RowsetIdUnorderedSet& rowset_ids,
- std::vector<RowsetSharedPtr>*
rowsets = nullptr);
std::string _get_rowset_info_str(RowsetSharedPtr rowset, bool delete_flag);
public:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]