This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new a5c07f0ae4 [fix](merge-on-write) Add delete bitmap correctness check 
in commit phase #23316  (#23911)
a5c07f0ae4 is described below

commit a5c07f0ae43f048ba7d5c71eb9605a14f4d22e4c
Author: bobhan1 <[email protected]>
AuthorDate: Tue Sep 5 17:44:33 2023 +0800

    [fix](merge-on-write) Add delete bitmap correctness check in commit phase 
#23316  (#23911)
---
 be/src/olap/delta_writer.cpp | 14 ++++++++++++++
 be/src/olap/tablet.cpp       | 26 +++++++++++++++++---------
 be/src/olap/tablet.h         |  6 +++---
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index ee9d003e92..22ed65e0fc 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -510,6 +510,20 @@ Status DeltaWriter::wait_calc_delete_bitmap() {
 
 Status DeltaWriter::commit_txn(const PSlaveTabletNodes& slave_tablet_nodes,
                                const bool write_single_replica) {
+    if (_tablet->enable_unique_key_merge_on_write() &&
+        config::enable_merge_on_write_correctness_check && 
_cur_rowset->num_rows() != 0) {
+        auto st = _tablet->check_delete_bitmap_correctness(
+                _delete_bitmap, _cur_rowset->end_version() - 1, _req.txn_id, 
_rowset_ids);
+        if (!st.ok()) {
+            LOG(WARNING) << fmt::format(
+                    "[tablet_id:{}][txn_id:{}][load_id:{}][partition_id:{}] "
+                    "delete bitmap correctness check failed in commit phase!",
+                    _req.tablet_id, _req.txn_id, 
UniqueId(_req.load_id).to_string(),
+                    _req.partition_id);
+            return st;
+        }
+    }
+
     std::lock_guard<std::mutex> l(_lock);
     SCOPED_TIMER(_close_wait_timer);
     Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, 
_tablet, _req.txn_id,
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 5e81291a43..24e8747740 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3257,8 +3257,11 @@ Status Tablet::update_delete_bitmap_without_lock(const 
RowsetSharedPtr& rowset)
               << "(us), total rows: " << total_rows;
     if (config::enable_merge_on_write_correctness_check) {
         // check if all the rowset has ROWSET_SENTINEL_MARK
-        RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, 
cur_version - 1, -1,
-                                                         cur_rowset_ids, 
&specified_rowsets));
+        auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 
1, -1,
+                                                  cur_rowset_ids, 
&specified_rowsets);
+        if (!st.ok()) {
+            LOG(WARNING) << fmt::format("delete bitmap correctness check 
failed in publish phase!");
+        }
         _remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
     }
     for (auto iter = delete_bitmap->delete_bitmap.begin();
@@ -3361,8 +3364,11 @@ Status Tablet::update_delete_bitmap(const 
RowsetSharedPtr& rowset,
     if (config::enable_merge_on_write_correctness_check && rowset->num_rows() 
!= 0) {
         // only do correctness check if the rowset has at least one row written
         // check if all the rowset has ROWSET_SENTINEL_MARK
-        RETURN_IF_ERROR(_check_delete_bitmap_correctness(delete_bitmap, 
cur_version - 1, txn_id,
-                                                         cur_rowset_ids));
+        auto st = check_delete_bitmap_correctness(delete_bitmap, cur_version - 
1, -1,
+                                                  cur_rowset_ids, 
&specified_rowsets);
+        if (!st.ok()) {
+            LOG(WARNING) << fmt::format("delete bitmap correctness check 
failed in publish phase!");
+        }
         _remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
     }
 
@@ -3718,10 +3724,10 @@ void 
Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bit
     }
 }
 
-Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, 
int64_t max_version,
-                                                int64_t txn_id,
-                                                const RowsetIdUnorderedSet& 
rowset_ids,
-                                                std::vector<RowsetSharedPtr>* 
rowsets) {
+Status Tablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, 
int64_t max_version,
+                                               int64_t txn_id,
+                                               const RowsetIdUnorderedSet& 
rowset_ids,
+                                               std::vector<RowsetSharedPtr>* 
rowsets) {
     RowsetIdUnorderedSet missing_ids;
     for (const auto& rowsetid : rowset_ids) {
         if (!delete_bitmap->delete_bitmap.contains(
@@ -3780,7 +3786,9 @@ Status 
Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, i
         root.Accept(writer);
         std::string rowset_status_string = std::string(strbuf.GetString());
         LOG_EVERY_SECOND(WARNING) << rowset_status_string;
-        DCHECK(false) << "check delete bitmap correctness failed!";
+        // let it crash if correctness check failed in Debug mode
+        DCHECK(false) << "delete bitmap correctness check failed in publish 
phase!";
+        return Status::InternalError("check delete bitmap failed!");
     }
     return Status::OK();
 }
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 5ea64cce78..a1684fe9e6 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -552,6 +552,9 @@ public:
     void set_binlog_config(BinlogConfig binlog_config);
     void add_sentinel_mark_to_delete_bitmap(DeleteBitmap* delete_bitmap,
                                             const RowsetIdUnorderedSet& 
rowsetids);
+    Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, 
int64_t max_version,
+                                           int64_t txn_id, const 
RowsetIdUnorderedSet& rowset_ids,
+                                           std::vector<RowsetSharedPtr>* 
rowsets = nullptr);
 
 private:
     Status _init_once_action();
@@ -597,9 +600,6 @@ private:
     
////////////////////////////////////////////////////////////////////////////
 
     void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr 
delete_bitmap);
-    Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, 
int64_t max_version,
-                                            int64_t txn_id, const 
RowsetIdUnorderedSet& rowset_ids,
-                                            std::vector<RowsetSharedPtr>* 
rowsets = nullptr);
     std::string _get_rowset_info_str(RowsetSharedPtr rowset, bool delete_flag);
 
 public:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to