This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 052b7f90eec branch-3.0: [Opt](checker) Add checker for delete bitmaps 
#44154 (#44952)
052b7f90eec is described below

commit 052b7f90eecc265ffb50942add552d2621888a93
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 4 09:57:08 2024 +0800

    branch-3.0: [Opt](checker) Add checker for delete bitmaps #44154 (#44952)
    
    Cherry-picked from #44154
    
    Co-authored-by: bobhan1 <bao...@selectdb.com>
---
 cloud/src/common/bvars.cpp     |   7 +
 cloud/src/common/bvars.h       |   4 +
 cloud/src/common/config.h      |   5 +
 cloud/src/recycler/checker.cpp | 457 ++++++++++++++++++++++++++++++++++++++++-
 cloud/src/recycler/checker.h   |  28 +++
 cloud/src/recycler/util.cpp    |  66 ++++++
 cloud/src/recycler/util.h      |   5 +
 cloud/test/recycler_test.cpp   | 398 ++++++++++++++++++++++++++++++++++-
 8 files changed, 965 insertions(+), 5 deletions(-)

diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp
index 507acb00dff..746f109ac6d 100644
--- a/cloud/src/common/bvars.cpp
+++ b/cloud/src/common/bvars.cpp
@@ -198,3 +198,10 @@ BvarStatusWithTag<long> 
g_bvar_checker_instance_volume("checker", "instance_volu
 BvarStatusWithTag<long> g_bvar_inverted_checker_num_scanned("checker", 
"num_inverted_scanned");
 BvarStatusWithTag<long> g_bvar_inverted_checker_num_check_failed("checker",
                                                                  
"num_inverted_check_failed");
+
+BvarStatusWithTag<int64_t> 
g_bvar_inverted_checker_leaked_delete_bitmaps("checker",
+                                                                         
"leaked_delete_bitmaps");
+BvarStatusWithTag<int64_t> g_bvar_inverted_checker_abnormal_delete_bitmaps(
+        "checker", "abnormal_delete_bitmaps");
+BvarStatusWithTag<int64_t> g_bvar_inverted_checker_delete_bitmaps_scanned(
+        "checker", "delete_bitmap_keys_scanned");
\ No newline at end of file
diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h
index 373a3a63ff2..d0ad2e97957 100644
--- a/cloud/src/common/bvars.h
+++ b/cloud/src/common/bvars.h
@@ -250,3 +250,7 @@ extern BvarStatusWithTag<long> 
g_bvar_checker_last_success_time_ms;
 extern BvarStatusWithTag<long> g_bvar_checker_instance_volume;
 extern BvarStatusWithTag<long> g_bvar_inverted_checker_num_scanned;
 extern BvarStatusWithTag<long> g_bvar_inverted_checker_num_check_failed;
+
+extern BvarStatusWithTag<int64_t> 
g_bvar_inverted_checker_leaked_delete_bitmaps;
+extern BvarStatusWithTag<int64_t> 
g_bvar_inverted_checker_abnormal_delete_bitmaps;
+extern BvarStatusWithTag<int64_t> 
g_bvar_inverted_checker_delete_bitmaps_scanned;
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index f6e0073b924..c6b6e1ef290 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -72,6 +72,11 @@ CONF_Bool(enable_checker, "false");
 CONF_Int32(recycle_pool_parallelism, "40");
 // Currently only used for recycler test
 CONF_Bool(enable_inverted_check, "false");
+// Currently only used for recycler test
+CONF_Bool(enable_delete_bitmap_inverted_check, "false");
+// checks if https://github.com/apache/doris/pull/40204 works as expected
+CONF_Bool(enable_delete_bitmap_storage_optimize_check, "false");
+CONF_mInt64(delete_bitmap_storage_optimize_check_version_gap, "1000");
 // interval for scanning instances to do checks and inspections
 CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min
 // interval for check object
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 19a10d61c12..60b6b7fc5ee 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -168,17 +168,35 @@ int Checker::start() {
             auto ctime_ms =
                     
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
             g_bvar_checker_enqueue_cost_s.put(instance_id, ctime_ms / 1000 - 
enqueue_time_s);
-            int ret1 = checker->do_check();
 
-            int ret2 = 0;
+            bool success {true};
+
+            if (int ret = checker->do_check(); ret != 0) {
+                success = false;
+            }
+
             if (config::enable_inverted_check) {
-                ret2 = checker->do_inverted_check();
+                if (int ret = checker->do_inverted_check(); ret != 0) {
+                    success = false;
+                }
+            }
+
+            if (config::enable_delete_bitmap_inverted_check) {
+                if (int ret = checker->do_delete_bitmap_inverted_check(); ret 
!= 0) {
+                    success = false;
+                }
+            }
+
+            if (config::enable_delete_bitmap_storage_optimize_check) {
+                if (int ret = 
checker->do_delete_bitmap_storage_optimize_check(); ret != 0) {
+                    success = false;
+                }
             }
 
             // If instance checker has been aborted, don't finish this job
             if (!checker->stopped()) {
                 finish_instance_recycle_job(txn_kv_.get(), check_job_key, 
instance.instance_id(),
-                                            ip_port_, ret1 == 0 && ret2 == 0, 
ctime_ms);
+                                            ip_port_, success, ctime_ms);
             }
             {
                 std::lock_guard lock(mtx_);
@@ -740,4 +758,435 @@ int InstanceChecker::do_inverted_check() {
     return num_file_leak > 0 ? 1 : check_ret;
 }
 
+int InstanceChecker::traverse_mow_tablet(const std::function<int(int64_t)>& 
check_func) {
+    std::unique_ptr<RangeGetIterator> it;
+    auto begin = meta_rowset_key({instance_id_, 0, 0});
+    auto end = meta_rowset_key({instance_id_, 
std::numeric_limits<int64_t>::max(), 0});
+    do {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        err = txn->get(begin, end, &it, false, 1);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get rowset kv, err=" << err;
+            return -1;
+        }
+        if (!it->has_next()) {
+            break;
+        }
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            std::string_view k1 = k;
+            k1.remove_prefix(1);
+            std::vector<std::tuple<std::variant<int64_t, std::string>, int, 
int>> out;
+            decode_key(&k1, &out);
+            // 0x01 "meta" ${instance_id} "rowset" ${tablet_id} ${version} -> 
RowsetMetaCloudPB
+            auto tablet_id = std::get<int64_t>(std::get<0>(out[3]));
+
+            if (!it->has_next()) {
+                // Update to next smallest key for iteration
+                // scan for next tablet in this instance
+                begin = meta_rowset_key({instance_id_, tablet_id + 1, 0});
+            }
+
+            TabletMetaCloudPB tablet_meta;
+            int ret = get_tablet_meta(txn_kv_.get(), instance_id_, tablet_id, 
tablet_meta);
+            if (ret < 0) {
+                LOG(WARNING) << fmt::format(
+                        "failed to get_tablet_meta in 
do_delete_bitmap_integrity_check(), "
+                        "instance_id={}, tablet_id={}",
+                        instance_id_, tablet_id);
+                return ret;
+            }
+
+            if (tablet_meta.enable_unique_key_merge_on_write()) {
+                // only check merge-on-write table
+                int ret = check_func(tablet_id);
+                if (ret < 0) {
+                    // return immediately when encounter unexpected error,
+                    // otherwise, we continue to check the next tablet
+                    return ret;
+                }
+            }
+        }
+    } while (it->more() && !stopped());
+    return 0;
+}
+
+int InstanceChecker::traverse_rowset_delete_bitmaps(
+        int64_t tablet_id, std::string rowset_id,
+        const std::function<int(int64_t, std::string_view, int64_t, int64_t)>& 
callback) {
+    std::unique_ptr<RangeGetIterator> it;
+    auto begin = meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 
0, 0});
+    auto end = meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id,
+                                       std::numeric_limits<int64_t>::max(),
+                                       std::numeric_limits<int64_t>::max()});
+    do {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        err = txn->get(begin, end, &it);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get rowset kv, err=" << err;
+            return -1;
+        }
+        if (!it->has_next()) {
+            break;
+        }
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            std::string_view k1 = k;
+            k1.remove_prefix(1);
+            std::vector<std::tuple<std::variant<int64_t, std::string>, int, 
int>> out;
+            decode_key(&k1, &out);
+            // 0x01 "meta" ${instance_id} "delete_bitmap" ${tablet_id} 
${rowset_id} ${version} ${segment_id} -> roaringbitmap
+            auto version = std::get<std::int64_t>(std::get<0>(out[5]));
+            auto segment_id = std::get<std::int64_t>(std::get<0>(out[6]));
+
+            int ret = callback(tablet_id, rowset_id, version, segment_id);
+            if (ret != 0) {
+                return ret;
+            }
+
+            if (!it->has_next()) {
+                begin = k;
+                begin.push_back('\x00'); // Update to next smallest key for 
iteration
+                break;
+            }
+        }
+    } while (it->more() && !stopped());
+
+    return 0;
+}
+
+int InstanceChecker::collect_tablet_rowsets(
+        int64_t tablet_id, const std::function<void(const 
doris::RowsetMetaCloudPB&)>& collect_cb) {
+    std::unique_ptr<Transaction> txn;
+    TxnErrorCode err = txn_kv_->create_txn(&txn);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG(WARNING) << "failed to create txn";
+        return -1;
+    }
+    std::unique_ptr<RangeGetIterator> it;
+    auto begin = meta_rowset_key({instance_id_, tablet_id, 0});
+    auto end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
+
+    int64_t rowsets_num {0};
+    do {
+        TxnErrorCode err = txn->get(begin, end, &it);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get rowset kv, err=" << err;
+            return -1;
+        }
+        if (!it->has_next()) {
+            break;
+        }
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            doris::RowsetMetaCloudPB rowset;
+            if (!rowset.ParseFromArray(v.data(), v.size())) {
+                LOG(WARNING) << "malformed rowset meta value, key=" << hex(k);
+                return -1;
+            }
+
+            ++rowsets_num;
+            collect_cb(rowset);
+
+            if (!it->has_next()) {
+                begin = k;
+                begin.push_back('\x00'); // Update to next smallest key for 
iteration
+                break;
+            }
+        }
+    } while (it->more() && !stopped());
+
+    LOG(INFO) << fmt::format(
+            "[delete bitmap checker] successfully collect rowsets for 
instance_id={}, "
+            "tablet_id={}, rowsets_num={}",
+            instance_id_, tablet_id, rowsets_num);
+    return 0;
+}
+
+int InstanceChecker::do_delete_bitmap_inverted_check() {
+    LOG(INFO) << fmt::format(
+            "[delete bitmap checker] begin to do_delete_bitmap_inverted_check 
for instance_id={}",
+            instance_id_);
+
+    // number of delete bitmap keys being scanned
+    int64_t total_delete_bitmap_keys {0};
+    // number of delete bitmaps which belongs to non mow tablet
+    int64_t abnormal_delete_bitmaps {0};
+    // number of delete bitmaps which doesn't have corresponding rowset in MS
+    int64_t leaked_delete_bitmaps {0};
+
+    auto start_time = std::chrono::steady_clock::now();
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        g_bvar_inverted_checker_leaked_delete_bitmaps.put(instance_id_, 
leaked_delete_bitmaps);
+        g_bvar_inverted_checker_abnormal_delete_bitmaps.put(instance_id_, 
abnormal_delete_bitmaps);
+        g_bvar_inverted_checker_delete_bitmaps_scanned.put(instance_id_, 
total_delete_bitmap_keys);
+
+        auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(
+                            std::chrono::steady_clock::now() - start_time)
+                            .count();
+        if (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) {
+            LOG(WARNING) << fmt::format(
+                    "[delete bitmap check fails] delete bitmap inverted check 
for instance_id={}, "
+                    "cost={} ms, total_delete_bitmap_keys={}, 
leaked_delete_bitmaps={}, "
+                    "abnormal_delete_bitmaps={}",
+                    instance_id_, cost, total_delete_bitmap_keys, 
leaked_delete_bitmaps,
+                    abnormal_delete_bitmaps);
+        } else {
+            LOG(INFO) << fmt::format(
+                    "[delete bitmap checker] delete bitmap inverted check for 
instance_id={}, "
+                    "passed. cost={} ms, total_delete_bitmap_keys={}",
+                    instance_id_, cost, total_delete_bitmap_keys);
+        }
+    });
+
+    struct TabletsRowsetsCache {
+        int64_t tablet_id {-1};
+        bool enable_merge_on_write {false};
+        std::unordered_set<std::string> rowsets {};
+    } tablet_rowsets_cache {};
+
+    std::unique_ptr<RangeGetIterator> it;
+    auto begin = meta_delete_bitmap_key({instance_id_, 0, "", 0, 0});
+    auto end =
+            meta_delete_bitmap_key({instance_id_, 
std::numeric_limits<int64_t>::max(), "", 0, 0});
+    do {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        err = txn->get(begin, end, &it);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get rowset kv, err=" << err;
+            return -1;
+        }
+        if (!it->has_next()) {
+            break;
+        }
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            std::string_view k1 = k;
+            k1.remove_prefix(1);
+            std::vector<std::tuple<std::variant<int64_t, std::string>, int, 
int>> out;
+            decode_key(&k1, &out);
+            // 0x01 "meta" ${instance_id} "delete_bitmap" ${tablet_id} 
${rowset_id} ${version} ${segment_id} -> roaringbitmap
+            auto tablet_id = std::get<int64_t>(std::get<0>(out[3]));
+            auto rowset_id = std::get<std::string>(std::get<0>(out[4]));
+            auto version = std::get<std::int64_t>(std::get<0>(out[5]));
+            auto segment_id = std::get<std::int64_t>(std::get<0>(out[6]));
+
+            ++total_delete_bitmap_keys;
+
+            if (!it->has_next()) {
+                begin = k;
+                begin.push_back('\x00'); // Update to next smallest key for 
iteration
+            }
+
+            if (tablet_rowsets_cache.tablet_id == -1 ||
+                tablet_rowsets_cache.tablet_id != tablet_id) {
+                TabletMetaCloudPB tablet_meta;
+                int ret = get_tablet_meta(txn_kv_.get(), instance_id_, 
tablet_id, tablet_meta);
+                if (ret < 0) {
+                    LOG(WARNING) << fmt::format(
+                            "[delete bitmap checker] failed to get_tablet_meta 
in "
+                            "do_delete_bitmap_inverted_check(), 
instance_id={}, tablet_id={}",
+                            instance_id_, tablet_id);
+                    return ret;
+                }
+
+                tablet_rowsets_cache.tablet_id = tablet_id;
+                tablet_rowsets_cache.enable_merge_on_write =
+                        tablet_meta.enable_unique_key_merge_on_write();
+                tablet_rowsets_cache.rowsets.clear();
+
+                if (tablet_rowsets_cache.enable_merge_on_write) {
+                    // only collect rowsets for merge-on-write tablet
+                    auto collect_cb =
+                            [&tablet_rowsets_cache](const 
doris::RowsetMetaCloudPB& rowset) {
+                                
tablet_rowsets_cache.rowsets.insert(rowset.rowset_id_v2());
+                            };
+                    ret = collect_tablet_rowsets(tablet_id, collect_cb);
+                    if (ret < 0) {
+                        return ret;
+                    }
+                }
+            }
+            DCHECK_EQ(tablet_id, tablet_rowsets_cache.tablet_id);
+
+            if (!tablet_rowsets_cache.enable_merge_on_write) {
+                // clang-format off
+                TEST_SYNC_POINT_CALLBACK(
+                        
"InstanceChecker::do_delete_bitmap_inverted_check.get_abnormal_delete_bitmap",
+                        &tablet_id, &rowset_id, &version, &segment_id);
+                // clang-format on
+                ++abnormal_delete_bitmaps;
+                // log an error and continue to check the next delete bitmap
+                LOG(WARNING) << fmt::format(
+                        "[delete bitmap check fails] find a delete bitmap 
belongs to tablet "
+                        "which is not a merge-on-write table! instance_id={}, 
tablet_id={}, "
+                        "version={}, segment_id={}",
+                        instance_id_, tablet_id, version, segment_id);
+                continue;
+            }
+
+            if (!tablet_rowsets_cache.rowsets.contains(rowset_id)) {
+                TEST_SYNC_POINT_CALLBACK(
+                        
"InstanceChecker::do_delete_bitmap_inverted_check.get_leaked_delete_bitmap",
+                        &tablet_id, &rowset_id, &version, &segment_id);
+                ++leaked_delete_bitmaps;
+                // log an error and continue to check the next delete bitmap
+                LOG(WARNING) << fmt::format(
+                        "[delete bitmap check fails] can't find corresponding 
rowset for delete "
+                        "bitmap instance_id={}, tablet_id={}, rowset_id={}, 
version={}, "
+                        "segment_id={}",
+                        instance_id_, tablet_id, rowset_id, version, 
segment_id);
+            }
+        }
+    } while (it->more() && !stopped());
+
+    return (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) ? 1 : 0;
+}
+
+int InstanceChecker::check_delete_bitmap_storage_optimize(int64_t tablet_id) {
+    using Version = std::pair<int64_t, int64_t>;
+    struct RowsetDigest {
+        std::string rowset_id;
+        Version version;
+        doris::SegmentsOverlapPB segments_overlap;
+
+        bool operator<(const RowsetDigest& other) const {
+            return version.first < other.version.first;
+        }
+
+        bool produced_by_compaction() const {
+            return (version.first < version.second) ||
+                   ((version.first == version.second) && segments_overlap == 
NONOVERLAPPING);
+        }
+    };
+
+    // number of rowsets which may have problems
+    int64_t abnormal_rowsets_num {0};
+
+    std::vector<RowsetDigest> tablet_rowsets {};
+    // Get all visible rowsets of this tablet
+    auto collect_cb = [&tablet_rowsets](const doris::RowsetMetaCloudPB& 
rowset) {
+        if (rowset.start_version() == 0 && rowset.end_version() == 1) {
+            // ignore dummy rowset [0-1]
+            return;
+        }
+        tablet_rowsets.emplace_back(
+                rowset.rowset_id_v2(),
+                std::make_pair<int64_t, int64_t>(rowset.start_version(), 
rowset.end_version()),
+                rowset.segments_overlap_pb());
+    };
+    if (int ret = collect_tablet_rowsets(tablet_id, collect_cb); ret != 0) {
+        return ret;
+    }
+
+    std::sort(tablet_rowsets.begin(), tablet_rowsets.end());
+
+    // find right-most rowset which is produced by compaction
+    auto it = std::find_if(
+            tablet_rowsets.crbegin(), tablet_rowsets.crend(),
+            [](const RowsetDigest& rowset) { return 
rowset.produced_by_compaction(); });
+    if (it == tablet_rowsets.crend()) {
+        LOG(INFO) << fmt::format(
+                "[delete bitmap checker] skip to check delete bitmap storage 
optimize for "
+                "tablet_id={} because it doesn't have compacted rowsets.",
+                tablet_id);
+        return 0;
+    }
+
+    int64_t start_version = it->version.first;
+    int64_t pre_min_version = it->version.second;
+
+    // after BE sweeping stale rowsets, all rowsets in this tablet before
+    // should not have delete bitmaps with versions lower than 
`pre_min_version`
+    if (config::delete_bitmap_storage_optimize_check_version_gap > 0) {
+        pre_min_version -= 
config::delete_bitmap_storage_optimize_check_version_gap;
+        if (pre_min_version <= 1) {
+            LOG(INFO) << fmt::format(
+                    "[delete bitmap checker] skip to check delete bitmap 
storage optimize for "
+                    "tablet_id={} because pre_min_version is too small.",
+                    tablet_id);
+            return 0;
+        }
+    }
+
+    auto check_func = [pre_min_version, instance_id = instance_id_](
+                              int64_t tablet_id, std::string_view rowset_id, 
int64_t version,
+                              int64_t segment_id) -> int {
+        if (version < pre_min_version) {
+            LOG(WARNING) << fmt::format(
+                    "[delete bitmap check fails] delete bitmap storage 
optimize check fail for "
+                    "instance_id={}, tablet_id={}, rowset_id={}, found delete 
bitmap with "
+                    "version={} < pre_min_version={}",
+                    instance_id, tablet_id, rowset_id, version, 
pre_min_version);
+            return 1;
+        }
+        return 0;
+    };
+
+    for (const auto& rowset : tablet_rowsets) {
+        // check for all rowsets before the max compacted rowset
+        if (rowset.version.second < start_version) {
+            auto rowset_id = rowset.rowset_id;
+            int ret = traverse_rowset_delete_bitmaps(tablet_id, rowset_id, 
check_func);
+            if (ret < 0) {
+                return ret;
+            }
+
+            if (ret != 0) {
+                ++abnormal_rowsets_num;
+                TEST_SYNC_POINT_CALLBACK(
+                        
"InstanceChecker::check_delete_bitmap_storage_optimize.get_abnormal_rowset",
+                        &tablet_id, &rowset_id);
+            }
+        }
+    }
+
+    LOG(INFO) << fmt::format(
+            "[delete bitmap checker] finish check delete bitmap storage 
optimize for "
+            "instance_id={}, tablet_id={}, rowsets_num={}, 
abnormal_rowsets_num={}, "
+            "pre_min_version={}",
+            instance_id_, tablet_id, tablet_rowsets.size(), 
abnormal_rowsets_num, pre_min_version);
+
+    return (abnormal_rowsets_num > 1 ? 1 : 0);
+}
+
+int InstanceChecker::do_delete_bitmap_storage_optimize_check() {
+    int64_t total_tablets_num {0};
+    int64_t failed_tablets_num {0};
+
+    // check that for every visible rowset, there exists at least delete one 
bitmap in MS
+    int ret = traverse_mow_tablet([&](int64_t tablet_id) {
+        ++total_tablets_num;
+        int res = check_delete_bitmap_storage_optimize(tablet_id);
+        failed_tablets_num += (res != 0);
+        return res;
+    });
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    LOG(INFO) << fmt::format(
+            "[delete bitmap checker] check delete bitmap storage optimize for 
instance_id={}, "
+            "total_tablets_num={}, failed_tablets_num={}",
+            instance_id_, total_tablets_num, failed_tablets_num);
+
+    return (failed_tablets_num > 0) ? 1 : 0;
+}
+
 } // namespace doris::cloud
diff --git a/cloud/src/recycler/checker.h b/cloud/src/recycler/checker.h
index 03717a69b5e..7f87e90f7cb 100644
--- a/cloud/src/recycler/checker.h
+++ b/cloud/src/recycler/checker.h
@@ -23,6 +23,7 @@
 #include <atomic>
 #include <condition_variable>
 #include <deque>
+#include <functional>
 #include <thread>
 #include <unordered_map>
 #include <unordered_set>
@@ -30,6 +31,10 @@
 #include "recycler/storage_vault_accessor.h"
 #include "recycler/white_black_list.h"
 
+namespace doris {
+class RowsetMetaCloudPB;
+} // namespace doris
+
 namespace doris::cloud {
 class StorageVaultAccessor;
 class InstanceChecker;
@@ -86,6 +91,18 @@ public:
     // Return 1 if data loss is identified.
     // Return negative if a temporary error occurred during the check process.
     int do_check();
+
+    // Return 0 if success.
+    // Return 1 if delete bitmap leak is identified.
+    // Return negative if a temporary error occurred during the check process.
+    int do_delete_bitmap_inverted_check();
+
+    // checks if https://github.com/apache/doris/pull/40204 works as expected
+    // the stale delete bitmap will be cleared in MS when BE delete expired 
stale rowsets
+    // NOTE: stale rowsets will be lost after BE restarts, so there may be 
some stale delete bitmaps
+    // which will not be cleared.
+    int do_delete_bitmap_storage_optimize_check();
+
     // If there are multiple buckets, return the minimum lifecycle; if there 
are no buckets (i.e.
     // all accessors are HdfsAccessor), return INT64_MAX.
     // Return 0 if success, otherwise error
@@ -100,6 +117,17 @@ private:
     // returns 0 for success otherwise error
     int init_storage_vault_accessors(const InstanceInfoPB& instance);
 
+    int traverse_mow_tablet(const std::function<int(int64_t)>& check_func);
+    int traverse_rowset_delete_bitmaps(
+            int64_t tablet_id, std::string rowset_id,
+            const std::function<int(int64_t, std::string_view, int64_t, 
int64_t)>& callback);
+    int collect_tablet_rowsets(
+            int64_t tablet_id,
+            const std::function<void(const doris::RowsetMetaCloudPB&)>& 
collect_cb);
+    int traverse_delete_bitmaps(const std::function<int(int64_t)>& check_func);
+
+    int check_delete_bitmap_storage_optimize(int64_t tablet_id);
+
     std::atomic_bool stopped_ {false};
     std::shared_ptr<TxnKv> txn_kv_;
     std::string instance_id_;
diff --git a/cloud/src/recycler/util.cpp b/cloud/src/recycler/util.cpp
index d6c63ea752f..6797782d5d6 100644
--- a/cloud/src/recycler/util.cpp
+++ b/cloud/src/recycler/util.cpp
@@ -23,6 +23,7 @@
 
 #include "common/util.h"
 #include "meta-service/keys.h"
+#include "meta-service/meta_service_schema.h"
 #include "meta-service/txn_kv.h"
 #include "meta-service/txn_kv_error.h"
 
@@ -233,4 +234,69 @@ int lease_instance_recycle_job(TxnKv* txn_kv, 
std::string_view key, const std::s
     return 0;
 }
 
+int get_tablet_idx(TxnKv* txn_kv, const std::string& instance_id, int64_t 
tablet_id,
+                   TabletIndexPB& tablet_idx) {
+    std::unique_ptr<Transaction> txn;
+    TxnErrorCode err = txn_kv->create_txn(&txn);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG(WARNING) << "failed to create txn";
+        return -1;
+    }
+
+    std::string key, val;
+    meta_tablet_idx_key({instance_id, tablet_id}, &key);
+    err = txn->get(key, &val);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG(WARNING) << fmt::format("failed to get tablet_idx, err={} 
tablet_id={} key={}", err,
+                                    tablet_id, hex(key));
+        return -1;
+    }
+    if (!tablet_idx.ParseFromString(val)) [[unlikely]] {
+        LOG(WARNING) << fmt::format("malformed tablet index value, 
tablet_id={} key={}", tablet_id,
+                                    hex(key));
+        return -1;
+    }
+    if (tablet_id != tablet_idx.tablet_id()) [[unlikely]] {
+        LOG(WARNING) << "unexpected error given_tablet_id=" << tablet_id
+                     << " idx_pb_tablet_id=" << tablet_idx.tablet_id() << " 
key=" << hex(key);
+        return -1;
+    }
+    return 0;
+}
+
+int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t 
tablet_id,
+                    TabletMetaCloudPB& tablet_meta) {
+    TabletIndexPB tablet_idx;
+    int ret = get_tablet_idx(txn_kv, instance_id, tablet_id, tablet_idx);
+    if (ret < 0) {
+        return ret;
+    }
+
+    std::unique_ptr<Transaction> txn;
+    TxnErrorCode err = txn_kv->create_txn(&txn);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG(WARNING) << "failed to create txn";
+        return -1;
+    }
+
+    std::string key, val;
+    meta_tablet_key({instance_id, tablet_idx.table_id(), tablet_idx.index_id(),
+                     tablet_idx.partition_id(), tablet_id},
+                    &key);
+    err = txn->get(key, &val);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG(WARNING) << fmt::format(
+                "failed to get tablet, err={}, table_id={}, index_id={}, 
partition_id={}, "
+                "tablet_id={} key={}",
+                err, tablet_idx.table_id(), tablet_idx.index_id(), 
tablet_idx.partition_id(),
+                tablet_id, hex(key));
+        return -1;
+    }
+    if (!tablet_meta.ParseFromString(val)) [[unlikely]] {
+        LOG(WARNING) << fmt::format("malformed tablet meta, tablet_id={} 
key={}", tablet_id,
+                                    hex(key));
+        return -1;
+    }
+    return 0;
+}
 } // namespace doris::cloud
diff --git a/cloud/src/recycler/util.h b/cloud/src/recycler/util.h
index b6d4d3299b5..5aa929c2398 100644
--- a/cloud/src/recycler/util.h
+++ b/cloud/src/recycler/util.h
@@ -85,4 +85,9 @@ inline std::string tablet_path_prefix(int64_t tablet_id) {
     return fmt::format("data/{}/", tablet_id);
 }
 
+int get_tablet_idx(TxnKv* txn_kv, const std::string& instance_id, int64_t 
tablet_id,
+                   TabletIndexPB& tablet_idx);
+
+int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t 
tablet_id,
+                    TabletMetaCloudPB& tablet_meta);
 } // namespace doris::cloud
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index 14687354839..feecf9552f9 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -255,14 +255,63 @@ static int create_committed_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor
     return 0;
 }
 
+static int create_committed_rowset_with_rowset_id(TxnKv* txn_kv, 
StorageVaultAccessor* accessor,
+                                                  const std::string& 
resource_id, int64_t tablet_id,
+                                                  int64_t start_version, 
int64_t end_version,
+                                                  std::string rowset_id, bool 
segments_overlap,
+                                                  int num_segments) {
+    std::string key;
+    std::string val;
+
+    MetaRowsetKeyInfo key_info {instance_id, tablet_id, end_version};
+    meta_rowset_key(key_info, &key);
+
+    doris::RowsetMetaCloudPB rowset_pb;
+    rowset_pb.set_rowset_id(0); // useless but required
+    rowset_pb.set_rowset_id_v2(rowset_id);
+    rowset_pb.set_num_segments(num_segments);
+    rowset_pb.set_tablet_id(tablet_id);
+    rowset_pb.set_resource_id(resource_id);
+    rowset_pb.set_creation_time(current_time);
+    rowset_pb.set_start_version(start_version);
+    rowset_pb.set_end_version(end_version);
+    rowset_pb.set_segments_overlap_pb(segments_overlap ? OVERLAPPING : 
NONOVERLAPPING);
+    rowset_pb.SerializeToString(&val);
+
+    std::unique_ptr<Transaction> txn;
+    if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+        return -1;
+    }
+    txn->put(key, val);
+    if (txn->commit() != TxnErrorCode::TXN_OK) {
+        return -1;
+    }
+
+    for (int i = 0; i < num_segments; ++i) {
+        auto path = segment_path(tablet_id, rowset_id, i);
+        accessor->put_file(path, "");
+    }
+    return 0;
+}
+
+static void create_delete_bitmaps(Transaction* txn, int64_t tablet_id, 
std::string rowset_id,
+                                  int64_t start_version, int64_t end_version) {
+    for (int64_t ver {start_version}; ver <= end_version; ver++) {
+        auto key = meta_delete_bitmap_key({instance_id, tablet_id, rowset_id, 
ver, 0});
+        std::string val {"test_data"};
+        txn->put(key, val);
+    }
+}
+
 static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id, 
int64_t partition_id,
-                         int64_t tablet_id) {
+                         int64_t tablet_id, bool is_mow = false) {
     std::unique_ptr<Transaction> txn;
     if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
         return -1;
     }
     doris::TabletMetaCloudPB tablet_meta;
     tablet_meta.set_tablet_id(tablet_id);
+    tablet_meta.set_enable_unique_key_merge_on_write(is_mow);
     auto val = tablet_meta.SerializeAsString();
     auto key = meta_tablet_key({instance_id, table_id, index_id, partition_id, 
tablet_id});
     txn->put(key, val);
@@ -275,6 +324,7 @@ static int create_tablet(TxnKv* txn_kv, int64_t table_id, 
int64_t index_id, int6
     TabletIndexPB tablet_idx_pb;
     tablet_idx_pb.set_db_id(db_id);
     tablet_idx_pb.set_table_id(table_id);
+    tablet_idx_pb.set_index_id(index_id);
     tablet_idx_pb.set_partition_id(partition_id);
     tablet_idx_pb.set_tablet_id(tablet_id);
     auto idx_val = tablet_idx_pb.SerializeAsString();
@@ -2576,6 +2626,352 @@ TEST(CheckerTest, do_inspect) {
     }
 }
 
+TEST(CheckerTest, delete_bitmap_inverted_check_normal) {
+    // normal case, all delete bitmaps belong to a rowset
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    std::unique_ptr<Transaction> txn;
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+    constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+    // create some rowsets with delete bitmaps in merge-on-write tablet
+    for (int tablet_id = 600001; tablet_id <= 600010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        int64_t rowset_start_id = 400;
+        for (int ver = 2; ver <= 10; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   ver, ver, rowset_id, false, 
1);
+            if (ver >= 5) {
+                auto delete_bitmap_key =
+                        meta_delete_bitmap_key({instance_id, tablet_id, 
rowset_id, ver, 0});
+                std::string delete_bitmap_val {"test"};
+                txn->put(delete_bitmap_key, delete_bitmap_val);
+            } else {
+                // delete bitmaps may be spilitted into mulitiple KVs if too 
large
+                auto delete_bitmap_key =
+                        meta_delete_bitmap_key({instance_id, tablet_id, 
rowset_id, ver, 0});
+                std::string delete_bitmap_val(1000, 'A');
+                cloud::put(txn.get(), delete_bitmap_key, delete_bitmap_val, 0, 
300);
+            }
+        }
+    }
+
+    // also create some rowsets without delete bitmaps in non merge-on-write 
tablet
+    for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, false));
+        int64_t rowset_start_id = 500;
+        for (int ver = 2; ver < 10; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   ver, ver, rowset_id, false, 
1);
+        }
+    }
+
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+    ASSERT_EQ(checker.do_delete_bitmap_inverted_check(), 0);
+}
+
+TEST(CheckerTest, delete_bitmap_inverted_check_abnormal) {
+    // abnormal case, some delete bitmaps arem leaked
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    // tablet_id -> [rowset_id, version, segment_id]
+    std::map<std::int64_t, std::set<std::tuple<std::string, int64_t, int64_t>>>
+            expected_abnormal_delete_bitmaps {}, real_abnormal_delete_bitmaps 
{};
+    std::map<std::int64_t, std::set<std::tuple<std::string, int64_t, int64_t>>>
+            expected_leaked_delete_bitmaps {}, real_leaked_delete_bitmaps {};
+    auto sp = SyncPoint::get_instance();
+    std::unique_ptr<int, std::function<void(int*)>> defer(
+            (int*)0x01, [](int*) { 
SyncPoint::get_instance()->clear_all_call_backs(); });
+    sp->set_call_back(
+            
"InstanceChecker::do_delete_bitmap_inverted_check.get_abnormal_delete_bitmap",
+            [&real_abnormal_delete_bitmaps](auto&& args) {
+                int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+                std::string rowset_id = *try_any_cast<std::string*>(args[1]);
+                int64_t version = *try_any_cast<int64_t*>(args[2]);
+                int64_t segment_id = *try_any_cast<int64_t*>(args[3]);
+                real_abnormal_delete_bitmaps[tablet_id].insert({rowset_id, 
version, segment_id});
+            });
+    sp->set_call_back(
+            
"InstanceChecker::do_delete_bitmap_inverted_check.get_leaked_delete_bitmap",
+            [&real_leaked_delete_bitmaps](auto&& args) {
+                int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+                std::string rowset_id = *try_any_cast<std::string*>(args[1]);
+                int64_t version = *try_any_cast<int64_t*>(args[2]);
+                int64_t segment_id = *try_any_cast<int64_t*>(args[3]);
+                real_leaked_delete_bitmaps[tablet_id].insert({rowset_id, 
version, segment_id});
+            });
+    sp->enable_processing();
+
+    std::unique_ptr<Transaction> txn;
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+    constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+    // create some rowsets with delete bitmaps in merge-on-write tablet
+    for (int tablet_id = 800001; tablet_id <= 800010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        int64_t rowset_start_id = 600;
+        for (int ver = 2; ver <= 20; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+
+            if (ver >= 10) {
+                // only create rowsets for some versions
+                create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                       ver, ver, rowset_id, 
false, 1);
+            } else {
+                expected_leaked_delete_bitmaps[tablet_id].insert({rowset_id, 
ver, 0});
+            }
+
+            if (ver >= 5) {
+                auto delete_bitmap_key =
+                        meta_delete_bitmap_key({instance_id, tablet_id, 
rowset_id, ver, 0});
+                std::string delete_bitmap_val {"test"};
+                txn->put(delete_bitmap_key, delete_bitmap_val);
+            } else {
+                // delete bitmaps may be spilitted into mulitiple KVs if too 
large
+                auto delete_bitmap_key =
+                        meta_delete_bitmap_key({instance_id, tablet_id, 
rowset_id, ver, 0});
+                std::string delete_bitmap_val(1000, 'A');
+                cloud::put(txn.get(), delete_bitmap_key, delete_bitmap_val, 0, 
300);
+            }
+        }
+    }
+
+    // create some rowsets with delete bitmaps in non merge-on-write tablet
+    for (int tablet_id = 900001; tablet_id <= 900010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, false));
+        int64_t rowset_start_id = 700;
+        for (int ver = 2; ver < 6; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   ver, ver, rowset_id, false, 
1);
+            auto delete_bitmap_key =
+                    meta_delete_bitmap_key({instance_id, tablet_id, rowset_id, 
ver, 0});
+            std::string delete_bitmap_val {"test2"};
+            txn->put(delete_bitmap_key, delete_bitmap_val);
+
+            expected_abnormal_delete_bitmaps[tablet_id].insert({rowset_id, 
ver, 0});
+        }
+    }
+
+    // create some rowsets without delete bitmaps in non merge-on-write tablet
+    for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, false));
+        int64_t rowset_start_id = 500;
+        for (int ver = 2; ver < 10; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   ver, ver, rowset_id, false, 
1);
+        }
+    }
+
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+    ASSERT_EQ(checker.do_delete_bitmap_inverted_check(), 1);
+    ASSERT_EQ(expected_leaked_delete_bitmaps, real_leaked_delete_bitmaps);
+    ASSERT_EQ(expected_abnormal_delete_bitmaps, real_abnormal_delete_bitmaps);
+}
+
+TEST(CheckerTest, delete_bitmap_storage_optimize_check_normal) {
+    config::delete_bitmap_storage_optimize_check_version_gap = 0;
+
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    std::unique_ptr<Transaction> txn;
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+    constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+    int64_t rowset_start_id = 600;
+
+    for (int tablet_id = 800001; tablet_id <= 800005; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3}, 
{4, 4}, {5, 5},
+                                                              {6, 7}, {8, 8}, 
{9, 9}};
+        std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+                {7, 9}, {8, 9}, {7, 9}, {7, 9}, {7, 9}, {8, 9}, {9, 9}};
+        std::vector<bool> segments_overlap {true, true, true, true, false, 
true, true};
+        for (size_t i {0}; i < 7; i++) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   rowset_vers[i].first, 
rowset_vers[i].second,
+                                                   rowset_id, 
segments_overlap[i], 1);
+            create_delete_bitmaps(txn.get(), tablet_id, rowset_id, 
delete_bitmaps_vers[i].first,
+                                  delete_bitmaps_vers[i].second);
+        }
+    }
+
+    for (int tablet_id = 800006; tablet_id <= 800010; ++tablet_id) {
+        // [7-7] cumu compaction output rowset start_version == end_version
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3}, 
{4, 4}, {5, 5},
+                                                              {6, 6}, {7, 7}, 
{8, 8}, {9, 9}};
+        std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+                {7, 9}, {8, 9}, {7, 9}, {7, 9}, {7, 9}, {7, 9}, {8, 9}, {9, 
9}};
+        std::vector<bool> segments_overlap {true, true, false, true, false, 
true, true, true};
+        for (size_t i {0}; i < 8; i++) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   rowset_vers[i].first, 
rowset_vers[i].second,
+                                                   rowset_id, 
segments_overlap[i], 1);
+            create_delete_bitmaps(txn.get(), tablet_id, rowset_id, 
delete_bitmaps_vers[i].first,
+                                  delete_bitmaps_vers[i].second);
+        }
+    }
+
+    for (int tablet_id = 800011; tablet_id <= 800015; ++tablet_id) {
+        // no rowsets are compacted
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3}, 
{4, 4}, {5, 5},
+                                                              {6, 6}, {7, 7}, 
{8, 8}, {9, 9}};
+        std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+                {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {8, 9}, {9, 
9}};
+        std::vector<bool> segments_overlap {true, true, true, true, true, 
true, true, true};
+        for (size_t i {0}; i < 8; i++) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   rowset_vers[i].first, 
rowset_vers[i].second,
+                                                   rowset_id, 
segments_overlap[i], 1);
+            create_delete_bitmaps(txn.get(), tablet_id, rowset_id, 
delete_bitmaps_vers[i].first,
+                                  delete_bitmaps_vers[i].second);
+        }
+    }
+
+    for (int tablet_id = 800016; tablet_id <= 800020; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        std::vector<std::pair<int64_t, int64_t>> rowset_vers {
+                {2, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}};
+        std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+                {5, 9}, {6, 9}, {7, 9}, {8, 9}, {9, 9}};
+        std::vector<bool> segments_overlap {false, true, true, true, true};
+        for (size_t i {0}; i < 5; i++) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   rowset_vers[i].first, 
rowset_vers[i].second,
+                                                   rowset_id, 
segments_overlap[i], 1);
+            create_delete_bitmaps(txn.get(), tablet_id, rowset_id, 
delete_bitmaps_vers[i].first,
+                                  delete_bitmaps_vers[i].second);
+        }
+    }
+
+    // also create some rowsets without delete bitmaps in non merge-on-write 
tablet
+    for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, false));
+        int64_t rowset_start_id = 500;
+        for (int ver = 2; ver < 10; ++ver) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   ver, ver, rowset_id, false, 
1);
+        }
+    }
+
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+    ASSERT_EQ(checker.do_delete_bitmap_storage_optimize_check(), 0);
+}
+
+TEST(CheckerTest, delete_bitmap_storage_optimize_check_abnormal) {
+    config::delete_bitmap_storage_optimize_check_version_gap = 0;
+    // abnormal case, some rowsets' delete bitmaps are not deleted as expected
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    // tablet_id -> [rowset_id]
+    std::map<std::int64_t, std::set<std::string>> expected_abnormal_rowsets {};
+    std::map<std::int64_t, std::set<std::string>> real_abnormal_rowsets {};
+    auto sp = SyncPoint::get_instance();
+    std::unique_ptr<int, std::function<void(int*)>> defer(
+            (int*)0x01, [](int*) { 
SyncPoint::get_instance()->clear_all_call_backs(); });
+    
sp->set_call_back("InstanceChecker::check_delete_bitmap_storage_optimize.get_abnormal_rowset",
+                      [&real_abnormal_rowsets](auto&& args) {
+                          int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+                          std::string rowset_id = 
*try_any_cast<std::string*>(args[1]);
+                          real_abnormal_rowsets[tablet_id].insert(rowset_id);
+                      });
+    sp->enable_processing();
+
+    std::unique_ptr<Transaction> txn;
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+    constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+
+    int64_t rowset_start_id = 700;
+    for (int tablet_id = 900001; tablet_id <= 900005; ++tablet_id) {
+        ASSERT_EQ(0,
+                  create_tablet(txn_kv.get(), table_id, index_id, 
partition_id, tablet_id, true));
+        std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3}, 
{4, 4}, {5, 5},
+                                                              {6, 7}, {8, 8}, 
{9, 9}};
+        std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+                {2, 9}, {7, 9}, {4, 9}, {7, 9}, {7, 9}, {8, 9}, {9, 9}};
+        std::vector<bool> segments_overlap {true, true, true, true, false, 
true, true};
+        for (size_t i {0}; i < 7; i++) {
+            std::string rowset_id = std::to_string(rowset_start_id++);
+            create_committed_rowset_with_rowset_id(txn_kv.get(), 
accessor.get(), "1", tablet_id,
+                                                   rowset_vers[i].first, 
rowset_vers[i].second,
+                                                   rowset_id, 
segments_overlap[i], 1);
+            create_delete_bitmaps(txn.get(), tablet_id, rowset_id, 
delete_bitmaps_vers[i].first,
+                                  delete_bitmaps_vers[i].second);
+            if (delete_bitmaps_vers[i].first < 7) {
+                expected_abnormal_rowsets[tablet_id].insert(rowset_id);
+            }
+        }
+    }
+
+    ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+    ASSERT_EQ(checker.do_delete_bitmap_storage_optimize_check(), 1);
+    ASSERT_EQ(expected_abnormal_rowsets, real_abnormal_rowsets);
+}
+
 TEST(RecyclerTest, delete_rowset_data) {
     auto txn_kv = std::make_shared<MemTxnKv>();
     ASSERT_EQ(txn_kv->init(), 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to