This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 052b7f90eec branch-3.0: [Opt](checker) Add checker for delete bitmaps
#44154 (#44952)
052b7f90eec is described below
commit 052b7f90eecc265ffb50942add552d2621888a93
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 4 09:57:08 2024 +0800
branch-3.0: [Opt](checker) Add checker for delete bitmaps #44154 (#44952)
Cherry-picked from #44154
Co-authored-by: bobhan1 <[email protected]>
---
cloud/src/common/bvars.cpp | 7 +
cloud/src/common/bvars.h | 4 +
cloud/src/common/config.h | 5 +
cloud/src/recycler/checker.cpp | 457 ++++++++++++++++++++++++++++++++++++++++-
cloud/src/recycler/checker.h | 28 +++
cloud/src/recycler/util.cpp | 66 ++++++
cloud/src/recycler/util.h | 5 +
cloud/test/recycler_test.cpp | 398 ++++++++++++++++++++++++++++++++++-
8 files changed, 965 insertions(+), 5 deletions(-)
diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp
index 507acb00dff..746f109ac6d 100644
--- a/cloud/src/common/bvars.cpp
+++ b/cloud/src/common/bvars.cpp
@@ -198,3 +198,10 @@ BvarStatusWithTag<long>
g_bvar_checker_instance_volume("checker", "instance_volu
BvarStatusWithTag<long> g_bvar_inverted_checker_num_scanned("checker",
"num_inverted_scanned");
BvarStatusWithTag<long> g_bvar_inverted_checker_num_check_failed("checker",
"num_inverted_check_failed");
+
+BvarStatusWithTag<int64_t>
g_bvar_inverted_checker_leaked_delete_bitmaps("checker",
+
"leaked_delete_bitmaps");
+BvarStatusWithTag<int64_t> g_bvar_inverted_checker_abnormal_delete_bitmaps(
+ "checker", "abnormal_delete_bitmaps");
+BvarStatusWithTag<int64_t> g_bvar_inverted_checker_delete_bitmaps_scanned(
+ "checker", "delete_bitmap_keys_scanned");
\ No newline at end of file
diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h
index 373a3a63ff2..d0ad2e97957 100644
--- a/cloud/src/common/bvars.h
+++ b/cloud/src/common/bvars.h
@@ -250,3 +250,7 @@ extern BvarStatusWithTag<long>
g_bvar_checker_last_success_time_ms;
extern BvarStatusWithTag<long> g_bvar_checker_instance_volume;
extern BvarStatusWithTag<long> g_bvar_inverted_checker_num_scanned;
extern BvarStatusWithTag<long> g_bvar_inverted_checker_num_check_failed;
+
+extern BvarStatusWithTag<int64_t>
g_bvar_inverted_checker_leaked_delete_bitmaps;
+extern BvarStatusWithTag<int64_t>
g_bvar_inverted_checker_abnormal_delete_bitmaps;
+extern BvarStatusWithTag<int64_t>
g_bvar_inverted_checker_delete_bitmaps_scanned;
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index f6e0073b924..c6b6e1ef290 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -72,6 +72,11 @@ CONF_Bool(enable_checker, "false");
CONF_Int32(recycle_pool_parallelism, "40");
// Currently only used for recycler test
CONF_Bool(enable_inverted_check, "false");
+// Currently only used for recycler test
+CONF_Bool(enable_delete_bitmap_inverted_check, "false");
+// checks if https://github.com/apache/doris/pull/40204 works as expected
+CONF_Bool(enable_delete_bitmap_storage_optimize_check, "false");
+CONF_mInt64(delete_bitmap_storage_optimize_check_version_gap, "1000");
// interval for scanning instances to do checks and inspections
CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min
// interval for check object
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 19a10d61c12..60b6b7fc5ee 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -168,17 +168,35 @@ int Checker::start() {
auto ctime_ms =
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
g_bvar_checker_enqueue_cost_s.put(instance_id, ctime_ms / 1000 -
enqueue_time_s);
- int ret1 = checker->do_check();
- int ret2 = 0;
+ bool success {true};
+
+ if (int ret = checker->do_check(); ret != 0) {
+ success = false;
+ }
+
if (config::enable_inverted_check) {
- ret2 = checker->do_inverted_check();
+ if (int ret = checker->do_inverted_check(); ret != 0) {
+ success = false;
+ }
+ }
+
+ if (config::enable_delete_bitmap_inverted_check) {
+ if (int ret = checker->do_delete_bitmap_inverted_check(); ret
!= 0) {
+ success = false;
+ }
+ }
+
+ if (config::enable_delete_bitmap_storage_optimize_check) {
+ if (int ret =
checker->do_delete_bitmap_storage_optimize_check(); ret != 0) {
+ success = false;
+ }
}
// If instance checker has been aborted, don't finish this job
if (!checker->stopped()) {
finish_instance_recycle_job(txn_kv_.get(), check_job_key,
instance.instance_id(),
- ip_port_, ret1 == 0 && ret2 == 0,
ctime_ms);
+ ip_port_, success, ctime_ms);
}
{
std::lock_guard lock(mtx_);
@@ -740,4 +758,435 @@ int InstanceChecker::do_inverted_check() {
return num_file_leak > 0 ? 1 : check_ret;
}
+int InstanceChecker::traverse_mow_tablet(const std::function<int(int64_t)>&
check_func) {
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_rowset_key({instance_id_, 0, 0});
+ auto end = meta_rowset_key({instance_id_,
std::numeric_limits<int64_t>::max(), 0});
+ do {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ err = txn->get(begin, end, &it, false, 1);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next() && !stopped()) {
+ auto [k, v] = it->next();
+ std::string_view k1 = k;
+ k1.remove_prefix(1);
+ std::vector<std::tuple<std::variant<int64_t, std::string>, int,
int>> out;
+ decode_key(&k1, &out);
+ // 0x01 "meta" ${instance_id} "rowset" ${tablet_id} ${version} ->
RowsetMetaCloudPB
+ auto tablet_id = std::get<int64_t>(std::get<0>(out[3]));
+
+ if (!it->has_next()) {
+ // Update to next smallest key for iteration
+ // scan for next tablet in this instance
+ begin = meta_rowset_key({instance_id_, tablet_id + 1, 0});
+ }
+
+ TabletMetaCloudPB tablet_meta;
+ int ret = get_tablet_meta(txn_kv_.get(), instance_id_, tablet_id,
tablet_meta);
+ if (ret < 0) {
+ LOG(WARNING) << fmt::format(
+ "failed to get_tablet_meta in
do_delete_bitmap_integrity_check(), "
+ "instance_id={}, tablet_id={}",
+ instance_id_, tablet_id);
+ return ret;
+ }
+
+ if (tablet_meta.enable_unique_key_merge_on_write()) {
+ // only check merge-on-write table
+ int ret = check_func(tablet_id);
+ if (ret < 0) {
+ // return immediately when encounter unexpected error,
+ // otherwise, we continue to check the next tablet
+ return ret;
+ }
+ }
+ }
+ } while (it->more() && !stopped());
+ return 0;
+}
+
+int InstanceChecker::traverse_rowset_delete_bitmaps(
+ int64_t tablet_id, std::string rowset_id,
+ const std::function<int(int64_t, std::string_view, int64_t, int64_t)>&
callback) {
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id,
0, 0});
+ auto end = meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id,
+ std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::max()});
+ do {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ err = txn->get(begin, end, &it);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next() && !stopped()) {
+ auto [k, v] = it->next();
+ std::string_view k1 = k;
+ k1.remove_prefix(1);
+ std::vector<std::tuple<std::variant<int64_t, std::string>, int,
int>> out;
+ decode_key(&k1, &out);
+ // 0x01 "meta" ${instance_id} "delete_bitmap" ${tablet_id}
${rowset_id} ${version} ${segment_id} -> roaringbitmap
+ auto version = std::get<std::int64_t>(std::get<0>(out[5]));
+ auto segment_id = std::get<std::int64_t>(std::get<0>(out[6]));
+
+ int ret = callback(tablet_id, rowset_id, version, segment_id);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (!it->has_next()) {
+ begin = k;
+ begin.push_back('\x00'); // Update to next smallest key for
iteration
+ break;
+ }
+ }
+ } while (it->more() && !stopped());
+
+ return 0;
+}
+
+int InstanceChecker::collect_tablet_rowsets(
+ int64_t tablet_id, const std::function<void(const
doris::RowsetMetaCloudPB&)>& collect_cb) {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_rowset_key({instance_id_, tablet_id, 0});
+ auto end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
+
+ int64_t rowsets_num {0};
+ do {
+ TxnErrorCode err = txn->get(begin, end, &it);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next() && !stopped()) {
+ auto [k, v] = it->next();
+ doris::RowsetMetaCloudPB rowset;
+ if (!rowset.ParseFromArray(v.data(), v.size())) {
+ LOG(WARNING) << "malformed rowset meta value, key=" << hex(k);
+ return -1;
+ }
+
+ ++rowsets_num;
+ collect_cb(rowset);
+
+ if (!it->has_next()) {
+ begin = k;
+ begin.push_back('\x00'); // Update to next smallest key for
iteration
+ break;
+ }
+ }
+ } while (it->more() && !stopped());
+
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] successfully collect rowsets for
instance_id={}, "
+ "tablet_id={}, rowsets_num={}",
+ instance_id_, tablet_id, rowsets_num);
+ return 0;
+}
+
+int InstanceChecker::do_delete_bitmap_inverted_check() {
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] begin to do_delete_bitmap_inverted_check
for instance_id={}",
+ instance_id_);
+
+ // number of delete bitmap keys being scanned
+ int64_t total_delete_bitmap_keys {0};
+ // number of delete bitmaps which belongs to non mow tablet
+ int64_t abnormal_delete_bitmaps {0};
+ // number of delete bitmaps which doesn't have corresponding rowset in MS
+ int64_t leaked_delete_bitmaps {0};
+
+ auto start_time = std::chrono::steady_clock::now();
+ std::unique_ptr<int, std::function<void(int*)>>
defer_log_statistics((int*)0x01, [&](int*) {
+ g_bvar_inverted_checker_leaked_delete_bitmaps.put(instance_id_,
leaked_delete_bitmaps);
+ g_bvar_inverted_checker_abnormal_delete_bitmaps.put(instance_id_,
abnormal_delete_bitmaps);
+ g_bvar_inverted_checker_delete_bitmaps_scanned.put(instance_id_,
total_delete_bitmap_keys);
+
+ auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now() - start_time)
+ .count();
+ if (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) {
+ LOG(WARNING) << fmt::format(
+ "[delete bitmap check fails] delete bitmap inverted check
for instance_id={}, "
+ "cost={} ms, total_delete_bitmap_keys={},
leaked_delete_bitmaps={}, "
+ "abnormal_delete_bitmaps={}",
+ instance_id_, cost, total_delete_bitmap_keys,
leaked_delete_bitmaps,
+ abnormal_delete_bitmaps);
+ } else {
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] delete bitmap inverted check for
instance_id={}, "
+ "passed. cost={} ms, total_delete_bitmap_keys={}",
+ instance_id_, cost, total_delete_bitmap_keys);
+ }
+ });
+
+ struct TabletsRowsetsCache {
+ int64_t tablet_id {-1};
+ bool enable_merge_on_write {false};
+ std::unordered_set<std::string> rowsets {};
+ } tablet_rowsets_cache {};
+
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_delete_bitmap_key({instance_id_, 0, "", 0, 0});
+ auto end =
+ meta_delete_bitmap_key({instance_id_,
std::numeric_limits<int64_t>::max(), "", 0, 0});
+ do {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ err = txn->get(begin, end, &it);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next() && !stopped()) {
+ auto [k, v] = it->next();
+ std::string_view k1 = k;
+ k1.remove_prefix(1);
+ std::vector<std::tuple<std::variant<int64_t, std::string>, int,
int>> out;
+ decode_key(&k1, &out);
+ // 0x01 "meta" ${instance_id} "delete_bitmap" ${tablet_id}
${rowset_id} ${version} ${segment_id} -> roaringbitmap
+ auto tablet_id = std::get<int64_t>(std::get<0>(out[3]));
+ auto rowset_id = std::get<std::string>(std::get<0>(out[4]));
+ auto version = std::get<std::int64_t>(std::get<0>(out[5]));
+ auto segment_id = std::get<std::int64_t>(std::get<0>(out[6]));
+
+ ++total_delete_bitmap_keys;
+
+ if (!it->has_next()) {
+ begin = k;
+ begin.push_back('\x00'); // Update to next smallest key for
iteration
+ }
+
+ if (tablet_rowsets_cache.tablet_id == -1 ||
+ tablet_rowsets_cache.tablet_id != tablet_id) {
+ TabletMetaCloudPB tablet_meta;
+ int ret = get_tablet_meta(txn_kv_.get(), instance_id_,
tablet_id, tablet_meta);
+ if (ret < 0) {
+ LOG(WARNING) << fmt::format(
+ "[delete bitmap checker] failed to get_tablet_meta
in "
+ "do_delete_bitmap_inverted_check(),
instance_id={}, tablet_id={}",
+ instance_id_, tablet_id);
+ return ret;
+ }
+
+ tablet_rowsets_cache.tablet_id = tablet_id;
+ tablet_rowsets_cache.enable_merge_on_write =
+ tablet_meta.enable_unique_key_merge_on_write();
+ tablet_rowsets_cache.rowsets.clear();
+
+ if (tablet_rowsets_cache.enable_merge_on_write) {
+ // only collect rowsets for merge-on-write tablet
+ auto collect_cb =
+ [&tablet_rowsets_cache](const
doris::RowsetMetaCloudPB& rowset) {
+
tablet_rowsets_cache.rowsets.insert(rowset.rowset_id_v2());
+ };
+ ret = collect_tablet_rowsets(tablet_id, collect_cb);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+ DCHECK_EQ(tablet_id, tablet_rowsets_cache.tablet_id);
+
+ if (!tablet_rowsets_cache.enable_merge_on_write) {
+ // clang-format off
+ TEST_SYNC_POINT_CALLBACK(
+
"InstanceChecker::do_delete_bitmap_inverted_check.get_abnormal_delete_bitmap",
+ &tablet_id, &rowset_id, &version, &segment_id);
+ // clang-format on
+ ++abnormal_delete_bitmaps;
+ // log an error and continue to check the next delete bitmap
+ LOG(WARNING) << fmt::format(
+ "[delete bitmap check fails] find a delete bitmap
belongs to tablet "
+ "which is not a merge-on-write table! instance_id={},
tablet_id={}, "
+ "version={}, segment_id={}",
+ instance_id_, tablet_id, version, segment_id);
+ continue;
+ }
+
+ if (!tablet_rowsets_cache.rowsets.contains(rowset_id)) {
+ TEST_SYNC_POINT_CALLBACK(
+
"InstanceChecker::do_delete_bitmap_inverted_check.get_leaked_delete_bitmap",
+ &tablet_id, &rowset_id, &version, &segment_id);
+ ++leaked_delete_bitmaps;
+ // log an error and continue to check the next delete bitmap
+ LOG(WARNING) << fmt::format(
+ "[delete bitmap check fails] can't find corresponding
rowset for delete "
+ "bitmap instance_id={}, tablet_id={}, rowset_id={},
version={}, "
+ "segment_id={}",
+ instance_id_, tablet_id, rowset_id, version,
segment_id);
+ }
+ }
+ } while (it->more() && !stopped());
+
+ return (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) ? 1 : 0;
+}
+
+int InstanceChecker::check_delete_bitmap_storage_optimize(int64_t tablet_id) {
+ using Version = std::pair<int64_t, int64_t>;
+ struct RowsetDigest {
+ std::string rowset_id;
+ Version version;
+ doris::SegmentsOverlapPB segments_overlap;
+
+ bool operator<(const RowsetDigest& other) const {
+ return version.first < other.version.first;
+ }
+
+ bool produced_by_compaction() const {
+ return (version.first < version.second) ||
+ ((version.first == version.second) && segments_overlap ==
NONOVERLAPPING);
+ }
+ };
+
+ // number of rowsets which may have problems
+ int64_t abnormal_rowsets_num {0};
+
+ std::vector<RowsetDigest> tablet_rowsets {};
+ // Get all visible rowsets of this tablet
+ auto collect_cb = [&tablet_rowsets](const doris::RowsetMetaCloudPB&
rowset) {
+ if (rowset.start_version() == 0 && rowset.end_version() == 1) {
+ // ignore dummy rowset [0-1]
+ return;
+ }
+ tablet_rowsets.emplace_back(
+ rowset.rowset_id_v2(),
+ std::make_pair<int64_t, int64_t>(rowset.start_version(),
rowset.end_version()),
+ rowset.segments_overlap_pb());
+ };
+ if (int ret = collect_tablet_rowsets(tablet_id, collect_cb); ret != 0) {
+ return ret;
+ }
+
+ std::sort(tablet_rowsets.begin(), tablet_rowsets.end());
+
+ // find right-most rowset which is produced by compaction
+ auto it = std::find_if(
+ tablet_rowsets.crbegin(), tablet_rowsets.crend(),
+ [](const RowsetDigest& rowset) { return
rowset.produced_by_compaction(); });
+ if (it == tablet_rowsets.crend()) {
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] skip to check delete bitmap storage
optimize for "
+ "tablet_id={} because it doesn't have compacted rowsets.",
+ tablet_id);
+ return 0;
+ }
+
+ int64_t start_version = it->version.first;
+ int64_t pre_min_version = it->version.second;
+
+ // after BE sweeping stale rowsets, all rowsets in this tablet before
+ // should not have delete bitmaps with versions lower than
`pre_min_version`
+ if (config::delete_bitmap_storage_optimize_check_version_gap > 0) {
+ pre_min_version -=
config::delete_bitmap_storage_optimize_check_version_gap;
+ if (pre_min_version <= 1) {
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] skip to check delete bitmap
storage optimize for "
+ "tablet_id={} because pre_min_version is too small.",
+ tablet_id);
+ return 0;
+ }
+ }
+
+ auto check_func = [pre_min_version, instance_id = instance_id_](
+ int64_t tablet_id, std::string_view rowset_id,
int64_t version,
+ int64_t segment_id) -> int {
+ if (version < pre_min_version) {
+ LOG(WARNING) << fmt::format(
+ "[delete bitmap check fails] delete bitmap storage
optimize check fail for "
+ "instance_id={}, tablet_id={}, rowset_id={}, found delete
bitmap with "
+ "version={} < pre_min_version={}",
+ instance_id, tablet_id, rowset_id, version,
pre_min_version);
+ return 1;
+ }
+ return 0;
+ };
+
+ for (const auto& rowset : tablet_rowsets) {
+ // check for all rowsets before the max compacted rowset
+ if (rowset.version.second < start_version) {
+ auto rowset_id = rowset.rowset_id;
+ int ret = traverse_rowset_delete_bitmaps(tablet_id, rowset_id,
check_func);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret != 0) {
+ ++abnormal_rowsets_num;
+ TEST_SYNC_POINT_CALLBACK(
+
"InstanceChecker::check_delete_bitmap_storage_optimize.get_abnormal_rowset",
+ &tablet_id, &rowset_id);
+ }
+ }
+ }
+
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] finish check delete bitmap storage
optimize for "
+ "instance_id={}, tablet_id={}, rowsets_num={},
abnormal_rowsets_num={}, "
+ "pre_min_version={}",
+ instance_id_, tablet_id, tablet_rowsets.size(),
abnormal_rowsets_num, pre_min_version);
+
+ return (abnormal_rowsets_num > 1 ? 1 : 0);
+}
+
+int InstanceChecker::do_delete_bitmap_storage_optimize_check() {
+ int64_t total_tablets_num {0};
+ int64_t failed_tablets_num {0};
+
+ // check that for every visible rowset, there exists at least delete one
bitmap in MS
+ int ret = traverse_mow_tablet([&](int64_t tablet_id) {
+ ++total_tablets_num;
+ int res = check_delete_bitmap_storage_optimize(tablet_id);
+ failed_tablets_num += (res != 0);
+ return res;
+ });
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ LOG(INFO) << fmt::format(
+ "[delete bitmap checker] check delete bitmap storage optimize for
instance_id={}, "
+ "total_tablets_num={}, failed_tablets_num={}",
+ instance_id_, total_tablets_num, failed_tablets_num);
+
+ return (failed_tablets_num > 0) ? 1 : 0;
+}
+
} // namespace doris::cloud
diff --git a/cloud/src/recycler/checker.h b/cloud/src/recycler/checker.h
index 03717a69b5e..7f87e90f7cb 100644
--- a/cloud/src/recycler/checker.h
+++ b/cloud/src/recycler/checker.h
@@ -23,6 +23,7 @@
#include <atomic>
#include <condition_variable>
#include <deque>
+#include <functional>
#include <thread>
#include <unordered_map>
#include <unordered_set>
@@ -30,6 +31,10 @@
#include "recycler/storage_vault_accessor.h"
#include "recycler/white_black_list.h"
+namespace doris {
+class RowsetMetaCloudPB;
+} // namespace doris
+
namespace doris::cloud {
class StorageVaultAccessor;
class InstanceChecker;
@@ -86,6 +91,18 @@ public:
// Return 1 if data loss is identified.
// Return negative if a temporary error occurred during the check process.
int do_check();
+
+ // Return 0 if success.
+ // Return 1 if delete bitmap leak is identified.
+ // Return negative if a temporary error occurred during the check process.
+ int do_delete_bitmap_inverted_check();
+
+ // checks if https://github.com/apache/doris/pull/40204 works as expected
+ // the stale delete bitmap will be cleared in MS when BE delete expired
stale rowsets
+ // NOTE: stale rowsets will be lost after BE restarts, so there may be
some stale delete bitmaps
+ // which will not be cleared.
+ int do_delete_bitmap_storage_optimize_check();
+
// If there are multiple buckets, return the minimum lifecycle; if there
are no buckets (i.e.
// all accessors are HdfsAccessor), return INT64_MAX.
// Return 0 if success, otherwise error
@@ -100,6 +117,17 @@ private:
// returns 0 for success otherwise error
int init_storage_vault_accessors(const InstanceInfoPB& instance);
+ int traverse_mow_tablet(const std::function<int(int64_t)>& check_func);
+ int traverse_rowset_delete_bitmaps(
+ int64_t tablet_id, std::string rowset_id,
+ const std::function<int(int64_t, std::string_view, int64_t,
int64_t)>& callback);
+ int collect_tablet_rowsets(
+ int64_t tablet_id,
+ const std::function<void(const doris::RowsetMetaCloudPB&)>&
collect_cb);
+ int traverse_delete_bitmaps(const std::function<int(int64_t)>& check_func);
+
+ int check_delete_bitmap_storage_optimize(int64_t tablet_id);
+
std::atomic_bool stopped_ {false};
std::shared_ptr<TxnKv> txn_kv_;
std::string instance_id_;
diff --git a/cloud/src/recycler/util.cpp b/cloud/src/recycler/util.cpp
index d6c63ea752f..6797782d5d6 100644
--- a/cloud/src/recycler/util.cpp
+++ b/cloud/src/recycler/util.cpp
@@ -23,6 +23,7 @@
#include "common/util.h"
#include "meta-service/keys.h"
+#include "meta-service/meta_service_schema.h"
#include "meta-service/txn_kv.h"
#include "meta-service/txn_kv_error.h"
@@ -233,4 +234,69 @@ int lease_instance_recycle_job(TxnKv* txn_kv,
std::string_view key, const std::s
return 0;
}
+int get_tablet_idx(TxnKv* txn_kv, const std::string& instance_id, int64_t
tablet_id,
+ TabletIndexPB& tablet_idx) {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+
+ std::string key, val;
+ meta_tablet_idx_key({instance_id, tablet_id}, &key);
+ err = txn->get(key, &val);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << fmt::format("failed to get tablet_idx, err={}
tablet_id={} key={}", err,
+ tablet_id, hex(key));
+ return -1;
+ }
+ if (!tablet_idx.ParseFromString(val)) [[unlikely]] {
+ LOG(WARNING) << fmt::format("malformed tablet index value,
tablet_id={} key={}", tablet_id,
+ hex(key));
+ return -1;
+ }
+ if (tablet_id != tablet_idx.tablet_id()) [[unlikely]] {
+ LOG(WARNING) << "unexpected error given_tablet_id=" << tablet_id
+ << " idx_pb_tablet_id=" << tablet_idx.tablet_id() << "
key=" << hex(key);
+ return -1;
+ }
+ return 0;
+}
+
+int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t
tablet_id,
+ TabletMetaCloudPB& tablet_meta) {
+ TabletIndexPB tablet_idx;
+ int ret = get_tablet_idx(txn_kv, instance_id, tablet_id, tablet_idx);
+ if (ret < 0) {
+ return ret;
+ }
+
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+
+ std::string key, val;
+ meta_tablet_key({instance_id, tablet_idx.table_id(), tablet_idx.index_id(),
+ tablet_idx.partition_id(), tablet_id},
+ &key);
+ err = txn->get(key, &val);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << fmt::format(
+ "failed to get tablet, err={}, table_id={}, index_id={},
partition_id={}, "
+ "tablet_id={} key={}",
+ err, tablet_idx.table_id(), tablet_idx.index_id(),
tablet_idx.partition_id(),
+ tablet_id, hex(key));
+ return -1;
+ }
+ if (!tablet_meta.ParseFromString(val)) [[unlikely]] {
+ LOG(WARNING) << fmt::format("malformed tablet meta, tablet_id={}
key={}", tablet_id,
+ hex(key));
+ return -1;
+ }
+ return 0;
+}
} // namespace doris::cloud
diff --git a/cloud/src/recycler/util.h b/cloud/src/recycler/util.h
index b6d4d3299b5..5aa929c2398 100644
--- a/cloud/src/recycler/util.h
+++ b/cloud/src/recycler/util.h
@@ -85,4 +85,9 @@ inline std::string tablet_path_prefix(int64_t tablet_id) {
return fmt::format("data/{}/", tablet_id);
}
+int get_tablet_idx(TxnKv* txn_kv, const std::string& instance_id, int64_t
tablet_id,
+ TabletIndexPB& tablet_idx);
+
+int get_tablet_meta(TxnKv* txn_kv, const std::string& instance_id, int64_t
tablet_id,
+ TabletMetaCloudPB& tablet_meta);
} // namespace doris::cloud
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index 14687354839..feecf9552f9 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -255,14 +255,63 @@ static int create_committed_rowset(TxnKv* txn_kv,
StorageVaultAccessor* accessor
return 0;
}
+static int create_committed_rowset_with_rowset_id(TxnKv* txn_kv,
StorageVaultAccessor* accessor,
+ const std::string&
resource_id, int64_t tablet_id,
+ int64_t start_version,
int64_t end_version,
+ std::string rowset_id, bool
segments_overlap,
+ int num_segments) {
+ std::string key;
+ std::string val;
+
+ MetaRowsetKeyInfo key_info {instance_id, tablet_id, end_version};
+ meta_rowset_key(key_info, &key);
+
+ doris::RowsetMetaCloudPB rowset_pb;
+ rowset_pb.set_rowset_id(0); // useless but required
+ rowset_pb.set_rowset_id_v2(rowset_id);
+ rowset_pb.set_num_segments(num_segments);
+ rowset_pb.set_tablet_id(tablet_id);
+ rowset_pb.set_resource_id(resource_id);
+ rowset_pb.set_creation_time(current_time);
+ rowset_pb.set_start_version(start_version);
+ rowset_pb.set_end_version(end_version);
+ rowset_pb.set_segments_overlap_pb(segments_overlap ? OVERLAPPING :
NONOVERLAPPING);
+ rowset_pb.SerializeToString(&val);
+
+ std::unique_ptr<Transaction> txn;
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ for (int i = 0; i < num_segments; ++i) {
+ auto path = segment_path(tablet_id, rowset_id, i);
+ accessor->put_file(path, "");
+ }
+ return 0;
+}
+
+static void create_delete_bitmaps(Transaction* txn, int64_t tablet_id,
std::string rowset_id,
+ int64_t start_version, int64_t end_version) {
+ for (int64_t ver {start_version}; ver <= end_version; ver++) {
+ auto key = meta_delete_bitmap_key({instance_id, tablet_id, rowset_id,
ver, 0});
+ std::string val {"test_data"};
+ txn->put(key, val);
+ }
+}
+
static int create_tablet(TxnKv* txn_kv, int64_t table_id, int64_t index_id,
int64_t partition_id,
- int64_t tablet_id) {
+ int64_t tablet_id, bool is_mow = false) {
std::unique_ptr<Transaction> txn;
if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
return -1;
}
doris::TabletMetaCloudPB tablet_meta;
tablet_meta.set_tablet_id(tablet_id);
+ tablet_meta.set_enable_unique_key_merge_on_write(is_mow);
auto val = tablet_meta.SerializeAsString();
auto key = meta_tablet_key({instance_id, table_id, index_id, partition_id,
tablet_id});
txn->put(key, val);
@@ -275,6 +324,7 @@ static int create_tablet(TxnKv* txn_kv, int64_t table_id,
int64_t index_id, int6
TabletIndexPB tablet_idx_pb;
tablet_idx_pb.set_db_id(db_id);
tablet_idx_pb.set_table_id(table_id);
+ tablet_idx_pb.set_index_id(index_id);
tablet_idx_pb.set_partition_id(partition_id);
tablet_idx_pb.set_tablet_id(tablet_id);
auto idx_val = tablet_idx_pb.SerializeAsString();
@@ -2576,6 +2626,352 @@ TEST(CheckerTest, do_inspect) {
}
}
+TEST(CheckerTest, delete_bitmap_inverted_check_normal) {
+ // normal case, all delete bitmaps belong to a rowset
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+ constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+ // create some rowsets with delete bitmaps in merge-on-write tablet
+ for (int tablet_id = 600001; tablet_id <= 600010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ int64_t rowset_start_id = 400;
+ for (int ver = 2; ver <= 10; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id, false,
1);
+ if (ver >= 5) {
+ auto delete_bitmap_key =
+ meta_delete_bitmap_key({instance_id, tablet_id,
rowset_id, ver, 0});
+ std::string delete_bitmap_val {"test"};
+ txn->put(delete_bitmap_key, delete_bitmap_val);
+ } else {
+ // delete bitmaps may be spilitted into mulitiple KVs if too
large
+ auto delete_bitmap_key =
+ meta_delete_bitmap_key({instance_id, tablet_id,
rowset_id, ver, 0});
+ std::string delete_bitmap_val(1000, 'A');
+ cloud::put(txn.get(), delete_bitmap_key, delete_bitmap_val, 0,
300);
+ }
+ }
+ }
+
+ // also create some rowsets without delete bitmaps in non merge-on-write
tablet
+ for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, false));
+ int64_t rowset_start_id = 500;
+ for (int ver = 2; ver < 10; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id, false,
1);
+ }
+ }
+
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+ ASSERT_EQ(checker.do_delete_bitmap_inverted_check(), 0);
+}
+
+TEST(CheckerTest, delete_bitmap_inverted_check_abnormal) {
+ // abnormal case, some delete bitmaps arem leaked
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ // tablet_id -> [rowset_id, version, segment_id]
+ std::map<std::int64_t, std::set<std::tuple<std::string, int64_t, int64_t>>>
+ expected_abnormal_delete_bitmaps {}, real_abnormal_delete_bitmaps
{};
+ std::map<std::int64_t, std::set<std::tuple<std::string, int64_t, int64_t>>>
+ expected_leaked_delete_bitmaps {}, real_leaked_delete_bitmaps {};
+ auto sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer(
+ (int*)0x01, [](int*) {
SyncPoint::get_instance()->clear_all_call_backs(); });
+ sp->set_call_back(
+
"InstanceChecker::do_delete_bitmap_inverted_check.get_abnormal_delete_bitmap",
+ [&real_abnormal_delete_bitmaps](auto&& args) {
+ int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+ std::string rowset_id = *try_any_cast<std::string*>(args[1]);
+ int64_t version = *try_any_cast<int64_t*>(args[2]);
+ int64_t segment_id = *try_any_cast<int64_t*>(args[3]);
+ real_abnormal_delete_bitmaps[tablet_id].insert({rowset_id,
version, segment_id});
+ });
+ sp->set_call_back(
+
"InstanceChecker::do_delete_bitmap_inverted_check.get_leaked_delete_bitmap",
+ [&real_leaked_delete_bitmaps](auto&& args) {
+ int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+ std::string rowset_id = *try_any_cast<std::string*>(args[1]);
+ int64_t version = *try_any_cast<int64_t*>(args[2]);
+ int64_t segment_id = *try_any_cast<int64_t*>(args[3]);
+ real_leaked_delete_bitmaps[tablet_id].insert({rowset_id,
version, segment_id});
+ });
+ sp->enable_processing();
+
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+ constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+ // create some rowsets with delete bitmaps in merge-on-write tablet
+ for (int tablet_id = 800001; tablet_id <= 800010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ int64_t rowset_start_id = 600;
+ for (int ver = 2; ver <= 20; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+
+ if (ver >= 10) {
+ // only create rowsets for some versions
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id,
false, 1);
+ } else {
+ expected_leaked_delete_bitmaps[tablet_id].insert({rowset_id,
ver, 0});
+ }
+
+ if (ver >= 5) {
+ auto delete_bitmap_key =
+ meta_delete_bitmap_key({instance_id, tablet_id,
rowset_id, ver, 0});
+ std::string delete_bitmap_val {"test"};
+ txn->put(delete_bitmap_key, delete_bitmap_val);
+ } else {
+ // delete bitmaps may be spilitted into mulitiple KVs if too
large
+ auto delete_bitmap_key =
+ meta_delete_bitmap_key({instance_id, tablet_id,
rowset_id, ver, 0});
+ std::string delete_bitmap_val(1000, 'A');
+ cloud::put(txn.get(), delete_bitmap_key, delete_bitmap_val, 0,
300);
+ }
+ }
+ }
+
+ // create some rowsets with delete bitmaps in non merge-on-write tablet
+ for (int tablet_id = 900001; tablet_id <= 900010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, false));
+ int64_t rowset_start_id = 700;
+ for (int ver = 2; ver < 6; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id, false,
1);
+ auto delete_bitmap_key =
+ meta_delete_bitmap_key({instance_id, tablet_id, rowset_id,
ver, 0});
+ std::string delete_bitmap_val {"test2"};
+ txn->put(delete_bitmap_key, delete_bitmap_val);
+
+ expected_abnormal_delete_bitmaps[tablet_id].insert({rowset_id,
ver, 0});
+ }
+ }
+
+ // create some rowsets without delete bitmaps in non merge-on-write tablet
+ for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, false));
+ int64_t rowset_start_id = 500;
+ for (int ver = 2; ver < 10; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id, false,
1);
+ }
+ }
+
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+ ASSERT_EQ(checker.do_delete_bitmap_inverted_check(), 1);
+ ASSERT_EQ(expected_leaked_delete_bitmaps, real_leaked_delete_bitmaps);
+ ASSERT_EQ(expected_abnormal_delete_bitmaps, real_abnormal_delete_bitmaps);
+}
+
+TEST(CheckerTest, delete_bitmap_storage_optimize_check_normal) {
+ config::delete_bitmap_storage_optimize_check_version_gap = 0;
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+ constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+ int64_t rowset_start_id = 600;
+
+ for (int tablet_id = 800001; tablet_id <= 800005; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3},
{4, 4}, {5, 5},
+ {6, 7}, {8, 8},
{9, 9}};
+ std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+ {7, 9}, {8, 9}, {7, 9}, {7, 9}, {7, 9}, {8, 9}, {9, 9}};
+ std::vector<bool> segments_overlap {true, true, true, true, false,
true, true};
+ for (size_t i {0}; i < 7; i++) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ rowset_vers[i].first,
rowset_vers[i].second,
+ rowset_id,
segments_overlap[i], 1);
+ create_delete_bitmaps(txn.get(), tablet_id, rowset_id,
delete_bitmaps_vers[i].first,
+ delete_bitmaps_vers[i].second);
+ }
+ }
+
+ for (int tablet_id = 800006; tablet_id <= 800010; ++tablet_id) {
+ // [7-7] cumu compaction output rowset start_version == end_version
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3},
{4, 4}, {5, 5},
+ {6, 6}, {7, 7},
{8, 8}, {9, 9}};
+ std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+ {7, 9}, {8, 9}, {7, 9}, {7, 9}, {7, 9}, {7, 9}, {8, 9}, {9,
9}};
+ std::vector<bool> segments_overlap {true, true, false, true, false,
true, true, true};
+ for (size_t i {0}; i < 8; i++) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ rowset_vers[i].first,
rowset_vers[i].second,
+ rowset_id,
segments_overlap[i], 1);
+ create_delete_bitmaps(txn.get(), tablet_id, rowset_id,
delete_bitmaps_vers[i].first,
+ delete_bitmaps_vers[i].second);
+ }
+ }
+
+ for (int tablet_id = 800011; tablet_id <= 800015; ++tablet_id) {
+ // no rowsets are compacted
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3},
{4, 4}, {5, 5},
+ {6, 6}, {7, 7},
{8, 8}, {9, 9}};
+ std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+ {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {8, 9}, {9,
9}};
+ std::vector<bool> segments_overlap {true, true, true, true, true,
true, true, true};
+ for (size_t i {0}; i < 8; i++) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ rowset_vers[i].first,
rowset_vers[i].second,
+ rowset_id,
segments_overlap[i], 1);
+ create_delete_bitmaps(txn.get(), tablet_id, rowset_id,
delete_bitmaps_vers[i].first,
+ delete_bitmaps_vers[i].second);
+ }
+ }
+
+ for (int tablet_id = 800016; tablet_id <= 800020; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ std::vector<std::pair<int64_t, int64_t>> rowset_vers {
+ {2, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}};
+ std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+ {5, 9}, {6, 9}, {7, 9}, {8, 9}, {9, 9}};
+ std::vector<bool> segments_overlap {false, true, true, true, true};
+ for (size_t i {0}; i < 5; i++) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ rowset_vers[i].first,
rowset_vers[i].second,
+ rowset_id,
segments_overlap[i], 1);
+ create_delete_bitmaps(txn.get(), tablet_id, rowset_id,
delete_bitmaps_vers[i].first,
+ delete_bitmaps_vers[i].second);
+ }
+ }
+
+ // also create some rowsets without delete bitmaps in non merge-on-write
tablet
+ for (int tablet_id = 700001; tablet_id <= 700010; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, false));
+ int64_t rowset_start_id = 500;
+ for (int ver = 2; ver < 10; ++ver) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ ver, ver, rowset_id, false,
1);
+ }
+ }
+
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+ ASSERT_EQ(checker.do_delete_bitmap_storage_optimize_check(), 0);
+}
+
+TEST(CheckerTest, delete_bitmap_storage_optimize_check_abnormal) {
+ config::delete_bitmap_storage_optimize_check_version_gap = 0;
+ // abnormal case, some rowsets' delete bitmaps are not deleted as expected
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ // tablet_id -> [rowset_id]
+ std::map<std::int64_t, std::set<std::string>> expected_abnormal_rowsets {};
+ std::map<std::int64_t, std::set<std::string>> real_abnormal_rowsets {};
+ auto sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer(
+ (int*)0x01, [](int*) {
SyncPoint::get_instance()->clear_all_call_backs(); });
+
sp->set_call_back("InstanceChecker::check_delete_bitmap_storage_optimize.get_abnormal_rowset",
+ [&real_abnormal_rowsets](auto&& args) {
+ int64_t tablet_id = *try_any_cast<int64_t*>(args[0]);
+ std::string rowset_id =
*try_any_cast<std::string*>(args[1]);
+ real_abnormal_rowsets[tablet_id].insert(rowset_id);
+ });
+ sp->enable_processing();
+
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn_kv->create_txn(&txn));
+
+ constexpr int table_id = 10000, index_id = 10001, partition_id = 10002;
+
+ int64_t rowset_start_id = 700;
+ for (int tablet_id = 900001; tablet_id <= 900005; ++tablet_id) {
+ ASSERT_EQ(0,
+ create_tablet(txn_kv.get(), table_id, index_id,
partition_id, tablet_id, true));
+ std::vector<std::pair<int64_t, int64_t>> rowset_vers {{2, 2}, {3, 3},
{4, 4}, {5, 5},
+ {6, 7}, {8, 8},
{9, 9}};
+ std::vector<std::pair<int64_t, int64_t>> delete_bitmaps_vers {
+ {2, 9}, {7, 9}, {4, 9}, {7, 9}, {7, 9}, {8, 9}, {9, 9}};
+ std::vector<bool> segments_overlap {true, true, true, true, false,
true, true};
+ for (size_t i {0}; i < 7; i++) {
+ std::string rowset_id = std::to_string(rowset_start_id++);
+ create_committed_rowset_with_rowset_id(txn_kv.get(),
accessor.get(), "1", tablet_id,
+ rowset_vers[i].first,
rowset_vers[i].second,
+ rowset_id,
segments_overlap[i], 1);
+ create_delete_bitmaps(txn.get(), tablet_id, rowset_id,
delete_bitmaps_vers[i].first,
+ delete_bitmaps_vers[i].second);
+ if (delete_bitmaps_vers[i].first < 7) {
+ expected_abnormal_rowsets[tablet_id].insert(rowset_id);
+ }
+ }
+ }
+
+ ASSERT_EQ(TxnErrorCode::TXN_OK, txn->commit());
+
+ ASSERT_EQ(checker.do_delete_bitmap_storage_optimize_check(), 1);
+ ASSERT_EQ(expected_abnormal_rowsets, real_abnormal_rowsets);
+}
+
TEST(RecyclerTest, delete_rowset_data) {
auto txn_kv = std::make_shared<MemTxnKv>();
ASSERT_EQ(txn_kv->init(), 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]