This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 415aa23e25e branch-3.0: [fix](cloud-mow) Fix the issue of inaccurate
estimation of txn size when updating delete bitmap #46969 (#47046)
415aa23e25e is described below
commit 415aa23e25e65e477c43c00f99f2ed4b62ac331b
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 17 15:12:17 2025 +0800
branch-3.0: [fix](cloud-mow) Fix the issue of inaccurate estimation of txn
size when updating delete bitmap #46969 (#47046)
Cherry-picked from #46969
Co-authored-by: huanghaibin <[email protected]>
Co-authored-by: Calvin Kirs <[email protected]>
---
cloud/src/common/config.h | 9 ++++++
cloud/src/meta-service/meta_service.cpp | 53 ++++++++++++++++++++++---------
cloud/test/meta_service_test.cpp | 55 +++++++++++++++++++++++++++++++++
3 files changed, 103 insertions(+), 14 deletions(-)
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index a5cfef70ff5..ac4064c8d92 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -232,6 +232,15 @@ CONF_mInt64(max_num_aborted_txn, "100");
// Max byte getting delete bitmap can return, default is 1GB
CONF_mInt64(max_get_delete_bitmap_byte, "1073741824");
+// Max byte txn commit when updating delete bitmap, default is 7MB.
+// Because the size of one fdb transaction can't exceed 10MB, and
+// fdb does not have an accurate way to estimate the size of txn.
+// In my test, when txn->approximate_bytes() bigger than 8MB,
+// it may meet Transaction exceeds byte limit error. We'd better
+// reserve 1MB of buffer, so setting the default value to 7MB is
+// more reasonable.
+CONF_mInt64(max_txn_commit_byte, "7340032");
+
CONF_Bool(enable_cloud_txn_lazy_commit, "true");
CONF_Int32(txn_lazy_commit_rowsets_thresold, "1000");
CONF_Int32(txn_lazy_commit_num_threads, "8");
diff --git a/cloud/src/meta-service/meta_service.cpp
b/cloud/src/meta-service/meta_service.cpp
index 2ca8379f0ba..70fb53b7fb3 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -1851,25 +1851,40 @@ void
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
}
// 4. Update delete bitmap for curent txn
- size_t total_key = 0;
- size_t total_size = 0;
+ size_t current_key_count = 0;
+ size_t current_value_count = 0;
+ size_t total_key_count = 0;
+ size_t total_value_count = 0;
+ size_t total_txn_put_keys = 0;
+ size_t total_txn_put_bytes = 0;
+ size_t total_txn_size = 0;
for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
auto& key = delete_bitmap_keys.delete_bitmap_keys(i);
auto& val = request->segment_delete_bitmaps(i);
// Split into multiple fdb transactions, because the size of one fdb
// transaction can't exceed 10MB.
- if (fdb_txn_size + key.size() + val.size() > 9 * 1024 * 1024) {
- LOG(INFO) << "fdb txn size more than 9MB, current size: " <<
fdb_txn_size
- << " lock_id=" << request->lock_id();
+ if (txn->approximate_bytes() + key.size() * 3 + val.size() >
config::max_txn_commit_byte) {
+ LOG(INFO) << "fdb txn size more than " <<
config::max_txn_commit_byte
+ << ", current size: " << txn->approximate_bytes()
+ << " lock_id=" << request->lock_id() << ", need to
commit";
err = txn->commit();
+ total_txn_put_keys += txn->num_put_keys();
+ total_txn_put_bytes += txn->put_bytes();
+ total_txn_size += txn->approximate_bytes();
if (err != TxnErrorCode::TXN_OK) {
code = cast_as<ErrCategory::COMMIT>(err);
- ss << "failed to update delete bitmap, err=" << err;
+ ss << "failed to update delete bitmap, err=" << err << "
tablet_id=" << tablet_id
+ << " lock_id=" << request->lock_id()
+ << " delete_bitmap_key=" << current_key_count
+ << " delete_bitmap_value=" << current_value_count
+ << " put_size=" << txn->put_bytes() << " num_put_keys=" <<
txn->num_put_keys()
+ << " txn_size=" << txn->approximate_bytes();
msg = ss.str();
return;
}
- fdb_txn_size = 0;
+ current_key_count = 0;
+ current_value_count = 0;
TxnErrorCode err = txn_kv_->create_txn(&txn);
if (err != TxnErrorCode::TXN_OK) {
code = cast_as<ErrCategory::CREATE>(err);
@@ -1888,24 +1903,34 @@ void
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
}
// splitting large values (>90*1000) into multiple KVs
cloud::put(txn.get(), key, val, 0);
- fdb_txn_size = fdb_txn_size + key.size() + val.size();
- total_key++;
- total_size += key.size() + val.size();
+ current_key_count++;
+ current_value_count += val.size();
+ total_key_count++;
+ total_value_count += val.size();
VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" <<
hex(key)
<< " lock_id=" << request->lock_id() << " key_size: " <<
key.size()
<< " value_size: " << val.size();
}
-
err = txn->commit();
+ total_txn_put_keys += txn->num_put_keys();
+ total_txn_put_bytes += txn->put_bytes();
+ total_txn_size += txn->approximate_bytes();
if (err != TxnErrorCode::TXN_OK) {
code = cast_as<ErrCategory::COMMIT>(err);
- ss << "failed to update delete bitmap, err=" << err;
+ ss << "failed to update delete bitmap, err=" << err << " tablet_id="
<< tablet_id
+ << " lock_id=" << request->lock_id() << " delete_bitmap_key=" <<
current_key_count
+ << " delete_bitmap_value=" << current_value_count << " put_size="
<< txn->put_bytes()
+ << " num_put_keys=" << txn->num_put_keys() << " txn_size=" <<
txn->approximate_bytes();
msg = ss.str();
return;
}
LOG(INFO) << "update_delete_bitmap tablet_id=" << tablet_id << " lock_id="
<< request->lock_id()
- << " rowset_num=" << request->rowset_ids_size() << " total_key="
<< total_key
- << " total_size=" << total_size << " unlock=" << unlock;
+ << " rowset_num=" << request->rowset_ids_size()
+ << " total_key_count=" << total_key_count
+ << " total_value_count=" << total_value_count << " unlock=" <<
unlock
+ << " total_txn_put_keys=" << total_txn_put_keys
+ << " total_txn_put_bytes=" << total_txn_put_bytes
+ << " total_txn_size=" << total_txn_size;
}
void MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController*
controller,
diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp
index b7004716035..fb17c29629b 100644
--- a/cloud/test/meta_service_test.cpp
+++ b/cloud/test/meta_service_test.cpp
@@ -104,6 +104,24 @@ std::unique_ptr<MetaServiceProxy> get_meta_service() {
return get_meta_service(true);
}
+std::unique_ptr<MetaServiceProxy> get_fdb_meta_service() {
+ config::fdb_cluster_file_path = "fdb.cluster";
+ static auto txn_kv =
std::dynamic_pointer_cast<TxnKv>(std::make_shared<FdbTxnKv>());
+ static std::atomic<bool> init {false};
+ bool tmp = false;
+ if (init.compare_exchange_strong(tmp, true)) {
+ int ret = txn_kv->init();
+ [&] {
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(txn_kv.get(), nullptr);
+ }();
+ }
+ auto rs = std::make_shared<MockResourceManager>(txn_kv);
+ auto rl = std::make_shared<RateLimiter>();
+ auto meta_service = std::make_unique<MetaServiceImpl>(txn_kv, rs, rl);
+ return std::make_unique<MetaServiceProxy>(std::move(meta_service));
+}
+
static std::string next_rowset_id() {
static int cnt = 0;
return std::to_string(++cnt);
@@ -4857,6 +4875,43 @@ static std::string generate_random_string(int length) {
return randomString;
}
+TEST(MetaServiceTest, UpdateDeleteBitmapWithBigKeys) {
+ auto meta_service = get_fdb_meta_service();
+ // get delete bitmap update lock
+ brpc::Controller cntl;
+ GetDeleteBitmapUpdateLockRequest get_lock_req;
+ GetDeleteBitmapUpdateLockResponse get_lock_res;
+ get_lock_req.set_cloud_unique_id("test_cloud_unique_id");
+ get_lock_req.set_table_id(1999);
+ get_lock_req.add_partition_ids(123);
+ get_lock_req.set_expiration(5);
+ get_lock_req.set_lock_id(-1);
+ get_lock_req.set_initiator(100);
+ meta_service->get_delete_bitmap_update_lock(
+ reinterpret_cast<::google::protobuf::RpcController*>(&cntl),
&get_lock_req,
+ &get_lock_res, nullptr);
+ ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK);
+ UpdateDeleteBitmapRequest update_delete_bitmap_req;
+ UpdateDeleteBitmapResponse update_delete_bitmap_res;
+ update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
+ update_delete_bitmap_req.set_table_id(1999);
+ update_delete_bitmap_req.set_partition_id(123);
+ update_delete_bitmap_req.set_lock_id(-1);
+ update_delete_bitmap_req.set_initiator(100);
+ update_delete_bitmap_req.set_tablet_id(333);
+ std::string large_value = generate_random_string(300 * 1000 * 3);
+ for (int i = 0; i < 100000; i++) {
+
update_delete_bitmap_req.add_rowset_ids("0200000003ea308a3647dbea83220ed4b8897f2288244a91");
+ update_delete_bitmap_req.add_segment_ids(0);
+ update_delete_bitmap_req.add_versions(i);
+ update_delete_bitmap_req.add_segment_delete_bitmaps("1");
+ }
+
meta_service->update_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
+ &update_delete_bitmap_req,
&update_delete_bitmap_res,
+ nullptr);
+ ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
+}
+
TEST(MetaServiceTest, UpdateDeleteBitmap) {
auto meta_service = get_meta_service();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]