This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 64df172ca85 [fix](cloud) use snapshot read for table version to avoid
txn conflict (#64647)
64df172ca85 is described below
commit 64df172ca8582442770ae9edb5a5db09b80e723f
Author: deardeng <[email protected]>
AuthorDate: Tue Jun 30 12:45:57 2026 +0800
[fix](cloud) use snapshot read for table version to avoid txn conflict
(#64647)
PR #60467 added a non-snapshot txn->get(table_version_key) in
commit_txn, commit_partition, commit_index and drop_partition to return
table_version+1 to FE as a version-cache hint. A non-snapshot read adds
table_version_key to the transaction's read-conflict range, while
update_table_version() bumps the same key via atomic_add (otherwise
conflict-free). As a result, concurrent commits/imports on the same
table now conflict on the version key and fail with KV_TXN_CONFLICT,
causing retries and throughput regression.
Change these reads to snapshot reads. The returned value is only a hint
for FE's version cache (reconciled by
CloudTableAndPartitionVersionChecker) and the real increment is still
done by atomic_add, so correctness is preserved while the read-conflict
on the version key is removed.
---
cloud/src/meta-service/meta_service_partition.cpp | 18 +++++++++++++++---
cloud/src/meta-service/meta_service_txn.cpp | 18 +++++++++++++++---
2 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/cloud/src/meta-service/meta_service_partition.cpp
b/cloud/src/meta-service/meta_service_partition.cpp
index 5f66ca4216e..8936ba32e96 100644
--- a/cloud/src/meta-service/meta_service_partition.cpp
+++ b/cloud/src/meta-service/meta_service_partition.cpp
@@ -342,7 +342,11 @@ void
MetaServiceImpl::commit_index(::google::protobuf::RpcController* controller
int64_t table_id = request->table_id();
std::string ver_key = table_version_key({instance_id,
request->db_id(), table_id});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint for
FE's version
+ // cache; the real increment is done by update_table_version() via
atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict set
and make
+ // concurrent commits on the same table conflict (KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
@@ -856,7 +860,11 @@ void MetaServiceImpl::commit_partition_internal(const
PartitionRequest* request,
std::string ver_key =
table_version_key({instance_id, request->db_id(),
request->table_id()});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint for FE's
version
+ // cache; the real increment is done by update_table_version() via
atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict set and
make
+ // concurrent commits on the same table conflict (KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
@@ -1045,7 +1053,11 @@ void
MetaServiceImpl::drop_partition(::google::protobuf::RpcController* controll
std::string ver_key =
table_version_key({instance_id, request->db_id(),
request->table_id()});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint for
FE's version
+ // cache; the real increment is done by update_table_version() via
atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict set
and make
+ // concurrent commits on the same table conflict (KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
diff --git a/cloud/src/meta-service/meta_service_txn.cpp
b/cloud/src/meta-service/meta_service_txn.cpp
index 18986625b7d..5c35958b1f0 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1853,7 +1853,11 @@ void MetaServiceImpl::commit_txn_immediately(
int64_t table_id = i.first;
std::string ver_key = table_version_key({instance_id, db_id,
table_id});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint
for FE's version
+ // cache; the real increment is done by update_table_version()
via atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict
set and make
+ // concurrent commits on the same table conflict
(KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
@@ -2556,7 +2560,11 @@ void MetaServiceImpl::commit_txn_eventually(
int64_t table_id = i.first;
std::string ver_key = table_version_key({instance_id, db_id,
table_id});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint
for FE's version
+ // cache; the real increment is done by update_table_version()
via atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict
set and make
+ // concurrent commits on the same table conflict
(KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
@@ -3050,7 +3058,11 @@ void MetaServiceImpl::commit_txn_with_sub_txn(const
CommitTxnRequest* request,
int64_t table_id = i.first;
std::string ver_key = table_version_key({instance_id, db_id,
table_id});
std::string ver_val;
- err = txn->get(ver_key, &ver_val);
+ // snapshot read: the returned table version is only a hint
for FE's version
+ // cache; the real increment is done by update_table_version()
via atomic_add.
+ // A non-snapshot read would add ver_key to the read-conflict
set and make
+ // concurrent commits on the same table conflict
(KV_TXN_CONFLICT).
+ err = txn->get(ver_key, &ver_val, true);
int64_t table_version = 0;
if (err == TxnErrorCode::TXN_OK) {
if (!txn->decode_atomic_int(ver_val, &table_version)) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]