This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new b1a563a5c1b branch-4.1: [fix](cloud) skip stale tablet cache check for 
STOP_TOKEN #63520 (#63786)
b1a563a5c1b is described below

commit b1a563a5c1b163c40de548c53f1ed89592deda74
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 29 14:26:19 2026 +0800

    branch-4.1: [fix](cloud) skip stale tablet cache check for STOP_TOKEN 
#63520 (#63786)
    
    Cherry-picked from #63520
    
    Co-authored-by: Gavin Chou <[email protected]>
    Co-authored-by: Siyang Tang <[email protected]>
---
 cloud/src/meta-service/meta_service_job.cpp | 12 ++++--
 cloud/test/meta_service_job_test.cpp        | 65 +++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/cloud/src/meta-service/meta_service_job.cpp 
b/cloud/src/meta-service/meta_service_job.cpp
index e335c0a9b40..dad84f6a36a 100644
--- a/cloud/src/meta-service/meta_service_job.cpp
+++ b/cloud/src/meta-service/meta_service_job.cpp
@@ -162,9 +162,15 @@ void start_compaction_job(MetaServiceCode& code, 
std::string& msg, std::stringst
             return;
         }
     }
-
-    if (compaction.base_compaction_cnt() < stats.base_compaction_cnt() ||
-        compaction.cumulative_compaction_cnt() < 
stats.cumulative_compaction_cnt()) {
+    // STOP_TOKEN is a lock marker used by schema change to block concurrent 
compactions during
+    // delete bitmap recalculation on MOW tables. It does not perform actual 
compaction, so the
+    // stale tablet cache check (which guards against compacting on outdated 
rowset metadata) is
+    // not meaningful for it and must be skipped to avoid spurious failures 
when the BE's cached
+    // compaction counts lag behind the meta-service due to a concurrent 
compaction completing
+    // on another BE node (see CORE-5964).
+    if (compaction.type() != TabletCompactionJobPB::STOP_TOKEN &&
+        (compaction.base_compaction_cnt() < stats.base_compaction_cnt() ||
+         compaction.cumulative_compaction_cnt() < 
stats.cumulative_compaction_cnt())) {
         code = MetaServiceCode::STALE_TABLET_CACHE;
         SS << "could not perform compaction on expired tablet cache."
            << " req_base_compaction_cnt=" << compaction.base_compaction_cnt()
diff --git a/cloud/test/meta_service_job_test.cpp 
b/cloud/test/meta_service_job_test.cpp
index d5c837e8711..1926f6c600a 100644
--- a/cloud/test/meta_service_job_test.cpp
+++ b/cloud/test/meta_service_job_test.cpp
@@ -1645,6 +1645,71 @@ void check_job_key(MetaServiceProxy* meta_service, 
std::string instance_id, int6
     }
 }
 
+// Regression test for CORE-5964: STOP_TOKEN should not be rejected by the 
stale tablet
+// cache check even when the BE's cached compaction counts lag behind the 
meta-service.
+// STOP_TOKEN is a lock marker used by schema change (MOW table) to block 
concurrent
+// compactions during delete bitmap recalculation -- it does not perform 
actual compaction
+// work, so verifying compaction count freshness is meaningless for it.
+TEST(MetaServiceJobTest, StopTokenSkipsStaleTabletCacheCheck) {
+    auto meta_service = get_meta_service();
+
+    auto sp = SyncPoint::get_instance();
+    DORIS_CLOUD_DEFER {
+        SyncPoint::get_instance()->clear_all_call_backs();
+    };
+    sp->set_call_back("get_instance_id", [&](auto&& args) {
+        auto* ret = try_any_cast_ret<std::string>(args);
+        ret->first = instance_id;
+        ret->second = true;
+    });
+    sp->enable_processing();
+
+    int64_t table_id = 1, index_id = 2, partition_id = 3, tablet_id = 101;
+
+    // Set up tablet index
+    auto index_key = meta_tablet_idx_key({instance_id, tablet_id});
+    TabletIndexPB idx_pb;
+    idx_pb.set_table_id(table_id);
+    idx_pb.set_index_id(index_id);
+    idx_pb.set_partition_id(partition_id);
+    idx_pb.set_tablet_id(tablet_id);
+    std::unique_ptr<Transaction> txn;
+    ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK);
+    txn->put(index_key, idx_pb.SerializeAsString());
+
+    // Simulate meta-service state where cumulative_compaction_cnt=9 (advanced 
by another BE)
+    std::string stats_key =
+            stats_tablet_key({instance_id, table_id, index_id, partition_id, 
tablet_id});
+    TabletStatsPB stats;
+    stats.set_base_compaction_cnt(0);
+    stats.set_cumulative_compaction_cnt(9);
+    txn->put(stats_key, stats.SerializeAsString());
+    ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK);
+
+    // A regular CUMULATIVE compaction with stale counts (req=8 < actual=9) 
must be rejected.
+    {
+        StartTabletJobResponse res;
+        start_compaction_job(meta_service.get(), tablet_id, "cumu_job", 
"ip:port",
+                             /*base_cnt=*/0, /*cumu_cnt=*/8, 
TabletCompactionJobPB::CUMULATIVE,
+                             res);
+        ASSERT_EQ(res.status().code(), MetaServiceCode::STALE_TABLET_CACHE)
+                << "CUMULATIVE with stale counts should be rejected";
+    }
+
+    // A STOP_TOKEN with the same stale counts must NOT be rejected (CORE-5964 
regression).
+    // The BE's cached cumulative_compaction_cnt=8 lags behind the actual 
value=9 on the
+    // meta-service side, but STOP_TOKEN registration must still succeed.
+    {
+        StartTabletJobResponse res;
+        start_compaction_job(meta_service.get(), tablet_id, "stop_token_job", 
"ip:port",
+                             /*base_cnt=*/0, /*cumu_cnt=*/8, 
TabletCompactionJobPB::STOP_TOKEN,
+                             res);
+        ASSERT_EQ(res.status().code(), MetaServiceCode::OK)
+                << "STOP_TOKEN with stale counts should NOT be rejected; got: "
+                << res.status().msg();
+    }
+}
+
 TEST(MetaServiceJobTest, DeleteBitmapUpdateLockCompatibilityTest) {
     auto meta_service = get_meta_service();
     auto sp = SyncPoint::get_instance();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to