This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
     new 50202b107 KUDU-3486 Periodically send tombstoned replica report to 
delete it.
50202b107 is described below

commit 50202b10729f2c9b1b3992d08ec796619c406a88
Author: 宋家成 <songjiach...@thinkingdata.cn>
AuthorDate: Fri Oct 20 19:09:28 2023 +0800

    KUDU-3486 Periodically send tombstoned replica report to delete it.
    
    A tombstoned tablet replica might never be deleted since the
    delete-type deletion could only occur when the tablet is deleted.
    And the requests will only be sent to the voters, not including
    the tombstoned ones.
    
    The data of tombstone replica is deleted, but metadata is persisted
    in memory, which will occupy a lot of memory, especially for the
    tablets with big SchemaPB.
    
    Tombstoned replicas can be deleted while processing heartbeats. So
    making the tservers periodically send some reports with tombstoned
    replicas will delete the useless tombstoned replicas. See KUDU-3486
    for details.
    
    Change-Id: I92d3b3e44f49ad24a95fa10f351c6bd55c6eca7b
    Reviewed-on: http://gerrit.cloudera.org:8080/20600
    Tested-by: Kudu Jenkins
    Reviewed-by: Yifan Zhang <chinazhangyi...@163.com>
    (cherry picked from commit fab3569f074954f9ba0a69dcc00f96bf2217ef60)
    Reviewed-on: http://gerrit.cloudera.org:8080/21176
    Tested-by: Alexey Serbin <ale...@apache.org>
---
 src/kudu/integration-tests/delete_table-itest.cc | 67 +++++++++++++++++++++++-
 src/kudu/tserver/heartbeater.cc                  | 34 +++++++++---
 src/kudu/tserver/ts_tablet_manager.cc            | 10 +++-
 src/kudu/tserver/ts_tablet_manager.h             |  3 +-
 4 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/src/kudu/integration-tests/delete_table-itest.cc 
b/src/kudu/integration-tests/delete_table-itest.cc
index 943fce2fe..8de238a82 100644
--- a/src/kudu/integration-tests/delete_table-itest.cc
+++ b/src/kudu/integration-tests/delete_table-itest.cc
@@ -28,6 +28,7 @@
 #include <set>
 #include <string>
 #include <tuple>  // IWYU pragma: keep
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -58,7 +59,7 @@
 #include "kudu/integration-tests/mini_cluster_fs_inspector.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/master/master.pb.h"
-#include "kudu/master/master.proxy.h"
+#include "kudu/master/master.proxy.h" // IWYU pragma: keep
 #include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/rpc/rpc_controller.h"
 #include "kudu/tablet/metadata.pb.h"
@@ -95,6 +96,7 @@ using kudu::itest::AddServer;
 using kudu::itest::DeleteTablet;
 using kudu::itest::DeleteTabletWithRetries;
 using kudu::itest::GetInt64Metric;
+using kudu::itest::ListTablesWithInfo;
 using kudu::itest::RemoveServer;
 using kudu::itest::TServerDetails;
 using kudu::pb_util::SecureDebugString;
@@ -1293,6 +1295,69 @@ TEST_F(DeleteTableITest, TestNoDeleteTombstonedTablets) {
   }
 }
 
+// Test if the tombstoned tablets could be deleted by sending incremental 
reports with
+// tombstoned replicas.
+TEST_F(DeleteTableITest, TombstonedTabletsDeletedByReport) {
+  SKIP_IF_SLOW_NOT_ALLOWED();
+  constexpr int kNumTablets = 3;
+  constexpr int kNumTabletServers = 4;
+
+  vector<string> ts_flags;
+  vector<string> master_flags;
+  NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers));
+
+  TestWorkload workload(cluster_.get());
+  workload.set_num_tablets(kNumTablets);
+  workload.Setup();
+  ASSERT_EVENTUALLY([&] {
+    ASSERT_OK(ClusterVerifier(cluster_.get()).RunKsck());
+  });
+
+  // Choose a random tablet replica and set it replaced, and then it will be 
tombstoned.
+  ConsensusStatePB cstate;
+  string tablet_id = inspect_->ListTabletsOnTS(0)[0];
+  
NO_FATALS(itest::GetConsensusState(ts_map_[cluster_->tablet_server(0)->uuid()],
+                                    tablet_id, MonoDelta::FromSeconds(10),
+                                    EXCLUDE_HEALTH_REPORT, &cstate));
+  RaftPeerPB follower;
+  for (const auto& peer : cstate.committed_config().peers()) {
+    if (peer.permanent_uuid() != cstate.leader_uuid()) {
+      follower = peer;
+    }
+  }
+
+  vector<consensus::BulkChangeConfigRequestPB::ConfigChangeItemPB> changes_pb;
+  consensus::BulkChangeConfigRequestPB::ConfigChangeItemPB change_pb;
+  change_pb.set_type(consensus::MODIFY_PEER);
+  RaftPeerPB* peer = change_pb.mutable_peer();
+  peer->set_permanent_uuid(follower.permanent_uuid());
+  peer->mutable_attrs()->set_replace(true);
+  changes_pb.emplace_back(std::move(change_pb));
+  NO_FATALS(BulkChangeConfig(ts_map_[cstate.leader_uuid()], tablet_id, 
changes_pb,
+                            MonoDelta::FromSeconds(30)));
+
+  uint32_t index = 
cluster_->tablet_server_index_by_uuid(follower.permanent_uuid());
+  NO_FATALS(WaitForTabletTombstonedOnTS(index, tablet_id, CMETA_NOT_EXPECTED));
+
+  // Now delete the table and the tombstoned tablet should remain.
+  NO_FATALS(DeleteTable(TestWorkload::kDefaultTableName));
+  ASSERT_EVENTUALLY([&] {
+    master::ListTablesResponsePB table;
+    ListTablesWithInfo(cluster_->master_proxy(), 
TestWorkload::kDefaultTableName,
+                       MonoDelta::FromSeconds(10), &table);
+    ASSERT_EQ(0, table.tables_size());
+  });
+  ASSERT_OK(CheckTabletTombstonedOnTS(index, tablet_id, CMETA_NOT_EXPECTED));
+
+  // Set the newly added flag to make the next incremental report of the 
tserver include
+  // the tombstoned replicas.
+  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(index),
+                              
"tserver_send_tombstoned_tablets_report_inteval_secs", "0"));
+  AssertEventually([&] {
+    ASSERT_OK(CheckTabletDeletedOnTS(index, tablet_id, 
SUPERBLOCK_NOT_EXPECTED));
+  });
+}
+
 // Parameterized test case for TABLET_DATA_DELETED deletions.
 class DeleteTableDeletedParamTest : public DeleteTableITest,
                                     public ::testing::WithParamInterface<const 
char*> {
diff --git a/src/kudu/tserver/heartbeater.cc b/src/kudu/tserver/heartbeater.cc
index 4828a38ce..94757699b 100644
--- a/src/kudu/tserver/heartbeater.cc
+++ b/src/kudu/tserver/heartbeater.cc
@@ -107,6 +107,18 @@ DEFINE_uint32(heartbeat_inject_required_feature_flag, 0,
 TAG_FLAG(heartbeat_inject_required_feature_flag, runtime);
 TAG_FLAG(heartbeat_inject_required_feature_flag, unsafe);
 
+DEFINE_int32(tserver_send_tombstoned_tablets_report_inteval_secs, 1800,
+             "Time interval in seconds of sending a incremental tablets report 
"
+             "to delete the tombstoned replicas whose tablets had already been 
"
+             "deleted. Turn off this by setting it to a value less than 0. If "
+             "this interval is set to less than the heartbeat interval, the "
+             "tablets report will only be sent at the heartbeat interval. And "
+             "also, please set this interval less than the flag "
+             "metadata_for_deleted_table_and_tablet_reserved_secs of master if 
"
+             "enable_metadata_cleanup_for_deleted_tables_and_tablets is set 
true. "
+             "Otherwise, the tombstoned tablets probably could not be 
deleted.");
+TAG_FLAG(tserver_send_tombstoned_tablets_report_inteval_secs, runtime);
+
 DECLARE_bool(raft_prepare_replacement_before_eviction);
 
 using kudu::consensus::ReplicaManagementInfoPB;
@@ -139,7 +151,7 @@ class Heartbeater::Thread {
   Status Stop();
   void TriggerASAP();
   void MarkTabletsDirty(const vector<string>& tablet_ids, const string& 
reason);
-  void GenerateIncrementalTabletReport(TabletReportPB* report);
+  void GenerateIncrementalTabletReport(TabletReportPB* report, bool 
including_tombstoned);
   void GenerateFullTabletReport(TabletReportPB* report);
 
   // Mark that the master successfully received and processed the given
@@ -216,6 +228,8 @@ class Heartbeater::Thread {
   // Indicates that the thread should send a full tablet report. Set when
   // the thread detects that the master has been elected leader.
   bool send_full_tablet_report_;
+  // Time of sending last report with tombstoned tablets.
+  MonoTime last_tombstoned_report_time_;
 
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
@@ -279,7 +293,7 @@ vector<TabletReportPB> 
Heartbeater::GenerateIncrementalTabletReportsForTests() {
   vector<TabletReportPB> results;
   for (const auto& thread : threads_) {
     TabletReportPB report;
-    thread->GenerateIncrementalTabletReport(&report);
+    thread->GenerateIncrementalTabletReport(&report, false);
     results.emplace_back(std::move(report));
   }
   return results;
@@ -316,7 +330,8 @@ Heartbeater::Thread::Thread(HostPort master_address, 
TabletServer* server)
     cond_(&mutex_),
     should_run_(false),
     heartbeat_asap_(true),
-    send_full_tablet_report_(false) {
+    send_full_tablet_report_(false),
+    last_tombstoned_report_time_(MonoTime::Now()) {
 }
 
 Status Heartbeater::Thread::ConnectToMaster() {
@@ -483,7 +498,7 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* 
error,
   // Send the most recently known TSK sequence number so that the master can
   // send us knew ones if they exist.
   
req.set_latest_tsk_seq_num(server_->token_verifier().GetMaxKnownKeySequenceNumber());
-
+  bool including_tombstoned = false;
   if (send_full_tablet_report_) {
     LOG(INFO) << Substitute(
         "Master $0 was elected leader, sending a full tablet report...",
@@ -500,7 +515,11 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* 
error,
   } else {
     VLOG(2) << Substitute("Sending an incremental tablet report to master 
$0...",
                           master_address_.ToString());
-    GenerateIncrementalTabletReport(req.mutable_tablet_report());
+    // Check if it is time to send a report with tombstoned replicas.
+    including_tombstoned = 
FLAGS_tserver_send_tombstoned_tablets_report_inteval_secs >= 0 &&
+        (MonoTime::Now() - last_tombstoned_report_time_).ToSeconds() >=
+        FLAGS_tserver_send_tombstoned_tablets_report_inteval_secs;
+    GenerateIncrementalTabletReport(req.mutable_tablet_report(), 
including_tombstoned);
   }
 
   req.set_num_live_tablets(server_->tablet_manager()->GetNumLiveTablets());
@@ -754,7 +773,8 @@ void Heartbeater::Thread::MarkTabletsDirty(const 
vector<string>& tablet_ids,
   }
 }
 
-void Heartbeater::Thread::GenerateIncrementalTabletReport(TabletReportPB* 
report) {
+void Heartbeater::Thread::GenerateIncrementalTabletReport(TabletReportPB* 
report,
+                                                          bool 
including_tombstoned) {
   report->Clear();
   report->set_sequence_number(next_report_seq_.fetch_add(1));
   report->set_is_incremental(true);
@@ -764,7 +784,7 @@ void 
Heartbeater::Thread::GenerateIncrementalTabletReport(TabletReportPB* report
     AppendKeysFromMap(dirty_tablets_, &dirty_tablet_ids);
   }
   server_->tablet_manager()->PopulateIncrementalTabletReport(
-      report, dirty_tablet_ids);
+      report, dirty_tablet_ids, including_tombstoned);
 }
 
 void Heartbeater::Thread::GenerateFullTabletReport(TabletReportPB* report) {
diff --git a/src/kudu/tserver/ts_tablet_manager.cc 
b/src/kudu/tserver/ts_tablet_manager.cc
index fa5c9c9bf..e1acb3143 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -1771,7 +1771,8 @@ void 
TSTabletManager::PopulateFullTabletReport(TabletReportPB* report) const {
 }
 
 void TSTabletManager::PopulateIncrementalTabletReport(TabletReportPB* report,
-                                                      const vector<string>& 
tablet_ids) const {
+                                                      const vector<string>& 
tablet_ids,
+                                                      bool 
including_tombstoned) const {
   // See comment in PopulateFullTabletReport for rationale on making a local
   // copy of the set of tablets to report.
   vector<scoped_refptr<tablet::TabletReplica>> to_report;
@@ -1789,6 +1790,13 @@ void 
TSTabletManager::PopulateIncrementalTabletReport(TabletReportPB* report,
         report->add_removed_tablet_ids(id);
       }
     }
+    if (including_tombstoned) {
+      for (const auto& entry : tablet_map_) {
+        if (entry.second->data_state() == tablet::TABLET_DATA_TOMBSTONED) {
+          to_report.push_back(entry.second);
+        }
+      }
+    }
   }
   for (const auto& replica : to_report) {
     CreateReportedTabletPB(replica, report->add_updated_tablets());
diff --git a/src/kudu/tserver/ts_tablet_manager.h 
b/src/kudu/tserver/ts_tablet_manager.h
index 865441f7f..18ece2b63 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -196,7 +196,8 @@ class TSTabletManager : public 
tserver::TabletReplicaLookupIf {
   // Adds updated tablet information to 'report'. Only tablets in 'tablet_ids'
   // are included.
   void PopulateIncrementalTabletReport(master::TabletReportPB* report,
-                                       const std::vector<std::string>& 
tablet_ids) const;
+                                       const std::vector<std::string>& 
tablet_ids,
+                                       bool including_tombstoned) const;
 
   // Get all the tablets currently hosted on this server.
   void GetTabletReplicas(

Reply via email to