This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d01e10  KUDU-2915: add tool to unregister a tablet server
9d01e10 is described below

commit 9d01e1046249a815f26c7b5ebb1ceb2b67f72b9e
Author: zhangyifan27 <chinazhangyi...@163.com>
AuthorDate: Wed Dec 29 15:38:40 2021 +0800

    KUDU-2915: add tool to unregister a tablet server
    
    Add a 'kudu tserver unregister' tool to unregister a tserver from the
    master. This tool will be useful when we want to decommission a tserver
    without restarting masters.
    
    This tool unregisters the dead tserver from master's in-memory map and
    removes its persisted state from catalog table by default. It's also
    possible to unregister a tserver which is not presumed dead by adding
    '-force_unregister_live_tserver', or keep tserver's persisted state
    by adding '-remove_tserver_state=false'.
    
    Change-Id: If1f5c2979a8d14428f4bcc8e850c57ce228c793a
    Reviewed-on: http://gerrit.cloudera.org:8080/18124
    Reviewed-by: Alexey Serbin <aser...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/master/catalog_manager.cc    |   1 +
 src/kudu/master/master.proto          |  20 +++++
 src/kudu/master/master_service.cc     |  16 ++++
 src/kudu/master/master_service.h      |   6 ++
 src/kudu/master/ts_manager.cc         |  15 ++++
 src/kudu/master/ts_manager.h          |   3 +
 src/kudu/tools/kudu-tool-test.cc      | 137 ++++++++++++++++++++++++++++++++++
 src/kudu/tools/tool_action_common.cc  |  62 +++++++++++++++
 src/kudu/tools/tool_action_common.h   |   5 ++
 src/kudu/tools/tool_action_master.cc  |  63 ----------------
 src/kudu/tools/tool_action_tserver.cc |  54 ++++++++++++++
 11 files changed, 319 insertions(+), 63 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 94fb6ac..0a2a830 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -6231,6 +6231,7 @@ bool 
CatalogManager::ScopedLeaderSharedLock::CheckIsInitializedAndIsLeaderOrResp
 
 INITTED_OR_RESPOND(ConnectToMasterResponsePB);
 INITTED_OR_RESPOND(GetMasterRegistrationResponsePB);
+INITTED_OR_RESPOND(UnregisterTServerResponsePB);
 INITTED_OR_RESPOND(TSHeartbeatResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(AddMasterResponsePB);
 INITTED_AND_LEADER_OR_RESPOND(AlterTableResponsePB);
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index b9ded52..2967f24 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -1070,6 +1070,22 @@ message RefreshAuthzCacheResponsePB {
   optional MasterErrorPB error = 1;
 }
 
+// RemoveTabletServerRequest/Response: remove a tablet server from master's
+// in-memory map and persisted catalog.
+message UnregisterTServerRequestPB {
+  // The tserver UUID to be unregistered.
+  optional string uuid = 1;
+
+  // Whether to return an error in case the tserver is not presumed to be dead,
+  // per --tserver_unresponsive_timeout_ms.
+  // Disable this by default to make sure the tserver has been brought down.
+  optional bool force_unregister_live_tserver = 2 [default = false];
+}
+
+message UnregisterTServerResponsePB {
+  optional MasterErrorPB error = 1;
+}
+
 enum MasterFeatures {
   UNKNOWN_FEATURE = 0;
   // The master supports creating tables with non-covering range partitions.
@@ -1184,6 +1200,10 @@ service MasterService {
     option (kudu.rpc.authz_method) = "AuthorizeSuperUser";
   }
 
+  rpc UnregisterTServer(UnregisterTServerRequestPB) returns 
(UnregisterTServerResponsePB) {
+    option (kudu.rpc.authz_method) = "AuthorizeSuperUser";
+  }
+
   // Master->Master RPCs
   // ------------------------------------------------------------
 
diff --git a/src/kudu/master/master_service.cc 
b/src/kudu/master/master_service.cc
index 71d84f2..0d0305d 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -331,6 +331,22 @@ void MasterServiceImpl::RemoveMaster(const 
RemoveMasterRequestPB* req,
   // See completion_cb in CatalogManager::InitiateMasterChangeConfig().
 }
 
+void MasterServiceImpl::UnregisterTServer(const UnregisterTServerRequestPB* 
req,
+                                          UnregisterTServerResponsePB* resp,
+                                          rpc::RpcContext* rpc) {
+  const auto& ts_uuid = req->uuid();
+  bool force_unregister_live_tserver = req->force_unregister_live_tserver();
+
+  Status s = server_->ts_manager()->UnregisterTServer(ts_uuid, 
force_unregister_live_tserver);
+  if (PREDICT_FALSE(!s.ok() && !s.IsNotFound())) {
+    // Ignore the NotFound error to make this RPC retriable and effectively 
idempotent.
+    StatusToPB(s, resp->mutable_error()->mutable_status());
+    resp->mutable_error()->set_code(MasterErrorPB::UNKNOWN_ERROR);
+  }
+
+  rpc->RespondSuccess();
+}
+
 void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,
                                     TSHeartbeatResponsePB* resp,
                                     rpc::RpcContext* rpc) {
diff --git a/src/kudu/master/master_service.h b/src/kudu/master/master_service.h
index 011a4bb..f86a225 100644
--- a/src/kudu/master/master_service.h
+++ b/src/kudu/master/master_service.h
@@ -75,6 +75,8 @@ class RefreshAuthzCacheRequestPB;
 class RefreshAuthzCacheResponsePB;
 class RemoveMasterRequestPB;
 class RemoveMasterResponsePB;
+class UnregisterTServerRequestPB;
+class UnregisterTServerResponsePB;
 class ReplaceTabletRequestPB;
 class ReplaceTabletResponsePB;
 class TSHeartbeatRequestPB;
@@ -114,6 +116,10 @@ class MasterServiceImpl : public MasterServiceIf {
   void RemoveMaster(const RemoveMasterRequestPB* req,
                     RemoveMasterResponsePB* resp, rpc::RpcContext* rpc) 
override;
 
+  void UnregisterTServer(const UnregisterTServerRequestPB* req,
+                         UnregisterTServerResponsePB* resp,
+                         rpc::RpcContext* rpc) override;
+
   void Ping(const PingRequestPB* req,
             PingResponsePB* resp,
             rpc::RpcContext* rpc) override;
diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc
index 928e352..78e9930 100644
--- a/src/kudu/master/ts_manager.cc
+++ b/src/kudu/master/ts_manager.cc
@@ -316,6 +316,21 @@ void TSManager::SetAllTServersNeedFullTabletReports() {
   }
 }
 
+Status TSManager::UnregisterTServer(const std::string& ts_uuid,
+                                    bool force_unregister_live_tserver) {
+  lock_guard<rw_spinlock> l(lock_);
+  shared_ptr<TSDescriptor> ts_desc;
+  if (!FindCopy(servers_by_id_, ts_uuid, &ts_desc)) {
+    return Status::NotFound(Substitute("Requested tserver $0 has not been 
registered", ts_uuid));
+  }
+
+  if (!force_unregister_live_tserver && !ts_desc->PresumedDead()) {
+    return Status::IllegalState(Substitute("TServer $0 is not presumed dead.", 
ts_uuid));
+  }
+  servers_by_id_.erase(ts_uuid);
+  return Status::OK();
+}
+
 int TSManager::ClusterSkew() const {
   int min_count = std::numeric_limits<int>::max();
   int max_count = 0;
diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h
index 464dfa6..9b9fe53 100644
--- a/src/kudu/master/ts_manager.h
+++ b/src/kudu/master/ts_manager.h
@@ -125,6 +125,9 @@ class TSManager {
   // Resets the tserver states and reloads them from disk.
   Status ReloadTServerStates(SysCatalogTable* sys_catalog);
 
+  // Remove the tserver from 'servers_by_id_'.
+  Status UnregisterTServer(const std::string& ts_uuid, bool 
force_unregister_live_tserver);
+
  private:
   friend class TServerStateLoader;
 
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index c34bdd0..69b5471 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -143,6 +143,8 @@ DECLARE_bool(hive_metastore_sasl_enabled);
 DECLARE_bool(show_values);
 DECLARE_bool(show_attributes);
 DECLARE_int32(catalog_manager_inject_latency_load_ca_info_ms);
+DECLARE_int32(heartbeat_interval_ms);
+DECLARE_int32(tserver_unresponsive_timeout_ms);
 DECLARE_int32(rpc_negotiation_inject_delay_ms);
 DECLARE_string(block_manager);
 DECLARE_string(hive_metastore_uris);
@@ -7447,6 +7449,141 @@ TEST_F(ToolTest, TestNonDefaultPrincipal) {
                          "--sasl_protocol_name=oryx",
                          
HostPort::ToCommaSeparatedString(cluster_->master_rpc_addrs())}));
 }
+class UnregisterTServerTest : public ToolTest, public 
::testing::WithParamInterface<bool> {
+ public:
+  void StartCluster() {
+    // Test on a multi-master cluster.
+    InternalMiniClusterOptions opts;
+    opts.num_masters = 3;
+    StartMiniCluster(std::move(opts));
+  }
+
+  string GetMasterAddrsStr() {
+    vector<string> master_addrs;
+    for (const auto& hp : mini_cluster_->master_rpc_addrs()) {
+      master_addrs.emplace_back(hp.ToString());
+    }
+    return JoinStrings(master_addrs, ",");
+  }
+};
+
+INSTANTIATE_TEST_SUITE_P(, UnregisterTServerTest, ::testing::Bool());
+
+TEST_P(UnregisterTServerTest, TestUnregisterTServer) {
+  bool remove_tserver_state = GetParam();
+
+  // Set a short timeout that masters consider a tserver dead.
+  FLAGS_tserver_unresponsive_timeout_ms = 3000;
+  NO_FATALS(StartCluster());
+  const string master_addrs_str = GetMasterAddrsStr();
+  MiniTabletServer* ts = mini_cluster_->mini_tablet_server(0);
+  const string ts_uuid = ts->uuid();
+  const string ts_hostport = ts->bound_rpc_addr().ToString();
+
+  // Enter maintenance mode on the tserver and shut it down.
+  ASSERT_OK(RunKuduTool({"tserver", "state", "enter_maintenance", 
master_addrs_str, ts_uuid}));
+  ts->Shutdown();
+
+  {
+    string out;
+    string err;
+    // Getting an error when running ksck and the output contains the dead 
tserver.
+    Status s =
+        RunActionStdoutStderrString(Substitute("cluster ksck $0", 
master_addrs_str), &out, &err);
+    ASSERT_TRUE(s.IsRuntimeError());
+    ASSERT_STR_CONTAINS(out, Substitute("$0 | $1 | UNAVAILABLE", ts_uuid, 
ts_hostport));
+  }
+  // Wait the tserver become dead.
+  ASSERT_EVENTUALLY(
+      [&] { ASSERT_EQ(0, 
mini_cluster_->mini_master(0)->master()->ts_manager()->GetLiveCount()); });
+
+  // Unregister the tserver.
+  ASSERT_OK(RunKuduTool({"tserver",
+                         "unregister",
+                         master_addrs_str,
+                         ts_uuid,
+                         Substitute("-remove_tserver_state=$0", 
remove_tserver_state)}));
+  {
+    // Run ksck and get no error.
+    string out;
+    NO_FATALS(RunActionStdoutString(Substitute("cluster ksck $0", 
master_addrs_str), &out));
+    if (remove_tserver_state) {
+      // Both the persisted state and registration of the tserver was removed.
+      ASSERT_STR_NOT_CONTAINS(out, Substitute(" $0 | MAINTENANCE_MODE", 
ts_uuid));
+      ASSERT_STR_NOT_CONTAINS(out, ts_uuid);
+    } else {
+      // Only the registration of the tserver was removed.
+      ASSERT_STR_CONTAINS(out, Substitute(" $0 | MAINTENANCE_MODE", ts_uuid));
+      ASSERT_STR_NOT_CONTAINS(out, Substitute("$0 | $1 | UNAVAILABLE", 
ts_uuid, ts_hostport));
+    }
+  }
+
+  // Restart the tserver and re-register it on masters.
+  ts->Start();
+  {
+    string out;
+    ASSERT_EVENTUALLY([&]() {
+      NO_FATALS(RunActionStdoutString(Substitute("cluster ksck $0", 
master_addrs_str), &out));
+    });
+    if (remove_tserver_state) {
+      // The tserver came back as a brand new tserver.
+      ASSERT_STR_NOT_CONTAINS(out, Substitute(" $0 | MAINTENANCE_MODE", 
ts_uuid));
+      ASSERT_STR_CONTAINS(out, ts_uuid);
+    } else {
+      // The tserver got its original maintenance state.
+      ASSERT_STR_CONTAINS(out, Substitute(" $0 | MAINTENANCE_MODE", ts_uuid));
+      ASSERT_STR_CONTAINS(out, ts_uuid);
+    }
+  }
+}
+
+TEST_F(UnregisterTServerTest, TestUnregisterTServerNotPresumedDead) {
+  // Reduce the TS<->Master heartbeat interval to speed up testing.
+  FLAGS_heartbeat_interval_ms = 100;
+  NO_FATALS(StartCluster());
+  const string master_addrs_str = GetMasterAddrsStr();
+  MiniTabletServer* ts = mini_cluster_->mini_tablet_server(0);
+  const string ts_uuid = ts->uuid();
+  const string ts_hostport = ts->bound_rpc_addr().ToString();
+
+  // Shut down the tserver.
+  ts->Shutdown();
+  // Get an error because the tserver is not presumed dead by masters.
+  {
+    string out;
+    string err;
+    Status s = RunActionStdoutStderrString(
+        Substitute("tserver unregister $0 $1", master_addrs_str, ts_uuid), 
&out, &err);
+    ASSERT_TRUE(s.IsRuntimeError());
+    ASSERT_STR_CONTAINS(err, ts_uuid);
+  }
+  // The ksck output contains the dead tserver.
+  {
+    string out;
+    string err;
+    Status s =
+        RunActionStdoutStderrString(Substitute("cluster ksck $0", 
master_addrs_str), &out, &err);
+    ASSERT_TRUE(s.IsRuntimeError());
+    ASSERT_STR_CONTAINS(out, Substitute("$0 | $1 | UNAVAILABLE", ts_uuid, 
ts_hostport));
+  }
+
+  // We could force unregister the tserver.
+  ASSERT_OK(RunKuduTool(
+      {"tserver", "unregister", master_addrs_str, ts_uuid, 
"-force_unregister_live_tserver"}));
+  {
+    string out;
+    NO_FATALS(RunActionStdoutString(Substitute("cluster ksck $0", 
master_addrs_str), &out));
+    ASSERT_STR_NOT_CONTAINS(out, ts_uuid);
+  }
+
+  // After several hearbeat intervals, the tserver still does not appear in 
ksck output.
+  SleepFor(MonoDelta::FromMilliseconds(3 * FLAGS_heartbeat_interval_ms));
+  {
+    string out;
+    NO_FATALS(RunActionStdoutString(Substitute("cluster ksck $0", 
master_addrs_str), &out));
+    ASSERT_STR_NOT_CONTAINS(out, ts_uuid);
+  }
+}
 
 } // namespace tools
 } // namespace kudu
diff --git a/src/kudu/tools/tool_action_common.cc 
b/src/kudu/tools/tool_action_common.cc
index a8cfb03..a89e0b4 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -23,8 +23,10 @@
 #include <iomanip>
 #include <iostream>
 #include <iterator>
+#include <map>
 #include <memory>
 #include <numeric>
+#include <set>
 #include <stack>
 #include <string>
 #include <unordered_map>
@@ -60,6 +62,7 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/master/master.h"
+#include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h" // IWYU pragma: keep
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/response_callback.h"
@@ -196,6 +199,8 @@ using kudu::consensus::ReplicateMsg;
 using kudu::log::LogEntryPB;
 using kudu::log::LogEntryReader;
 using kudu::log::ReadableLogSegment;
+using kudu::master::ConnectToMasterRequestPB;
+using kudu::master::ConnectToMasterResponsePB;
 using kudu::master::MasterServiceProxy;
 using kudu::pb_util::SecureDebugString;
 using kudu::pb_util::SecureShortDebugString;
@@ -222,7 +227,9 @@ using kudu::tserver::TabletServerServiceProxy; // NOLINT
 using kudu::tserver::WriteRequestPB;
 using std::cout;
 using std::endl;
+using std::map;
 using std::ostream;
+using std::set;
 using std::setfill;
 using std::setw;
 using std::shared_ptr;
@@ -744,6 +751,61 @@ Status MasterAddressesToSet(
   return Status::OK();
 }
 
+Status VerifyMasterAddressList(const vector<string>& master_addresses) {
+  map<string, set<string>> addresses_per_master;
+  for (const auto& address : master_addresses) {
+    unique_ptr<MasterServiceProxy> proxy;
+    RETURN_NOT_OK(BuildProxy(address, master::Master::kDefaultPort, &proxy));
+
+    RpcController ctl;
+    ctl.set_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms));
+    ConnectToMasterRequestPB req;
+    ConnectToMasterResponsePB resp;
+    RETURN_NOT_OK(proxy->ConnectToMaster(req, &resp, &ctl));
+    const auto& resp_master_addrs = resp.master_addrs();
+    if (resp_master_addrs.size() != master_addresses.size()) {
+      const auto addresses_provided = JoinStrings(master_addresses, ",");
+      const auto addresses_cluster_config =
+          JoinMapped(resp_master_addrs,
+                     [](const HostPortPB& pb) { return Substitute("$0:$1", 
pb.host(), pb.port()); },
+                     ",");
+      return Status::InvalidArgument(
+          Substitute("list of master addresses provided ($0) "
+                     "does not match the actual cluster configuration ($1) ",
+                     addresses_provided,
+                     addresses_cluster_config));
+    }
+    set<string> addr_set;
+    for (const auto& hp : resp_master_addrs) {
+      addr_set.emplace(Substitute("$0:$1", hp.host(), hp.port()));
+    }
+    addresses_per_master.emplace(address, std::move(addr_set));
+  }
+
+  bool mismatch = false;
+  if (addresses_per_master.size() > 1) {
+    const auto it_0 = addresses_per_master.cbegin();
+    auto it_1 = addresses_per_master.begin();
+    ++it_1;
+    for (auto it = it_1; it != addresses_per_master.end(); ++it) {
+      if (it->second != it_0->second) {
+        mismatch = true;
+        break;
+      }
+    }
+  }
+
+  if (mismatch) {
+    string err_msg = Substitute("specified: ($0);", 
JoinStrings(master_addresses, ","));
+    for (const auto& e : addresses_per_master) {
+      err_msg += Substitute(" from master $0: ($1);", e.first, 
JoinStrings(e.second, ","));
+    }
+    return Status::ConfigurationError(Substitute("master address lists 
mismatch: $0", err_msg));
+  }
+
+  return Status::OK();
+}
+
 Status PrintServerStatus(const string& address, uint16_t default_port) {
   ServerStatusPB status;
   RETURN_NOT_OK(GetServerStatus(address, default_port, &status));
diff --git a/src/kudu/tools/tool_action_common.h 
b/src/kudu/tools/tool_action_common.h
index 562d426..d89b55c 100644
--- a/src/kudu/tools/tool_action_common.h
+++ b/src/kudu/tools/tool_action_common.h
@@ -236,6 +236,11 @@ Status MasterAddressesToSet(
     const std::string& master_addresses_arg,
     kudu::UnorderedHostPortSet* res);
 
+// Make sure the list of master addresses specified in 'master_addresses'
+// corresponds to the actual list of masters addresses in the cluster,
+// as reported in ConnectToMasterResponsePB::master_addrs.
+Status VerifyMasterAddressList(const std::vector<std::string>& 
master_addresses);
+
 // A table of data to present to the user.
 //
 // Supports formatting based on the --format flag.
diff --git a/src/kudu/tools/tool_action_master.cc 
b/src/kudu/tools/tool_action_master.cc
index 4764be1..48cb94a 100644
--- a/src/kudu/tools/tool_action_master.cc
+++ b/src/kudu/tools/tool_action_master.cc
@@ -89,8 +89,6 @@ DEFINE_string(kudu_abs_path, "", "Absolute file path of the 
'kudu' executable us
 
 using kudu::master::AddMasterRequestPB;
 using kudu::master::AddMasterResponsePB;
-using kudu::master::ConnectToMasterRequestPB;
-using kudu::master::ConnectToMasterResponsePB;
 using kudu::master::ListMastersRequestPB;
 using kudu::master::ListMastersResponsePB;
 using kudu::master::Master;
@@ -605,67 +603,6 @@ Status MasterDumpMemTrackers(const RunnerContext& context) 
{
   return DumpMemTrackers(address, Master::kDefaultPort);
 }
 
-// Make sure the list of master addresses specified in 'master_addresses'
-// corresponds to the actual list of masters addresses in the cluster,
-// as reported in ConnectToMasterResponsePB::master_addrs.
-Status VerifyMasterAddressList(const vector<string>& master_addresses) {
-  map<string, set<string>> addresses_per_master;
-  for (const auto& address : master_addresses) {
-    unique_ptr<MasterServiceProxy> proxy;
-    RETURN_NOT_OK(BuildProxy(address, Master::kDefaultPort, &proxy));
-
-    RpcController ctl;
-    ctl.set_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms));
-    ConnectToMasterRequestPB req;
-    ConnectToMasterResponsePB resp;
-    RETURN_NOT_OK(proxy->ConnectToMaster(req, &resp, &ctl));
-    const auto& resp_master_addrs = resp.master_addrs();
-    if (resp_master_addrs.size() != master_addresses.size()) {
-      const auto addresses_provided = JoinStrings(master_addresses, ",");
-      const auto addresses_cluster_config = JoinMapped(
-          resp_master_addrs,
-          [](const HostPortPB& pb) {
-            return Substitute("$0:$1", pb.host(), pb.port());
-          }, ",");
-      return Status::InvalidArgument(Substitute(
-          "list of master addresses provided ($0) "
-          "does not match the actual cluster configuration ($1) ",
-          addresses_provided, addresses_cluster_config));
-    }
-    set<string> addr_set;
-    for (const auto& hp : resp_master_addrs) {
-      addr_set.emplace(Substitute("$0:$1", hp.host(), hp.port()));
-    }
-    addresses_per_master.emplace(address, std::move(addr_set));
-  }
-
-  bool mismatch = false;
-  if (addresses_per_master.size() > 1) {
-    const auto it_0 = addresses_per_master.cbegin();
-    auto it_1 = addresses_per_master.begin();
-    ++it_1;
-    for (auto it = it_1; it != addresses_per_master.end(); ++it) {
-      if (it->second != it_0->second) {
-        mismatch = true;
-        break;
-      }
-    }
-  }
-
-  if (mismatch) {
-    string err_msg = Substitute("specified: ($0);",
-                                JoinStrings(master_addresses, ","));
-    for (const auto& e : addresses_per_master) {
-      err_msg += Substitute(" from master $0: ($1);",
-                            e.first, JoinStrings(e.second, ","));
-    }
-    return Status::ConfigurationError(
-        Substitute("master address lists mismatch: $0", err_msg));
-  }
-
-  return Status::OK();
-}
-
 Status PrintRebuildReport(const RebuildReport& rebuild_report) {
   cout << "Rebuild Report" << endl;
   cout << "Tablet Servers" << endl;
diff --git a/src/kudu/tools/tool_action_tserver.cc 
b/src/kudu/tools/tool_action_tserver.cc
index 1b3ab8e..7947395 100644
--- a/src/kudu/tools/tool_action_tserver.cc
+++ b/src/kudu/tools/tool_action_tserver.cc
@@ -34,6 +34,7 @@
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/master/master.h"
 #include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h"
 #include "kudu/rpc/response_callback.h"
@@ -57,6 +58,14 @@ DEFINE_bool(error_if_not_fully_quiesced, false, "If true, 
the command to start "
     "quiescing will return an error if the tserver is not fully quiesced, i.e. 
"
     "there are still tablet leaders or active scanners on it.");
 
+DEFINE_bool(force_unregister_live_tserver, false,
+            "If true, force the unregistration of the tserver even if it is 
not presumed dead "
+            "by the master. Make sure the tserver has been shut down before 
setting this true.");
+DEFINE_bool(remove_tserver_state, true,
+            "If false, remove the tserver from the master's in-memory map but 
keep its persisted "
+            "state (if any). If the same tserver re-registers on the master it 
will get its "
+            "original state");
+
 DECLARE_string(columns);
 
 using std::cout;
@@ -72,6 +81,8 @@ using master::ChangeTServerStateResponsePB;
 using master::ListTabletServersRequestPB;
 using master::ListTabletServersResponsePB;
 using master::MasterServiceProxy;
+using master::UnregisterTServerRequestPB;
+using master::UnregisterTServerResponsePB;
 using master::TServerStateChangePB;
 using rpc::RpcController;
 using tserver::QuiesceTabletServerRequestPB;
@@ -296,6 +307,39 @@ Status QuiescingStatus(const RunnerContext& context) {
   return table.PrintTo(cout);
 }
 
+Status UnregisterTServer(const RunnerContext& context) {
+  const auto& ts_uuid = FindOrDie(context.required_args, kTServerIdArg);
+  vector<string> master_addresses;
+  RETURN_NOT_OK(ParseMasterAddresses(context, &master_addresses));
+  RETURN_NOT_OK(VerifyMasterAddressList(master_addresses));
+  if (FLAGS_remove_tserver_state) {
+    // We don't care about FLAGS_allow_missing_tserver because it doesn't
+    // make sense for ExitMaintenance.
+    RETURN_NOT_OK(ExitMaintenance(context));
+  }
+
+  string err_str;
+  for (const auto& address : master_addresses) {
+    unique_ptr<MasterServiceProxy> proxy;
+    RETURN_NOT_OK(BuildProxy(address, master::Master::kDefaultPort, &proxy));
+    UnregisterTServerRequestPB req;
+    req.set_uuid(ts_uuid);
+    req.set_force_unregister_live_tserver(FLAGS_force_unregister_live_tserver);
+    UnregisterTServerResponsePB resp;
+    RpcController rpc;
+    Status s = proxy->UnregisterTServer(req, &resp, &rpc);
+    if (!s.ok() || resp.has_error()) {
+      err_str += Substitute(" Unable to unregister the tserver from master $0, 
status: $1",
+                            address,
+                            StatusFromPB(resp.error().status()).ToString());
+    }
+  }
+  if (err_str.empty()) {
+    return Status::OK();
+  }
+  return Status::RemoteError(err_str);
+}
+
 } // anonymous namespace
 
 unique_ptr<Mode> BuildTServerMode() {
@@ -412,6 +456,15 @@ unique_ptr<Mode> BuildTServerMode() {
       .AddAction(std::move(exit_maintenance))
       .Build();
 
+  unique_ptr<Action> unregister_tserver =
+      ClusterActionBuilder("unregister", &UnregisterTServer)
+          .Description(
+              "Unregister a tablet server from the master's in-memory state 
and system catalog.")
+          .AddRequiredParameter({kTServerIdArg, kTServerIdDesc})
+          .AddOptionalParameter("force_unregister_live_tserver")
+          .AddOptionalParameter("remove_tserver_state")
+          .Build();
+
   return ModeBuilder("tserver")
       .Description("Operate on a Kudu Tablet Server")
       .AddAction(std::move(dump_memtrackers))
@@ -421,6 +474,7 @@ unique_ptr<Mode> BuildTServerMode() {
       .AddAction(std::move(status))
       .AddAction(std::move(timestamp))
       .AddAction(std::move(list_tservers))
+      .AddAction(std::move(unregister_tserver))
       .AddMode(std::move(quiesce))
       .AddMode(std::move(state))
       .Build();

Reply via email to