This is an automated email from the ASF dual-hosted git repository. laiyingchun pushed a commit to branch branch-1.17.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b3e64b07f73ed1ee756c6a338c2fa4e76e1e7265 Author: duyuqi <shenxingwuy...@gmail.com> AuthorDate: Fri Mar 10 19:44:08 2023 +0800 [master] Exclude tservers in MAINTENANCE_MODE when leader rebalancing Currently, auto leader rebalancing may transfer new leaders to tservers which have entered MAINTENANCE_MODE. Because a tserver in MAINTENANCE_MODE is being maintained by administrators, maybe it is decommissioning. Since it might be decommissioned/decommissioning, it doesn't make sense for the leader rebalancer to transfer a leader to a tserver in MAINTENANCE_MODE as leaders serve many users' requests. For this reason, we should exclude such tservers. This patch fixes the problem and adds an unit test for this. Change-Id: I2f85a675e69fd02a62e2625881dad2ca5e27acd9 Reviewed-on: http://gerrit.cloudera.org:8080/19608 Tested-by: Kudu Jenkins Reviewed-by: Yingchun Lai <laiyingc...@apache.org> Reviewed-by: Yuqi Du <shenxingwuy...@gmail.com> Reviewed-on: http://gerrit.cloudera.org:8080/19923 Tested-by: Yingchun Lai <laiyingc...@apache.org> --- src/kudu/master/auto_leader_rebalancer-test.cc | 75 +++++++++++++++++++++++++- src/kudu/master/auto_leader_rebalancer.cc | 22 +++++++- src/kudu/master/auto_leader_rebalancer.h | 2 + 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/src/kudu/master/auto_leader_rebalancer-test.cc b/src/kudu/master/auto_leader_rebalancer-test.cc index 8ff1d0d0e..5357bdd15 100644 --- a/src/kudu/master/auto_leader_rebalancer-test.cc +++ b/src/kudu/master/auto_leader_rebalancer-test.cc @@ -16,6 +16,7 @@ // under the License. #include "kudu/master/auto_leader_rebalancer.h" +#include <algorithm> #include <cstdint> #include <memory> #include <ostream> @@ -27,15 +28,20 @@ #include <glog/logging.h> #include <gtest/gtest.h> +#include "kudu/consensus/metadata.pb.h" #include "kudu/gutil/ref_counted.h" #include "kudu/integration-tests/test_workload.h" #include "kudu/master/catalog_manager.h" #include "kudu/master/master.h" +#include "kudu/master/master.pb.h" #include "kudu/master/mini_master.h" #include "kudu/master/ts_descriptor.h" #include "kudu/master/ts_manager.h" #include "kudu/mini-cluster/internal_mini_cluster.h" +#include "kudu/rpc/rpc_controller.h" #include "kudu/tserver/mini_tablet_server.h" +#include "kudu/tserver/tserver.pb.h" +#include "kudu/tserver/tserver_service.proxy.h" // IWYU pragma: keep #include "kudu/util/monotime.h" #include "kudu/util/status.h" #include "kudu/util/test_macros.h" @@ -50,8 +56,10 @@ class AutoRebalancerTask; using kudu::cluster::InternalMiniCluster; using kudu::cluster::InternalMiniClusterOptions; +using kudu::tserver::ListTabletsResponsePB; using std::string; using std::unique_ptr; +using std::vector; DECLARE_bool(auto_leader_rebalancing_enabled); DECLARE_bool(auto_rebalancing_enabled); @@ -118,7 +126,7 @@ class LeaderRebalancerTest : public KuduTest { } return catalog_manager->auto_leader_rebalancer()->RunLeaderRebalanceForTable( - table_info, tserver_uuids, AutoLeaderRebalancerTask::ExecuteMode::TEST); + table_info, tserver_uuids, {}, AutoLeaderRebalancerTask::ExecuteMode::TEST); } protected: @@ -203,5 +211,70 @@ TEST_F(LeaderRebalancerTest, RestartTserver) { ASSERT_OK(CheckLeaderBalance()); } +TEST_F(LeaderRebalancerTest, TestMaintenanceMode) { + SKIP_IF_SLOW_NOT_ALLOWED(); + constexpr const int kNumTServers = 3; + constexpr const int kNumTablets = 10; + cluster_opts_.num_tablet_servers = kNumTServers; + FLAGS_leader_rebalancing_max_moves_per_round = 5; + ASSERT_OK(CreateAndStartCluster()); + + CreateWorkloadTable(kNumTablets, /*num_replicas*/ 3); + + // Leader master + master::Master* master = cluster_->mini_master()->master(); + + master::AutoLeaderRebalancerTask* leader_rebalancer = + master->catalog_manager()->auto_leader_rebalancer(); + ASSERT_NE(leader_rebalancer, nullptr); + + constexpr const int kCurrentTserverIndex = 0; + tserver::MiniTabletServer* mini_tserver = cluster_->mini_tablet_server(kCurrentTserverIndex); + // Sets the tserver state for a tserver to 'MAINTENANCE_MODE'. + ASSERT_OK( + master->ts_manager()->SetTServerState(mini_tserver->uuid(), + TServerStatePB::MAINTENANCE_MODE, + ChangeTServerStateRequestPB::ALLOW_MISSING_TSERVER, + master->catalog_manager()->sys_catalog())); + ASSERT_EQ(TServerStatePB::MAINTENANCE_MODE, + master->ts_manager()->GetTServerState(mini_tserver->uuid())); + // Restart the tserver to force transferring all leaders on it to make leaders not balanced. + mini_tserver->Shutdown(); + SleepFor(MonoDelta::FromMilliseconds(5 * FLAGS_heartbeat_interval_ms)); + ASSERT_OK(mini_tserver->Start()); + + // Try to run some 'leader rebalance' iterations. If mini_tserver is not in MAINTENANCE_MODE, + // it's enough to reach leader balanced, more tries is not necessary and less tries + // may not reach leader rebalanced. + constexpr const int32_t retries = std::max(kNumTablets / 2, 3); + for (int i = 0; i < retries; i++) { + ASSERT_OK(leader_rebalancer->RunLeaderRebalancer()); + if (CheckLeaderBalance().ok()) { + break; + } + SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms)); + } + // This cluster cannot reach the state of rebalanced leadership distribution since 1 of the 3 + // tservers is in maintenance mode. + Status status = CheckLeaderBalance(); + ASSERT_TRUE(status.IsIllegalState()) << status.ToString(); + + { + // The tserver in maintenance mode should have no leaders. + std::shared_ptr<tserver::TabletServerServiceProxy> proxy = + cluster_->tserver_proxy(kCurrentTserverIndex); + tserver::ListTabletsRequestPB req; + ListTabletsResponsePB resp; + rpc::RpcController rpc; + rpc.set_timeout(MonoDelta::FromMilliseconds(1000)); + ASSERT_OK(proxy->ListTablets(req, &resp, &rpc)); + ASSERT_FALSE(resp.has_error()); + ASSERT_FALSE(resp.status_and_schema().empty()); + for (const auto& replica : resp.status_and_schema()) { + ASSERT_NE(consensus::RaftPeerPB::LEADER, replica.role()); + } + } +} + } // namespace master } // namespace kudu diff --git a/src/kudu/master/auto_leader_rebalancer.cc b/src/kudu/master/auto_leader_rebalancer.cc index 1a5adf143..3f9030c9c 100644 --- a/src/kudu/master/auto_leader_rebalancer.cc +++ b/src/kudu/master/auto_leader_rebalancer.cc @@ -25,6 +25,8 @@ #include <ostream> #include <string> #include <type_traits> +#include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -37,6 +39,7 @@ #include "kudu/consensus/metadata.pb.h" #include "kudu/gutil/macros.h" #include "kudu/gutil/map-util.h" +#include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/catalog_manager.h" #include "kudu/master/master.pb.h" @@ -62,6 +65,7 @@ using kudu::rpc::RpcController; using std::map; using std::nullopt; using std::string; +using std::unordered_set; using std::vector; using strings::Substitute; @@ -116,6 +120,7 @@ void AutoLeaderRebalancerTask::Shutdown() { Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable( const scoped_refptr<TableInfo>& table_info, const vector<string>& tserver_uuids, + const unordered_set<string>& exclude_dest_uuids, AutoLeaderRebalancerTask::ExecuteMode mode) { LOG(INFO) << Substitute("leader rebalance for table $0", table_info->table_name()); TableMetadataLock table_l(table_info.get(), LockMode::READ); @@ -243,6 +248,9 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable( } double min_score = 1; for (int j = 0; j < uuid_followers.size(); j++) { + if (ContainsKey(exclude_dest_uuids, uuid_followers[j])) { + continue; + } std::pair<int32_t, int32_t>& replica_and_leader_count = replica_and_leader_count_by_ts_uuid[uuid_followers[j]]; int32_t replica_count = replica_and_leader_count.first; @@ -328,6 +336,10 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalanceForTable( if (!ts_manager_->LookupTSByUUID(leader_uuid, &leader_desc)) { continue; } + if (PREDICT_FALSE(TServerStatePB::MAINTENANCE_MODE == + ts_manager_->GetTServerState(task.second.second))) { + continue; + } vector<Sockaddr> resolved; RETURN_NOT_OK(host_port->ResolveAddresses(&resolved)); @@ -380,6 +392,14 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() { tserver_uuids.emplace_back(e->permanent_uuid()); } + // Avoid transferring leaders to tservers that are in MAINTENANCE_MODE. + auto tserver_state_by_uuid = ts_manager_->GetTServerStates(); + unordered_set<string> exclude_dest_uuids; + for (const auto& uuid_state : tserver_state_by_uuid) { + if (uuid_state.second.first == TServerStatePB::MAINTENANCE_MODE) { + exclude_dest_uuids.insert(uuid_state.first); + } + } vector<scoped_refptr<TableInfo>> table_infos; { CatalogManager::ScopedLeaderSharedLock leader_lock(catalog_manager_); @@ -387,7 +407,7 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() { catalog_manager_->GetAllTables(&table_infos); } for (const auto& table_info : table_infos) { - RunLeaderRebalanceForTable(table_info, tserver_uuids); + RunLeaderRebalanceForTable(table_info, tserver_uuids, exclude_dest_uuids); } // @TODO(duyuqi) // Enrich the log and add metrics for leader rebalancer. diff --git a/src/kudu/master/auto_leader_rebalancer.h b/src/kudu/master/auto_leader_rebalancer.h index 004e19b1d..4dd19ab64 100644 --- a/src/kudu/master/auto_leader_rebalancer.h +++ b/src/kudu/master/auto_leader_rebalancer.h @@ -21,6 +21,7 @@ #include <memory> #include <random> #include <string> +#include <unordered_set> #include <vector> #include "kudu/gutil/ref_counted.h" @@ -75,6 +76,7 @@ class AutoLeaderRebalancerTask { Status RunLeaderRebalanceForTable( const scoped_refptr<TableInfo>& table_info, const std::vector<std::string>& tserver_uuids, + const std::unordered_set<std::string>& exclude_dest_uuids, AutoLeaderRebalancerTask::ExecuteMode mode = AutoLeaderRebalancerTask::ExecuteMode::NORMAL); // Only one task can be scheduled at a time.