This is an automated email from the ASF dual-hosted git repository. laiyingchun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new d7fb77f4c Leader rebalance ignores soft deleted tables d7fb77f4c is described below commit d7fb77f4cfbf6d638c6d11009ce3728ae30e01e3 Author: xinghuayu007 <1450306...@qq.com> AuthorDate: Wed May 8 14:51:20 2024 +0800 Leader rebalance ignores soft deleted tables Soft deleted tables no longer provide read/write service. Therefore it is not needed to do leader rebalance for soft deleted tables, which may take a long time. This patch provides a configuration 'leader_rebalancing_filter_soft_deleted_tables' to decide whether to ignore soft deleted tables when leader rebalancing. Change-Id: I1e2f37b004ed7d394e18d21cce97a2c9702adba3 Reviewed-on: http://gerrit.cloudera.org:8080/21411 Tested-by: Kudu Jenkins Reviewed-by: Marton Greber <greber...@gmail.com> Reviewed-by: Yingchun Lai <laiyingc...@apache.org> --- src/kudu/master/auto_leader_rebalancer-test.cc | 114 +++++++++++++++++++++---- src/kudu/master/auto_leader_rebalancer.cc | 11 ++- src/kudu/master/catalog_manager.cc | 8 ++ src/kudu/master/catalog_manager.h | 8 ++ 4 files changed, 122 insertions(+), 19 deletions(-) diff --git a/src/kudu/master/auto_leader_rebalancer-test.cc b/src/kudu/master/auto_leader_rebalancer-test.cc index 234a6a8da..d9f5949c0 100644 --- a/src/kudu/master/auto_leader_rebalancer-test.cc +++ b/src/kudu/master/auto_leader_rebalancer-test.cc @@ -60,6 +60,7 @@ class AutoRebalancerTask; } // namespace master } // namespace kudu +using kudu::client::KuduTable; using kudu::cluster::InternalMiniCluster; using kudu::cluster::InternalMiniClusterOptions; using kudu::tserver::ListTabletsResponsePB; @@ -69,10 +70,12 @@ using kudu::consensus::LeaderStepDownResponsePB; using kudu::rpc::RpcController; using std::string; using std::unique_ptr; +using kudu::client::sp::shared_ptr; using std::vector; DECLARE_bool(auto_leader_rebalancing_enabled); DECLARE_bool(auto_rebalancing_enabled); +DECLARE_bool(leader_rebalancing_ignore_soft_deleted_tables); DECLARE_int32(heartbeat_interval_ms); DECLARE_uint32(auto_leader_rebalancing_interval_seconds); DECLARE_uint32(auto_rebalancing_interval_seconds); @@ -140,17 +143,21 @@ class LeaderRebalancerTest : public KuduTest { } // Get the leader numbers of each tablet server. - void GetLeaderDistribution(std::map<string, int32_t>* leader_map) { + void GetLeaderDistribution(std::map<string, int32_t>* leader_map, + const string& table_name) { leader_map->clear(); - scoped_refptr<TableInfo> table; + shared_ptr<KuduTable> table; + workload_->client()->OpenTable(table_name, &table); + + scoped_refptr<TableInfo> table_info; master::Master* master = cluster_->mini_master()->master(); master::CatalogManager* catalog_manager = master->catalog_manager(); { CatalogManager::ScopedLeaderSharedLock leaderlock(catalog_manager); - catalog_manager->GetTableInfoByName(table_name(), &table); + catalog_manager->GetTableInfo(table->id(), &table_info); } std::vector<string> leader_list; - for (const auto& tablet : table->tablet_map()) { + for (const auto& tablet : table_info->tablet_map()) { client::KuduTablet* ptr; workload_->client()->GetTablet(tablet.second->id(), &ptr); unique_ptr<client::KuduTablet> tablet_ptr(ptr); @@ -172,7 +179,8 @@ class LeaderRebalancerTest : public KuduTest { } // Make the leader distribution as the vector passed in. - Status MakeLeaderDistribution(std::vector<int32_t> leader_distribution) { + Status MakeLeaderDistribution(std::vector<int32_t> leader_distribution, + const string table_name) { master::Master* master = cluster_->mini_master()->master(); TSDescriptorVector descriptors; master->ts_manager()->GetAllDescriptors(&descriptors); @@ -185,7 +193,7 @@ class LeaderRebalancerTest : public KuduTest { master::CatalogManager* catalog_manager = master->catalog_manager(); { CatalogManager::ScopedLeaderSharedLock leaderlock(catalog_manager); - catalog_manager->GetTableInfoByName(table_name(), &table); + catalog_manager->GetTableInfoByName(table_name, &table); } if (std::accumulate(leader_distribution.begin(), leader_distribution.end(), 0) != @@ -253,11 +261,11 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) { // Simulate the leader distribution. std::vector<int32_t> leader_distribution = {4, 4, 1}; - MakeLeaderDistribution(leader_distribution); + MakeLeaderDistribution(leader_distribution, table_name()); SleepFor(MonoDelta::FromMilliseconds(3000)); std::map<string, int32_t> leader_map; - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -276,7 +284,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) { // Check the leader numbers of each tablet server. It should always be floor(avg) // or ceil(avg), where the parameter avg is (tablet num) / (tablet server num). double expected_leader_num = static_cast<double>(kNumTablets) / 3; - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -288,10 +296,10 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) { // Try different leader distribution. std::vector<int32_t> leader_distribution2 = {0, 8, 1}; - MakeLeaderDistribution(leader_distribution2); + MakeLeaderDistribution(leader_distribution2, table_name()); SleepFor(MonoDelta::FromMilliseconds(3000)); - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -302,7 +310,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForDivided) { SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms)); } - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -323,11 +331,11 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) { // Simulate the leader distribution. std::vector<int32_t> leader_distribution = {5, 4, 1}; - MakeLeaderDistribution(leader_distribution); + MakeLeaderDistribution(leader_distribution, table_name()); SleepFor(MonoDelta::FromMilliseconds(3000)); std::map<string, int32_t> leader_map; - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -346,7 +354,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) { // Check the leader numbers of each tablet server. It should always be floor(avg) // or ceil(avg), where the parameter avg is (tablet num) / (tablet server num). double expected_leader_num = static_cast<double>(kNumTablets) / 3; - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -358,10 +366,10 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) { // Try different leader distribution. std::vector<int32_t> leader_distribution2 = {8, 1, 1}; - MakeLeaderDistribution(leader_distribution2); + MakeLeaderDistribution(leader_distribution2, table_name()); SleepFor(MonoDelta::FromMilliseconds(3000)); - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -372,7 +380,7 @@ TEST_F(LeaderRebalancerTest, FunctionalTestForNotDivided) { SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms)); } - GetLeaderDistribution(&leader_map); + GetLeaderDistribution(&leader_map, table_name()); LOG(INFO) << "The leader distribution is " << '\n'; for (const auto& leader : leader_map) { std::cout << leader.first << " " << leader.second << '\n'; @@ -524,5 +532,75 @@ TEST_F(LeaderRebalancerTest, TestMaintenanceMode) { } } +class FilterSoftDeletedTableTest : + public LeaderRebalancerTest, + public ::testing::WithParamInterface<bool> { +}; + +INSTANTIATE_TEST_SUITE_P(, FilterSoftDeletedTableTest, ::testing::Bool()); +TEST_P(FilterSoftDeletedTableTest, TestFilterSofteDeletedTable) { + FLAGS_leader_rebalancing_ignore_soft_deleted_tables = GetParam(); + + constexpr const int kNumTServers = 3; + constexpr const int kNumTablets = 9; + constexpr const int kNumReplicas = 3; + constexpr const char* const soft_deleted_table = "soft_deleted_table"; + + cluster_opts_.num_tablet_servers = kNumTServers; + ASSERT_OK(CreateAndStartCluster()); + + CreateWorkloadTable(kNumTablets, /*num_replicas*/ kNumReplicas); + + // Simulate the leader distribution. + std::vector<int32_t> leader_distribution = {4, 4, 1}; + MakeLeaderDistribution(leader_distribution, table_name()); + SleepFor(MonoDelta::FromMilliseconds(3000)); + + string first_table = table_name(); + + // Create a new table. + workload_.reset(new TestWorkload(cluster_.get())); + workload_->set_table_name(soft_deleted_table); + workload_->set_num_tablets(kNumTablets); + workload_->set_num_replicas(kNumReplicas); + workload_->Setup(); + + // Simulate the leader distribution. + MakeLeaderDistribution(leader_distribution, soft_deleted_table); + SleepFor(MonoDelta::FromMilliseconds(3000)); + + // Delete the table 'soft_deleted_table'. + workload_->client()->SoftDeleteTable(soft_deleted_table, 3600); + + // Try to run leader rebalance for 10 times. + int32_t retries = 10; + master::Master* master = cluster_->mini_master()->master(); + master::AutoLeaderRebalancerTask* leader_rebalancer = + master->catalog_manager()->auto_leader_rebalancer(); + for (int i = 0; i < retries; i++) { + leader_rebalancer->RunLeaderRebalancer(); + SleepFor(MonoDelta::FromMilliseconds(FLAGS_heartbeat_interval_ms)); + } + + std::map<string, int32_t> leader_map; + // The first table is leader rebalanced. + GetLeaderDistribution(&leader_map, first_table); + for (const auto& leader: leader_map) { + ASSERT_EQ(leader.second, 3); + } + + GetLeaderDistribution(&leader_map, soft_deleted_table); + // The soft deleted table is not leader rebalanced. + if (FLAGS_leader_rebalancing_ignore_soft_deleted_tables) { + for (const auto& leader: leader_map) { + ASSERT_NE(leader.second, 3); + } + } else { + for (const auto& leader: leader_map) { + ASSERT_EQ(leader.second, 3); + } + } +} + } // namespace master } // namespace kudu diff --git a/src/kudu/master/auto_leader_rebalancer.cc b/src/kudu/master/auto_leader_rebalancer.cc index b6bd1268d..6d2c6091f 100644 --- a/src/kudu/master/auto_leader_rebalancer.cc +++ b/src/kudu/master/auto_leader_rebalancer.cc @@ -87,6 +87,11 @@ DEFINE_uint32(leader_rebalancing_max_moves_per_round, 10, TAG_FLAG(leader_rebalancing_max_moves_per_round, advanced); TAG_FLAG(leader_rebalancing_max_moves_per_round, runtime); +DEFINE_bool(leader_rebalancing_ignore_soft_deleted_tables, false, + "Whether to ignore rebalancing the soft deleted tables"); +TAG_FLAG(leader_rebalancing_ignore_soft_deleted_tables, advanced); +TAG_FLAG(leader_rebalancing_ignore_soft_deleted_tables, runtime); + DECLARE_bool(auto_leader_rebalancing_enabled); namespace kudu { @@ -431,7 +436,11 @@ Status AutoLeaderRebalancerTask::RunLeaderRebalancer() { { CatalogManager::ScopedLeaderSharedLock leader_lock(catalog_manager_); RETURN_NOT_OK(leader_lock.first_failed_status()); - catalog_manager_->GetAllTables(&table_infos); + if (FLAGS_leader_rebalancing_ignore_soft_deleted_tables) { + catalog_manager_->GetNormalizedTables(&table_infos); + } else { + catalog_manager_->GetAllTables(&table_infos); + } } for (const auto& table_info : table_infos) { RunLeaderRebalanceForTable(table_info, tserver_uuids, exclude_dest_uuids); diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 1ab7bc85c..da9881c22 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -4275,6 +4275,14 @@ void CatalogManager::GetAllTables(vector<scoped_refptr<TableInfo>>* tables) { AppendValuesFromMap(table_ids_map_, tables); } +void CatalogManager::GetNormalizedTables(vector<scoped_refptr<TableInfo>>* tables) { + leader_lock_.AssertAcquiredForReading(); + + tables->clear(); + shared_lock<LockType> l(lock_); + AppendValuesFromMap(normalized_table_names_map_, tables); +} + void CatalogManager::GetAllTabletsForTests(vector<scoped_refptr<TabletInfo>>* tablets) { leader_lock_.AssertAcquiredForReading(); diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h index 136ccbaab..a7bf477df 100644 --- a/src/kudu/master/catalog_manager.h +++ b/src/kudu/master/catalog_manager.h @@ -872,6 +872,14 @@ class CatalogManager : public tserver::TabletReplicaLookupIf { // NOTE: This should only be used by tests or web-ui void GetAllTables(std::vector<scoped_refptr<TableInfo>>* tables); + // Retrieve all tables from the normalized_table_names_map_. + // The normalized_table_names_map_ inherently does not contain soft-deleted + // or not running tables, so no manual exclusion is needed. May fail if + // the catalog manager is not yet running. Caller must hold leader_lock_. + // + // NOTE: This should only be used by tests or web-ui + void GetNormalizedTables(std::vector<scoped_refptr<TableInfo>>* tables); + // Check if a table exists by name, setting 'exist' appropriately. May fail // if the catalog manager is not yet running. Caller must hold leader_lock_. //