This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.17.x in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.17.x by this push: new 6030b2948 KUDU-3532: Fix range aware replica placement bug 6030b2948 is described below commit 6030b2948116a072f21c1b8b0d728146ba716d50 Author: Mahesh Reddy <mre...@cloudera.com> AuthorDate: Tue Dec 12 18:38:57 2023 -0500 KUDU-3532: Fix range aware replica placement bug An implicit conversion from unsigned long to int caused an std::length_error to be thrown when a vector tried to reserve a a size greater than the max size. This happens when a negative number is converted. This bug was introduced by changelist [1]. I also added tests with tablet servers in multiple locations. This omission caused this bug to go unnoticed until now. [1] https://github.com/apache/kudu/commit/10fdaf6a9 Change-Id: Id5d696d58965590a9f91f8b1b59f23225bbad8ee Reviewed-on: http://gerrit.cloudera.org:8080/20781 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <ale...@apache.org> (cherry picked from commit a84c8376a78b3a86be75d434e0f7ff2853c0c880) Reviewed-on: http://gerrit.cloudera.org:8080/20788 Reviewed-by: Abhishek Chennaka <achenn...@cloudera.com> Tested-by: Alexey Serbin <ale...@apache.org> --- src/kudu/master/placement_policy-test.cc | 219 +++++++++++++++++++++++-------- src/kudu/master/placement_policy.cc | 7 +- 2 files changed, 170 insertions(+), 56 deletions(-) diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc index 84449ca7a..191f81d74 100644 --- a/src/kudu/master/placement_policy-test.cc +++ b/src/kudu/master/placement_policy-test.cc @@ -1085,6 +1085,17 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesExtremeCase) { ASSERT_EQ(500, stats["ts0"]); ASSERT_EQ(500, stats["ts1"]); } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"ts1"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + ASSERT_TRUE(extra_ts->permanent_uuid() == "ts0") << extra_ts->permanent_uuid(); + } } // RF is equal to the number of tablet servers, so replica placement will be same with ranges. @@ -1142,6 +1153,18 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRanges) { ASSERT_EQ(1000, stats["ts1"]); ASSERT_EQ(1000, stats["ts2"]); } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"ts2"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + const auto& ts_uuid = extra_ts->permanent_uuid(); + ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << extra_ts->permanent_uuid(); + } } // RF is 3 while number of tablet servers is 4 with 2 of them empty and the other 2 loaded with @@ -1209,6 +1232,18 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesAndTables) { ASSERT_LE(368, stats["ts3"]); ASSERT_GE(378, stats["ts3"]); } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"ts3"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + const auto& ts_uuid = extra_ts->permanent_uuid(); + ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << extra_ts->permanent_uuid(); + } } // RF is 3 while number of tablet servers is 6. Half of them are empty while the other half contain @@ -1288,6 +1323,20 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithRangesAndTablesWithDoubleTSer ASSERT_LE(137, stats["ts5"]); ASSERT_GE(147, stats["ts5"]); } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"ts4", "ts5"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + const auto& ts_uuid = extra_ts->permanent_uuid(); + ASSERT_TRUE(ts_uuid == "ts0" || + ts_uuid == "ts1" || + ts_uuid == "ts2") << extra_ts->permanent_uuid(); + } } // With RF = 3, 6 tablet servers, and a well-balanced cluster initially, this test case adds @@ -1377,68 +1426,132 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithImbalanceByTable) { }, }; - ASSERT_OK(Prepare(cluster_info)); - const auto& all = descriptors(); - PlacementPolicy policy(all, rng()); - typedef std::unordered_map<string, std::unordered_map<string, int>> replicas_per_range_per_table; - std::unordered_map<string, replicas_per_range_per_table> stats; - vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"}; - vector<string> tables = {"t1", "t2"}; - for (const auto& table : tables) { - for (const auto& range : ranges) { - for (auto i = 0; i < 500; ++i) { - TSDescriptorVector result; - ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, range, table, &result)); - ASSERT_EQ(nReplicationFactor, result.size()); - for (const auto& ts : result) { - const auto& ts_uuid = ts->permanent_uuid(); - ++stats[ts_uuid][table][range]; + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + typedef std::unordered_map<string, std::unordered_map<string, int>> + replicas_per_range_per_table; + std::unordered_map<string, replicas_per_range_per_table> stats; + vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"}; + vector<string> tables = {"t1", "t2"}; + for (const auto& table : tables) { + for (const auto& range : ranges) { + for (auto i = 0; i < 500; ++i) { + TSDescriptorVector result; + ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, range, table, &result)); + ASSERT_EQ(nReplicationFactor, result.size()); + for (const auto& ts : result) { + const auto& ts_uuid = ts->permanent_uuid(); + ++stats[ts_uuid][table][range]; + } } } } - } - ASSERT_EQ(kNumServers, stats.size()); - vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"}; - vector<string> tservers1 = {"ts0", "ts2", "ts4"}; - for (const auto& stat : stats) { - int tserver_replicas = 0; - // Verifies that two tables exist on each tserver. - ASSERT_EQ(2, stat.second.size()); - for (const auto& table : stat.second) { - // Verifies that the six ranges exist for each table on each tserver. - ASSERT_EQ(6, table.second.size()); - for (const auto& ranges : table.second) { - if (std::find(balanced_ranges.begin(), - balanced_ranges.end(), ranges.first) != balanced_ranges.end()) { - ASSERT_LE(245, ranges.second); - ASSERT_GE(255, ranges.second); - } else if (std::find(tservers1.begin(), - tservers1.end(), stat.first) != tservers1.end()) { - // Only range of "a1" or "b1" now. Checks which tservers and for table "t1" or "t2". - if (table.first == "t1") { - ASSERT_LE(120, ranges.second); - ASSERT_GE(130, ranges.second); - } else { - ASSERT_LE(370, ranges.second); - ASSERT_GE(380, ranges.second); - } - } else { - // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for table "t1" or "t2". - if (table.first == "t1") { - ASSERT_LE(370, ranges.second); - ASSERT_GE(380, ranges.second); + ASSERT_EQ(kNumServers, stats.size()); + vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"}; + vector<string> tservers1 = {"ts0", "ts2", "ts4"}; + for (const auto& stat : stats) { + int tserver_replicas = 0; + // Verifies that two tables exist on each tserver. + ASSERT_EQ(2, stat.second.size()); + for (const auto& table : stat.second) { + // Verifies that the six ranges exist for each table on each tserver. + ASSERT_EQ(6, table.second.size()); + for (const auto& ranges : table.second) { + if (std::find(balanced_ranges.begin(), + balanced_ranges.end(), ranges.first) != balanced_ranges.end()) { + ASSERT_LE(245, ranges.second); + ASSERT_GE(255, ranges.second); + } else if (std::find(tservers1.begin(), + tservers1.end(), stat.first) != tservers1.end()) { + // Only range of "a1" or "b1" now. Checks which tservers and for table "t1" or "t2". + if (table.first == "t1") { + ASSERT_LE(120, ranges.second); + ASSERT_GE(130, ranges.second); + } else { + ASSERT_LE(370, ranges.second); + ASSERT_GE(380, ranges.second); + } } else { - ASSERT_LE(120, ranges.second); - ASSERT_GE(130, ranges.second); + // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for table "t1" or "t2". + if (table.first == "t1") { + ASSERT_LE(370, ranges.second); + ASSERT_GE(380, ranges.second); + } else { + ASSERT_LE(120, ranges.second); + ASSERT_GE(130, ranges.second); + } } + tserver_replicas += ranges.second; } - tserver_replicas += ranges.second; } + // Verifies that around 3000 replicas are placed on each tserver. + ASSERT_LE(2990, tserver_replicas); + ASSERT_GE(3110, tserver_replicas); } - // Verifies that around 3000 replicas are placed on each tserver. - ASSERT_LE(2990, tserver_replicas); - ASSERT_GE(3110, tserver_replicas); + } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"ts0", "ts2", "ts4"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + const auto& ts_uuid = extra_ts->permanent_uuid(); + ASSERT_TRUE(ts_uuid == "ts1" || + ts_uuid == "ts3" || + ts_uuid == "ts5") << extra_ts->permanent_uuid(); + } +} + +TEST_F(PlacementPolicyTest, PlaceRangeAwareTabletReplicasWithLocations) { + TabletNumByRangeMap range_a({ {"a1", 2}}); + TabletNumByRangeMap range_double_a({ {"a1", 4}}); + TabletNumByRangeMap range_b({ {"b1", 2}}); + TabletNumByRangeMap range_double_b({ {"b1", 2}}); + TabletNumByRangeMap ranges_a_and_b({ {"a1", 2}, {"b1", 2} }); + const vector<LocationInfo> cluster_info = { + { "A", { { "A_ts0", 4, { }, { {"t1", range_double_a }, } }, + { "A_ts1", 4, { }, { {"t1", range_double_a }, } }, + { "A_ts2", 4, { }, { {"t1", range_double_b }, } }, } }, + { "B", { { "B_ts0", 4, { }, { {"t1", ranges_a_and_b }, } }, + { "B_ts1", 4, { }, { {"t1", range_a}, {"t2", range_a} } }, } }, + { "C", { { "C_ts0", 6, { }, { {"t1", range_a }, {"t2", ranges_a_and_b }, } }, + { "C_ts1", 4, { }, { {"t1", range_a }, {"t2", range_b} } }, } }, + }; + + // To avoid placing a majority of replicas in one location, + // exactly one replica will be placed in each location. + // "A_ts2" is chosen because it has the least replicas per range "a1" in table "t1". + // "B_ts1" is chosen because it wins the replicas per table tiebreaker with "B_ts0". + // "C_ts1" is chosen because it wins the total replicas tiebreaker. + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + TSDescriptorVector result; + ASSERT_OK(policy.PlaceTabletReplicas(3, nullopt, "a1", "t1", &result)); + ASSERT_EQ(3, result.size()); + TSDescriptorsMap m; + ASSERT_OK(TSDescriptorVectorToMap(result, &m)); + ASSERT_EQ(1, m.count("A_ts2")); + ASSERT_EQ(1, m.count("B_ts1")); + ASSERT_EQ(1, m.count("C_ts1")); + } + + { + ASSERT_OK(Prepare(cluster_info)); + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + const auto existing = GetDescriptors({"A_ts0", "A_ts1", "B_ts0", "B_ts1", "C_ts0", "C_ts1"}); + shared_ptr<TSDescriptor> extra_ts; + ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", &extra_ts)); + ASSERT_TRUE(extra_ts); + ASSERT_TRUE(extra_ts->permanent_uuid() == "A_ts2") << extra_ts->permanent_uuid(); } } diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc index 44c0cb292..d2d95ea16 100644 --- a/src/kudu/master/placement_policy.cc +++ b/src/kudu/master/placement_policy.cc @@ -381,9 +381,10 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica( const set<shared_ptr<TSDescriptor>>& excluded) const { if (range_key_start && table_id) { TSDescriptorVector ts_choices; - auto choices_size = ts_descs.size() - excluded.size(); - ReservoirSample(ts_descs, choices_size, excluded, rng_, &ts_choices); - DCHECK_EQ(ts_choices.size(), choices_size); + const int ts_size = static_cast<int>(ts_descs.size()); + CHECK_GE(ts_size, 0); + ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices); + DCHECK_LE(ts_choices.size(), ts_size); if (ts_choices.size() > 1) { return PickTabletServer( ts_choices, range_key_start.value(), table_id.value(), dimension, rng_);