This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 2810e635c KUDU-3252: Follow up to replica placement bug 2810e635c is described below commit 2810e635cc72aa5279a9dc787b6becad6e0098a9 Author: Mahesh Reddy <mre...@cloudera.com> AuthorDate: Thu Dec 21 01:17:32 2023 -0500 KUDU-3252: Follow up to replica placement bug This patch addresses the feedback left on patch [1]. ReservoirSample is refactored to accept an unsigned integer rather than a signed integer. A new test is added to verify the functionality of the tiebreaker scenarios of PlacementPolicy::SelectReplica(). [1] https://github.com/apache/kudu/commit/a84c8376a Change-Id: I28582cae538ca27fa26fbfae84460b3b264ddb86 Reviewed-on: http://gerrit.cloudera.org:8080/20827 Tested-by: Alexey Serbin <ale...@apache.org> Reviewed-by: Alexey Serbin <ale...@apache.org> --- src/kudu/master/placement_policy-test.cc | 43 ++++++++++++++++++++++++++++++++ src/kudu/master/placement_policy.cc | 40 ++++++++++++++++------------- src/kudu/util/random_util.h | 6 ++--- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc index 191f81d74..7691e1036 100644 --- a/src/kudu/master/placement_policy-test.cc +++ b/src/kudu/master/placement_policy-test.cc @@ -34,6 +34,7 @@ #include <glog/logging.h> #include <gtest/gtest.h> +#include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/ts_descriptor.h" #include "kudu/util/random.h" @@ -142,6 +143,15 @@ class PlacementPolicyTest : public ::testing::Test { return Status::OK(); } + shared_ptr<TSDescriptor> SelectReplica(const TSDescriptorVector& source_ts_descs, + const optional<string>& dimension, + const optional<string>& range_key_start, + const optional<string>& table_id, + const std::set<shared_ptr<TSDescriptor>>& excluded) { + PlacementPolicy policy(descriptors_, &rng_); + return policy.SelectReplica(source_ts_descs, dimension, range_key_start, table_id, excluded); + } + static Status TSDescriptorVectorToMap(const TSDescriptorVector& v_descs, TSDescriptorsMap* m_descs) { for (const auto& desc : v_descs) { @@ -1555,5 +1565,38 @@ TEST_F(PlacementPolicyTest, PlaceRangeAwareTabletReplicasWithLocations) { } } +// This test is more granular and specifically tests SelectReplica() and its +// various tiebreaker scenarios when choosing a tablet server to place a replica on. +TEST_F(PlacementPolicyTest, TestSelectReplicaTieBreakers) { + constexpr int nReplicationFactor = 3; + TabletNumByRangeMap range({ {"a1", 100} }); + TabletNumByRangeMap ranges({ {"a1", 100}, {"b1", 100} }); + const vector<LocationInfo> cluster_info = { + { + "", + { + { "ts0", 200, { }, { {"t1", range }, {"t2", range }, } }, + { "ts1", 100, { }, { {"t1", range }, } }, + { "ts2", 200, { }, { {"t1", ranges }, } }, + } + }, + }; + + { + ASSERT_OK(Prepare(cluster_info)); + std::set<shared_ptr<TSDescriptor>> selected; + // All tablet servers have the same number of replicas per range "a1" of table "t1". + // "ts1" is chosen first since it has the least number of replicas overall. + // "ts0" is chosen next since it has the least number of replicas of table "t1". + vector<string> chosen_tservers_in_order = {"ts1", "ts0", "ts2"}; + for (int i = 0; i < nReplicationFactor; ++i) { + auto ts = SelectReplica(descriptors(), nullopt, "a1", "t1", selected); + ASSERT_TRUE(ts); + ASSERT_TRUE(ts->permanent_uuid() == chosen_tservers_in_order[i]) << ts->permanent_uuid(); + EmplaceOrDie(&selected, std::move(ts)); + } + } +} + } // namespace master } // namespace kudu diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc index d2d95ea16..d78c67902 100644 --- a/src/kudu/master/placement_policy.cc +++ b/src/kudu/master/placement_policy.cc @@ -350,6 +350,12 @@ Status PlacementPolicy::SelectReplicas(const TSDescriptorVector& source_ts_descs // multiple tablet servers, the total number of replicas will be considered. If // there's still a tie, a tablet server will be randomly chosen. // +// When choosing a tablet server to place a replica on from the set of tablet +// servers "ts_descs", any tablet servers from the set "excluded" that are also +// in the set "ts_descs" will not be chosen. It's possible that there's no +// overlap between these two sets as "excluded" can contain tservers from +// other locations and thus have no relation to the set "ts_descs". +// // The old replica selection algorithm follows the idea from // "Power of Two Choices in Randomized Load Balancing"[1]. For each replica, // we randomly select two tablet servers, and then assign the replica to the @@ -381,8 +387,7 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica( const set<shared_ptr<TSDescriptor>>& excluded) const { if (range_key_start && table_id) { TSDescriptorVector ts_choices; - const int ts_size = static_cast<int>(ts_descs.size()); - CHECK_GE(ts_size, 0); + const auto ts_size = ts_descs.size(); ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices); DCHECK_LE(ts_choices.size(), ts_size); if (ts_choices.size() > 1) { @@ -393,23 +398,22 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica( return ts_choices.front(); } return nullptr; - } else { - // Pick two random servers, excluding those we've already picked. - // If we've only got one server left, 'two_choices' will actually - // just contain one element. - TSDescriptorVector two_choices; - ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices); - DCHECK_LE(two_choices.size(), 2); - - if (two_choices.size() == 2) { - // Pick the better of the two. - return PickBetterTabletServer(two_choices, dimension, rng_); - } - if (two_choices.size() == 1) { - return two_choices.front(); - } - return nullptr; } + // Pick two random servers, excluding those we've already picked. + // If we've only got one server left, 'two_choices' will actually + // just contain one element. + TSDescriptorVector two_choices; + ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices); + DCHECK_LE(two_choices.size(), 2); + + if (two_choices.size() == 2) { + // Pick the better of the two. + return PickBetterTabletServer(two_choices, dimension, rng_); + } + if (two_choices.size() == 1) { + return two_choices.front(); + } + return nullptr; } Status PlacementPolicy::SelectLocation( diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h index abeef3ee1..0e2631910 100644 --- a/src/kudu/util/random_util.h +++ b/src/kudu/util/random_util.h @@ -79,11 +79,11 @@ std::vector<T> SelectRandomSubset(const Container& c, int min_to_return, Rand* r // The results are not stored in a randomized order: the order of results will // match their order in the input collection. template<class Collection, class Set, class T, typename Rand> -void ReservoirSample(const Collection& c, int k, const Set& avoid, +void ReservoirSample(const Collection& c, uint32_t k, const Set& avoid, Rand* r, std::vector<T>* result) { result->clear(); result->reserve(k); - int i = 0; + uint32_t i = 0; for (const T& elem : c) { if (ContainsKey(avoid, elem)) { continue; @@ -95,7 +95,7 @@ void ReservoirSample(const Collection& c, int k, const Set& avoid, continue; } // Otherwise replace existing elements with decreasing probability. - int j = r->Uniform(i); + uint32_t j = r->Uniform(i); if (j < k) { (*result)[j] = elem; }