This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new a84c8376a KUDU-3532: Fix range aware replica placement bug
a84c8376a is described below

commit a84c8376a78b3a86be75d434e0f7ff2853c0c880
Author: Mahesh Reddy <mre...@cloudera.com>
AuthorDate: Tue Dec 12 18:38:57 2023 -0500

    KUDU-3532: Fix range aware replica placement bug
    
    An implicit conversion from unsigned long to int caused
    an std::length_error to be thrown when a vector tried to
    reserve a a size greater than the max size. This happens
    when a negative number is converted. This bug was
    introduced by changelist [1].
    
    I also added tests with tablet servers in multiple
    locations. This omission caused this bug to go
    unnoticed until now.
    
    [1] https://github.com/apache/kudu/commit/10fdaf6a9
    
    Change-Id: Id5d696d58965590a9f91f8b1b59f23225bbad8ee
    Reviewed-on: http://gerrit.cloudera.org:8080/20781
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <ale...@apache.org>
---
 src/kudu/master/placement_policy-test.cc | 219 +++++++++++++++++++++++--------
 src/kudu/master/placement_policy.cc      |   7 +-
 2 files changed, 170 insertions(+), 56 deletions(-)

diff --git a/src/kudu/master/placement_policy-test.cc 
b/src/kudu/master/placement_policy-test.cc
index 84449ca7a..191f81d74 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -1085,6 +1085,17 @@ TEST_F(PlacementPolicyTest, 
PlaceTabletReplicasWithRangesExtremeCase) {
     ASSERT_EQ(500, stats["ts0"]);
     ASSERT_EQ(500, stats["ts1"]);
   }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"ts1"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    ASSERT_TRUE(extra_ts->permanent_uuid() == "ts0") << 
extra_ts->permanent_uuid();
+  }
 }
 
 // RF is equal to the number of tablet servers, so replica placement will be 
same with ranges.
@@ -1142,6 +1153,18 @@ TEST_F(PlacementPolicyTest, 
PlaceTabletReplicasWithRanges) {
     ASSERT_EQ(1000, stats["ts1"]);
     ASSERT_EQ(1000, stats["ts2"]);
   }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"ts2"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    const auto& ts_uuid = extra_ts->permanent_uuid();
+    ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << 
extra_ts->permanent_uuid();
+  }
 }
 
 // RF is 3 while number of tablet servers is 4 with 2 of them empty and the 
other 2 loaded with
@@ -1209,6 +1232,18 @@ TEST_F(PlacementPolicyTest, 
PlaceTabletReplicasWithRangesAndTables) {
     ASSERT_LE(368, stats["ts3"]);
     ASSERT_GE(378, stats["ts3"]);
   }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"ts3"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    const auto& ts_uuid = extra_ts->permanent_uuid();
+    ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") << 
extra_ts->permanent_uuid();
+  }
 }
 
 // RF is 3 while number of tablet servers is 6. Half of them are empty while 
the other half contain
@@ -1288,6 +1323,20 @@ TEST_F(PlacementPolicyTest, 
PlaceTabletReplicasWithRangesAndTablesWithDoubleTSer
     ASSERT_LE(137, stats["ts5"]);
     ASSERT_GE(147, stats["ts5"]);
   }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"ts4", "ts5"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    const auto& ts_uuid = extra_ts->permanent_uuid();
+    ASSERT_TRUE(ts_uuid == "ts0" ||
+                ts_uuid == "ts1" ||
+                ts_uuid == "ts2") << extra_ts->permanent_uuid();
+  }
 }
 
 // With RF = 3, 6 tablet servers, and a well-balanced cluster initially, this 
test case adds
@@ -1377,68 +1426,132 @@ TEST_F(PlacementPolicyTest, 
PlaceTabletReplicasWithImbalanceByTable) {
       },
   };
 
-  ASSERT_OK(Prepare(cluster_info));
-  const auto& all = descriptors();
-  PlacementPolicy policy(all, rng());
-  typedef std::unordered_map<string, std::unordered_map<string, int>> 
replicas_per_range_per_table;
-  std::unordered_map<string, replicas_per_range_per_table> stats;
-  vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
-  vector<string> tables = {"t1", "t2"};
-  for (const auto& table : tables) {
-    for (const auto& range : ranges) {
-      for (auto i = 0; i < 500; ++i) {
-        TSDescriptorVector result;
-        ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, 
range, table, &result));
-        ASSERT_EQ(nReplicationFactor, result.size());
-        for (const auto& ts : result) {
-          const auto& ts_uuid = ts->permanent_uuid();
-          ++stats[ts_uuid][table][range];
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    typedef std::unordered_map<string, std::unordered_map<string, int>>
+    replicas_per_range_per_table;
+    std::unordered_map<string, replicas_per_range_per_table> stats;
+    vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
+    vector<string> tables = {"t1", "t2"};
+    for (const auto& table : tables) {
+      for (const auto& range : ranges) {
+        for (auto i = 0; i < 500; ++i) {
+          TSDescriptorVector result;
+          ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt, 
range, table, &result));
+          ASSERT_EQ(nReplicationFactor, result.size());
+          for (const auto& ts : result) {
+            const auto& ts_uuid = ts->permanent_uuid();
+            ++stats[ts_uuid][table][range];
+          }
         }
       }
     }
-  }
 
-  ASSERT_EQ(kNumServers, stats.size());
-  vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
-  vector<string> tservers1 = {"ts0", "ts2", "ts4"};
-  for (const auto& stat : stats) {
-    int tserver_replicas = 0;
-    // Verifies that two tables exist on each tserver.
-    ASSERT_EQ(2, stat.second.size());
-    for (const auto& table : stat.second) {
-      // Verifies that the six ranges exist for each table on each tserver.
-      ASSERT_EQ(6, table.second.size());
-      for (const auto& ranges : table.second) {
-        if (std::find(balanced_ranges.begin(),
-                      balanced_ranges.end(), ranges.first) != 
balanced_ranges.end()) {
-          ASSERT_LE(245, ranges.second);
-          ASSERT_GE(255, ranges.second);
-        } else if (std::find(tservers1.begin(),
-                           tservers1.end(), stat.first) != tservers1.end()) {
-          // Only range of "a1" or "b1" now. Checks which tservers and for 
table "t1" or "t2".
-          if (table.first == "t1") {
-            ASSERT_LE(120, ranges.second);
-            ASSERT_GE(130, ranges.second);
-          } else {
-            ASSERT_LE(370, ranges.second);
-            ASSERT_GE(380, ranges.second);
-          }
-        } else {
-          // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for 
table "t1" or "t2".
-          if (table.first == "t1") {
-            ASSERT_LE(370, ranges.second);
-            ASSERT_GE(380, ranges.second);
+    ASSERT_EQ(kNumServers, stats.size());
+    vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
+    vector<string> tservers1 = {"ts0", "ts2", "ts4"};
+    for (const auto& stat : stats) {
+      int tserver_replicas = 0;
+      // Verifies that two tables exist on each tserver.
+      ASSERT_EQ(2, stat.second.size());
+      for (const auto& table : stat.second) {
+        // Verifies that the six ranges exist for each table on each tserver.
+        ASSERT_EQ(6, table.second.size());
+        for (const auto& ranges : table.second) {
+          if (std::find(balanced_ranges.begin(),
+                        balanced_ranges.end(), ranges.first) != 
balanced_ranges.end()) {
+            ASSERT_LE(245, ranges.second);
+            ASSERT_GE(255, ranges.second);
+          } else if (std::find(tservers1.begin(),
+                               tservers1.end(), stat.first) != 
tservers1.end()) {
+            // Only range of "a1" or "b1" now. Checks which tservers and for 
table "t1" or "t2".
+            if (table.first == "t1") {
+              ASSERT_LE(120, ranges.second);
+              ASSERT_GE(130, ranges.second);
+            } else {
+              ASSERT_LE(370, ranges.second);
+              ASSERT_GE(380, ranges.second);
+            }
           } else {
-            ASSERT_LE(120, ranges.second);
-            ASSERT_GE(130, ranges.second);
+            // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks 
for table "t1" or "t2".
+            if (table.first == "t1") {
+              ASSERT_LE(370, ranges.second);
+              ASSERT_GE(380, ranges.second);
+            } else {
+              ASSERT_LE(120, ranges.second);
+              ASSERT_GE(130, ranges.second);
+            }
           }
+          tserver_replicas += ranges.second;
         }
-        tserver_replicas += ranges.second;
       }
+      // Verifies that around 3000 replicas are placed on each tserver.
+      ASSERT_LE(2990, tserver_replicas);
+      ASSERT_GE(3110, tserver_replicas);
     }
-    // Verifies that around 3000 replicas are placed on each tserver.
-    ASSERT_LE(2990, tserver_replicas);
-    ASSERT_GE(3110, tserver_replicas);
+  }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"ts0", "ts2", "ts4"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    const auto& ts_uuid = extra_ts->permanent_uuid();
+    ASSERT_TRUE(ts_uuid == "ts1" ||
+                ts_uuid == "ts3" ||
+                ts_uuid == "ts5") << extra_ts->permanent_uuid();
+  }
+}
+
+TEST_F(PlacementPolicyTest, PlaceRangeAwareTabletReplicasWithLocations) {
+  TabletNumByRangeMap range_a({ {"a1", 2}});
+  TabletNumByRangeMap range_double_a({ {"a1", 4}});
+  TabletNumByRangeMap range_b({ {"b1", 2}});
+  TabletNumByRangeMap range_double_b({ {"b1", 2}});
+  TabletNumByRangeMap ranges_a_and_b({ {"a1", 2}, {"b1", 2} });
+  const vector<LocationInfo> cluster_info = {
+      { "A", { { "A_ts0", 4, { }, { {"t1", range_double_a }, } },
+               { "A_ts1", 4, { }, { {"t1", range_double_a }, } },
+               { "A_ts2", 4, { }, { {"t1", range_double_b }, } }, } },
+      { "B", { { "B_ts0", 4, { }, { {"t1", ranges_a_and_b }, } },
+               { "B_ts1", 4, { }, { {"t1", range_a}, {"t2", range_a} } }, } },
+      { "C", { { "C_ts0", 6, { }, { {"t1", range_a }, {"t2", ranges_a_and_b }, 
} },
+               { "C_ts1", 4, { }, { {"t1", range_a }, {"t2", range_b} } }, } },
+  };
+
+  // To avoid placing a majority of replicas in one location,
+  // exactly one replica will be placed in each location.
+  // "A_ts2" is chosen because it has the least replicas per range "a1" in 
table "t1".
+  // "B_ts1" is chosen because it wins the replicas per table tiebreaker with 
"B_ts0".
+  // "C_ts1" is chosen because it wins the total replicas tiebreaker.
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(3, nullopt, "a1", "t1", &result));
+    ASSERT_EQ(3, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(1, m.count("A_ts2"));
+    ASSERT_EQ(1, m.count("B_ts1"));
+    ASSERT_EQ(1, m.count("C_ts1"));
+  }
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+    const auto existing = GetDescriptors({"A_ts0", "A_ts1", "B_ts0", "B_ts1", 
"C_ts0", "C_ts1"});
+    shared_ptr<TSDescriptor> extra_ts;
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1", 
&extra_ts));
+    ASSERT_TRUE(extra_ts);
+    ASSERT_TRUE(extra_ts->permanent_uuid() == "A_ts2") << 
extra_ts->permanent_uuid();
   }
 }
 
diff --git a/src/kudu/master/placement_policy.cc 
b/src/kudu/master/placement_policy.cc
index 44c0cb292..d2d95ea16 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -381,9 +381,10 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
     const set<shared_ptr<TSDescriptor>>& excluded) const {
   if (range_key_start && table_id) {
     TSDescriptorVector ts_choices;
-    auto choices_size = ts_descs.size() - excluded.size();
-    ReservoirSample(ts_descs, choices_size, excluded, rng_, &ts_choices);
-    DCHECK_EQ(ts_choices.size(), choices_size);
+    const int ts_size = static_cast<int>(ts_descs.size());
+    CHECK_GE(ts_size, 0);
+    ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices);
+    DCHECK_LE(ts_choices.size(), ts_size);
     if (ts_choices.size() > 1) {
       return PickTabletServer(
           ts_choices, range_key_start.value(), table_id.value(), dimension, 
rng_);

Reply via email to