This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 08621f94d4b HBASE-28962 Meta replication is inconsistent after startup 
when reusing hbase storage location (#6448)
08621f94d4b is described below

commit 08621f94d4bd9b6a180e7dcf5d4a530cf643258d
Author: richardantal <[email protected]>
AuthorDate: Tue May 27 16:01:12 2025 +0200

    HBASE-28962 Meta replication is inconsistent after startup when reusing 
hbase storage location (#6448)
    
    Signed-off-by: Andor Molnár <[email protected]>
    Signed-off-by: Wellington Chevreuil <[email protected]>
    Reviewed-by: Aman Poonia <[email protected]>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  43 ++++-----
 .../master/procedure/ModifyTableProcedure.java     |   9 +-
 .../hbase/client/TestMetaReplicaAssignment.java    | 100 +++++++++++++++++++++
 3 files changed, 129 insertions(+), 23 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 18b85404840..7ccbb4d614a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1171,30 +1171,33 @@ public class HMaster extends 
HBaseServerBase<MasterRpcServices> implements Maste
       int replicasNumInConf =
         conf.getInt(HConstants.META_REPLICAS_NUM, 
HConstants.DEFAULT_META_REPLICA_NUM);
       TableDescriptor metaDesc = 
tableDescriptors.get(TableName.META_TABLE_NAME);
+      int existingReplicasCount =
+        
assignmentManager.getRegionStates().getRegionsOfTable(TableName.META_TABLE_NAME).size();
+
       if (metaDesc.getRegionReplication() != replicasNumInConf) {
         // it is possible that we already have some replicas before upgrading, 
so we must set the
         // region replication number in meta TableDescriptor directly first, 
without creating a
         // ModifyTableProcedure, otherwise it may cause a double assign for 
the meta replicas.
-        int existingReplicasCount =
-          
assignmentManager.getRegionStates().getRegionsOfTable(TableName.META_TABLE_NAME).size();
-        if (existingReplicasCount > metaDesc.getRegionReplication()) {
-          LOG.info("Update replica count of hbase:meta from {}(in 
TableDescriptor)"
-            + " to {}(existing ZNodes)", metaDesc.getRegionReplication(), 
existingReplicasCount);
-          metaDesc = TableDescriptorBuilder.newBuilder(metaDesc)
-            .setRegionReplication(existingReplicasCount).build();
-          tableDescriptors.update(metaDesc);
-        }
-        // check again, and issue a ModifyTableProcedure if needed
-        if (metaDesc.getRegionReplication() != replicasNumInConf) {
-          LOG.info(
-            "The {} config is {} while the replica count in TableDescriptor is 
{}"
-              + " for hbase:meta, altering...",
-            HConstants.META_REPLICAS_NUM, replicasNumInConf, 
metaDesc.getRegionReplication());
-          procedureExecutor.submitProcedure(new ModifyTableProcedure(
-            procedureExecutor.getEnvironment(), 
TableDescriptorBuilder.newBuilder(metaDesc)
-              .setRegionReplication(replicasNumInConf).build(),
-            null, metaDesc, false, true));
-        }
+        LOG.info("Update replica count of hbase:meta from {}(in 
TableDescriptor)"
+          + " to {}(existing ZNodes)", metaDesc.getRegionReplication(), 
existingReplicasCount);
+        metaDesc = TableDescriptorBuilder.newBuilder(metaDesc)
+          .setRegionReplication(existingReplicasCount).build();
+        tableDescriptors.update(metaDesc);
+      }
+      // check again, and issue a ModifyTableProcedure if needed
+      if (
+        metaDesc.getRegionReplication() != replicasNumInConf
+          || existingReplicasCount != metaDesc.getRegionReplication()
+      ) {
+        LOG.info(
+          "The {} config is {} while the replica count in TableDescriptor is 
{},"
+            + " The number of replicas seen on ZK {} for hbase:meta, 
altering...",
+          HConstants.META_REPLICAS_NUM, replicasNumInConf, 
metaDesc.getRegionReplication(),
+          existingReplicasCount);
+        procedureExecutor.submitProcedure(new ModifyTableProcedure(
+          procedureExecutor.getEnvironment(), 
TableDescriptorBuilder.newBuilder(metaDesc)
+            .setRegionReplication(replicasNumInConf).build(),
+          null, metaDesc, false, true));
       }
     }
     // Initialize after meta is up as below scans meta
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
index 45153612259..1040604727f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
@@ -506,14 +506,17 @@ public class ModifyTableProcedure extends 
AbstractStateMachineTableProcedure<Mod
   private void assignNewReplicasIfNeeded(MasterProcedureEnv env) throws 
IOException {
     final int oldReplicaCount = 
unmodifiedTableDescriptor.getRegionReplication();
     final int newReplicaCount = modifiedTableDescriptor.getRegionReplication();
-    if (newReplicaCount <= oldReplicaCount) {
+    int existingReplicasCount = env.getAssignmentManager().getRegionStates()
+      .getRegionsOfTable(modifiedTableDescriptor.getTableName()).size();
+    if (newReplicaCount <= Math.min(oldReplicaCount, existingReplicasCount)) {
       return;
     }
     if (isTableEnabled(env)) {
       List<RegionInfo> newReplicas = 
env.getAssignmentManager().getRegionStates()
         
.getRegionsOfTable(getTableName()).stream().filter(RegionReplicaUtil::isDefaultReplica)
-        .flatMap(primaryRegion -> IntStream.range(oldReplicaCount, 
newReplicaCount).mapToObj(
-          replicaId -> 
RegionReplicaUtil.getRegionInfoForReplica(primaryRegion, replicaId)))
+        .flatMap(primaryRegion -> IntStream
+          .range(Math.min(oldReplicaCount, existingReplicasCount), 
newReplicaCount).mapToObj(
+            replicaId -> 
RegionReplicaUtil.getRegionInfoForReplica(primaryRegion, replicaId)))
         .collect(Collectors.toList());
       
addChildProcedure(env.getAssignmentManager().createAssignProcedures(newReplicas));
     }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaReplicaAssignment.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaReplicaAssignment.java
new file mode 100644
index 00000000000..8c41f30a068
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaReplicaAssignment.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.StartTestingClusterOption;
+import org.apache.hadoop.hbase.TableDescriptors;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNameTestRule;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ MiscTests.class, MediumTests.class })
+public class TestMetaReplicaAssignment {
+  protected static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+
+  protected static final int REGIONSERVERS_COUNT = 3;
+
+  @Rule
+  public TableNameTestRule name = new TableNameTestRule();
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestMetaReplicaAssignment.class);
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    TEST_UTIL.getConfiguration().setInt("zookeeper.session.timeout", 30000);
+    TEST_UTIL.getConfiguration()
+      .setInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 
1000);
+    StartTestingClusterOption option = StartTestingClusterOption.builder()
+      
.numAlwaysStandByMasters(1).numMasters(1).numRegionServers(REGIONSERVERS_COUNT).build();
+    TEST_UTIL.startMiniCluster(option);
+    Admin admin = TEST_UTIL.getAdmin();
+    TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getMaster() 
!= null);
+    HBaseTestingUtil.setReplicas(admin, TableName.META_TABLE_NAME, 1);
+  }
+
+  @Test
+  public void testUpgradeSameReplicaCount() throws Exception {
+    HMaster oldMaster = TEST_UTIL.getMiniHBaseCluster().getMaster();
+    TableDescriptors oldTds = oldMaster.getTableDescriptors();
+    TableDescriptor oldMetaTd = oldTds.get(TableName.META_TABLE_NAME);
+
+    assertEquals(1, oldMaster.getAssignmentManager().getRegionStates()
+      .getRegionsOfTable(TableName.META_TABLE_NAME).size());
+    assertEquals(1, oldMetaTd.getRegionReplication());
+    // force update the replica count to 3 and then kill the master, to 
simulate that
+    // HBase storage location is reused, resulting a sane-looking meta and 
only the
+    // RegionInfoBuilder.FIRST_META_REGIONINFO gets assigned in 
InitMetaProcedure.
+    // Meta region replicas are not assigned, but we have replication in meta 
table descriptor.
+
+    
oldTds.update(TableDescriptorBuilder.newBuilder(oldMetaTd).setRegionReplication(3).build());
+    oldMaster.stop("Restarting");
+    TEST_UTIL.waitFor(30000, () -> oldMaster.isStopped());
+
+    // increase replica count to 3 through Configuration
+    
TEST_UTIL.getMiniHBaseCluster().getConfiguration().setInt(HConstants.META_REPLICAS_NUM,
 3);
+    TEST_UTIL.getMiniHBaseCluster().startMaster();
+    TEST_UTIL.waitFor(30000,
+      () -> TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes().size() == 3);
+    HMaster newMaster = TEST_UTIL.getMiniHBaseCluster().getMaster();
+    assertEquals(3, newMaster.getAssignmentManager().getRegionStates()
+      .getRegionsOfTable(TableName.META_TABLE_NAME).size());
+    assertEquals(3,
+      
newMaster.getTableDescriptors().get(TableName.META_TABLE_NAME).getRegionReplication());
+  }
+}

Reply via email to