This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new debc0e2b773 HBASE-29315 Fail Split/MergeTableRegionProcedure if table 
modification in progress (#6991)
debc0e2b773 is described below

commit debc0e2b7731392df1f63e4b2f08a29ea2b9c906
Author: Charles Connell <[email protected]>
AuthorDate: Fri May 23 10:56:45 2025 -0400

    HBASE-29315 Fail Split/MergeTableRegionProcedure if table modification in 
progress (#6991)
    
    Signed-off-by: Duo Zhang <[email protected]>
    (cherry picked from commit c4d8b00267adb8246df991397c0748cc625f756d)
---
 .../assignment/MergeTableRegionsProcedure.java     | 13 ++++
 .../assignment/SplitTableRegionProcedure.java      | 16 +++++
 .../AbstractStateMachineTableProcedure.java        | 11 +++
 ...ionServerHostingReplicaSlowOpenCoprocessor.java | 59 +++++++++++++++
 .../assignment/TestMergeTableRegionsProcedure.java | 29 ++++++++
 .../assignment/TestSplitTableRegionProcedure.java  | 84 ++++++++++------------
 .../procedure/TestTruncateRegionProcedure.java     |  3 -
 7 files changed, 166 insertions(+), 49 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
index 4ad45757ad5..38eae148004 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
@@ -450,6 +450,19 @@ public class MergeTableRegionsProcedure
         "Skip merging regions " + regionNamesToLog + ", because we are 
snapshotting " + tn);
     }
 
+    /*
+     * Sometimes a ModifyTableProcedure has edited a table descriptor to 
change the number of region
+     * replicas for a table, but it has not yet opened/closed the new 
replicas. The
+     * ModifyTableProcedure assumes that nobody else will do the 
opening/closing of the new
+     * replicas, but a concurrent MergeTableRegionProcedure would violate that 
assumption.
+     */
+    if (isTableModificationInProgress(env)) {
+      setFailure(getClass().getSimpleName(),
+        new IOException("Skip merging regions " + regionNamesToLog
+          + ", because there is an active procedure that is modifying the 
table " + tn));
+      return false;
+    }
+
     // Mostly the below two checks are not used because we already check the 
switches before
     // submitting the merge procedure. Just for safety, we are checking the 
switch again here.
     // Also, in case the switch was set to false after submission, this 
procedure can be rollbacked,
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
index 1dc6768cb05..3d3d3d18de2 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -103,6 +103,8 @@ public class SplitTableRegionProcedure
   private RegionInfo daughterTwoRI;
   private byte[] bestSplitRow;
   private RegionSplitPolicy splitPolicy;
+  // exposed for unit testing
+  boolean checkTableModifyInProgress = true;
 
   public SplitTableRegionProcedure() {
     // Required by the Procedure framework to create the procedure on replay
@@ -514,6 +516,20 @@ public class SplitTableRegionProcedure
         + ", because we are taking snapshot for the table " + 
getParentRegion().getTable()));
       return false;
     }
+
+    /*
+     * Sometimes a ModifyTableProcedure has edited a table descriptor to 
change the number of region
+     * replicas for a table, but it has not yet opened/closed the new 
replicas. The
+     * ModifyTableProcedure assumes that nobody else will do the 
opening/closing of the new
+     * replicas, but a concurrent SplitTableRegionProcedure would violate that 
assumption.
+     */
+    if (checkTableModifyInProgress && isTableModificationInProgress(env)) {
+      setFailure(new IOException("Skip splitting region " + 
getParentRegion().getShortNameToLog()
+        + ", because there is an active procedure that is modifying the table "
+        + getParentRegion().getTable()));
+      return false;
+    }
+
     // Check whether the region is splittable
     RegionStateNode node =
       
env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion());
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
index b6dce5198d4..67076507496 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
@@ -192,4 +192,15 @@ public abstract class 
AbstractStateMachineTableProcedure<TState>
     }
     regionNode.checkOnline();
   }
+
+  /**
+   * Some procedures cannot safely run while a table is being modified. Helper 
method to allow them
+   * to check if such a procedure is running on a table.
+   */
+  protected final boolean isTableModificationInProgress(MasterProcedureEnv 
env) {
+    return 
env.getMasterServices().getMasterProcedureExecutor().getProcedures().stream()
+      .filter(p -> !p.isFinished()).filter(p -> p instanceof 
TableProcedureInterface)
+      .map(p -> (TableProcedureInterface) p).filter(p -> 
getTableName().equals(p.getTableName()))
+      .anyMatch(TableQueue::requireTableExclusiveLock);
+  }
 }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/RegionServerHostingReplicaSlowOpenCoprocessor.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/RegionServerHostingReplicaSlowOpenCoprocessor.java
new file mode 100644
index 00000000000..67fafbbe4be
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/RegionServerHostingReplicaSlowOpenCoprocessor.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.Optional;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This coprocessor is used to slow down opening of the replica regions.
+ */
+public class RegionServerHostingReplicaSlowOpenCoprocessor
+  implements RegionCoprocessor, RegionObserver {
+
+  private static final Logger LOG =
+    
LoggerFactory.getLogger(RegionServerHostingReplicaSlowOpenCoprocessor.class);
+
+  static volatile boolean slowDownReplicaOpen = false;
+
+  @Override
+  public Optional<RegionObserver> getRegionObserver() {
+    return Optional.of(this);
+  }
+
+  @Override
+  public void preOpen(ObserverContext<? extends RegionCoprocessorEnvironment> 
c) {
+    int replicaId = 
c.getEnvironment().getRegion().getRegionInfo().getReplicaId();
+    if (replicaId != RegionInfo.DEFAULT_REPLICA_ID) {
+      while (slowDownReplicaOpen) {
+        LOG.info("Slow down replica region open a bit");
+        try {
+          Thread.sleep(250);
+        } catch (InterruptedException ignored) {
+          return;
+        }
+      }
+    }
+  }
+}
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
index c0c4e355f2b..936cdc032fb 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
+import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
 import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
@@ -100,6 +101,8 @@ public class TestMergeTableRegionsProcedure {
     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
     conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
     conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
+    conf.set("hbase.coprocessor.region.classes",
+      RegionServerHostingReplicaSlowOpenCoprocessor.class.getName());
   }
 
   @BeforeClass
@@ -391,6 +394,32 @@ public class TestMergeTableRegionsProcedure {
     assertEquals(initialRegionCount, 
UTIL.getAdmin().getRegions(tableName).size());
   }
 
+  @Test
+  public void testMergeDetectsModifyTableProcedure() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final ProcedureExecutor<MasterProcedureEnv> procExec = 
getMasterProcedureExecutor();
+
+    List<RegionInfo> regions = createTable(tableName);
+
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true;
+    TableDescriptor td = 
TableDescriptorBuilder.newBuilder(admin.getDescriptor(tableName))
+      .setRegionReplication(2).build();
+    long modifyProcId =
+      procExec.submitProcedure(new 
ModifyTableProcedure(procExec.getEnvironment(), td));
+
+    // Merge regions of the table, the MergeTableRegionsProcedure will fail 
because there is a
+    // ModifyTableProcedure in progress
+    MergeTableRegionsProcedure mergeProcedure = new MergeTableRegionsProcedure(
+      procExec.getEnvironment(), regions.toArray(new RegionInfo[0]), false);
+    long mergeProcId = procExec.submitProcedure(mergeProcedure);
+    ProcedureTestingUtility.waitProcedure(procExec, mergeProcId);
+    ProcedureTestingUtility.assertProcFailed(procExec, mergeProcId);
+
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false;
+    ProcedureTestingUtility.waitProcedure(procExec, modifyProcId);
+    ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId);
+  }
+
   private List<RegionInfo> createTable(final TableName tableName) throws 
Exception {
     TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
       .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
index 3d79fe208cf..e9dd0d3ac98 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
@@ -25,7 +25,6 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.util.List;
-import java.util.Optional;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
@@ -45,14 +44,12 @@ import org.apache.hadoop.hbase.client.SnapshotDescription;
 import org.apache.hadoop.hbase.client.SnapshotType;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
-import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
+import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
 import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
@@ -114,39 +111,7 @@ public class TestSplitTableRegionProcedure {
     conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
     conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
     conf.set("hbase.coprocessor.region.classes",
-      RegionServerHostingReplicaSlowOpenCopro.class.getName());
-    conf.setInt("hbase.client.sync.wait.timeout.msec", 1500);
-  }
-
-  /**
-   * This copro is used to slow down opening of the replica regions.
-   */
-  public static class RegionServerHostingReplicaSlowOpenCopro
-    implements RegionCoprocessor, RegionObserver {
-    static int countForReplica = 0;
-    static boolean slowDownReplicaOpen = false;
-
-    @Override
-    public Optional<RegionObserver> getRegionObserver() {
-      return Optional.of(this);
-    }
-
-    @Override
-    public void preOpen(ObserverContext<? extends 
RegionCoprocessorEnvironment> c)
-      throws IOException {
-      int replicaId = 
c.getEnvironment().getRegion().getRegionInfo().getReplicaId();
-      if ((replicaId != RegionInfo.DEFAULT_REPLICA_ID) && (countForReplica == 
0)) {
-        countForReplica++;
-        while (slowDownReplicaOpen) {
-          LOG.info("Slow down replica region open a bit");
-          try {
-            Thread.sleep(100);
-          } catch (InterruptedException ie) {
-            // Ingore
-          }
-        }
-      }
-    }
+      RegionServerHostingReplicaSlowOpenCoprocessor.class.getName());
   }
 
   @BeforeClass
@@ -191,15 +156,13 @@ public class TestSplitTableRegionProcedure {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     final ProcedureExecutor<MasterProcedureEnv> procExec = 
getMasterProcedureExecutor();
 
-    RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = true;
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true;
     RegionInfo[] regions =
       MasterProcedureTestingUtility.createTable(procExec, tableName, null, 
columnFamilyName1);
 
-    try {
-      HBaseTestingUtil.setReplicas(UTIL.getAdmin(), tableName, 2);
-    } catch (IOException ioe) {
-
-    }
+    TableDescriptor td = 
TableDescriptorBuilder.newBuilder(UTIL.getAdmin().getDescriptor(tableName))
+      .setRegionReplication(2).build();
+    procExec.submitProcedure(new 
ModifyTableProcedure(procExec.getEnvironment(), td));
 
     // wait until the primary region is online.
     HBaseTestingUtil.await(2000, () -> {
@@ -223,7 +186,7 @@ public class TestSplitTableRegionProcedure {
     ProcedureTestingUtility.waitProcedure(procExec, procId);
 
     // Let replica parent region open.
-    RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = false;
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false;
 
     // wait until the replica region is online.
     HBaseTestingUtil.await(2000, () -> {
@@ -231,7 +194,10 @@ public class TestSplitTableRegionProcedure {
         AssignmentManager am = 
UTIL.getHBaseCluster().getMaster().getAssignmentManager();
         if (am == null) return false;
         RegionInfo replicaRegion = 
RegionReplicaUtil.getRegionInfoForReplica(regions[0], 1);
-        if (am.getRegionStates().getRegionState(replicaRegion).isOpened()) {
+        if (
+          am.getRegionStates().getRegionState(replicaRegion) != null
+            && am.getRegionStates().getRegionState(replicaRegion).isOpened()
+        ) {
           return true;
         }
         return false;
@@ -610,6 +576,32 @@ public class TestSplitTableRegionProcedure {
     assertEquals(splitFailedCount + 1, 
splitProcMetrics.getFailedCounter().getCount());
   }
 
+  @Test
+  public void testSplitDetectsModifyTableProcedure() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final ProcedureExecutor<MasterProcedureEnv> procExec = 
getMasterProcedureExecutor();
+
+    RegionInfo[] regions =
+      MasterProcedureTestingUtility.createTable(procExec, tableName, null, 
columnFamilyName1);
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true;
+    TableDescriptor td = 
TableDescriptorBuilder.newBuilder(UTIL.getAdmin().getDescriptor(tableName))
+      .setRegionReplication(2).build();
+    long modifyProcId =
+      procExec.submitProcedure(new 
ModifyTableProcedure(procExec.getEnvironment(), td));
+
+    // Split region of the table, the SplitTableRegionProcedure will fail 
because there is a
+    // ModifyTableProcedure in progress
+    SplitTableRegionProcedure splitProcedure = new SplitTableRegionProcedure(
+      procExec.getEnvironment(), regions[0], HConstants.CATALOG_FAMILY);
+    long splitProcId = procExec.submitProcedure(splitProcedure);
+    ProcedureTestingUtility.waitProcedure(procExec, splitProcId);
+    ProcedureTestingUtility.assertProcFailed(procExec, splitProcId);
+
+    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false;
+    ProcedureTestingUtility.waitProcedure(procExec, modifyProcId);
+    ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId);
+  }
+
   private void deleteData(final TableName tableName, final int 
startDeleteRowNum)
     throws IOException, InterruptedException {
     Table t = UTIL.getConnection().getTable(tableName);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedure.java
index 8ce60ee5550..8ea493984e6 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedure.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
-import org.apache.hadoop.hbase.master.assignment.TestSplitTableRegionProcedure;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
@@ -70,8 +69,6 @@ public class TestTruncateRegionProcedure extends 
TestTableDDLProcedureBase {
   private static void setupConf(Configuration conf) {
     conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
     conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
-    conf.set("hbase.coprocessor.region.classes",
-      
TestSplitTableRegionProcedure.RegionServerHostingReplicaSlowOpenCopro.class.getName());
     conf.setInt("hbase.client.sync.wait.timeout.msec", 60000);
   }
 

Reply via email to