[hbase] 07/10: HBASE-27218 Support rolling upgrading (#4808)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 2442166da2cd957b46a6c7b09f0330a09cb63044
Author: Duo Zhang 
AuthorDate: Sun Nov 6 16:57:11 2022 +0800

HBASE-27218 Support rolling upgrading (#4808)

Signed-off-by: Yu Li 
---
 .../apache/hadoop/hbase/zookeeper/ZNodePaths.java  |   8 +-
 .../apache/hadoop/hbase/procedure2/Procedure.java  |  15 +
 .../protobuf/server/master/MasterProcedure.proto   |  12 +
 hbase-replication/pom.xml  |  10 +
 .../hbase/replication/ReplicationQueueStorage.java |  19 ++
 .../replication/TableReplicationQueueStorage.java  |  65 +++-
 .../ZKReplicationQueueStorageForMigration.java | 351 +
 .../replication/TestZKReplicationQueueStorage.java | 317 +++
 hbase-server/pom.xml   |   6 +
 .../org/apache/hadoop/hbase/master/HMaster.java|  13 +
 .../master/procedure/ServerCrashProcedure.java |  19 ++
 .../replication/AbstractPeerNoLockProcedure.java   |   5 +-
 ...rateReplicationQueueFromZkToTableProcedure.java | 244 ++
 .../master/replication/ModifyPeerProcedure.java|  26 ++
 .../master/replication/ReplicationPeerManager.java | 104 +-
 .../TransitPeerSyncReplicationStateProcedure.java  |  14 +
 .../replication/TestMigrateReplicationQueue.java   | 126 
 ...rateReplicationQueueFromZkToTableProcedure.java | 226 +
 ...icationQueueFromZkToTableProcedureRecovery.java | 128 
 ...tReplicationPeerManagerMigrateQueuesFromZk.java | 216 +
 .../hbase/replication/TestReplicationBase.java |   2 +-
 pom.xml|   7 +-
 22 files changed, 1917 insertions(+), 16 deletions(-)

diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
index d19d2100466..3f66c7cdc0c 100644
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
+++ 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
@@ -220,7 +220,11 @@ public class ZNodePaths {
* @param suffix ending of znode name
* @return result of properly joining prefix with suffix
*/
-  public static String joinZNode(String prefix, String suffix) {
-return prefix + ZNodePaths.ZNODE_PATH_SEPARATOR + suffix;
+  public static String joinZNode(String prefix, String... suffix) {
+StringBuilder sb = new StringBuilder(prefix);
+for (String s : suffix) {
+  sb.append(ZNodePaths.ZNODE_PATH_SEPARATOR).append(s);
+}
+return sb.toString();
   }
 }
diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
index 34c74d92c16..43adba2bc21 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
+++ 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
 import org.apache.hadoop.hbase.metrics.Counter;
 import org.apache.hadoop.hbase.metrics.Histogram;
@@ -33,6 +34,7 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
 
 /**
@@ -1011,6 +1013,19 @@ public abstract class Procedure implements 
Comparable other) {
 return Long.compare(getProcId(), other.getProcId());
diff --git 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
index 76a1d676487..b6f5d7e50bb 100644
--- 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
+++ 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
@@ -722,3 +722,15 @@ enum AssignReplicationQueuesState {
 message AssignReplicationQueuesStateData {
   required ServerName crashed_server = 1;
 }
+
+enum MigrateReplicationQueueFromZkToTableState {
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 1;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 2;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 3;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 4;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 5;
+}
+
+message MigrateReplicationQueueFromZkToTableStateData {
+  repeated string disabled_peer_id = 1;
+}
diff --git a/hbase-replication/pom.xml 

[hbase] branch HBASE-27109/table_based_rqs updated (6128eb476af -> 2e4e2951364)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a change to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git


omit 6128eb476af HBASE-27430 Should disable replication log cleaner when 
migrating replication queue data (#4901)
omit 32fd20812ed HBASE-27429 Add exponential retry backoff support for 
MigrateReplicationQueueFromZkToTableProcedure
omit 2e2ec6c90af HBASE-27217 Revisit the DumpReplicationQueues tool (#4810)
omit 6321c964eef HBASE-27218 Support rolling upgrading (#4808)
omit c82ebf796c0 HBASE-27405 Fix the replication hfile/log cleaner report 
that the replication table does not exist (#4811)
omit c1c4ef09fbc HBASE-27392 Add a new procedure type for implementing some 
global operations such as migration (#4803)
omit 76d65fe7993 HBASE-27215 Add support for sync replication (#4762)
omit f161c17eb92 HBASE-27214 Implement the new replication hfile/log 
cleaner (#4722)
omit 2d21c3d3b30 HBASE-27213 Add support for claim queue operation (#4708)
omit b218c14e9a8 HBASE-27212 Implement a new table based replication queue 
storage and make the minimum replication system work (#4672)
 add 3eedc0987a7 HBASE-27672 Read RPC threads may BLOCKED at the 
Configuration.get when using java compression (#5075)
 add 37858bb6b05 HBASE-25709 Close region may stuck when region is 
compacting and skipped most cells read (#4536)
 add 586073d0c09 HBASE-27689 Update README.md about how to request a jira 
account (#5088)
 add 16864c705c7 HBASE-27681 Refactor Table Latency Metrics (#5072)
 add 13e11a46c6e HBASE-27681 Addendum delete old metric classes (#5092)
 add 8bdabed85ca HBASE-27690 Fix a misspell in TestRegionStateStore (#5090)
 add bc8b13e468a HBASE-27669 chaos-daemon.sh should make use hbase script 
start/stop chaosagent and chaos monkey runner (#5078)
 new 058b449032d HBASE-27212 Implement a new table based replication queue 
storage and make the minimum replication system work (#4672)
 new c6a272bc0d8 HBASE-27213 Add support for claim queue operation (#4708)
 new 8099d454eba HBASE-27214 Implement the new replication hfile/log 
cleaner (#4722)
 new 330042cc3d2 HBASE-27215 Add support for sync replication (#4762)
 new 83f9769ab03 HBASE-27392 Add a new procedure type for implementing some 
global operations such as migration (#4803)
 new 54a722e1e75 HBASE-27405 Fix the replication hfile/log cleaner report 
that the replication table does not exist (#4811)
 new 2442166da2c HBASE-27218 Support rolling upgrading (#4808)
 new e5d10b0d22d HBASE-27217 Revisit the DumpReplicationQueues tool (#4810)
 new 3c54d9c27a9 HBASE-27429 Add exponential retry backoff support for 
MigrateReplicationQueueFromZkToTableProcedure
 new 2e4e2951364 HBASE-27430 Should disable replication log cleaner when 
migrating replication queue data (#4901)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (6128eb476af)
\
 N -- N -- N   refs/heads/HBASE-27109/table_based_rqs (2e4e2951364)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 10 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 README.md  |  13 +-
 bin/chaos-daemon.sh|  26 +-
 bin/hbase  |   4 +
 .../java/org/apache/hadoop/hbase/HConstants.java   |   5 +
 .../hbase/io/compress/aircompressor/Lz4Codec.java  |   6 +-
 .../hbase/io/compress/aircompressor/LzoCodec.java  |   6 +-
 .../io/compress/aircompressor/SnappyCodec.java |   6 +-
 .../hbase/io/compress/aircompressor/ZstdCodec.java |   6 +-
 .../hbase/io/compress/brotli/BrotliCodec.java  |  16 +-
 .../hadoop/hbase/io/compress/lz4/Lz4Codec.java |  10 +-
 .../hbase/io/compress/xerial/SnappyCodec.java  |  10 +-
 .../hadoop/hbase/io/compress/xz/LzmaCodec.java |  13 +-
 .../hadoop/hbase/io/compress/zstd/ZstdCodec.java   |  17 +-
 .../impl/GlobalMetricRegistriesAdapter.java|   1 +
 .../hbase/regionserver/MetricsTableLatencies.java  | 145 -
 .../regionserver/MetricsTableLatenciesImpl.java| 216 --
 .../hbase/regionserver/MetricsTableQueryMeter.java |  57 
 

[hbase] 02/10: HBASE-27213 Add support for claim queue operation (#4708)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit c6a272bc0d86f8587e5939246882419f66cd7976
Author: Duo Zhang 
AuthorDate: Sat Aug 20 23:10:58 2022 +0800

HBASE-27213 Add support for claim queue operation (#4708)

Signed-off-by: Xin Sun 
---
 .../protobuf/server/master/MasterProcedure.proto   |  6 +--
 .../AssignReplicationQueuesProcedure.java  | 13 ++---
 .../master/replication/ModifyPeerProcedure.java|  2 +-
 .../master/replication/RemovePeerProcedure.java| 41 +-
 .../regionserver/ReplicationSourceManager.java | 37 +
 .../replication/TestClaimReplicationQueue.java |  2 +-
 ...java => TestRemovePeerProcedureWaitForSCP.java} | 63 +-
 .../replication/TestSerialReplicationFailover.java |  3 --
 8 files changed, 116 insertions(+), 51 deletions(-)

diff --git 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
index 2e0da0deb84..76a1d676487 100644
--- 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
+++ 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
@@ -515,6 +515,7 @@ message UpdatePeerConfigStateData {
 
 message RemovePeerStateData {
   optional ReplicationPeer peer_config = 1;
+  repeated int64 ongoing_assign_replication_queues_proc_ids = 2;
 }
 
 message EnablePeerStateData {
@@ -714,9 +715,8 @@ message ModifyColumnFamilyStoreFileTrackerStateData {
 }
 
 enum AssignReplicationQueuesState {
-  ASSIGN_REPLICATION_QUEUES_PRE_CHECK = 1;
-  ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES = 2;
-  ASSIGN_REPLICATION_QUEUES_CLAIM = 3;
+  ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES = 1;
+  ASSIGN_REPLICATION_QUEUES_CLAIM = 2;
 }
 
 message AssignReplicationQueuesStateData {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java
index e7fb5e51715..d33259dd436 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java
@@ -23,6 +23,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
+import java.util.stream.Collectors;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface;
@@ -102,8 +103,12 @@ public class AssignReplicationQueuesProcedure
   }
 
   private Flow claimQueues(MasterProcedureEnv env) throws ReplicationException 
{
+Set existingPeerIds = 
env.getReplicationPeerManager().listPeers(null).stream()
+  .map(ReplicationPeerDescription::getPeerId).collect(Collectors.toSet());
 ReplicationQueueStorage storage = 
env.getReplicationPeerManager().getQueueStorage();
-List queueIds = storage.listAllQueueIds(crashedServer);
+// filter out replication queue for deleted peers
+List queueIds = 
storage.listAllQueueIds(crashedServer).stream()
+  .filter(q -> 
existingPeerIds.contains(q.getPeerId())).collect(Collectors.toList());
 if (queueIds.isEmpty()) {
   LOG.debug("Finish claiming replication queues for {}", crashedServer);
   // we are done
@@ -130,10 +135,6 @@ public class AssignReplicationQueuesProcedure
 throws ProcedureSuspendedException, ProcedureYieldException, 
InterruptedException {
 try {
   switch (state) {
-case ASSIGN_REPLICATION_QUEUES_PRE_CHECK:
-  // TODO: reserved for implementing the fencing logic with 
Add/Remove/UpdatePeerProcedure
-  
setNextState(AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES);
-  return Flow.HAS_MORE_STATE;
 case ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES:
   addMissingQueues(env);
   retryCounter = null;
@@ -183,7 +184,7 @@ public class AssignReplicationQueuesProcedure
 
   @Override
   protected AssignReplicationQueuesState getInitialState() {
-return AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_PRE_CHECK;
+return 
AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES;
   }
 
   @Override
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java
index 67d70a166be..78b97620c01 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java
+++ 

[hbase] 10/10: HBASE-27430 Should disable replication log cleaner when migrating replication queue data (#4901)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 2e4e2951364519e0a9187d58b5ca9ff8c3f61574
Author: Duo Zhang 
AuthorDate: Sat Dec 3 20:51:40 2022 +0800

HBASE-27430 Should disable replication log cleaner when migrating 
replication queue data (#4901)

Signed-off-by: Liangjun He 
---
 .../protobuf/server/master/MasterProcedure.proto   | 12 +++---
 ...rateReplicationQueueFromZkToTableProcedure.java | 47 +-
 ...rateReplicationQueueFromZkToTableProcedure.java | 29 -
 3 files changed, 80 insertions(+), 8 deletions(-)

diff --git 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
index b6f5d7e50bb..14d07c17c88 100644
--- 
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
+++ 
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
@@ -724,11 +724,13 @@ message AssignReplicationQueuesStateData {
 }
 
 enum MigrateReplicationQueueFromZkToTableState {
-  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 1;
-  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 2;
-  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 3;
-  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 4;
-  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 5;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_CLEANER = 1;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 2;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 3;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 4;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 5;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 6;
+  MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_CLEANER = 7;
 }
 
 message MigrateReplicationQueueFromZkToTableStateData {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
index 93ff27db3f7..b7c4e33ef85 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
@@ -17,7 +17,9 @@
  */
 package org.apache.hadoop.hbase.master.replication;
 
+import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_CLEANER;
 import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER;
+import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_CLEANER;
 import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER;
 import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE;
 import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE;
@@ -111,6 +113,26 @@ public class MigrateReplicationQueueFromZkToTableProcedure
 }
   }
 
+  private void disableReplicationLogCleaner(MasterProcedureEnv env)
+throws ProcedureSuspendedException {
+if 
(!env.getReplicationPeerManager().getReplicationLogCleanerBarrier().disable()) {
+  // it is not likely that we can reach here as we will schedule this 
procedure immediately
+  // after master restarting, where ReplicationLogCleaner should have not 
started its first run
+  // yet. But anyway, let's make the code more robust. And it is safe to 
wait a bit here since
+  // there will be no data in the new replication queue storage before we 
execute this procedure
+  // so ReplicationLogCleaner will quit immediately without doing anything.
+  throw suspend(env.getMasterConfiguration(),
+backoff -> LOG.info(
+  "Can not disable replication log cleaner, sleep {} secs and retry 
later",
+  backoff / 1000));
+}
+resetRetry();
+  }
+
+  private void enableReplicationLogCleaner(MasterProcedureEnv env) {
+env.getReplicationPeerManager().getReplicationLogCleanerBarrier().enable();
+  }
+
   private void waitUntilNoPeerProcedure(MasterProcedureEnv 

[hbase] 08/10: HBASE-27217 Revisit the DumpReplicationQueues tool (#4810)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit e5d10b0d22dcdee6d21045375a96a6b73e5813a3
Author: LiangJun He <2005hit...@163.com>
AuthorDate: Sun Nov 13 22:03:36 2022 +0800

HBASE-27217 Revisit the DumpReplicationQueues tool (#4810)

Signed-off-by: Duo Zhang 
---
 .../regionserver/DumpReplicationQueues.java| 240 +
 .../hadoop/hbase/wal/AbstractFSWALProvider.java|  20 ++
 .../regionserver/TestDumpReplicationQueues.java| 159 +-
 3 files changed, 284 insertions(+), 135 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java
index 98d0a55fbc4..b284e3f6837 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java
@@ -19,8 +19,12 @@ package org.apache.hadoop.hbase.replication.regionserver;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -31,7 +35,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.Admin;
@@ -40,28 +44,33 @@ import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.replication.TableCFs;
 import org.apache.hadoop.hbase.io.WALLink;
 import org.apache.hadoop.hbase.procedure2.util.StringUtils;
+import org.apache.hadoop.hbase.replication.ReplicationException;
+import org.apache.hadoop.hbase.replication.ReplicationGroupOffset;
+import org.apache.hadoop.hbase.replication.ReplicationOffsetUtil;
 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
-import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
+import org.apache.hadoop.hbase.replication.ReplicationQueueData;
+import org.apache.hadoop.hbase.replication.ReplicationQueueId;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.zookeeper.ZKDump;
-import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
 import 
org.apache.hbase.thirdparty.com.google.common.util.concurrent.AtomicLongMap;
 
 /**
- * TODO: reimplement this tool
  * 
  * Provides information about the existing states of replication, replication 
peers and queues.
  * Usage: hbase 
org.apache.hadoop.hbase.replication.regionserver.DumpReplicationQueues [args]
  * Arguments: --distributed Polls each RS to dump information about the queue 
--hdfs Reports HDFS
- * usage by the replication queues (note: can be overestimated).
+ * usage by the replication queues (note: can be overestimated). In the new 
version, we
+ * reimplemented the DumpReplicationQueues tool to support obtaining 
information from replication
+ * table.
  */
 @InterfaceAudience.Private
 public class DumpReplicationQueues extends Configured implements Tool {
@@ -185,7 +194,7 @@ public class DumpReplicationQueues extends Configured 
implements Tool {
 System.err.println("General Options:");
 System.err.println(" -h|--h|--help  Show this help and exit.");
 System.err.println(" --distributed  Poll each RS and print its own 
replication queue. "
-  + "Default only polls ZooKeeper");
+  + "Default only polls replication table.");
 System.err.println(" --hdfs Use HDFS to calculate usage of WALs by 
replication."
   + " It could be overestimated if replicating to multiple peers."
   + " --distributed flag is also needed.");
@@ -201,13 +210,7 @@ public class DumpReplicationQueues extends Configured 
implements Tool {
 Connection connection = ConnectionFactory.createConnection(conf);
 Admin 

[hbase] 09/10: HBASE-27429 Add exponential retry backoff support for MigrateReplicationQueueFromZkToTableProcedure

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 3c54d9c27a9a6e147580cbeab67186b18019785b
Author: Duo Zhang 
AuthorDate: Tue Oct 18 16:46:03 2022 +0800

HBASE-27429 Add exponential retry backoff support for 
MigrateReplicationQueueFromZkToTableProcedure

Signed-off-by: Liangjun He 
---
 .../hbase/procedure2/TimeoutExecutorThread.java|  10 +-
 ...rateReplicationQueueFromZkToTableProcedure.java | 131 ++---
 .../master/replication/ReplicationPeerManager.java |  45 ---
 ...tReplicationPeerManagerMigrateQueuesFromZk.java |   9 +-
 4 files changed, 125 insertions(+), 70 deletions(-)

diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java
 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java
index 3b99781a558..c0287a99435 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java
+++ 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java
@@ -78,9 +78,13 @@ class TimeoutExecutorThread extends 
StoppableThread {
   }
 
   public void add(Procedure procedure) {
-LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, 
procedure.getTimeout(),
-  procedure.getTimeoutTimestamp());
-queue.add(new DelayedProcedure<>(procedure));
+if (procedure.getTimeout() > 0) {
+  LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, 
procedure.getTimeout(),
+procedure.getTimeoutTimestamp());
+  queue.add(new DelayedProcedure<>(procedure));
+} else {
+  LOG.info("Got negative timeout {} for {}, skip adding", 
procedure.getTimeout(), procedure);
+}
   }
 
   public boolean remove(Procedure procedure) {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
index 536f232338e..93ff27db3f7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java
@@ -25,19 +25,25 @@ import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureP
 
 import java.io.IOException;
 import java.util.List;
+import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
+import java.util.function.LongConsumer;
 import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.master.procedure.GlobalProcedureInterface;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
 import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
 import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
 import 
org.apache.hadoop.hbase.replication.ZKReplicationQueueStorageForMigration;
+import org.apache.hadoop.hbase.util.FutureUtils;
+import org.apache.hadoop.hbase.util.IdLock;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.VersionInfo;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
@@ -65,18 +71,34 @@ public class MigrateReplicationQueueFromZkToTableProcedure
 
   private List disabledPeerIds;
 
-  private List> futures;
+  private CompletableFuture future;
 
   private ExecutorService executor;
 
+  private RetryCounter retryCounter;
+
   @Override
   public String getGlobalId() {
 return getClass().getSimpleName();
   }
 
+  private ProcedureSuspendedException suspend(Configuration conf, LongConsumer 
backoffConsumer)
+throws ProcedureSuspendedException {
+if (retryCounter == null) {
+  retryCounter = ProcedureUtil.createRetryCounter(conf);
+}
+long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+backoffConsumer.accept(backoff);
+throw suspend(Math.toIntExact(backoff), true);
+  }
+
+  private void resetRetry() {
+retryCounter = null;
+  }
+
   private ExecutorService getExecutorService() {
 if (executor == null) {
-  executor = Executors.newFixedThreadPool(3, new ThreadFactoryBuilder()
+  executor = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
 

[hbase] 05/10: HBASE-27392 Add a new procedure type for implementing some global operations such as migration (#4803)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 83f9769ab03fc70663732a418eb112bb8916346f
Author: Duo Zhang 
AuthorDate: Thu Sep 29 10:08:02 2022 +0800

HBASE-27392 Add a new procedure type for implementing some global 
operations such as migration (#4803)

Signed-off-by: Xin Sun 
---
 .../hbase/procedure2/LockedResourceType.java   |   3 +-
 .../master/procedure/GlobalProcedureInterface.java |  15 ++-
 .../hadoop/hbase/master/procedure/GlobalQueue.java |  21 ++--
 .../master/procedure/MasterProcedureScheduler.java | 119 -
 .../hbase/master/procedure/SchemaLocking.java  |  18 +++-
 .../procedure/TestMasterProcedureScheduler.java|  48 +
 6 files changed, 202 insertions(+), 22 deletions(-)

diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
index 12f899d7565..40141017009 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
+++ 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
@@ -26,5 +26,6 @@ public enum LockedResourceType {
   TABLE,
   REGION,
   PEER,
-  META
+  META,
+  GLOBAL
 }
diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java
similarity index 82%
copy from 
hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
copy to 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java
index 12f899d7565..1ef168abfd8 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java
@@ -15,16 +15,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hbase.procedure2;
+package org.apache.hadoop.hbase.master.procedure;
 
 import org.apache.yetus.audience.InterfaceAudience;
 
+/**
+ * Procedure interface for global operations, such as migration.
+ */
 @InterfaceAudience.Private
-public enum LockedResourceType {
-  SERVER,
-  NAMESPACE,
-  TABLE,
-  REGION,
-  PEER,
-  META
+public interface GlobalProcedureInterface {
+
+  String getGlobalId();
 }
diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java
similarity index 69%
copy from 
hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
copy to 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java
index 12f899d7565..1633dc4856e 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java
@@ -15,16 +15,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hbase.procedure2;
+package org.apache.hadoop.hbase.master.procedure;
 
+import org.apache.hadoop.hbase.procedure2.LockStatus;
+import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.yetus.audience.InterfaceAudience;
 
 @InterfaceAudience.Private
-public enum LockedResourceType {
-  SERVER,
-  NAMESPACE,
-  TABLE,
-  REGION,
-  PEER,
-  META
+public class GlobalQueue extends Queue {
+
+  public GlobalQueue(String globalId, LockStatus lockStatus) {
+super(globalId, lockStatus);
+  }
+
+  @Override
+  boolean requireExclusiveLock(Procedure proc) {
+return true;
+  }
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
index 866f2f6f403..fbf0eb8abf3 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.function.Function;
 import java.util.function.Supplier;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableExistsException;
 import org.apache.hadoop.hbase.TableName;
@@ -95,16 +96,20 @@ public class MasterProcedureScheduler extends 
AbstractProcedureScheduler {
 (n, k) -> n.compareKey((String) k);
   private 

[hbase] 06/10: HBASE-27405 Fix the replication hfile/log cleaner report that the replication table does not exist (#4811)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 54a722e1e7591587ef1ee3a125663935c311f1ec
Author: LiangJun He <2005hit...@163.com>
AuthorDate: Wed Oct 12 14:40:05 2022 +0800

HBASE-27405 Fix the replication hfile/log cleaner report that the 
replication table does not exist (#4811)

Signed-off-by: Duo Zhang 
---
 .../apache/hadoop/hbase/replication/ReplicationQueueStorage.java | 6 ++
 .../hadoop/hbase/replication/TableReplicationQueueStorage.java   | 9 +
 .../hadoop/hbase/replication/master/ReplicationLogCleaner.java   | 8 
 .../hbase/replication/master/TestReplicationLogCleaner.java  | 1 +
 4 files changed, 24 insertions(+)

diff --git 
a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java
 
b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java
index c4204f0e8c4..6f6aee38cc8 100644
--- 
a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java
+++ 
b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java
@@ -178,4 +178,10 @@ public interface ReplicationQueueStorage {
* created hfile references during the call may not be included.
*/
   Set getAllHFileRefs() throws ReplicationException;
+
+  /**
+   * Whether the replication queue table exists.
+   * @return Whether the replication queue table exists
+   */
+  boolean hasData() throws ReplicationException;
 }
diff --git 
a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java
 
b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java
index 0c9553f4fd8..392a3692d66 100644
--- 
a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java
+++ 
b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java
@@ -532,4 +532,13 @@ public class TableReplicationQueueStorage implements 
ReplicationQueueStorage {
   throw new ReplicationException("failed to getAllHFileRefs", e);
 }
   }
+
+  @Override
+  public boolean hasData() throws ReplicationException {
+try {
+  return conn.getAdmin().getDescriptor(tableName) != null;
+} catch (IOException e) {
+  throw new ReplicationException("failed to get replication queue table", 
e);
+}
+  }
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
index f1fd8f8d6b3..3ab52da6158 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
@@ -76,6 +76,14 @@ public class ReplicationLogCleaner extends 
BaseLogCleanerDelegate {
 if (this.getConf() == null) {
   return;
 }
+try {
+  if (!rpm.getQueueStorage().hasData()) {
+return;
+  }
+} catch (ReplicationException e) {
+  LOG.error("Error occurred while executing queueStorage.hasData()", e);
+  return;
+}
 canFilter = rpm.getReplicationLogCleanerBarrier().start();
 if (canFilter) {
   notFullyDeadServers = getNotFullyDeadServers.get();
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java
index 7a227fb0603..7edadae03b1 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java
@@ -86,6 +86,7 @@ public class TestReplicationLogCleaner {
 when(rpm.listPeers(null)).thenReturn(new ArrayList<>());
 ReplicationQueueStorage rqs = mock(ReplicationQueueStorage.class);
 when(rpm.getQueueStorage()).thenReturn(rqs);
+when(rpm.getQueueStorage().hasData()).thenReturn(true);
 when(rqs.listAllQueues()).thenReturn(new ArrayList<>());
 ServerManager sm = mock(ServerManager.class);
 when(services.getServerManager()).thenReturn(sm);



[hbase] 04/10: HBASE-27215 Add support for sync replication (#4762)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 330042cc3d2939c584b638245a7bbd22fa294e6d
Author: Duo Zhang 
AuthorDate: Thu Sep 15 22:58:29 2022 +0800

HBASE-27215 Add support for sync replication (#4762)

Signed-off-by: Xiaolin Ha 
---
 .../regionserver/ReplicationSource.java|  2 +-
 .../regionserver/ReplicationSourceManager.java | 53 +++---
 .../TestDrainReplicationQueuesForStandBy.java  |  3 --
 3 files changed, 28 insertions(+), 30 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index e078722b157..0784a87711b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -465,7 +465,7 @@ public class ReplicationSource implements 
ReplicationSourceInterface {
 t.getName());
   manager.refreshSources(peerId);
   break;
-} catch (IOException e1) {
+} catch (IOException | ReplicationException e1) {
   LOG.error("Replication sources refresh failed.", e1);
   sleepForRetries("Sleeping before try refreshing sources again", 
maxRetriesMultiplier);
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index 03569be86fc..f3d07315240 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -403,38 +403,44 @@ public class ReplicationSourceManager {
 // TODO: use empty initial offsets for now, revisit when adding support 
for sync replication
 ReplicationSourceInterface src =
   createSource(new ReplicationQueueData(queueId, ImmutableMap.of()), peer);
-// synchronized here to avoid race with preLogRoll where we add new log to 
source and also
+// synchronized here to avoid race with postLogRoll where we add new log 
to source and also
 // walsById.
 ReplicationSourceInterface toRemove;
-Map> wals = new HashMap<>();
+ReplicationQueueData queueData;
 synchronized (latestPaths) {
+  // Here we make a copy of all the remaining wal files and then delete 
them from the
+  // replication queue storage after releasing the lock. It is not safe to 
just remove the old
+  // map from walsById since later we may fail to update the replication 
queue storage, and when
+  // we retry next time, we can not know the wal files that needs to be 
set to the replication
+  // queue storage
+  ImmutableMap.Builder builder = 
ImmutableMap.builder();
+  synchronized (walsById) {
+walsById.get(queueId).forEach((group, wals) -> {
+  if (!wals.isEmpty()) {
+builder.put(group, new ReplicationGroupOffset(wals.last(), -1));
+  }
+});
+  }
+  queueData = new ReplicationQueueData(queueId, builder.build());
+  src = createSource(queueData, peer);
   toRemove = sources.put(peerId, src);
   if (toRemove != null) {
 LOG.info("Terminate replication source for " + toRemove.getPeerId());
 toRemove.terminate(terminateMessage);
 toRemove.getSourceMetrics().clear();
   }
-  // Here we make a copy of all the remaining wal files and then delete 
them from the
-  // replication queue storage after releasing the lock. It is not safe to 
just remove the old
-  // map from walsById since later we may fail to delete them from the 
replication queue
-  // storage, and when we retry next time, we can not know the wal files 
that need to be deleted
-  // from the replication queue storage.
-  walsById.get(queueId).forEach((k, v) -> wals.put(k, new TreeSet<>(v)));
+}
+for (Map.Entry entry : 
queueData.getOffsets().entrySet()) {
+  queueStorage.setOffset(queueId, entry.getKey(), entry.getValue(), 
Collections.emptyMap());
 }
 LOG.info("Startup replication source for " + src.getPeerId());
 src.startup();
-for (NavigableSet walsByGroup : wals.values()) {
-  // TODO: just need to reset the replication offset
-  // for (String wal : walsByGroup) {
-  // queueStorage.removeWAL(server.getServerName(), peerId, wal);
-  // }
-}
 synchronized (walsById) {
-  Map> oldWals = walsById.get(queueId);
-  wals.forEach((k, v) -> {
-NavigableSet walsByGroup = oldWals.get(k);
+  Map> 

[hbase] 03/10: HBASE-27214 Implement the new replication hfile/log cleaner (#4722)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch HBASE-27109/table_based_rqs
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 8099d454eba625157f83b9ffc01503b4383b7fef
Author: Duo Zhang 
AuthorDate: Wed Aug 31 21:24:09 2022 +0800

HBASE-27214 Implement the new replication hfile/log cleaner (#4722)

Signed-off-by: Xin Sun 
---
 .../org/apache/hadoop/hbase/master/HMaster.java|   1 -
 .../hbase/master/cleaner/FileCleanerDelegate.java  |   2 +-
 .../hadoop/hbase/master/region/MasterRegion.java   |   2 +-
 .../hbase/master/replication/AddPeerProcedure.java |  15 +-
 .../master/replication/ReplicationPeerManager.java |   8 +
 .../hadoop/hbase/regionserver/HRegionServer.java   |   2 +-
 .../hbase/replication/ReplicationOffsetUtil.java   |  47 +++
 .../replication/master/ReplicationLogCleaner.java  | 234 +
 .../master/ReplicationLogCleanerBarrier.java   |  85 +
 .../regionserver/ReplicationSourceManager.java |  18 +-
 .../regionserver/ReplicationSyncUp.java|   5 +-
 .../hadoop/hbase/wal/AbstractFSWALProvider.java|  29 ++
 .../org/apache/hadoop/hbase/wal/WALFactory.java|  29 +-
 .../hbase/master/cleaner/TestLogsCleaner.java  | 227 +---
 .../cleaner/TestReplicationHFileCleaner.java   |  43 ++-
 .../replication/TestReplicationOffsetUtil.java |  52 +++
 .../replication/master/TestLogCleanerBarrier.java  |  60 
 .../master/TestReplicationLogCleaner.java  | 385 +
 .../regionserver/TestReplicationSourceManager.java |   2 +-
 .../apache/hadoop/hbase/wal/TestWALFactory.java|   2 +-
 .../apache/hadoop/hbase/wal/TestWALMethods.java|  14 +
 21 files changed, 1008 insertions(+), 254 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index ce3e81ad04b..118457648de 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -4289,5 +4289,4 @@ public class HMaster extends 
HBaseServerBase implements Maste
 // initialize master side coprocessors before we start handling requests
 this.cpHost = new MasterCoprocessorHost(this, conf);
   }
-
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
index d37bb620273..e08f5329433 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
@@ -50,7 +50,7 @@ public interface FileCleanerDelegate extends Configurable, 
Stoppable {
   }
 
   /**
-   * Used to do some cleanup work
+   * Will be called after cleaner run.
*/
   default void postClean() {
   }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java
index 177e161c32e..45f049723c7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java
@@ -377,7 +377,7 @@ public final class MasterRegion {
   params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize());
 walRoller.start();
 
-WALFactory walFactory = new WALFactory(conf, 
server.getServerName().toString(), server, false);
+WALFactory walFactory = new WALFactory(conf, server.getServerName(), 
server, false);
 Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
 Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
 Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java
index 6d0acee76ca..25a4cd4b08e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java
@@ -21,7 +21,6 @@ import java.io.IOException;
 import org.apache.hadoop.hbase.client.replication.ReplicationPeerConfigUtil;
 import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
-import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
 import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.replication.ReplicationException;
@@ -84,15 +83,21 @@ public 

[hbase] branch branch-2.5 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.5 by this push:
 new ab8aa620302 HBASE-27669 chaos-daemon.sh should make use hbase script 
start/stop chaosagent and chaos monkey runner (#5078)
ab8aa620302 is described below

commit ab8aa62030264eed588fb78315b2bf2d57b8a3fa
Author: Rajeshbabu Chintaguntla 
AuthorDate: Thu Mar 9 08:16:25 2023 +0530

HBASE-27669 chaos-daemon.sh should make use hbase script start/stop 
chaosagent and chaos monkey runner (#5078)

Co-authored-by: Rajeshbabu Chintaguntla 
Signed-off-by: Duo Zhang 
(cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d)
---
 bin/chaos-daemon.sh| 26 +++---
 bin/hbase  |  4 
 .../apache/hadoop/hbase/HBaseClusterManager.java   |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh
index 084e519321a..8e27f4a5d9f 100644
--- a/bin/chaos-daemon.sh
+++ b/bin/chaos-daemon.sh
@@ -19,7 +19,7 @@
 # */
 #
 
-usage="Usage: chaos-daemon.sh (start|stop) chaosagent"
+usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd)
 . "$bin"/hbase-config.sh
 . "$bin"/hbase-common.sh
 
-CLASSPATH=$HBASE_CONF_DIR
-for f in ../lib/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f
-done
-
 # get log directory
 if [ "$HBASE_LOG_DIR" = "" ]; then
   export HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then
 fi
 
 export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
-export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log
+export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
 
 if [ -z "${HBASE_ROOT_LOGGER}" ]; then
 export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
@@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then
 export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"}
 fi
 
-CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"}
+CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
 CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
 
 if [ -z "$CHAOS_JAVA_OPTS" ]; then
@@ -101,15 +96,20 @@ case $startStop in
 (start)
 check_before_start
 echo running $command
-CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../  
${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService 
-$command start &>> ${CHAOS_LOGLOG} &"
-
-eval $CMD
+command_args=""
+if [ "$command" = "chaosagent" ]; then
+  command_args=" -${command} start"
+elif [ "$command" = "chaosmonkeyrunner" ]; then
+  command_args="-c $HBASE_CONF_DIR $@"
+fi
+HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config 
"${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 &
 PID=$(echo $!)
+disown -h -r
 echo ${PID} >${CHAOS_PID}
 
-echo "Chaos ${1} process Started with ${PID} !"
+echo "Chaos ${command} process Started with ${PID} !"
 now=$(date)
-echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG}
+echo "${now} Chaos ${command} process Started with ${PID} !" 
>>${CHAOS_LOGLOG}
 ;;
 
 (stop)
diff --git a/bin/hbase b/bin/hbase
index 02a021ec04f..6833ebfe760 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -710,6 +710,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator'
 elif [ "$COMMAND" = "completebulkload" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool'
+elif [ "$COMMAND" = "chaosagent" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.ChaosService'
+elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner'
 elif [ "$COMMAND" = "hbtop" ] ; then
   CLASS='org.apache.hadoop.hbase.hbtop.HBTop'
   if [ -n "${shaded_jar}" ] ; then
diff --git 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index cd1c6773634..a73748d5c4f 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured 
implements ClusterManager {
 }
 
 public String signalCommand(ServiceType service, String signal) {
-  return String.format("%s | xargs sudo kill -s %s", 
findPidCommand(service), signal);
+  return String.format("%s | xargs kill -s %s", findPidCommand(service), 
signal);
 }
   }
 



[hbase] branch branch-2.4 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new ad90a7b0466 HBASE-27669 chaos-daemon.sh should make use hbase script 
start/stop chaosagent and chaos monkey runner (#5078)
ad90a7b0466 is described below

commit ad90a7b0466b6cc4265f61aae962369f1b00ba83
Author: Rajeshbabu Chintaguntla 
AuthorDate: Thu Mar 9 08:16:25 2023 +0530

HBASE-27669 chaos-daemon.sh should make use hbase script start/stop 
chaosagent and chaos monkey runner (#5078)

Co-authored-by: Rajeshbabu Chintaguntla 
Signed-off-by: Duo Zhang 
(cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d)
---
 bin/chaos-daemon.sh| 26 +++---
 bin/hbase  |  4 
 .../apache/hadoop/hbase/HBaseClusterManager.java   |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh
index 084e519321a..8e27f4a5d9f 100644
--- a/bin/chaos-daemon.sh
+++ b/bin/chaos-daemon.sh
@@ -19,7 +19,7 @@
 # */
 #
 
-usage="Usage: chaos-daemon.sh (start|stop) chaosagent"
+usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd)
 . "$bin"/hbase-config.sh
 . "$bin"/hbase-common.sh
 
-CLASSPATH=$HBASE_CONF_DIR
-for f in ../lib/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f
-done
-
 # get log directory
 if [ "$HBASE_LOG_DIR" = "" ]; then
   export HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then
 fi
 
 export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
-export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log
+export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
 
 if [ -z "${HBASE_ROOT_LOGGER}" ]; then
 export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
@@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then
 export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"}
 fi
 
-CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"}
+CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
 CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
 
 if [ -z "$CHAOS_JAVA_OPTS" ]; then
@@ -101,15 +96,20 @@ case $startStop in
 (start)
 check_before_start
 echo running $command
-CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../  
${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService 
-$command start &>> ${CHAOS_LOGLOG} &"
-
-eval $CMD
+command_args=""
+if [ "$command" = "chaosagent" ]; then
+  command_args=" -${command} start"
+elif [ "$command" = "chaosmonkeyrunner" ]; then
+  command_args="-c $HBASE_CONF_DIR $@"
+fi
+HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config 
"${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 &
 PID=$(echo $!)
+disown -h -r
 echo ${PID} >${CHAOS_PID}
 
-echo "Chaos ${1} process Started with ${PID} !"
+echo "Chaos ${command} process Started with ${PID} !"
 now=$(date)
-echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG}
+echo "${now} Chaos ${command} process Started with ${PID} !" 
>>${CHAOS_LOGLOG}
 ;;
 
 (stop)
diff --git a/bin/hbase b/bin/hbase
index 3c8f80bf4e2..41ac11f4016 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -673,6 +673,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator'
 elif [ "$COMMAND" = "completebulkload" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool'
+elif [ "$COMMAND" = "chaosagent" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.ChaosService'
+elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner'
 elif [ "$COMMAND" = "hbtop" ] ; then
   CLASS='org.apache.hadoop.hbase.hbtop.HBTop'
   if [ -n "${shaded_jar}" ] ; then
diff --git 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index d620684a37b..9380e1dfe94 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured 
implements ClusterManager {
 }
 
 public String signalCommand(ServiceType service, String signal) {
-  return String.format("%s | xargs sudo kill -s %s", 
findPidCommand(service), signal);
+  return String.format("%s | xargs kill -s %s", findPidCommand(service), 
signal);
 }
   }
 



[hbase] branch branch-2 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
 new e180aa10df8 HBASE-27669 chaos-daemon.sh should make use hbase script 
start/stop chaosagent and chaos monkey runner (#5078)
e180aa10df8 is described below

commit e180aa10df835f4e2995ea74a4a9fec2b5e5118f
Author: Rajeshbabu Chintaguntla 
AuthorDate: Thu Mar 9 08:16:25 2023 +0530

HBASE-27669 chaos-daemon.sh should make use hbase script start/stop 
chaosagent and chaos monkey runner (#5078)

Co-authored-by: Rajeshbabu Chintaguntla 
Signed-off-by: Duo Zhang 
(cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d)
---
 bin/chaos-daemon.sh| 26 +++---
 bin/hbase  |  4 
 .../apache/hadoop/hbase/HBaseClusterManager.java   |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh
index 084e519321a..8e27f4a5d9f 100644
--- a/bin/chaos-daemon.sh
+++ b/bin/chaos-daemon.sh
@@ -19,7 +19,7 @@
 # */
 #
 
-usage="Usage: chaos-daemon.sh (start|stop) chaosagent"
+usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd)
 . "$bin"/hbase-config.sh
 . "$bin"/hbase-common.sh
 
-CLASSPATH=$HBASE_CONF_DIR
-for f in ../lib/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f
-done
-
 # get log directory
 if [ "$HBASE_LOG_DIR" = "" ]; then
   export HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then
 fi
 
 export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
-export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log
+export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
 
 if [ -z "${HBASE_ROOT_LOGGER}" ]; then
 export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
@@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then
 export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"}
 fi
 
-CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"}
+CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
 CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
 
 if [ -z "$CHAOS_JAVA_OPTS" ]; then
@@ -101,15 +96,20 @@ case $startStop in
 (start)
 check_before_start
 echo running $command
-CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../  
${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService 
-$command start &>> ${CHAOS_LOGLOG} &"
-
-eval $CMD
+command_args=""
+if [ "$command" = "chaosagent" ]; then
+  command_args=" -${command} start"
+elif [ "$command" = "chaosmonkeyrunner" ]; then
+  command_args="-c $HBASE_CONF_DIR $@"
+fi
+HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config 
"${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 &
 PID=$(echo $!)
+disown -h -r
 echo ${PID} >${CHAOS_PID}
 
-echo "Chaos ${1} process Started with ${PID} !"
+echo "Chaos ${command} process Started with ${PID} !"
 now=$(date)
-echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG}
+echo "${now} Chaos ${command} process Started with ${PID} !" 
>>${CHAOS_LOGLOG}
 ;;
 
 (stop)
diff --git a/bin/hbase b/bin/hbase
index b1369ef576a..31547b1ab51 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -742,6 +742,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator'
 elif [ "$COMMAND" = "completebulkload" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool'
+elif [ "$COMMAND" = "chaosagent" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.ChaosService'
+elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner'
 elif [ "$COMMAND" = "hbtop" ] ; then
   CLASS='org.apache.hadoop.hbase.hbtop.HBTop'
   if [ -n "${shaded_jar}" ] ; then
diff --git 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index cd1c6773634..a73748d5c4f 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured 
implements ClusterManager {
 }
 
 public String signalCommand(ServiceType service, String signal) {
-  return String.format("%s | xargs sudo kill -s %s", 
findPidCommand(service), signal);
+  return String.format("%s | xargs kill -s %s", findPidCommand(service), 
signal);
 }
   }
 



[hbase] branch master updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
 new bc8b13e468a HBASE-27669 chaos-daemon.sh should make use hbase script 
start/stop chaosagent and chaos monkey runner (#5078)
bc8b13e468a is described below

commit bc8b13e468a258b4ee47e40cf3645bf27d66471d
Author: Rajeshbabu Chintaguntla 
AuthorDate: Thu Mar 9 08:16:25 2023 +0530

HBASE-27669 chaos-daemon.sh should make use hbase script start/stop 
chaosagent and chaos monkey runner (#5078)

Co-authored-by: Rajeshbabu Chintaguntla 
Signed-off-by: Duo Zhang 
---
 bin/chaos-daemon.sh| 26 +++---
 bin/hbase  |  4 
 .../apache/hadoop/hbase/HBaseClusterManager.java   |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh
index 084e519321a..8e27f4a5d9f 100644
--- a/bin/chaos-daemon.sh
+++ b/bin/chaos-daemon.sh
@@ -19,7 +19,7 @@
 # */
 #
 
-usage="Usage: chaos-daemon.sh (start|stop) chaosagent"
+usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd)
 . "$bin"/hbase-config.sh
 . "$bin"/hbase-common.sh
 
-CLASSPATH=$HBASE_CONF_DIR
-for f in ../lib/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f
-done
-
 # get log directory
 if [ "$HBASE_LOG_DIR" = "" ]; then
   export HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then
 fi
 
 export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME
-export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log
+export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log
 
 if [ -z "${HBASE_ROOT_LOGGER}" ]; then
 export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"}
@@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then
 export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"}
 fi
 
-CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"}
+CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"}
 CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
 
 if [ -z "$CHAOS_JAVA_OPTS" ]; then
@@ -101,15 +96,20 @@ case $startStop in
 (start)
 check_before_start
 echo running $command
-CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../  
${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService 
-$command start &>> ${CHAOS_LOGLOG} &"
-
-eval $CMD
+command_args=""
+if [ "$command" = "chaosagent" ]; then
+  command_args=" -${command} start"
+elif [ "$command" = "chaosmonkeyrunner" ]; then
+  command_args="-c $HBASE_CONF_DIR $@"
+fi
+HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config 
"${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 &
 PID=$(echo $!)
+disown -h -r
 echo ${PID} >${CHAOS_PID}
 
-echo "Chaos ${1} process Started with ${PID} !"
+echo "Chaos ${command} process Started with ${PID} !"
 now=$(date)
-echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG}
+echo "${now} Chaos ${command} process Started with ${PID} !" 
>>${CHAOS_LOGLOG}
 ;;
 
 (stop)
diff --git a/bin/hbase b/bin/hbase
index f81c0551e57..b5329795c40 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -742,6 +742,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator'
 elif [ "$COMMAND" = "completebulkload" ] ; then
   CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool'
+elif [ "$COMMAND" = "chaosagent" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.ChaosService'
+elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then
+  CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner'
 elif [ "$COMMAND" = "hbtop" ] ; then
   CLASS='org.apache.hadoop.hbase.hbtop.HBTop'
   if [ -n "${shaded_jar}" ] ; then
diff --git 
a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java 
b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index a09a690c89a..b16ac52b696 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured 
implements ClusterManager {
 }
 
 public String signalCommand(ServiceType service, String signal) {
-  return String.format("%s | xargs sudo kill -s %s", 
findPidCommand(service), signal);
+  return String.format("%s | xargs kill -s %s", findPidCommand(service), 
signal);
 }
   }
 



[hbase] branch master updated: HBASE-27690 Fix a misspell in TestRegionStateStore (#5090)

2023-03-08 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
 new 8bdabed85ca HBASE-27690 Fix a misspell in TestRegionStateStore (#5090)
8bdabed85ca is described below

commit 8bdabed85cad0b0e4dc6c724b4174a8b5c7ccc64
Author: tianhang 
AuthorDate: Wed Mar 8 23:23:50 2023 +0800

HBASE-27690 Fix a misspell in TestRegionStateStore (#5090)

Signed-off-by: Duo Zhang 
---
 .../org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java
index 0004d7665e7..0e6251a 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java
@@ -215,7 +215,7 @@ public class TestRegionStateStore {
   assertNull(serverCellA);
   assertNull(startCodeCellA);
 
-  Get get2 = new Get(splitA.getRegionName());
+  Get get2 = new Get(splitB.getRegionName());
   Result resultB = meta.get(get2);
   Cell serverCellB = resultB.getColumnLatestCell(HConstants.CATALOG_FAMILY,
 CatalogFamilyFormat.getServerColumn(splitB.getReplicaId()));



[hbase] branch branch-2 updated (9a69a69648e -> 43fc4bfcd82)

2023-03-08 Thread bbeaudreault
This is an automated email from the ASF dual-hosted git repository.

bbeaudreault pushed a change to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


from 9a69a69648e HBASE-27250 MasterRpcService#setRegionStateInMeta does not 
support replica region encodedNames or region names
 add 43fc4bfcd82 HBASE-27681 Refactor Table Latency Metrics  (#5093)

No new revisions were added by this update.

Summary of changes:
 .../hbase/regionserver/MetricsTableLatencies.java  | 145 -
 .../hbase/regionserver/MetricsTableQueryMeter.java |  57 
 .../impl/GlobalMetricRegistriesAdapter.java|   1 +
 .../regionserver/MetricsTableLatenciesImpl.java| 216 --
 .../regionserver/MetricsTableQueryMeterImpl.java   |  99 --
 ...hadoop.hbase.regionserver.MetricsTableLatencies |  17 --
 .../metrics/impl/TestMetricRegistriesImpl.java |  62 
 .../apache/hadoop/hbase/regionserver/HRegion.java  |  28 +-
 .../hbase/regionserver/MetricsRegionServer.java| 117 +++-
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  38 +--
 .../regionserver/RegionServerTableMetrics.java | 108 ---
 .../regionserver/metrics/MetricsTableRequests.java | 331 +
 .../regionserver/TestMetricsRegionServer.java  |  57 ++--
 .../regionserver/TestMetricsTableLatencies.java| 123 
 .../regionserver/TestMetricsTableRequests.java | 128 
 .../regionserver/TestMetricsUserAggregate.java |  27 +-
 16 files changed, 661 insertions(+), 893 deletions(-)
 delete mode 100644 
hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java
 delete mode 100644 
hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableQueryMeter.java
 delete mode 100644 
hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatenciesImpl.java
 delete mode 100644 
hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableQueryMeterImpl.java
 delete mode 100644 
hbase-hadoop2-compat/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsTableLatencies
 create mode 100644 
hbase-metrics/src/test/java/org/apache/hadoop/hbase/metrics/impl/TestMetricRegistriesImpl.java
 delete mode 100644 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerTableMetrics.java
 create mode 100644 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/MetricsTableRequests.java
 delete mode 100644 
hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsTableLatencies.java
 create mode 100644 
hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsTableRequests.java



[hbase-site] branch asf-site updated: INFRA-10751 Empty commit

2023-03-08 Thread git-site-role
This is an automated email from the ASF dual-hosted git repository.

git-site-role pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hbase-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 183af9772b9 INFRA-10751 Empty commit
183af9772b9 is described below

commit 183af9772b9299b335975c0a03a4dacf0736e305
Author: jenkins 
AuthorDate: Wed Mar 8 14:46:10 2023 +

INFRA-10751 Empty commit



[hbase] branch master updated: HBASE-27681 Addendum delete old metric classes (#5092)

2023-03-08 Thread bbeaudreault
This is an automated email from the ASF dual-hosted git repository.

bbeaudreault pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
 new 13e11a46c6e HBASE-27681 Addendum delete old metric classes (#5092)
13e11a46c6e is described below

commit 13e11a46c6e37cba63d951e2739f3e8a42aa7ee6
Author: tianhang 
AuthorDate: Wed Mar 8 22:30:22 2023 +0800

HBASE-27681 Addendum delete old metric classes (#5092)

Signed-off-by: Duo Zhang 
Signed-off-by: Bryan Beaudreault 
---
 .../hbase/regionserver/MetricsTableLatencies.java  | 145 --
 .../regionserver/MetricsTableLatenciesImpl.java| 216 -
 ...hadoop.hbase.regionserver.MetricsTableLatencies |  17 --
 3 files changed, 378 deletions(-)

diff --git 
a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java
 
b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java
deleted file mode 100644
index e7d447aef49..000
--- 
a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.regionserver;
-
-import org.apache.yetus.audience.InterfaceAudience;
-
-/**
- * Latency metrics for a specific table in a RegionServer.
- */
-@InterfaceAudience.Private
-public interface MetricsTableLatencies {
-
-  /**
-   * The name of the metrics
-   */
-  String METRICS_NAME = "TableLatencies";
-
-  /**
-   * The name of the metrics context that metrics will be under.
-   */
-  String METRICS_CONTEXT = "regionserver";
-
-  /**
-   * Description
-   */
-  String METRICS_DESCRIPTION = "Metrics about Tables on a single HBase 
RegionServer";
-
-  /**
-   * The name of the metrics context that metrics will be under in jmx
-   */
-  String METRICS_JMX_CONTEXT = "RegionServer,sub=" + METRICS_NAME;
-
-  String GET_TIME = "getTime";
-  String SCAN_TIME = "scanTime";
-  String SCAN_SIZE = "scanSize";
-  String PUT_TIME = "putTime";
-  String PUT_BATCH_TIME = "putBatchTime";
-  String DELETE_TIME = "deleteTime";
-  String DELETE_BATCH_TIME = "deleteBatchTime";
-  String INCREMENT_TIME = "incrementTime";
-  String APPEND_TIME = "appendTime";
-  String CHECK_AND_DELETE_TIME = "checkAndDeleteTime";
-  String CHECK_AND_PUT_TIME = "checkAndPutTime";
-  String CHECK_AND_MUTATE_TIME = "checkAndMutateTime";
-
-  /**
-   * Update the Put time histogram
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updatePut(String tableName, long t);
-
-  /**
-   * Update the batch Put time histogram
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updatePutBatch(String tableName, long t);
-
-  /**
-   * Update the Delete time histogram
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updateDelete(String tableName, long t);
-
-  /**
-   * Update the batch Delete time histogram
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updateDeleteBatch(String tableName, long t);
-
-  /**
-   * Update the Get time histogram .
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updateGet(String tableName, long t);
-
-  /**
-   * Update the Increment time histogram.
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updateIncrement(String tableName, long t);
-
-  /**
-   * Update the Append time histogram.
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  void updateAppend(String tableName, long t);
-
-  /**
-   * Update the scan size.
-   * @param tableName The table the metric is for
-   * @param scanSize  size of the scan
-   */
-  void updateScanSize(String tableName, long scanSize);
-
-  /**
-   * Update the scan time.
-   * @param tableName The table the metric is for
-   * @param t time it took
-   */
-  

[hbase] branch branch-2.5 updated: HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica region encodedNames or region names

2023-03-08 Thread ndimiduk
This is an automated email from the ASF dual-hosted git repository.

ndimiduk pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.5 by this push:
 new 6e31c18bf26 HBASE-27250 MasterRpcService#setRegionStateInMeta does not 
support replica region encodedNames or region names
6e31c18bf26 is described below

commit 6e31c18bf26cb23daea10a47179f9114eb9d94e4
Author: huaxiangsun 
AuthorDate: Tue Feb 21 04:47:27 2023 -0800

HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica 
region encodedNames or region names

- Added sanity check to make sure input region encoded name or region name 
is valid
- Assignment improvements pertaining to read replica regions
- make several MetaTableAccessor methods more precise in their handling of 
replica regions
- hbck2 setRegionStateInMeta and HBCKServerCrashProcedure handle read 
replicas
- separate AM helper methods -- loading RegionInfo from cache vs. 
refreshing cache from meta
- AM helper method support loading RegionInfo from cache via either region 
name and encoded region
  name (both caches are maintained, and under lock)
- consolidate, extend tests to cover read replica regions

Co-authored-by: Huaxiang Sun 
Co-authored-by: Nick Dimiduk 
Signed-off-by: Peter Somogyi 
---
 .../org/apache/hadoop/hbase/MetaTableAccessor.java |  70 +---
 .../hadoop/hbase/master/MasterRpcServices.java | 157 +-
 .../hbase/master/assignment/AssignmentManager.java |  54 ---
 .../GCMultipleMergedRegionsProcedure.java  |   4 +-
 .../assignment/MergeTableRegionsProcedure.java |   5 +-
 .../hbase/master/assignment/RegionStateStore.java  |   4 +-
 .../hbase/master/assignment/RegionStates.java  |   6 +-
 .../master/procedure/DeleteTableProcedure.java |   4 +-
 .../org/apache/hadoop/hbase/util/HBaseFsck.java|   2 +-
 .../apache/hadoop/hbase/util/HBaseFsckRepair.java  |   2 +-
 .../apache/hadoop/hbase/TestMetaTableAccessor.java |   9 +-
 .../org/apache/hadoop/hbase/TestSplitMerge.java|   2 +-
 .../org/apache/hadoop/hbase/client/TestHbck.java   |  93 ++-
 .../hbase/master/TestRegionsRecoveryChore.java |   6 +-
 .../master/assignment/TestAssignmentManager.java   |  20 +--
 .../assignment/TestMergeTableRegionsProcedure.java |   4 +-
 .../hadoop/hbase/master/janitor/TestMetaFixer.java |   8 +-
 .../hadoop/hbase/master/procedure/TestHBCKSCP.java | 178 +
 .../hbase/master/procedure/TestHBCKSCPUnknown.java |  54 ---
 .../TestRegionMergeTransactionOnCluster.java   |   6 +-
 20 files changed, 394 insertions(+), 294 deletions(-)

diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 48476e4bb3a..f29104df3c0 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -289,7 +289,8 @@ public class MetaTableAccessor {
 RegionLocations locations = getRegionLocations(r);
 return locations == null
   ? null
-  : locations.getRegionLocation(parsedInfo == null ? 0 : 
parsedInfo.getReplicaId());
+  : locations.getRegionLocation(
+parsedInfo == null ? RegionInfo.DEFAULT_REPLICA_ID : 
parsedInfo.getReplicaId());
   }
 
   /**
@@ -333,12 +334,12 @@ public class MetaTableAccessor {
   /**
* Gets the result in hbase:meta for the specified region.
* @param connection connection we're using
-   * @param regionName region we're looking for
+   * @param regionInfo region we're looking for
* @return result of the specified region
*/
-  public static Result getRegionResult(Connection connection, byte[] 
regionName)
+  public static Result getRegionResult(Connection connection, RegionInfo 
regionInfo)
 throws IOException {
-Get get = new Get(regionName);
+Get get = new Get(getMetaKeyForRegion(regionInfo));
 get.addFamily(HConstants.CATALOG_FAMILY);
 return get(getMetaHTable(connection), get);
   }
@@ -364,20 +365,20 @@ public class MetaTableAccessor {
   }
 
   /**
-   * Returns Return all regioninfos listed in the 'info:merge*' columns of the
-   * regionName row.
+   * Returns Return all regioninfos listed in the 'info:merge*' columns of the 
{@code regionInfo}
+   * row.
*/
   @Nullable
-  public static List getMergeRegions(Connection connection, byte[] 
regionName)
+  public static List getMergeRegions(Connection connection, 
RegionInfo regionInfo)
 throws IOException {
-return getMergeRegions(getRegionResult(connection, regionName).rawCells());
+return getMergeRegions(getRegionResult(connection, regionInfo).rawCells());
   }
 
   /**
-   * Check whether the given {@code regionName} has any 'info:merge*' columns.
+   * Check whether the given