[hbase] 07/10: HBASE-27218 Support rolling upgrading (#4808)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 2442166da2cd957b46a6c7b09f0330a09cb63044 Author: Duo Zhang AuthorDate: Sun Nov 6 16:57:11 2022 +0800 HBASE-27218 Support rolling upgrading (#4808) Signed-off-by: Yu Li --- .../apache/hadoop/hbase/zookeeper/ZNodePaths.java | 8 +- .../apache/hadoop/hbase/procedure2/Procedure.java | 15 + .../protobuf/server/master/MasterProcedure.proto | 12 + hbase-replication/pom.xml | 10 + .../hbase/replication/ReplicationQueueStorage.java | 19 ++ .../replication/TableReplicationQueueStorage.java | 65 +++- .../ZKReplicationQueueStorageForMigration.java | 351 + .../replication/TestZKReplicationQueueStorage.java | 317 +++ hbase-server/pom.xml | 6 + .../org/apache/hadoop/hbase/master/HMaster.java| 13 + .../master/procedure/ServerCrashProcedure.java | 19 ++ .../replication/AbstractPeerNoLockProcedure.java | 5 +- ...rateReplicationQueueFromZkToTableProcedure.java | 244 ++ .../master/replication/ModifyPeerProcedure.java| 26 ++ .../master/replication/ReplicationPeerManager.java | 104 +- .../TransitPeerSyncReplicationStateProcedure.java | 14 + .../replication/TestMigrateReplicationQueue.java | 126 ...rateReplicationQueueFromZkToTableProcedure.java | 226 + ...icationQueueFromZkToTableProcedureRecovery.java | 128 ...tReplicationPeerManagerMigrateQueuesFromZk.java | 216 + .../hbase/replication/TestReplicationBase.java | 2 +- pom.xml| 7 +- 22 files changed, 1917 insertions(+), 16 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java index d19d2100466..3f66c7cdc0c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java @@ -220,7 +220,11 @@ public class ZNodePaths { * @param suffix ending of znode name * @return result of properly joining prefix with suffix */ - public static String joinZNode(String prefix, String suffix) { -return prefix + ZNodePaths.ZNODE_PATH_SEPARATOR + suffix; + public static String joinZNode(String prefix, String... suffix) { +StringBuilder sb = new StringBuilder(prefix); +for (String s : suffix) { + sb.append(ZNodePaths.ZNODE_PATH_SEPARATOR).append(s); +} +return sb.toString(); } } diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 34c74d92c16..43adba2bc21 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; import org.apache.hadoop.hbase.exceptions.TimeoutIOException; import org.apache.hadoop.hbase.metrics.Counter; import org.apache.hadoop.hbase.metrics.Histogram; @@ -33,6 +34,7 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; /** @@ -1011,6 +1013,19 @@ public abstract class Procedure implements Comparable other) { return Long.compare(getProcId(), other.getProcId()); diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto index 76a1d676487..b6f5d7e50bb 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto @@ -722,3 +722,15 @@ enum AssignReplicationQueuesState { message AssignReplicationQueuesStateData { required ServerName crashed_server = 1; } + +enum MigrateReplicationQueueFromZkToTableState { + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 1; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 2; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 3; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 4; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 5; +} + +message MigrateReplicationQueueFromZkToTableStateData { + repeated string disabled_peer_id = 1; +} diff --git a/hbase-replication/pom.xml
[hbase] branch HBASE-27109/table_based_rqs updated (6128eb476af -> 2e4e2951364)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a change to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git omit 6128eb476af HBASE-27430 Should disable replication log cleaner when migrating replication queue data (#4901) omit 32fd20812ed HBASE-27429 Add exponential retry backoff support for MigrateReplicationQueueFromZkToTableProcedure omit 2e2ec6c90af HBASE-27217 Revisit the DumpReplicationQueues tool (#4810) omit 6321c964eef HBASE-27218 Support rolling upgrading (#4808) omit c82ebf796c0 HBASE-27405 Fix the replication hfile/log cleaner report that the replication table does not exist (#4811) omit c1c4ef09fbc HBASE-27392 Add a new procedure type for implementing some global operations such as migration (#4803) omit 76d65fe7993 HBASE-27215 Add support for sync replication (#4762) omit f161c17eb92 HBASE-27214 Implement the new replication hfile/log cleaner (#4722) omit 2d21c3d3b30 HBASE-27213 Add support for claim queue operation (#4708) omit b218c14e9a8 HBASE-27212 Implement a new table based replication queue storage and make the minimum replication system work (#4672) add 3eedc0987a7 HBASE-27672 Read RPC threads may BLOCKED at the Configuration.get when using java compression (#5075) add 37858bb6b05 HBASE-25709 Close region may stuck when region is compacting and skipped most cells read (#4536) add 586073d0c09 HBASE-27689 Update README.md about how to request a jira account (#5088) add 16864c705c7 HBASE-27681 Refactor Table Latency Metrics (#5072) add 13e11a46c6e HBASE-27681 Addendum delete old metric classes (#5092) add 8bdabed85ca HBASE-27690 Fix a misspell in TestRegionStateStore (#5090) add bc8b13e468a HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) new 058b449032d HBASE-27212 Implement a new table based replication queue storage and make the minimum replication system work (#4672) new c6a272bc0d8 HBASE-27213 Add support for claim queue operation (#4708) new 8099d454eba HBASE-27214 Implement the new replication hfile/log cleaner (#4722) new 330042cc3d2 HBASE-27215 Add support for sync replication (#4762) new 83f9769ab03 HBASE-27392 Add a new procedure type for implementing some global operations such as migration (#4803) new 54a722e1e75 HBASE-27405 Fix the replication hfile/log cleaner report that the replication table does not exist (#4811) new 2442166da2c HBASE-27218 Support rolling upgrading (#4808) new e5d10b0d22d HBASE-27217 Revisit the DumpReplicationQueues tool (#4810) new 3c54d9c27a9 HBASE-27429 Add exponential retry backoff support for MigrateReplicationQueueFromZkToTableProcedure new 2e4e2951364 HBASE-27430 Should disable replication log cleaner when migrating replication queue data (#4901) This update added new revisions after undoing existing revisions. That is to say, some revisions that were in the old version of the branch are not in the new version. This situation occurs when a user --force pushes a change and generates a repository containing something like this: * -- * -- B -- O -- O -- O (6128eb476af) \ N -- N -- N refs/heads/HBASE-27109/table_based_rqs (2e4e2951364) You should already have received notification emails for all of the O revisions, and so the following emails describe only the N revisions from the common base, B. Any revisions marked "omit" are not gone; other references still refer to them. Any revisions marked "discard" are gone forever. The 10 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: README.md | 13 +- bin/chaos-daemon.sh| 26 +- bin/hbase | 4 + .../java/org/apache/hadoop/hbase/HConstants.java | 5 + .../hbase/io/compress/aircompressor/Lz4Codec.java | 6 +- .../hbase/io/compress/aircompressor/LzoCodec.java | 6 +- .../io/compress/aircompressor/SnappyCodec.java | 6 +- .../hbase/io/compress/aircompressor/ZstdCodec.java | 6 +- .../hbase/io/compress/brotli/BrotliCodec.java | 16 +- .../hadoop/hbase/io/compress/lz4/Lz4Codec.java | 10 +- .../hbase/io/compress/xerial/SnappyCodec.java | 10 +- .../hadoop/hbase/io/compress/xz/LzmaCodec.java | 13 +- .../hadoop/hbase/io/compress/zstd/ZstdCodec.java | 17 +- .../impl/GlobalMetricRegistriesAdapter.java| 1 + .../hbase/regionserver/MetricsTableLatencies.java | 145 - .../regionserver/MetricsTableLatenciesImpl.java| 216 -- .../hbase/regionserver/MetricsTableQueryMeter.java | 57
[hbase] 02/10: HBASE-27213 Add support for claim queue operation (#4708)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit c6a272bc0d86f8587e5939246882419f66cd7976 Author: Duo Zhang AuthorDate: Sat Aug 20 23:10:58 2022 +0800 HBASE-27213 Add support for claim queue operation (#4708) Signed-off-by: Xin Sun --- .../protobuf/server/master/MasterProcedure.proto | 6 +-- .../AssignReplicationQueuesProcedure.java | 13 ++--- .../master/replication/ModifyPeerProcedure.java| 2 +- .../master/replication/RemovePeerProcedure.java| 41 +- .../regionserver/ReplicationSourceManager.java | 37 + .../replication/TestClaimReplicationQueue.java | 2 +- ...java => TestRemovePeerProcedureWaitForSCP.java} | 63 +- .../replication/TestSerialReplicationFailover.java | 3 -- 8 files changed, 116 insertions(+), 51 deletions(-) diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto index 2e0da0deb84..76a1d676487 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto @@ -515,6 +515,7 @@ message UpdatePeerConfigStateData { message RemovePeerStateData { optional ReplicationPeer peer_config = 1; + repeated int64 ongoing_assign_replication_queues_proc_ids = 2; } message EnablePeerStateData { @@ -714,9 +715,8 @@ message ModifyColumnFamilyStoreFileTrackerStateData { } enum AssignReplicationQueuesState { - ASSIGN_REPLICATION_QUEUES_PRE_CHECK = 1; - ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES = 2; - ASSIGN_REPLICATION_QUEUES_CLAIM = 3; + ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES = 1; + ASSIGN_REPLICATION_QUEUES_CLAIM = 2; } message AssignReplicationQueuesStateData { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java index e7fb5e51715..d33259dd436 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java @@ -23,6 +23,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface; @@ -102,8 +103,12 @@ public class AssignReplicationQueuesProcedure } private Flow claimQueues(MasterProcedureEnv env) throws ReplicationException { +Set existingPeerIds = env.getReplicationPeerManager().listPeers(null).stream() + .map(ReplicationPeerDescription::getPeerId).collect(Collectors.toSet()); ReplicationQueueStorage storage = env.getReplicationPeerManager().getQueueStorage(); -List queueIds = storage.listAllQueueIds(crashedServer); +// filter out replication queue for deleted peers +List queueIds = storage.listAllQueueIds(crashedServer).stream() + .filter(q -> existingPeerIds.contains(q.getPeerId())).collect(Collectors.toList()); if (queueIds.isEmpty()) { LOG.debug("Finish claiming replication queues for {}", crashedServer); // we are done @@ -130,10 +135,6 @@ public class AssignReplicationQueuesProcedure throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { try { switch (state) { -case ASSIGN_REPLICATION_QUEUES_PRE_CHECK: - // TODO: reserved for implementing the fencing logic with Add/Remove/UpdatePeerProcedure - setNextState(AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES); - return Flow.HAS_MORE_STATE; case ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES: addMissingQueues(env); retryCounter = null; @@ -183,7 +184,7 @@ public class AssignReplicationQueuesProcedure @Override protected AssignReplicationQueuesState getInitialState() { -return AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_PRE_CHECK; +return AssignReplicationQueuesState.ASSIGN_REPLICATION_QUEUES_ADD_MISSING_QUEUES; } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java index 67d70a166be..78b97620c01 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java +++
[hbase] 10/10: HBASE-27430 Should disable replication log cleaner when migrating replication queue data (#4901)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 2e4e2951364519e0a9187d58b5ca9ff8c3f61574 Author: Duo Zhang AuthorDate: Sat Dec 3 20:51:40 2022 +0800 HBASE-27430 Should disable replication log cleaner when migrating replication queue data (#4901) Signed-off-by: Liangjun He --- .../protobuf/server/master/MasterProcedure.proto | 12 +++--- ...rateReplicationQueueFromZkToTableProcedure.java | 47 +- ...rateReplicationQueueFromZkToTableProcedure.java | 29 - 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto index b6f5d7e50bb..14d07c17c88 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto @@ -724,11 +724,13 @@ message AssignReplicationQueuesStateData { } enum MigrateReplicationQueueFromZkToTableState { - MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 1; - MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 2; - MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 3; - MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 4; - MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 5; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_CLEANER = 1; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE = 2; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER = 3; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE = 4; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_WAIT_UPGRADING = 5; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER = 6; + MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_CLEANER = 7; } message MigrateReplicationQueueFromZkToTableStateData { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java index 93ff27db3f7..b7c4e33ef85 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hbase.master.replication; +import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_CLEANER; import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_DISABLE_PEER; +import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_CLEANER; import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_ENABLE_PEER; import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_MIGRATE; import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MigrateReplicationQueueFromZkToTableState.MIGRATE_REPLICATION_QUEUE_FROM_ZK_TO_TABLE_PREPARE; @@ -111,6 +113,26 @@ public class MigrateReplicationQueueFromZkToTableProcedure } } + private void disableReplicationLogCleaner(MasterProcedureEnv env) +throws ProcedureSuspendedException { +if (!env.getReplicationPeerManager().getReplicationLogCleanerBarrier().disable()) { + // it is not likely that we can reach here as we will schedule this procedure immediately + // after master restarting, where ReplicationLogCleaner should have not started its first run + // yet. But anyway, let's make the code more robust. And it is safe to wait a bit here since + // there will be no data in the new replication queue storage before we execute this procedure + // so ReplicationLogCleaner will quit immediately without doing anything. + throw suspend(env.getMasterConfiguration(), +backoff -> LOG.info( + "Can not disable replication log cleaner, sleep {} secs and retry later", + backoff / 1000)); +} +resetRetry(); + } + + private void enableReplicationLogCleaner(MasterProcedureEnv env) { +env.getReplicationPeerManager().getReplicationLogCleanerBarrier().enable(); + } + private void waitUntilNoPeerProcedure(MasterProcedureEnv
[hbase] 08/10: HBASE-27217 Revisit the DumpReplicationQueues tool (#4810)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit e5d10b0d22dcdee6d21045375a96a6b73e5813a3 Author: LiangJun He <2005hit...@163.com> AuthorDate: Sun Nov 13 22:03:36 2022 +0800 HBASE-27217 Revisit the DumpReplicationQueues tool (#4810) Signed-off-by: Duo Zhang --- .../regionserver/DumpReplicationQueues.java| 240 + .../hadoop/hbase/wal/AbstractFSWALProvider.java| 20 ++ .../regionserver/TestDumpReplicationQueues.java| 159 +- 3 files changed, 284 insertions(+), 135 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java index 98d0a55fbc4..b284e3f6837 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/DumpReplicationQueues.java @@ -19,8 +19,12 @@ package org.apache.hadoop.hbase.replication.regionserver; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -31,7 +35,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Admin; @@ -40,28 +44,33 @@ import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.replication.TableCFs; import org.apache.hadoop.hbase.io.WALLink; import org.apache.hadoop.hbase.procedure2.util.StringUtils; +import org.apache.hadoop.hbase.replication.ReplicationException; +import org.apache.hadoop.hbase.replication.ReplicationGroupOffset; +import org.apache.hadoop.hbase.replication.ReplicationOffsetUtil; import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; -import org.apache.hadoop.hbase.replication.ReplicationQueueInfo; +import org.apache.hadoop.hbase.replication.ReplicationQueueData; +import org.apache.hadoop.hbase.replication.ReplicationQueueId; import org.apache.hadoop.hbase.replication.ReplicationQueueStorage; -import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; -import org.apache.hadoop.hbase.zookeeper.ZKDump; -import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.replication.ReplicationStorageFactory; +import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.util.concurrent.AtomicLongMap; /** - * TODO: reimplement this tool * * Provides information about the existing states of replication, replication peers and queues. * Usage: hbase org.apache.hadoop.hbase.replication.regionserver.DumpReplicationQueues [args] * Arguments: --distributed Polls each RS to dump information about the queue --hdfs Reports HDFS - * usage by the replication queues (note: can be overestimated). + * usage by the replication queues (note: can be overestimated). In the new version, we + * reimplemented the DumpReplicationQueues tool to support obtaining information from replication + * table. */ @InterfaceAudience.Private public class DumpReplicationQueues extends Configured implements Tool { @@ -185,7 +194,7 @@ public class DumpReplicationQueues extends Configured implements Tool { System.err.println("General Options:"); System.err.println(" -h|--h|--help Show this help and exit."); System.err.println(" --distributed Poll each RS and print its own replication queue. " - + "Default only polls ZooKeeper"); + + "Default only polls replication table."); System.err.println(" --hdfs Use HDFS to calculate usage of WALs by replication." + " It could be overestimated if replicating to multiple peers." + " --distributed flag is also needed."); @@ -201,13 +210,7 @@ public class DumpReplicationQueues extends Configured implements Tool { Connection connection = ConnectionFactory.createConnection(conf); Admin
[hbase] 09/10: HBASE-27429 Add exponential retry backoff support for MigrateReplicationQueueFromZkToTableProcedure
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 3c54d9c27a9a6e147580cbeab67186b18019785b Author: Duo Zhang AuthorDate: Tue Oct 18 16:46:03 2022 +0800 HBASE-27429 Add exponential retry backoff support for MigrateReplicationQueueFromZkToTableProcedure Signed-off-by: Liangjun He --- .../hbase/procedure2/TimeoutExecutorThread.java| 10 +- ...rateReplicationQueueFromZkToTableProcedure.java | 131 ++--- .../master/replication/ReplicationPeerManager.java | 45 --- ...tReplicationPeerManagerMigrateQueuesFromZk.java | 9 +- 4 files changed, 125 insertions(+), 70 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java index 3b99781a558..c0287a99435 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java @@ -78,9 +78,13 @@ class TimeoutExecutorThread extends StoppableThread { } public void add(Procedure procedure) { -LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, procedure.getTimeout(), - procedure.getTimeoutTimestamp()); -queue.add(new DelayedProcedure<>(procedure)); +if (procedure.getTimeout() > 0) { + LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, procedure.getTimeout(), +procedure.getTimeoutTimestamp()); + queue.add(new DelayedProcedure<>(procedure)); +} else { + LOG.info("Got negative timeout {} for {}, skip adding", procedure.getTimeout(), procedure); +} } public boolean remove(Procedure procedure) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java index 536f232338e..93ff27db3f7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/MigrateReplicationQueueFromZkToTableProcedure.java @@ -25,19 +25,25 @@ import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureP import java.io.IOException; import java.util.List; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.function.LongConsumer; import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.master.procedure.GlobalProcedureInterface; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; import org.apache.hadoop.hbase.replication.ZKReplicationQueueStorageForMigration; +import org.apache.hadoop.hbase.util.FutureUtils; +import org.apache.hadoop.hbase.util.IdLock; +import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.yetus.audience.InterfaceAudience; import org.apache.zookeeper.KeeperException; @@ -65,18 +71,34 @@ public class MigrateReplicationQueueFromZkToTableProcedure private List disabledPeerIds; - private List> futures; + private CompletableFuture future; private ExecutorService executor; + private RetryCounter retryCounter; + @Override public String getGlobalId() { return getClass().getSimpleName(); } + private ProcedureSuspendedException suspend(Configuration conf, LongConsumer backoffConsumer) +throws ProcedureSuspendedException { +if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(conf); +} +long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); +backoffConsumer.accept(backoff); +throw suspend(Math.toIntExact(backoff), true); + } + + private void resetRetry() { +retryCounter = null; + } + private ExecutorService getExecutorService() { if (executor == null) { - executor = Executors.newFixedThreadPool(3, new ThreadFactoryBuilder() + executor = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
[hbase] 05/10: HBASE-27392 Add a new procedure type for implementing some global operations such as migration (#4803)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 83f9769ab03fc70663732a418eb112bb8916346f Author: Duo Zhang AuthorDate: Thu Sep 29 10:08:02 2022 +0800 HBASE-27392 Add a new procedure type for implementing some global operations such as migration (#4803) Signed-off-by: Xin Sun --- .../hbase/procedure2/LockedResourceType.java | 3 +- .../master/procedure/GlobalProcedureInterface.java | 15 ++- .../hadoop/hbase/master/procedure/GlobalQueue.java | 21 ++-- .../master/procedure/MasterProcedureScheduler.java | 119 - .../hbase/master/procedure/SchemaLocking.java | 18 +++- .../procedure/TestMasterProcedureScheduler.java| 48 + 6 files changed, 202 insertions(+), 22 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java index 12f899d7565..40141017009 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java @@ -26,5 +26,6 @@ public enum LockedResourceType { TABLE, REGION, PEER, - META + META, + GLOBAL } diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java similarity index 82% copy from hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java copy to hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java index 12f899d7565..1ef168abfd8 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalProcedureInterface.java @@ -15,16 +15,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hbase.procedure2; +package org.apache.hadoop.hbase.master.procedure; import org.apache.yetus.audience.InterfaceAudience; +/** + * Procedure interface for global operations, such as migration. + */ @InterfaceAudience.Private -public enum LockedResourceType { - SERVER, - NAMESPACE, - TABLE, - REGION, - PEER, - META +public interface GlobalProcedureInterface { + + String getGlobalId(); } diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java similarity index 69% copy from hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java copy to hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java index 12f899d7565..1633dc4856e 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockedResourceType.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/GlobalQueue.java @@ -15,16 +15,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hbase.procedure2; +package org.apache.hadoop.hbase.master.procedure; +import org.apache.hadoop.hbase.procedure2.LockStatus; +import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private -public enum LockedResourceType { - SERVER, - NAMESPACE, - TABLE, - REGION, - PEER, - META +public class GlobalQueue extends Queue { + + public GlobalQueue(String globalId, LockStatus lockStatus) { +super(globalId, lockStatus); + } + + @Override + boolean requireExclusiveLock(Procedure proc) { +return true; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java index 866f2f6f403..fbf0eb8abf3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.function.Function; import java.util.function.Supplier; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableName; @@ -95,16 +96,20 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { (n, k) -> n.compareKey((String) k); private
[hbase] 06/10: HBASE-27405 Fix the replication hfile/log cleaner report that the replication table does not exist (#4811)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 54a722e1e7591587ef1ee3a125663935c311f1ec Author: LiangJun He <2005hit...@163.com> AuthorDate: Wed Oct 12 14:40:05 2022 +0800 HBASE-27405 Fix the replication hfile/log cleaner report that the replication table does not exist (#4811) Signed-off-by: Duo Zhang --- .../apache/hadoop/hbase/replication/ReplicationQueueStorage.java | 6 ++ .../hadoop/hbase/replication/TableReplicationQueueStorage.java | 9 + .../hadoop/hbase/replication/master/ReplicationLogCleaner.java | 8 .../hbase/replication/master/TestReplicationLogCleaner.java | 1 + 4 files changed, 24 insertions(+) diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java index c4204f0e8c4..6f6aee38cc8 100644 --- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java +++ b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueueStorage.java @@ -178,4 +178,10 @@ public interface ReplicationQueueStorage { * created hfile references during the call may not be included. */ Set getAllHFileRefs() throws ReplicationException; + + /** + * Whether the replication queue table exists. + * @return Whether the replication queue table exists + */ + boolean hasData() throws ReplicationException; } diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java index 0c9553f4fd8..392a3692d66 100644 --- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java +++ b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/TableReplicationQueueStorage.java @@ -532,4 +532,13 @@ public class TableReplicationQueueStorage implements ReplicationQueueStorage { throw new ReplicationException("failed to getAllHFileRefs", e); } } + + @Override + public boolean hasData() throws ReplicationException { +try { + return conn.getAdmin().getDescriptor(tableName) != null; +} catch (IOException e) { + throw new ReplicationException("failed to get replication queue table", e); +} + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java index f1fd8f8d6b3..3ab52da6158 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java @@ -76,6 +76,14 @@ public class ReplicationLogCleaner extends BaseLogCleanerDelegate { if (this.getConf() == null) { return; } +try { + if (!rpm.getQueueStorage().hasData()) { +return; + } +} catch (ReplicationException e) { + LOG.error("Error occurred while executing queueStorage.hasData()", e); + return; +} canFilter = rpm.getReplicationLogCleanerBarrier().start(); if (canFilter) { notFullyDeadServers = getNotFullyDeadServers.get(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java index 7a227fb0603..7edadae03b1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/master/TestReplicationLogCleaner.java @@ -86,6 +86,7 @@ public class TestReplicationLogCleaner { when(rpm.listPeers(null)).thenReturn(new ArrayList<>()); ReplicationQueueStorage rqs = mock(ReplicationQueueStorage.class); when(rpm.getQueueStorage()).thenReturn(rqs); +when(rpm.getQueueStorage().hasData()).thenReturn(true); when(rqs.listAllQueues()).thenReturn(new ArrayList<>()); ServerManager sm = mock(ServerManager.class); when(services.getServerManager()).thenReturn(sm);
[hbase] 04/10: HBASE-27215 Add support for sync replication (#4762)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 330042cc3d2939c584b638245a7bbd22fa294e6d Author: Duo Zhang AuthorDate: Thu Sep 15 22:58:29 2022 +0800 HBASE-27215 Add support for sync replication (#4762) Signed-off-by: Xiaolin Ha --- .../regionserver/ReplicationSource.java| 2 +- .../regionserver/ReplicationSourceManager.java | 53 +++--- .../TestDrainReplicationQueuesForStandBy.java | 3 -- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index e078722b157..0784a87711b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -465,7 +465,7 @@ public class ReplicationSource implements ReplicationSourceInterface { t.getName()); manager.refreshSources(peerId); break; -} catch (IOException e1) { +} catch (IOException | ReplicationException e1) { LOG.error("Replication sources refresh failed.", e1); sleepForRetries("Sleeping before try refreshing sources again", maxRetriesMultiplier); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index 03569be86fc..f3d07315240 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -403,38 +403,44 @@ public class ReplicationSourceManager { // TODO: use empty initial offsets for now, revisit when adding support for sync replication ReplicationSourceInterface src = createSource(new ReplicationQueueData(queueId, ImmutableMap.of()), peer); -// synchronized here to avoid race with preLogRoll where we add new log to source and also +// synchronized here to avoid race with postLogRoll where we add new log to source and also // walsById. ReplicationSourceInterface toRemove; -Map> wals = new HashMap<>(); +ReplicationQueueData queueData; synchronized (latestPaths) { + // Here we make a copy of all the remaining wal files and then delete them from the + // replication queue storage after releasing the lock. It is not safe to just remove the old + // map from walsById since later we may fail to update the replication queue storage, and when + // we retry next time, we can not know the wal files that needs to be set to the replication + // queue storage + ImmutableMap.Builder builder = ImmutableMap.builder(); + synchronized (walsById) { +walsById.get(queueId).forEach((group, wals) -> { + if (!wals.isEmpty()) { +builder.put(group, new ReplicationGroupOffset(wals.last(), -1)); + } +}); + } + queueData = new ReplicationQueueData(queueId, builder.build()); + src = createSource(queueData, peer); toRemove = sources.put(peerId, src); if (toRemove != null) { LOG.info("Terminate replication source for " + toRemove.getPeerId()); toRemove.terminate(terminateMessage); toRemove.getSourceMetrics().clear(); } - // Here we make a copy of all the remaining wal files and then delete them from the - // replication queue storage after releasing the lock. It is not safe to just remove the old - // map from walsById since later we may fail to delete them from the replication queue - // storage, and when we retry next time, we can not know the wal files that need to be deleted - // from the replication queue storage. - walsById.get(queueId).forEach((k, v) -> wals.put(k, new TreeSet<>(v))); +} +for (Map.Entry entry : queueData.getOffsets().entrySet()) { + queueStorage.setOffset(queueId, entry.getKey(), entry.getValue(), Collections.emptyMap()); } LOG.info("Startup replication source for " + src.getPeerId()); src.startup(); -for (NavigableSet walsByGroup : wals.values()) { - // TODO: just need to reset the replication offset - // for (String wal : walsByGroup) { - // queueStorage.removeWAL(server.getServerName(), peerId, wal); - // } -} synchronized (walsById) { - Map> oldWals = walsById.get(queueId); - wals.forEach((k, v) -> { -NavigableSet walsByGroup = oldWals.get(k); + Map>
[hbase] 03/10: HBASE-27214 Implement the new replication hfile/log cleaner (#4722)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch HBASE-27109/table_based_rqs in repository https://gitbox.apache.org/repos/asf/hbase.git commit 8099d454eba625157f83b9ffc01503b4383b7fef Author: Duo Zhang AuthorDate: Wed Aug 31 21:24:09 2022 +0800 HBASE-27214 Implement the new replication hfile/log cleaner (#4722) Signed-off-by: Xin Sun --- .../org/apache/hadoop/hbase/master/HMaster.java| 1 - .../hbase/master/cleaner/FileCleanerDelegate.java | 2 +- .../hadoop/hbase/master/region/MasterRegion.java | 2 +- .../hbase/master/replication/AddPeerProcedure.java | 15 +- .../master/replication/ReplicationPeerManager.java | 8 + .../hadoop/hbase/regionserver/HRegionServer.java | 2 +- .../hbase/replication/ReplicationOffsetUtil.java | 47 +++ .../replication/master/ReplicationLogCleaner.java | 234 + .../master/ReplicationLogCleanerBarrier.java | 85 + .../regionserver/ReplicationSourceManager.java | 18 +- .../regionserver/ReplicationSyncUp.java| 5 +- .../hadoop/hbase/wal/AbstractFSWALProvider.java| 29 ++ .../org/apache/hadoop/hbase/wal/WALFactory.java| 29 +- .../hbase/master/cleaner/TestLogsCleaner.java | 227 +--- .../cleaner/TestReplicationHFileCleaner.java | 43 ++- .../replication/TestReplicationOffsetUtil.java | 52 +++ .../replication/master/TestLogCleanerBarrier.java | 60 .../master/TestReplicationLogCleaner.java | 385 + .../regionserver/TestReplicationSourceManager.java | 2 +- .../apache/hadoop/hbase/wal/TestWALFactory.java| 2 +- .../apache/hadoop/hbase/wal/TestWALMethods.java| 14 + 21 files changed, 1008 insertions(+), 254 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index ce3e81ad04b..118457648de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -4289,5 +4289,4 @@ public class HMaster extends HBaseServerBase implements Maste // initialize master side coprocessors before we start handling requests this.cpHost = new MasterCoprocessorHost(this, conf); } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java index d37bb620273..e08f5329433 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java @@ -50,7 +50,7 @@ public interface FileCleanerDelegate extends Configurable, Stoppable { } /** - * Used to do some cleanup work + * Will be called after cleaner run. */ default void postClean() { } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java index 177e161c32e..45f049723c7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java @@ -377,7 +377,7 @@ public final class MasterRegion { params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize()); walRoller.start(); -WALFactory walFactory = new WALFactory(conf, server.getServerName().toString(), server, false); +WALFactory walFactory = new WALFactory(conf, server.getServerName(), server, false); Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName()); Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG); Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java index 6d0acee76ca..25a4cd4b08e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.hadoop.hbase.client.replication.ReplicationPeerConfigUtil; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; -import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.replication.ReplicationException; @@ -84,15 +83,21 @@ public
[hbase] branch branch-2.5 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2.5 in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/branch-2.5 by this push: new ab8aa620302 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) ab8aa620302 is described below commit ab8aa62030264eed588fb78315b2bf2d57b8a3fa Author: Rajeshbabu Chintaguntla AuthorDate: Thu Mar 9 08:16:25 2023 +0530 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) Co-authored-by: Rajeshbabu Chintaguntla Signed-off-by: Duo Zhang (cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d) --- bin/chaos-daemon.sh| 26 +++--- bin/hbase | 4 .../apache/hadoop/hbase/HBaseClusterManager.java | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh index 084e519321a..8e27f4a5d9f 100644 --- a/bin/chaos-daemon.sh +++ b/bin/chaos-daemon.sh @@ -19,7 +19,7 @@ # */ # -usage="Usage: chaos-daemon.sh (start|stop) chaosagent" +usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)" # if no args specified, show usage if [ $# -le 1 ]; then @@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd) . "$bin"/hbase-config.sh . "$bin"/hbase-common.sh -CLASSPATH=$HBASE_CONF_DIR -for f in ../lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f -done - # get log directory if [ "$HBASE_LOG_DIR" = "" ]; then export HBASE_LOG_DIR="$HBASE_HOME/logs" @@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then fi export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME -export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log +export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log if [ -z "${HBASE_ROOT_LOGGER}" ]; then export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"} @@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"} fi -CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"} +CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"} CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid if [ -z "$CHAOS_JAVA_OPTS" ]; then @@ -101,15 +96,20 @@ case $startStop in (start) check_before_start echo running $command -CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../ ${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService -$command start &>> ${CHAOS_LOGLOG} &" - -eval $CMD +command_args="" +if [ "$command" = "chaosagent" ]; then + command_args=" -${command} start" +elif [ "$command" = "chaosmonkeyrunner" ]; then + command_args="-c $HBASE_CONF_DIR $@" +fi +HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config "${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 & PID=$(echo $!) +disown -h -r echo ${PID} >${CHAOS_PID} -echo "Chaos ${1} process Started with ${PID} !" +echo "Chaos ${command} process Started with ${PID} !" now=$(date) -echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG} +echo "${now} Chaos ${command} process Started with ${PID} !" >>${CHAOS_LOGLOG} ;; (stop) diff --git a/bin/hbase b/bin/hbase index 02a021ec04f..6833ebfe760 100755 --- a/bin/hbase +++ b/bin/hbase @@ -710,6 +710,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator' elif [ "$COMMAND" = "completebulkload" ] ; then CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool' +elif [ "$COMMAND" = "chaosagent" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.ChaosService' +elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner' elif [ "$COMMAND" = "hbtop" ] ; then CLASS='org.apache.hadoop.hbase.hbtop.HBTop' if [ -n "${shaded_jar}" ] ; then diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index cd1c6773634..a73748d5c4f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager { } public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal); + return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); } }
[hbase] branch branch-2.4 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new ad90a7b0466 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) ad90a7b0466 is described below commit ad90a7b0466b6cc4265f61aae962369f1b00ba83 Author: Rajeshbabu Chintaguntla AuthorDate: Thu Mar 9 08:16:25 2023 +0530 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) Co-authored-by: Rajeshbabu Chintaguntla Signed-off-by: Duo Zhang (cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d) --- bin/chaos-daemon.sh| 26 +++--- bin/hbase | 4 .../apache/hadoop/hbase/HBaseClusterManager.java | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh index 084e519321a..8e27f4a5d9f 100644 --- a/bin/chaos-daemon.sh +++ b/bin/chaos-daemon.sh @@ -19,7 +19,7 @@ # */ # -usage="Usage: chaos-daemon.sh (start|stop) chaosagent" +usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)" # if no args specified, show usage if [ $# -le 1 ]; then @@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd) . "$bin"/hbase-config.sh . "$bin"/hbase-common.sh -CLASSPATH=$HBASE_CONF_DIR -for f in ../lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f -done - # get log directory if [ "$HBASE_LOG_DIR" = "" ]; then export HBASE_LOG_DIR="$HBASE_HOME/logs" @@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then fi export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME -export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log +export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log if [ -z "${HBASE_ROOT_LOGGER}" ]; then export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"} @@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"} fi -CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"} +CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"} CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid if [ -z "$CHAOS_JAVA_OPTS" ]; then @@ -101,15 +96,20 @@ case $startStop in (start) check_before_start echo running $command -CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../ ${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService -$command start &>> ${CHAOS_LOGLOG} &" - -eval $CMD +command_args="" +if [ "$command" = "chaosagent" ]; then + command_args=" -${command} start" +elif [ "$command" = "chaosmonkeyrunner" ]; then + command_args="-c $HBASE_CONF_DIR $@" +fi +HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config "${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 & PID=$(echo $!) +disown -h -r echo ${PID} >${CHAOS_PID} -echo "Chaos ${1} process Started with ${PID} !" +echo "Chaos ${command} process Started with ${PID} !" now=$(date) -echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG} +echo "${now} Chaos ${command} process Started with ${PID} !" >>${CHAOS_LOGLOG} ;; (stop) diff --git a/bin/hbase b/bin/hbase index 3c8f80bf4e2..41ac11f4016 100755 --- a/bin/hbase +++ b/bin/hbase @@ -673,6 +673,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator' elif [ "$COMMAND" = "completebulkload" ] ; then CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool' +elif [ "$COMMAND" = "chaosagent" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.ChaosService' +elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner' elif [ "$COMMAND" = "hbtop" ] ; then CLASS='org.apache.hadoop.hbase.hbtop.HBTop' if [ -n "${shaded_jar}" ] ; then diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index d620684a37b..9380e1dfe94 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager { } public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal); + return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); } }
[hbase] branch branch-2 updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/branch-2 by this push: new e180aa10df8 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) e180aa10df8 is described below commit e180aa10df835f4e2995ea74a4a9fec2b5e5118f Author: Rajeshbabu Chintaguntla AuthorDate: Thu Mar 9 08:16:25 2023 +0530 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) Co-authored-by: Rajeshbabu Chintaguntla Signed-off-by: Duo Zhang (cherry picked from commit bc8b13e468a258b4ee47e40cf3645bf27d66471d) --- bin/chaos-daemon.sh| 26 +++--- bin/hbase | 4 .../apache/hadoop/hbase/HBaseClusterManager.java | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh index 084e519321a..8e27f4a5d9f 100644 --- a/bin/chaos-daemon.sh +++ b/bin/chaos-daemon.sh @@ -19,7 +19,7 @@ # */ # -usage="Usage: chaos-daemon.sh (start|stop) chaosagent" +usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)" # if no args specified, show usage if [ $# -le 1 ]; then @@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd) . "$bin"/hbase-config.sh . "$bin"/hbase-common.sh -CLASSPATH=$HBASE_CONF_DIR -for f in ../lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f -done - # get log directory if [ "$HBASE_LOG_DIR" = "" ]; then export HBASE_LOG_DIR="$HBASE_HOME/logs" @@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then fi export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME -export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log +export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log if [ -z "${HBASE_ROOT_LOGGER}" ]; then export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"} @@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"} fi -CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"} +CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"} CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid if [ -z "$CHAOS_JAVA_OPTS" ]; then @@ -101,15 +96,20 @@ case $startStop in (start) check_before_start echo running $command -CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../ ${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService -$command start &>> ${CHAOS_LOGLOG} &" - -eval $CMD +command_args="" +if [ "$command" = "chaosagent" ]; then + command_args=" -${command} start" +elif [ "$command" = "chaosmonkeyrunner" ]; then + command_args="-c $HBASE_CONF_DIR $@" +fi +HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config "${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 & PID=$(echo $!) +disown -h -r echo ${PID} >${CHAOS_PID} -echo "Chaos ${1} process Started with ${PID} !" +echo "Chaos ${command} process Started with ${PID} !" now=$(date) -echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG} +echo "${now} Chaos ${command} process Started with ${PID} !" >>${CHAOS_LOGLOG} ;; (stop) diff --git a/bin/hbase b/bin/hbase index b1369ef576a..31547b1ab51 100755 --- a/bin/hbase +++ b/bin/hbase @@ -742,6 +742,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator' elif [ "$COMMAND" = "completebulkload" ] ; then CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool' +elif [ "$COMMAND" = "chaosagent" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.ChaosService' +elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner' elif [ "$COMMAND" = "hbtop" ] ; then CLASS='org.apache.hadoop.hbase.hbtop.HBTop' if [ -n "${shaded_jar}" ] ; then diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index cd1c6773634..a73748d5c4f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager { } public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal); + return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); } }
[hbase] branch master updated: HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/master by this push: new bc8b13e468a HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) bc8b13e468a is described below commit bc8b13e468a258b4ee47e40cf3645bf27d66471d Author: Rajeshbabu Chintaguntla AuthorDate: Thu Mar 9 08:16:25 2023 +0530 HBASE-27669 chaos-daemon.sh should make use hbase script start/stop chaosagent and chaos monkey runner (#5078) Co-authored-by: Rajeshbabu Chintaguntla Signed-off-by: Duo Zhang --- bin/chaos-daemon.sh| 26 +++--- bin/hbase | 4 .../apache/hadoop/hbase/HBaseClusterManager.java | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/bin/chaos-daemon.sh b/bin/chaos-daemon.sh index 084e519321a..8e27f4a5d9f 100644 --- a/bin/chaos-daemon.sh +++ b/bin/chaos-daemon.sh @@ -19,7 +19,7 @@ # */ # -usage="Usage: chaos-daemon.sh (start|stop) chaosagent" +usage="Usage: chaos-daemon.sh (start|stop) (chaosagent|chaosmonkeyrunner)" # if no args specified, show usage if [ $# -le 1 ]; then @@ -51,11 +51,6 @@ bin=$(cd "$bin">/dev/null || exit; pwd) . "$bin"/hbase-config.sh . "$bin"/hbase-common.sh -CLASSPATH=$HBASE_CONF_DIR -for f in ../lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f -done - # get log directory if [ "$HBASE_LOG_DIR" = "" ]; then export HBASE_LOG_DIR="$HBASE_HOME/logs" @@ -79,7 +74,7 @@ if [ "$JAVA_HOME" = "" ]; then fi export HBASE_LOG_PREFIX=hbase-$HBASE_IDENT_STRING-$command-$HOSTNAME -export CHAOS_LOGFILE=$HBASE_LOG_PREFIX.log +export HBASE_LOGFILE=$HBASE_LOG_PREFIX.log if [ -z "${HBASE_ROOT_LOGGER}" ]; then export HBASE_ROOT_LOGGER=${HBASE_ROOT_LOGGER:-"INFO,RFA"} @@ -89,7 +84,7 @@ if [ -z "${HBASE_SECURITY_LOGGER}" ]; then export HBASE_SECURITY_LOGGER=${HBASE_SECURITY_LOGGER:-"INFO,RFAS"} fi -CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${CHAOS_LOGFILE}"} +CHAOS_LOGLOG=${CHAOS_LOGLOG:-"${HBASE_LOG_DIR}/${HBASE_LOGFILE}"} CHAOS_PID=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid if [ -z "$CHAOS_JAVA_OPTS" ]; then @@ -101,15 +96,20 @@ case $startStop in (start) check_before_start echo running $command -CMD="${JAVA_HOME}/bin/java -Dapp.home=${HBASE_CONF_DIR}/../ ${CHAOS_JAVA_OPTS} -cp ${CLASSPATH} org.apache.hadoop.hbase.chaos.ChaosService -$command start &>> ${CHAOS_LOGLOG} &" - -eval $CMD +command_args="" +if [ "$command" = "chaosagent" ]; then + command_args=" -${command} start" +elif [ "$command" = "chaosmonkeyrunner" ]; then + command_args="-c $HBASE_CONF_DIR $@" +fi +HBASE_OPTS="$HBASE_OPTS $CHAOS_JAVA_OPTS" . $bin/hbase --config "${HBASE_CONF_DIR}" $command $command_args >> ${CHAOS_LOGLOG} 2>&1 & PID=$(echo $!) +disown -h -r echo ${PID} >${CHAOS_PID} -echo "Chaos ${1} process Started with ${PID} !" +echo "Chaos ${command} process Started with ${PID} !" now=$(date) -echo "${now} Chaos ${1} process Started with ${PID} !" >>${CHAOS_LOGLOG} +echo "${now} Chaos ${command} process Started with ${PID} !" >>${CHAOS_LOGLOG} ;; (stop) diff --git a/bin/hbase b/bin/hbase index f81c0551e57..b5329795c40 100755 --- a/bin/hbase +++ b/bin/hbase @@ -742,6 +742,10 @@ elif [ "$COMMAND" = "pre-upgrade" ] ; then CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator' elif [ "$COMMAND" = "completebulkload" ] ; then CLASS='org.apache.hadoop.hbase.tool.BulkLoadHFilesTool' +elif [ "$COMMAND" = "chaosagent" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.ChaosService' +elif [ "$COMMAND" = "chaosmonkeyrunner" ] ; then + CLASS='org.apache.hadoop.hbase.chaos.util.ChaosMonkeyRunner' elif [ "$COMMAND" = "hbtop" ] ; then CLASS='org.apache.hadoop.hbase.hbtop.HBTop' if [ -n "${shaded_jar}" ] ; then diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index a09a690c89a..b16ac52b696 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -217,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager { } public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal); + return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); } }
[hbase] branch master updated: HBASE-27690 Fix a misspell in TestRegionStateStore (#5090)
This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/master by this push: new 8bdabed85ca HBASE-27690 Fix a misspell in TestRegionStateStore (#5090) 8bdabed85ca is described below commit 8bdabed85cad0b0e4dc6c724b4174a8b5c7ccc64 Author: tianhang AuthorDate: Wed Mar 8 23:23:50 2023 +0800 HBASE-27690 Fix a misspell in TestRegionStateStore (#5090) Signed-off-by: Duo Zhang --- .../org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java index 0004d7665e7..0e6251a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionStateStore.java @@ -215,7 +215,7 @@ public class TestRegionStateStore { assertNull(serverCellA); assertNull(startCodeCellA); - Get get2 = new Get(splitA.getRegionName()); + Get get2 = new Get(splitB.getRegionName()); Result resultB = meta.get(get2); Cell serverCellB = resultB.getColumnLatestCell(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(splitB.getReplicaId()));
[hbase] branch branch-2 updated (9a69a69648e -> 43fc4bfcd82)
This is an automated email from the ASF dual-hosted git repository. bbeaudreault pushed a change to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git from 9a69a69648e HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica region encodedNames or region names add 43fc4bfcd82 HBASE-27681 Refactor Table Latency Metrics (#5093) No new revisions were added by this update. Summary of changes: .../hbase/regionserver/MetricsTableLatencies.java | 145 - .../hbase/regionserver/MetricsTableQueryMeter.java | 57 .../impl/GlobalMetricRegistriesAdapter.java| 1 + .../regionserver/MetricsTableLatenciesImpl.java| 216 -- .../regionserver/MetricsTableQueryMeterImpl.java | 99 -- ...hadoop.hbase.regionserver.MetricsTableLatencies | 17 -- .../metrics/impl/TestMetricRegistriesImpl.java | 62 .../apache/hadoop/hbase/regionserver/HRegion.java | 28 +- .../hbase/regionserver/MetricsRegionServer.java| 117 +++- .../hadoop/hbase/regionserver/RSRpcServices.java | 38 +-- .../regionserver/RegionServerTableMetrics.java | 108 --- .../regionserver/metrics/MetricsTableRequests.java | 331 + .../regionserver/TestMetricsRegionServer.java | 57 ++-- .../regionserver/TestMetricsTableLatencies.java| 123 .../regionserver/TestMetricsTableRequests.java | 128 .../regionserver/TestMetricsUserAggregate.java | 27 +- 16 files changed, 661 insertions(+), 893 deletions(-) delete mode 100644 hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java delete mode 100644 hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableQueryMeter.java delete mode 100644 hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatenciesImpl.java delete mode 100644 hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableQueryMeterImpl.java delete mode 100644 hbase-hadoop2-compat/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsTableLatencies create mode 100644 hbase-metrics/src/test/java/org/apache/hadoop/hbase/metrics/impl/TestMetricRegistriesImpl.java delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerTableMetrics.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/MetricsTableRequests.java delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsTableLatencies.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsTableRequests.java
[hbase-site] branch asf-site updated: INFRA-10751 Empty commit
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/hbase-site.git The following commit(s) were added to refs/heads/asf-site by this push: new 183af9772b9 INFRA-10751 Empty commit 183af9772b9 is described below commit 183af9772b9299b335975c0a03a4dacf0736e305 Author: jenkins AuthorDate: Wed Mar 8 14:46:10 2023 + INFRA-10751 Empty commit
[hbase] branch master updated: HBASE-27681 Addendum delete old metric classes (#5092)
This is an automated email from the ASF dual-hosted git repository. bbeaudreault pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/master by this push: new 13e11a46c6e HBASE-27681 Addendum delete old metric classes (#5092) 13e11a46c6e is described below commit 13e11a46c6e37cba63d951e2739f3e8a42aa7ee6 Author: tianhang AuthorDate: Wed Mar 8 22:30:22 2023 +0800 HBASE-27681 Addendum delete old metric classes (#5092) Signed-off-by: Duo Zhang Signed-off-by: Bryan Beaudreault --- .../hbase/regionserver/MetricsTableLatencies.java | 145 -- .../regionserver/MetricsTableLatenciesImpl.java| 216 - ...hadoop.hbase.regionserver.MetricsTableLatencies | 17 -- 3 files changed, 378 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java deleted file mode 100644 index e7d447aef49..000 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsTableLatencies.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import org.apache.yetus.audience.InterfaceAudience; - -/** - * Latency metrics for a specific table in a RegionServer. - */ -@InterfaceAudience.Private -public interface MetricsTableLatencies { - - /** - * The name of the metrics - */ - String METRICS_NAME = "TableLatencies"; - - /** - * The name of the metrics context that metrics will be under. - */ - String METRICS_CONTEXT = "regionserver"; - - /** - * Description - */ - String METRICS_DESCRIPTION = "Metrics about Tables on a single HBase RegionServer"; - - /** - * The name of the metrics context that metrics will be under in jmx - */ - String METRICS_JMX_CONTEXT = "RegionServer,sub=" + METRICS_NAME; - - String GET_TIME = "getTime"; - String SCAN_TIME = "scanTime"; - String SCAN_SIZE = "scanSize"; - String PUT_TIME = "putTime"; - String PUT_BATCH_TIME = "putBatchTime"; - String DELETE_TIME = "deleteTime"; - String DELETE_BATCH_TIME = "deleteBatchTime"; - String INCREMENT_TIME = "incrementTime"; - String APPEND_TIME = "appendTime"; - String CHECK_AND_DELETE_TIME = "checkAndDeleteTime"; - String CHECK_AND_PUT_TIME = "checkAndPutTime"; - String CHECK_AND_MUTATE_TIME = "checkAndMutateTime"; - - /** - * Update the Put time histogram - * @param tableName The table the metric is for - * @param t time it took - */ - void updatePut(String tableName, long t); - - /** - * Update the batch Put time histogram - * @param tableName The table the metric is for - * @param t time it took - */ - void updatePutBatch(String tableName, long t); - - /** - * Update the Delete time histogram - * @param tableName The table the metric is for - * @param t time it took - */ - void updateDelete(String tableName, long t); - - /** - * Update the batch Delete time histogram - * @param tableName The table the metric is for - * @param t time it took - */ - void updateDeleteBatch(String tableName, long t); - - /** - * Update the Get time histogram . - * @param tableName The table the metric is for - * @param t time it took - */ - void updateGet(String tableName, long t); - - /** - * Update the Increment time histogram. - * @param tableName The table the metric is for - * @param t time it took - */ - void updateIncrement(String tableName, long t); - - /** - * Update the Append time histogram. - * @param tableName The table the metric is for - * @param t time it took - */ - void updateAppend(String tableName, long t); - - /** - * Update the scan size. - * @param tableName The table the metric is for - * @param scanSize size of the scan - */ - void updateScanSize(String tableName, long scanSize); - - /** - * Update the scan time. - * @param tableName The table the metric is for - * @param t time it took - */ -
[hbase] branch branch-2.5 updated: HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica region encodedNames or region names
This is an automated email from the ASF dual-hosted git repository. ndimiduk pushed a commit to branch branch-2.5 in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/branch-2.5 by this push: new 6e31c18bf26 HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica region encodedNames or region names 6e31c18bf26 is described below commit 6e31c18bf26cb23daea10a47179f9114eb9d94e4 Author: huaxiangsun AuthorDate: Tue Feb 21 04:47:27 2023 -0800 HBASE-27250 MasterRpcService#setRegionStateInMeta does not support replica region encodedNames or region names - Added sanity check to make sure input region encoded name or region name is valid - Assignment improvements pertaining to read replica regions - make several MetaTableAccessor methods more precise in their handling of replica regions - hbck2 setRegionStateInMeta and HBCKServerCrashProcedure handle read replicas - separate AM helper methods -- loading RegionInfo from cache vs. refreshing cache from meta - AM helper method support loading RegionInfo from cache via either region name and encoded region name (both caches are maintained, and under lock) - consolidate, extend tests to cover read replica regions Co-authored-by: Huaxiang Sun Co-authored-by: Nick Dimiduk Signed-off-by: Peter Somogyi --- .../org/apache/hadoop/hbase/MetaTableAccessor.java | 70 +--- .../hadoop/hbase/master/MasterRpcServices.java | 157 +- .../hbase/master/assignment/AssignmentManager.java | 54 --- .../GCMultipleMergedRegionsProcedure.java | 4 +- .../assignment/MergeTableRegionsProcedure.java | 5 +- .../hbase/master/assignment/RegionStateStore.java | 4 +- .../hbase/master/assignment/RegionStates.java | 6 +- .../master/procedure/DeleteTableProcedure.java | 4 +- .../org/apache/hadoop/hbase/util/HBaseFsck.java| 2 +- .../apache/hadoop/hbase/util/HBaseFsckRepair.java | 2 +- .../apache/hadoop/hbase/TestMetaTableAccessor.java | 9 +- .../org/apache/hadoop/hbase/TestSplitMerge.java| 2 +- .../org/apache/hadoop/hbase/client/TestHbck.java | 93 ++- .../hbase/master/TestRegionsRecoveryChore.java | 6 +- .../master/assignment/TestAssignmentManager.java | 20 +-- .../assignment/TestMergeTableRegionsProcedure.java | 4 +- .../hadoop/hbase/master/janitor/TestMetaFixer.java | 8 +- .../hadoop/hbase/master/procedure/TestHBCKSCP.java | 178 + .../hbase/master/procedure/TestHBCKSCPUnknown.java | 54 --- .../TestRegionMergeTransactionOnCluster.java | 6 +- 20 files changed, 394 insertions(+), 294 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 48476e4bb3a..f29104df3c0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -289,7 +289,8 @@ public class MetaTableAccessor { RegionLocations locations = getRegionLocations(r); return locations == null ? null - : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); + : locations.getRegionLocation( +parsedInfo == null ? RegionInfo.DEFAULT_REPLICA_ID : parsedInfo.getReplicaId()); } /** @@ -333,12 +334,12 @@ public class MetaTableAccessor { /** * Gets the result in hbase:meta for the specified region. * @param connection connection we're using - * @param regionName region we're looking for + * @param regionInfo region we're looking for * @return result of the specified region */ - public static Result getRegionResult(Connection connection, byte[] regionName) + public static Result getRegionResult(Connection connection, RegionInfo regionInfo) throws IOException { -Get get = new Get(regionName); +Get get = new Get(getMetaKeyForRegion(regionInfo)); get.addFamily(HConstants.CATALOG_FAMILY); return get(getMetaHTable(connection), get); } @@ -364,20 +365,20 @@ public class MetaTableAccessor { } /** - * Returns Return all regioninfos listed in the 'info:merge*' columns of the - * regionName row. + * Returns Return all regioninfos listed in the 'info:merge*' columns of the {@code regionInfo} + * row. */ @Nullable - public static List getMergeRegions(Connection connection, byte[] regionName) + public static List getMergeRegions(Connection connection, RegionInfo regionInfo) throws IOException { -return getMergeRegions(getRegionResult(connection, regionName).rawCells()); +return getMergeRegions(getRegionResult(connection, regionInfo).rawCells()); } /** - * Check whether the given {@code regionName} has any 'info:merge*' columns. + * Check whether the given