This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2.5 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.5 by this push: new 78f587f5596 HBASE-27104 Add a tool command list_unknownservers (#4714) 78f587f5596 is described below commit 78f587f5596df5558c56e49c7654a5612fd67245 Author: LiangJun He <2005hit...@163.com> AuthorDate: Mon Aug 22 21:15:47 2022 +0800 HBASE-27104 Add a tool command list_unknownservers (#4714) Signed-off-by: Duo Zhang <zhang...@apache.org> (cherry picked from commit 1bd0b581cb68341d6087f9fda0e4f3efe289a3a0) --- .../org/apache/hadoop/hbase/ClusterMetrics.java | 7 ++ .../apache/hadoop/hbase/ClusterMetricsBuilder.java | 46 ++++++++-- .../org/apache/hadoop/hbase/ClusterStatus.java | 5 ++ .../java/org/apache/hadoop/hbase/client/Admin.java | 8 ++ .../org/apache/hadoop/hbase/client/AsyncAdmin.java | 8 ++ .../hadoop/hbase/client/AsyncHBaseAdmin.java | 5 ++ .../src/main/protobuf/ClusterStatus.proto | 2 + .../org/apache/hadoop/hbase/master/HMaster.java | 17 ++++ .../hbase/master/TestRegionsRecoveryChore.java | 5 ++ .../hadoop/hbase/master/TestUnknownServers.java | 98 ++++++++++++++++++++++ hbase-shell/src/main/ruby/hbase/admin.rb | 8 +- hbase-shell/src/main/ruby/shell.rb | 1 + .../ruby/shell/commands/list_unknownservers.rb | 44 ++++++++++ 13 files changed, 245 insertions(+), 9 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetrics.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetrics.java index 8be97fa1fda..a8a1493c349 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetrics.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetrics.java @@ -76,6 +76,9 @@ public interface ClusterMetrics { /** Returns the names of region servers on the dead list */ List<ServerName> getDeadServerNames(); + /** Returns the names of region servers on the unknown list */ + List<ServerName> getUnknownServerNames(); + /** Returns the names of region servers on the live list */ Map<ServerName, ServerMetrics> getLiveServerMetrics(); @@ -176,6 +179,10 @@ public interface ClusterMetrics { * metrics about dead region servers */ DEAD_SERVERS, + /** + * metrics about unknown region servers + */ + UNKNOWN_SERVERS, /** * metrics about master name */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetricsBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetricsBuilder.java index 630e3620a67..9ca65463e02 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetricsBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetricsBuilder.java @@ -47,6 +47,8 @@ public final class ClusterMetricsBuilder { .collect(Collectors.toList())) .addAllDeadServers(metrics.getDeadServerNames().stream().map(ProtobufUtil::toServerName) .collect(Collectors.toList())) + .addAllUnknownServers(metrics.getUnknownServerNames().stream() + .map(ProtobufUtil::toServerName).collect(Collectors.toList())) .addAllLiveServers(metrics.getLiveServerMetrics().entrySet().stream() .map(s -> ClusterStatusProtos.LiveServerInfo.newBuilder() .setServer(ProtobufUtil.toServerName(s.getKey())) @@ -98,6 +100,8 @@ public final class ClusterMetricsBuilder { ServerMetricsBuilder::toServerMetrics))) .setDeadServerNames(proto.getDeadServersList().stream().map(ProtobufUtil::toServerName) .collect(Collectors.toList())) + .setUnknownServerNames(proto.getUnknownServersList().stream().map(ProtobufUtil::toServerName) + .collect(Collectors.toList())) .setBackerMasterNames(proto.getBackupMastersList().stream().map(ProtobufUtil::toServerName) .collect(Collectors.toList())) .setRegionsInTransition(proto.getRegionsInTransitionList().stream() @@ -147,6 +151,8 @@ public final class ClusterMetricsBuilder { return ClusterMetrics.Option.LIVE_SERVERS; case DEAD_SERVERS: return ClusterMetrics.Option.DEAD_SERVERS; + case UNKNOWN_SERVERS: + return ClusterMetrics.Option.UNKNOWN_SERVERS; case REGIONS_IN_TRANSITION: return ClusterMetrics.Option.REGIONS_IN_TRANSITION; case CLUSTER_ID: @@ -186,6 +192,8 @@ public final class ClusterMetricsBuilder { return ClusterStatusProtos.Option.LIVE_SERVERS; case DEAD_SERVERS: return ClusterStatusProtos.Option.DEAD_SERVERS; + case UNKNOWN_SERVERS: + return ClusterStatusProtos.Option.UNKNOWN_SERVERS; case REGIONS_IN_TRANSITION: return ClusterStatusProtos.Option.REGIONS_IN_TRANSITION; case CLUSTER_ID: @@ -238,6 +246,7 @@ public final class ClusterMetricsBuilder { @Nullable private String hbaseVersion; private List<ServerName> deadServerNames = Collections.emptyList(); + private List<ServerName> unknownServerNames = Collections.emptyList(); private Map<ServerName, ServerMetrics> liveServerMetrics = new TreeMap<>(); @Nullable private ServerName masterName; @@ -267,6 +276,11 @@ public final class ClusterMetricsBuilder { return this; } + public ClusterMetricsBuilder setUnknownServerNames(List<ServerName> value) { + this.unknownServerNames = value; + return this; + } + public ClusterMetricsBuilder setLiveServerMetrics(Map<ServerName, ServerMetrics> value) { liveServerMetrics.putAll(value); return this; @@ -324,9 +338,10 @@ public final class ClusterMetricsBuilder { } public ClusterMetrics build() { - return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName, - backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn, - masterInfoPort, serversName, tableRegionStatesCount, masterTasks); + return new ClusterMetricsImpl(hbaseVersion, deadServerNames, unknownServerNames, + liveServerMetrics, masterName, backupMasterNames, regionsInTransition, clusterId, + masterCoprocessorNames, balancerOn, masterInfoPort, serversName, tableRegionStatesCount, + masterTasks); } private static class ClusterMetricsImpl implements ClusterMetrics { @@ -334,6 +349,7 @@ public final class ClusterMetricsBuilder { private final String hbaseVersion; private final List<ServerName> deadServerNames; private final Map<ServerName, ServerMetrics> liveServerMetrics; + private final List<ServerName> unknownServerNames; @Nullable private final ServerName masterName; private final List<ServerName> backupMasterNames; @@ -349,13 +365,14 @@ public final class ClusterMetricsBuilder { private final List<ServerTask> masterTasks; ClusterMetricsImpl(String hbaseVersion, List<ServerName> deadServerNames, - Map<ServerName, ServerMetrics> liveServerMetrics, ServerName masterName, - List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId, - List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort, - List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount, - List<ServerTask> masterTasks) { + List<ServerName> unknownServerNames, Map<ServerName, ServerMetrics> liveServerMetrics, + ServerName masterName, List<ServerName> backupMasterNames, + List<RegionState> regionsInTransition, String clusterId, List<String> masterCoprocessorNames, + Boolean balancerOn, int masterInfoPort, List<ServerName> serversName, + Map<TableName, RegionStatesCount> tableRegionStatesCount, List<ServerTask> masterTasks) { this.hbaseVersion = hbaseVersion; this.deadServerNames = Preconditions.checkNotNull(deadServerNames); + this.unknownServerNames = Preconditions.checkNotNull(unknownServerNames); this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics); this.masterName = masterName; this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames); @@ -379,6 +396,11 @@ public final class ClusterMetricsBuilder { return Collections.unmodifiableList(deadServerNames); } + @Override + public List<ServerName> getUnknownServerNames() { + return Collections.unmodifiableList(unknownServerNames); + } + @Override public Map<ServerName, ServerMetrics> getLiveServerMetrics() { return Collections.unmodifiableMap(liveServerMetrics); @@ -469,6 +491,14 @@ public final class ClusterMetricsBuilder { } } + int unknownServerSize = getUnknownServerNames().size(); + sb.append("\nNumber of unknown region servers: " + unknownServerSize); + if (unknownServerSize > 0) { + for (ServerName serverName : getUnknownServerNames()) { + sb.append("\n " + serverName); + } + } + sb.append("\nAverage load: " + getAverageLoad()); sb.append("\nNumber of requests: " + getRequestCount()); sb.append("\nNumber of regions: " + getRegionCount()); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java index e3b1a8ab662..f1e8d4bf147 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java @@ -113,6 +113,11 @@ public class ClusterStatus implements ClusterMetrics { return metrics.getDeadServerNames(); } + @Override + public List<ServerName> getUnknownServerNames() { + return metrics.getUnknownServerNames(); + } + @Override public Map<ServerName, ServerMetrics> getLiveServerMetrics() { return metrics.getLiveServerMetrics(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 77601475ab3..caedb5cbbd9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -3024,6 +3024,14 @@ public interface Admin extends Abortable, Closeable { return getClusterMetrics(EnumSet.of(Option.DEAD_SERVERS)).getDeadServerNames(); } + /** + * List unknown region servers. + * @return List of unknown region servers. + */ + default List<ServerName> listUnknownServers() throws IOException { + return getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS)).getUnknownServerNames(); + } + /** * Clear dead region servers from master. * @param servers list of dead region servers. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index ca08c9dfc10..138b3b8d238 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -1426,6 +1426,14 @@ public interface AsyncAdmin { .thenApply(ClusterMetrics::getDeadServerNames); } + /** + * List all the unknown region servers. + */ + default CompletableFuture<List<ServerName>> listUnknownServers() { + return this.getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS)) + .thenApply(ClusterMetrics::getUnknownServerNames); + } + /** * Clear dead region servers from master. * @param servers list of dead region servers. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index 9ccdeebad7a..f6f302463bd 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -764,6 +764,11 @@ class AsyncHBaseAdmin implements AsyncAdmin { return wrap(rawAdmin.listDeadServers()); } + @Override + public CompletableFuture<List<ServerName>> listUnknownServers() { + return wrap(rawAdmin.listUnknownServers()); + } + @Override public CompletableFuture<List<ServerName>> clearDeadServers(List<ServerName> servers) { return wrap(rawAdmin.clearDeadServers(servers)); diff --git a/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto b/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto index 4957d7ca801..3f9f92c9da6 100644 --- a/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto +++ b/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto @@ -339,6 +339,7 @@ message ClusterStatus { repeated ServerName servers_name = 11; repeated TableRegionStatesCount table_region_states_count = 12; repeated ServerTask master_tasks = 13; + repeated ServerName unknown_servers = 14; } enum Option { @@ -355,4 +356,5 @@ enum Option { SERVERS_NAME = 10; TABLE_TO_REGIONS_COUNT = 11; TASKS = 12; + UNKNOWN_SERVERS = 13; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 06502e8a541..1a1ecdadc70 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2846,6 +2846,12 @@ public class HMaster extends HRegionServer implements MasterServices { } break; } + case UNKNOWN_SERVERS: { + if (serverManager != null) { + builder.setUnknownServerNames(getUnknownServers()); + } + break; + } case MASTER_COPROCESSORS: { if (cpHost != null) { builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors())); @@ -2905,6 +2911,17 @@ public class HMaster extends HRegionServer implements MasterServices { return builder.build(); } + private List<ServerName> getUnknownServers() { + if (serverManager != null) { + final Set<ServerName> serverNames = getAssignmentManager().getRegionStates().getRegionStates() + .stream().map(RegionState::getServerName).collect(Collectors.toSet()); + final List<ServerName> unknownServerNames = serverNames.stream() + .filter(sn -> sn != null && serverManager.isServerUnknown(sn)).collect(Collectors.toList()); + return unknownServerNames; + } + return null; + } + private Map<ServerName, ServerMetrics> getOnlineServers() { if (serverManager != null) { final Map<ServerName, ServerMetrics> map = new HashMap<>(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java index cab757739ed..d2c34a55a7d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java @@ -227,6 +227,11 @@ public class TestRegionsRecoveryChore { return null; } + @Override + public List<ServerName> getUnknownServerNames() { + return null; + } + @Override public Map<ServerName, ServerMetrics> getLiveServerMetrics() { Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestUnknownServers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestUnknownServers.java new file mode 100644 index 00000000000..253a9c144c7 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestUnknownServers.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestUnknownServers { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestUnknownServers.class); + + private static HBaseTestingUtility UTIL; + private static Admin ADMIN; + private final static int SLAVES = 2; + private static boolean IS_UNKNOWN_SERVER = true; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + UTIL = new HBaseTestingUtility(); + UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, + TestUnknownServers.HMasterForTest.class, HMaster.class); + UTIL.startMiniCluster(SLAVES); + ADMIN = UTIL.getAdmin(); + } + + @Test + public void testListUnknownServers() throws Exception { + Assert.assertEquals(ADMIN.listUnknownServers().size(), SLAVES); + IS_UNKNOWN_SERVER = false; + Assert.assertEquals(ADMIN.listUnknownServers().size(), 0); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + if (ADMIN != null) { + ADMIN.close(); + } + if (UTIL != null) { + UTIL.shutdownMiniCluster(); + } + } + + public static final class HMasterForTest extends HMaster { + + public HMasterForTest(Configuration conf) throws IOException { + super(conf); + } + + @Override + protected ServerManager createServerManager(MasterServices master, RegionServerList storage) + throws IOException { + setupClusterConnection(); + return new TestUnknownServers.ServerManagerForTest(master, storage); + } + } + + private static final class ServerManagerForTest extends ServerManager { + + public ServerManagerForTest(MasterServices master, RegionServerList storage) { + super(master, storage); + } + + @Override + public boolean isServerUnknown(ServerName serverName) { + return IS_UNKNOWN_SERVER; + } + } +} diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index 8580f1378ab..42f25111c1a 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -1537,7 +1537,7 @@ module Hbase end #---------------------------------------------------------------------------------------------- - # clear dead region servers + # list dead region servers def list_deadservers @admin.listDeadServers.to_a end @@ -1558,6 +1558,12 @@ module Hbase @admin.clearDeadServers(servers).to_a end + #---------------------------------------------------------------------------------------------- + # list unknown region servers + def list_unknownservers + @admin.listUnknownServers.to_a + end + #---------------------------------------------------------------------------------------------- # List live region servers def list_liveservers diff --git a/hbase-shell/src/main/ruby/shell.rb b/hbase-shell/src/main/ruby/shell.rb index e1c4d42b426..f6b3efd945f 100644 --- a/hbase-shell/src/main/ruby/shell.rb +++ b/hbase-shell/src/main/ruby/shell.rb @@ -475,6 +475,7 @@ Shell.load_command_group( clear_compaction_queues list_deadservers list_liveservers + list_unknownservers clear_deadservers clear_block_cache stop_master diff --git a/hbase-shell/src/main/ruby/shell/commands/list_unknownservers.rb b/hbase-shell/src/main/ruby/shell/commands/list_unknownservers.rb new file mode 100644 index 00000000000..29f5e54acfb --- /dev/null +++ b/hbase-shell/src/main/ruby/shell/commands/list_unknownservers.rb @@ -0,0 +1,44 @@ +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class ListUnknownservers < Command + def help + <<~EOF + List all unknown region servers + Examples: + hbase> list_unknownservers + EOF + end + + def command + now = Time.now + formatter.header(['SERVERNAME']) + + servers = admin.list_unknownservers + servers.each do |server| + formatter.row([server.toString]) + end + + formatter.footer(servers.size) + end + end + end +end