HBASE-10367 RegionServer graceful stop / decommissioning Signed-off-by: Jerry He <jerry...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a43a00e8 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a43a00e8 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a43a00e8 Branch: refs/heads/HBASE-18410 Commit: a43a00e89c5c99968a205208ab9a5307c89730b3 Parents: af479c5 Author: Jerry He <jerry...@apache.org> Authored: Thu Oct 19 21:44:38 2017 -0700 Committer: Jerry He <jerry...@apache.org> Committed: Thu Oct 19 21:54:45 2017 -0700 ---------------------------------------------------------------------- bin/draining_servers.rb | 2 + .../org/apache/hadoop/hbase/client/Admin.java | 26 +++-- .../apache/hadoop/hbase/client/AsyncAdmin.java | 25 ++-- .../hadoop/hbase/client/AsyncHBaseAdmin.java | 14 ++- .../hbase/client/ConnectionImplementation.java | 30 ++--- .../apache/hadoop/hbase/client/HBaseAdmin.java | 23 ++-- .../hadoop/hbase/client/RawAsyncHBaseAdmin.java | 66 +++++------ .../client/ShortCircuitMasterConnection.java | 30 ++--- .../hbase/shaded/protobuf/RequestConverter.java | 24 ++-- .../src/main/protobuf/Master.proto | 38 +++--- .../hbase/coprocessor/MasterObserver.java | 36 ++++++ .../org/apache/hadoop/hbase/master/HMaster.java | 117 +++++++++++++------ .../hbase/master/MasterCoprocessorHost.java | 56 +++++++++ .../hadoop/hbase/master/MasterRpcServices.java | 71 ++++++----- .../hadoop/hbase/master/MasterServices.java | 19 +-- .../hadoop/hbase/master/ServerManager.java | 14 ++- .../hbase/security/access/AccessController.java | 17 +++ .../hbase/zookeeper/DrainingServerTracker.java | 3 + .../apache/hadoop/hbase/client/TestAdmin2.java | 103 ++++++++-------- .../client/TestAsyncDecommissionAdminApi.java | 95 +++++++++++++++ .../hbase/client/TestAsyncDrainAdminApi.java | 101 ---------------- .../hbase/master/MockNoopMasterServices.java | 15 --- .../hbase/zookeeper/TestZooKeeperACL.java | 18 +-- 23 files changed, 556 insertions(+), 387 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/bin/draining_servers.rb ---------------------------------------------------------------------- diff --git a/bin/draining_servers.rb b/bin/draining_servers.rb index ea74c30..588bac4 100644 --- a/bin/draining_servers.rb +++ b/bin/draining_servers.rb @@ -17,6 +17,8 @@ # # Add or remove servers from draining mode via zookeeper +# Deprecated in 2.0, and will be removed in 3.0. Use Admin decommission +# API instead. require 'optparse' include Java http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 64d5e53..540b7c8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2425,22 +2425,30 @@ public interface Admin extends Abortable, Closeable { } /** - * Mark a region server as draining to prevent additional regions from getting assigned to it. - * @param servers List of region servers to drain. + * Mark region server(s) as decommissioned to prevent additional regions from getting + * assigned to them. Optionally unload the regions on the servers. If there are multiple servers + * to be decommissioned, decommissioning them at the same time can prevent wasteful region + * movements. Region unloading is asynchronous. + * @param servers The list of servers to decommission. + * @param offload True to offload the regions from the decommissioned servers */ - void drainRegionServers(List<ServerName> servers) throws IOException; + void decommissionRegionServers(List<ServerName> servers, boolean offload) throws IOException; /** - * List region servers marked as draining to not get additional regions assigned to them. - * @return List of draining region servers. + * List region servers marked as decommissioned, which can not be assigned regions. + * @return List of decommissioned region servers. */ - List<ServerName> listDrainingRegionServers() throws IOException; + List<ServerName> listDecommissionedRegionServers() throws IOException; /** - * Remove drain from a region server to allow additional regions assignments. - * @param servers List of region servers to remove drain from. + * Remove decommission marker from a region server to allow regions assignments. + * Load regions onto the server if a list of regions is given. Region loading is + * asynchronous. + * @param server The server to recommission. + * @param encodedRegionNames Regions to load onto the server. */ - void removeDrainFromRegionServers(List<ServerName> servers) throws IOException; + void recommissionRegionServer(ServerName server, List<byte[]> encodedRegionNames) + throws IOException; /** * Find all table and column families that are replicated from this cluster http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index 0a88138..b671a9b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -759,22 +759,29 @@ public interface AsyncAdmin { CompletableFuture<String> getLocks(); /** - * Mark a region server as draining to prevent additional regions from getting assigned to it. - * @param servers + * Mark region server(s) as decommissioned to prevent additional regions from getting + * assigned to them. Optionally unload the regions on the servers. If there are multiple servers + * to be decommissioned, decommissioning them at the same time can prevent wasteful region + * movements. Region unloading is asynchronous. + * @param servers The list of servers to decommission. + * @param offload True to offload the regions from the decommissioned servers */ - CompletableFuture<Void> drainRegionServers(List<ServerName> servers); + CompletableFuture<Void> decommissionRegionServers(List<ServerName> servers, boolean offload); /** - * List region servers marked as draining to not get additional regions assigned to them. - * @return List of draining region servers wrapped by {@link CompletableFuture} + * List region servers marked as decommissioned, which can not be assigned regions. + * @return List of decommissioned region servers wrapped by {@link CompletableFuture} */ - CompletableFuture<List<ServerName>> listDrainingRegionServers(); + CompletableFuture<List<ServerName>> listDecommissionedRegionServers(); /** - * Remove drain from a region server to allow additional regions assignments. - * @param servers List of region servers to remove drain from. + * Remove decommission marker from a region server to allow regions assignments. Load regions onto + * the server if a list of regions is given. Region loading is asynchronous. + * @param server The server to recommission. + * @param encodedRegionNames Regions to load onto the server. */ - CompletableFuture<Void> removeDrainFromRegionServers(List<ServerName> servers); + CompletableFuture<Void> recommissionRegionServer(ServerName server, + List<byte[]> encodedRegionNames); /** * @return cluster status wrapped by {@link CompletableFuture} http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index f60f7ea..23dea81 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -446,18 +446,20 @@ public class AsyncHBaseAdmin implements AsyncAdmin { } @Override - public CompletableFuture<Void> drainRegionServers(List<ServerName> servers) { - return wrap(rawAdmin.drainRegionServers(servers)); + public CompletableFuture<Void> decommissionRegionServers(List<ServerName> servers, + boolean offload) { + return wrap(rawAdmin.decommissionRegionServers(servers, offload)); } @Override - public CompletableFuture<List<ServerName>> listDrainingRegionServers() { - return wrap(rawAdmin.listDrainingRegionServers()); + public CompletableFuture<List<ServerName>> listDecommissionedRegionServers() { + return wrap(rawAdmin.listDecommissionedRegionServers()); } @Override - public CompletableFuture<Void> removeDrainFromRegionServers(List<ServerName> servers) { - return wrap(rawAdmin.removeDrainFromRegionServers(servers)); + public CompletableFuture<Void> recommissionRegionServer(ServerName server, + List<byte[]> encodedRegionNames) { + return wrap(rawAdmin.recommissionRegionServer(server, encodedRegionNames)); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index 9d888e6..aa69612 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -89,18 +89,18 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService.BlockingInterface; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsBalancerEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsBalancerEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsNormalizerEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsNormalizerEnabledResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormalizerRunningRequest; @@ -1727,22 +1727,22 @@ class ConnectionImplementation implements ClusterConnection, Closeable { } @Override - public ListDrainingRegionServersResponse listDrainingRegionServers(RpcController controller, - ListDrainingRegionServersRequest request) throws ServiceException { - return stub.listDrainingRegionServers(controller, request); + public ListDecommissionedRegionServersResponse listDecommissionedRegionServers(RpcController controller, + ListDecommissionedRegionServersRequest request) throws ServiceException { + return stub.listDecommissionedRegionServers(controller, request); } @Override - public DrainRegionServersResponse drainRegionServers(RpcController controller, - DrainRegionServersRequest request) throws ServiceException { - return stub.drainRegionServers(controller, request); + public DecommissionRegionServersResponse decommissionRegionServers(RpcController controller, + DecommissionRegionServersRequest request) throws ServiceException { + return stub.decommissionRegionServers(controller, request); } @Override - public RemoveDrainFromRegionServersResponse removeDrainFromRegionServers( - RpcController controller, RemoveDrainFromRegionServersRequest request) + public RecommissionRegionServerResponse recommissionRegionServer( + RpcController controller, RecommissionRegionServerRequest request) throws ServiceException { - return stub.removeDrainFromRegionServers(controller, request); + return stub.recommissionRegionServer(controller, request); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 8665e84..47b7a55 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -171,7 +171,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsProcedur import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableNamesByNamespaceRequest; @@ -4030,27 +4030,28 @@ public class HBaseAdmin implements Admin { } @Override - public void drainRegionServers(List<ServerName> servers) throws IOException { + public void decommissionRegionServers(List<ServerName> servers, boolean offload) + throws IOException { executeCallable(new MasterCallable<Void>(getConnection(), getRpcControllerFactory()) { @Override public Void rpcCall() throws ServiceException { - master.drainRegionServers(getRpcController(), - RequestConverter.buildDrainRegionServersRequest(servers)); + master.decommissionRegionServers(getRpcController(), + RequestConverter.buildDecommissionRegionServersRequest(servers, offload)); return null; } }); } @Override - public List<ServerName> listDrainingRegionServers() throws IOException { + public List<ServerName> listDecommissionedRegionServers() throws IOException { return executeCallable(new MasterCallable<List<ServerName>>(getConnection(), getRpcControllerFactory()) { @Override public List<ServerName> rpcCall() throws ServiceException { - ListDrainingRegionServersRequest req = ListDrainingRegionServersRequest.newBuilder().build(); + ListDecommissionedRegionServersRequest req = ListDecommissionedRegionServersRequest.newBuilder().build(); List<ServerName> servers = new ArrayList<>(); - for (HBaseProtos.ServerName server : master.listDrainingRegionServers(null, req) - .getServerNameList()) { + for (HBaseProtos.ServerName server : master + .listDecommissionedRegionServers(getRpcController(), req).getServerNameList()) { servers.add(ProtobufUtil.toServerName(server)); } return servers; @@ -4059,11 +4060,13 @@ public class HBaseAdmin implements Admin { } @Override - public void removeDrainFromRegionServers(List<ServerName> servers) throws IOException { + public void recommissionRegionServer(ServerName server, List<byte[]> encodedRegionNames) + throws IOException { executeCallable(new MasterCallable<Void>(getConnection(), getRpcControllerFactory()) { @Override public Void rpcCall() throws ServiceException { - master.removeDrainFromRegionServers(getRpcController(), RequestConverter.buildRemoveDrainFromRegionServersRequest(servers)); + master.recommissionRegionServer(getRpcController(), + RequestConverter.buildRecommissionRegionServerRequest(server, encodedRegionNames)); return null; } }); http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java index 597acd3..0835a9b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java @@ -126,6 +126,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateName import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteNamespaceRequest; @@ -136,8 +138,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTabl import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTableResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTableRequest; @@ -180,8 +180,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrM import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MajorCompactionTimestampForRegionRequest; @@ -200,8 +200,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeR import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanRequest; @@ -1935,41 +1935,37 @@ public class RawAsyncHBaseAdmin implements AsyncAdmin { } @Override - public CompletableFuture<Void> drainRegionServers(List<ServerName> servers) { - return this - .<Void> newMasterCaller() - .action( - (controller, stub) -> this - .<DrainRegionServersRequest, DrainRegionServersResponse, Void> call(controller, stub, - RequestConverter.buildDrainRegionServersRequest(servers), - (s, c, req, done) -> s.drainRegionServers(c, req, done), resp -> null)).call(); + public CompletableFuture<Void> decommissionRegionServers(List<ServerName> servers, boolean offload) { + return this.<Void> newMasterCaller() + .action((controller, stub) -> this + .<DecommissionRegionServersRequest, DecommissionRegionServersResponse, Void> call( + controller, stub, RequestConverter.buildDecommissionRegionServersRequest(servers, offload), + (s, c, req, done) -> s.decommissionRegionServers(c, req, done), resp -> null)) + .call(); } @Override - public CompletableFuture<List<ServerName>> listDrainingRegionServers() { - return this - .<List<ServerName>> newMasterCaller() - .action( - (controller, stub) -> this - .<ListDrainingRegionServersRequest, ListDrainingRegionServersResponse, List<ServerName>> call( - controller, - stub, - ListDrainingRegionServersRequest.newBuilder().build(), - (s, c, req, done) -> s.listDrainingRegionServers(c, req, done), - resp -> resp.getServerNameList().stream().map(ProtobufUtil::toServerName) - .collect(Collectors.toList()))).call(); + public CompletableFuture<List<ServerName>> listDecommissionedRegionServers() { + return this.<List<ServerName>> newMasterCaller() + .action((controller, stub) -> this + .<ListDecommissionedRegionServersRequest, ListDecommissionedRegionServersResponse, + List<ServerName>> call( + controller, stub, ListDecommissionedRegionServersRequest.newBuilder().build(), + (s, c, req, done) -> s.listDecommissionedRegionServers(c, req, done), + resp -> resp.getServerNameList().stream().map(ProtobufUtil::toServerName) + .collect(Collectors.toList()))) + .call(); } @Override - public CompletableFuture<Void> removeDrainFromRegionServers(List<ServerName> servers) { - return this - .<Void> newMasterCaller() - .action( - (controller, stub) -> this - .<RemoveDrainFromRegionServersRequest, RemoveDrainFromRegionServersResponse, Void> call( - controller, stub, RequestConverter - .buildRemoveDrainFromRegionServersRequest(servers), (s, c, req, done) -> s - .removeDrainFromRegionServers(c, req, done), resp -> null)).call(); + public CompletableFuture<Void> recommissionRegionServer(ServerName server, + List<byte[]> encodedRegionNames) { + return this.<Void> newMasterCaller() + .action((controller, stub) -> this + .<RecommissionRegionServerRequest, RecommissionRegionServerResponse, Void> call(controller, + stub, RequestConverter.buildRecommissionRegionServerRequest(server, encodedRegionNames), + (s, c, req, done) -> s.recommissionRegionServer(c, req, done), resp -> null)) + .call(); } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ShortCircuitMasterConnection.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ShortCircuitMasterConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ShortCircuitMasterConnection.java index 826e6de..b2eebf6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ShortCircuitMasterConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ShortCircuitMasterConnection.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateName import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteNamespaceRequest; @@ -46,8 +48,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTabl import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTableResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTableRequest; @@ -94,8 +94,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrM import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest; @@ -120,8 +120,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeR import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanRequest; @@ -257,9 +257,9 @@ public class ShortCircuitMasterConnection implements MasterKeepAliveConnection { } @Override - public RemoveDrainFromRegionServersResponse removeDrainFromRegionServers(RpcController controller, - RemoveDrainFromRegionServersRequest request) throws ServiceException { - return stub.removeDrainFromRegionServers(controller, request); + public RecommissionRegionServerResponse recommissionRegionServer(RpcController controller, + RecommissionRegionServerRequest request) throws ServiceException { + return stub.recommissionRegionServer(controller, request); } @Override @@ -336,9 +336,9 @@ public class ShortCircuitMasterConnection implements MasterKeepAliveConnection { } @Override - public ListDrainingRegionServersResponse listDrainingRegionServers(RpcController controller, - ListDrainingRegionServersRequest request) throws ServiceException { - return stub.listDrainingRegionServers(controller, request); + public ListDecommissionedRegionServersResponse listDecommissionedRegionServers(RpcController controller, + ListDecommissionedRegionServersRequest request) throws ServiceException { + return stub.listDecommissionedRegionServers(controller, request); } @Override @@ -493,9 +493,9 @@ public class ShortCircuitMasterConnection implements MasterKeepAliveConnection { } @Override - public DrainRegionServersResponse drainRegionServers(RpcController controller, - DrainRegionServersRequest request) throws ServiceException { - return stub.drainRegionServers(controller, request); + public DecommissionRegionServersResponse decommissionRegionServers(RpcController controller, + DecommissionRegionServersRequest request) throws ServiceException { + return stub.decommissionRegionServers(controller, request); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java ---------------------------------------------------------------------- diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 2fbbc3f..924a36a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -96,11 +96,11 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceReq import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteNamespaceRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterStatusRequest; @@ -122,7 +122,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ModifyTabl import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MoveRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetBalancerRunningRequest; @@ -1851,15 +1851,21 @@ public final class RequestConverter { return GET_QUOTA_STATES_REQUEST; } - public static DrainRegionServersRequest buildDrainRegionServersRequest(List<ServerName> servers) { - return DrainRegionServersRequest.newBuilder().addAllServerName(toProtoServerNames(servers)) - .build(); + public static DecommissionRegionServersRequest + buildDecommissionRegionServersRequest(List<ServerName> servers, boolean offload) { + return DecommissionRegionServersRequest.newBuilder() + .addAllServerName(toProtoServerNames(servers)).setOffload(offload).build(); } - public static RemoveDrainFromRegionServersRequest buildRemoveDrainFromRegionServersRequest( - List<ServerName> servers) { - return RemoveDrainFromRegionServersRequest.newBuilder() - .addAllServerName(toProtoServerNames(servers)).build(); + public static RecommissionRegionServerRequest + buildRecommissionRegionServerRequest(ServerName server, List<byte[]> encodedRegionNames) { + RecommissionRegionServerRequest.Builder builder = RecommissionRegionServerRequest.newBuilder(); + if (encodedRegionNames != null) { + for (byte[] name : encodedRegionNames) { + builder.addRegion(buildRegionSpecifier(RegionSpecifierType.ENCODED_REGION_NAME, name)); + } + } + return builder.setServerName(ProtobufUtil.toServerName(server)).build(); } private static List<HBaseProtos.ServerName> toProtoServerNames(List<ServerName> servers) { http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-protocol-shaded/src/main/protobuf/Master.proto ---------------------------------------------------------------------- diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index 0a13e22..7cdd143 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -602,25 +602,27 @@ message SecurityCapabilitiesResponse { repeated Capability capabilities = 1; } -message ListDrainingRegionServersRequest { +message ListDecommissionedRegionServersRequest { } -message ListDrainingRegionServersResponse { - repeated ServerName server_name = 1; +message ListDecommissionedRegionServersResponse { + repeated ServerName server_name = 1; } -message DrainRegionServersRequest { - repeated ServerName server_name = 1; +message DecommissionRegionServersRequest { + repeated ServerName server_name = 1; + required bool offload = 2; } -message DrainRegionServersResponse { +message DecommissionRegionServersResponse { } -message RemoveDrainFromRegionServersRequest { - repeated ServerName server_name = 1; +message RecommissionRegionServerRequest { + required ServerName server_name = 1; + repeated RegionSpecifier region = 2; } -message RemoveDrainFromRegionServersResponse { +message RecommissionRegionServerResponse { } message ListDeadServersRequest { @@ -967,17 +969,17 @@ service MasterService { rpc ListReplicationPeers(ListReplicationPeersRequest) returns(ListReplicationPeersResponse); - /** Returns a list of ServerNames marked as draining. */ - rpc listDrainingRegionServers(ListDrainingRegionServersRequest) - returns(ListDrainingRegionServersResponse); + /** Returns a list of ServerNames marked as decommissioned. */ + rpc ListDecommissionedRegionServers(ListDecommissionedRegionServersRequest) + returns(ListDecommissionedRegionServersResponse); - /** Mark a list of ServerNames as draining. */ - rpc drainRegionServers(DrainRegionServersRequest) - returns(DrainRegionServersResponse); + /** Decommission region servers. */ + rpc DecommissionRegionServers(DecommissionRegionServersRequest) + returns(DecommissionRegionServersResponse); - /** Unmark a list of ServerNames marked as draining. */ - rpc removeDrainFromRegionServers(RemoveDrainFromRegionServersRequest) - returns(RemoveDrainFromRegionServersResponse); + /** Re-commission region server. */ + rpc RecommissionRegionServer(RecommissionRegionServerRequest) + returns(RecommissionRegionServerResponse); /** Fetches the Master's view of space utilization */ rpc GetSpaceQuotaRegionSizes(GetSpaceQuotaRegionSizesRequest) http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java index 85da610..29f0f9f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java @@ -1461,4 +1461,40 @@ public interface MasterObserver { */ default void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException {} + + /** + * Called before decommission region servers. + */ + default void preDecommissionRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx, + List<ServerName> servers, boolean offload) throws IOException {} + + /** + * Called after decommission region servers. + */ + default void postDecommissionRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx, + List<ServerName> servers, boolean offload) throws IOException {} + + /** + * Called before list decommissioned region servers. + */ + default void preListDecommissionedRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx) + throws IOException {} + + /** + * Called after list decommissioned region servers. + */ + default void postListDecommissionedRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx) + throws IOException {} + + /** + * Called before recommission region server. + */ + default void preRecommissionRegionServer(ObserverContext<MasterCoprocessorEnvironment> ctx, + ServerName server, List<byte[]> encodedRegionNames) throws IOException {} + + /** + * Called after recommission region server. + */ + default void postRecommissionRegionServer(ObserverContext<MasterCoprocessorEnvironment> ctx, + ServerName server, List<byte[]> encodedRegionNames) throws IOException {} } http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 36bcb60..747ce5a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -98,6 +98,7 @@ import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.RegionStates.ServerStateNode; import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; @@ -3435,53 +3436,97 @@ public class HMaster extends HRegionServer implements MasterServices { return peers; } - @Override - public void drainRegionServer(final ServerName server) { - String parentZnode = getZooKeeper().znodePaths.drainingZNode; - try { - String node = ZKUtil.joinZNode(parentZnode, server.getServerName()); - ZKUtil.createAndFailSilent(getZooKeeper(), node); - } catch (KeeperException ke) { - LOG.warn(this.zooKeeper.prefix("Unable to add drain for '" + server.getServerName() + "'."), - ke); - } - } - - @Override - public List<ServerName> listDrainingRegionServers() { + /** + * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional + * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0 + * @param servers Region servers to decommission. + * @throws HBaseIOException + */ + public void decommissionRegionServers(final List<ServerName> servers, final boolean offload) + throws HBaseIOException { + List<ServerName> serversAdded = new ArrayList<>(servers.size()); + // Place the decommission marker first. String parentZnode = getZooKeeper().znodePaths.drainingZNode; - List<ServerName> serverNames = new ArrayList<>(); - List<String> serverStrs = null; - try { - serverStrs = ZKUtil.listChildrenNoWatch(getZooKeeper(), parentZnode); - } catch (KeeperException ke) { - LOG.warn(this.zooKeeper.prefix("Unable to list draining servers."), ke); - } - // No nodes is empty draining list or ZK connectivity issues. - if (serverStrs == null) { - return serverNames; - } - - // Skip invalid ServerNames in result - for (String serverStr : serverStrs) { + for (ServerName server : servers) { try { - serverNames.add(ServerName.parseServerName(serverStr)); - } catch (IllegalArgumentException iae) { - LOG.warn("Unable to cast '" + serverStr + "' to ServerName.", iae); + String node = ZKUtil.joinZNode(parentZnode, server.getServerName()); + ZKUtil.createAndFailSilent(getZooKeeper(), node); + } catch (KeeperException ke) { + throw new HBaseIOException( + this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke); + } + if (this.serverManager.addServerToDrainList(server)) { + serversAdded.add(server); + }; + } + // Move the regions off the decommissioned servers. + if (offload) { + final List<ServerName> destServers = this.serverManager.createDestinationServersList(); + for (ServerName server : serversAdded) { + final List<RegionInfo> regionsOnServer = + this.assignmentManager.getRegionStates().getServerRegionInfoSet(server); + for (RegionInfo hri : regionsOnServer) { + ServerName dest = balancer.randomAssignment(hri, destServers); + if (dest == null) { + throw new HBaseIOException("Unable to determine a plan to move " + hri); + } + RegionPlan rp = new RegionPlan(hri, server, dest); + this.assignmentManager.moveAsync(rp); + } } } - return serverNames; } - @Override - public void removeDrainFromRegionServer(ServerName server) { + /** + * List region servers marked as decommissioned (previously called 'draining') to not get regions + * assigned to them. + * @return List of decommissioned servers. + */ + public List<ServerName> listDecommissionedRegionServers() { + return this.serverManager.getDrainingServersList(); + } + + /** + * Remove decommission marker (previously called 'draining') from a region server to allow regions + * assignments. Load regions onto the server asynchronously if a list of regions is given + * @param server Region server to remove decommission marker from. + * @throws HBaseIOException + */ + public void recommissionRegionServer(final ServerName server, + final List<byte[]> encodedRegionNames) throws HBaseIOException { + // Remove the server from decommissioned (draining) server list. String parentZnode = getZooKeeper().znodePaths.drainingZNode; String node = ZKUtil.joinZNode(parentZnode, server.getServerName()); try { ZKUtil.deleteNodeFailSilent(getZooKeeper(), node); } catch (KeeperException ke) { - LOG.warn( - this.zooKeeper.prefix("Unable to remove drain for '" + server.getServerName() + "'."), ke); + throw new HBaseIOException( + this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke); + } + this.serverManager.removeServerFromDrainList(server); + + // Load the regions onto the server if we are given a list of regions. + if (encodedRegionNames == null || encodedRegionNames.isEmpty()) { + return; + } + if (!this.serverManager.isServerOnline(server)) { + return; + } + for (byte[] encodedRegionName : encodedRegionNames) { + RegionState regionState = + assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName)); + if (regionState == null) { + LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName)); + continue; + } + RegionInfo hri = regionState.getRegion(); + if (server.equals(regionState.getServerName())) { + LOG.info("Skipping move of region " + hri.getRegionNameAsString() + + " because region already assigned to the same server " + server + "."); + continue; + } + RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server); + this.assignmentManager.moveAsync(rp); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java index 72ba5ae..f4e89b5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java @@ -1684,4 +1684,60 @@ public class MasterCoprocessorHost } }); } + + public void preDecommissionRegionServers(List<ServerName> servers, boolean offload) throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.preDecommissionRegionServers(this, servers, offload); + } + }); + } + + public void postDecommissionRegionServers(List<ServerName> servers, boolean offload) throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.postDecommissionRegionServers(this, servers, offload); + } + }); + } + + public void preListDecommissionedRegionServers() throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.preListDecommissionedRegionServers(this); + } + }); + } + + public void postListDecommissionedRegionServers() throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.postListDecommissionedRegionServers(this); + } + }); + } + + public void preRecommissionRegionServer(ServerName server, List<byte[]> encodedRegionNames) + throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.preRecommissionRegionServer(this, server, encodedRegionNames); + } + }); + } + + public void postRecommissionRegionServer(ServerName server, List<byte[]> encodedRegionNames) + throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + observer.postRecommissionRegionServer(this, server, encodedRegionNames); + } + }); + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 740edec..1bd6487 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -119,6 +120,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateName import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DecommissionRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteColumnResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteNamespaceRequest; @@ -129,8 +132,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTabl import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteTableResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DisableTableResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DrainRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableCatalogJanitorResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.EnableTableRequest; @@ -177,8 +178,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrM import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDeadServersResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDrainingRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListDecommissionedRegionServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest; @@ -203,8 +204,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeR import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.NormalizeResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionResponse; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoveDrainFromRegionServersResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanRequest; @@ -1902,15 +1903,21 @@ public class MasterRpcServices extends RSRpcServices } @Override - public ListDrainingRegionServersResponse listDrainingRegionServers(RpcController controller, - ListDrainingRegionServersRequest request) throws ServiceException { - ListDrainingRegionServersResponse.Builder response = - ListDrainingRegionServersResponse.newBuilder(); + public ListDecommissionedRegionServersResponse listDecommissionedRegionServers( + RpcController controller, ListDecommissionedRegionServersRequest request) + throws ServiceException { + ListDecommissionedRegionServersResponse.Builder response = + ListDecommissionedRegionServersResponse.newBuilder(); try { master.checkInitialized(); - List<ServerName> servers = master.listDrainingRegionServers(); - for (ServerName server : servers) { - response.addServerName(ProtobufUtil.toServerName(server)); + if (master.cpHost != null) { + master.cpHost.preListDecommissionedRegionServers(); + } + List<ServerName> servers = master.listDecommissionedRegionServers(); + response.addAllServerName((servers.stream().map(server -> ProtobufUtil.toServerName(server))) + .collect(Collectors.toList())); + if (master.cpHost != null) { + master.cpHost.postListDecommissionedRegionServers(); } } catch (IOException io) { throw new ServiceException(io); @@ -1920,36 +1927,48 @@ public class MasterRpcServices extends RSRpcServices } @Override - public DrainRegionServersResponse drainRegionServers(RpcController controller, - DrainRegionServersRequest request) throws ServiceException { - DrainRegionServersResponse.Builder response = DrainRegionServersResponse.newBuilder(); + public DecommissionRegionServersResponse decommissionRegionServers(RpcController controller, + DecommissionRegionServersRequest request) throws ServiceException { try { master.checkInitialized(); - for (HBaseProtos.ServerName pbServer : request.getServerNameList()) { - master.drainRegionServer(ProtobufUtil.toServerName(pbServer)); + List<ServerName> servers = request.getServerNameList().stream() + .map(pbServer -> ProtobufUtil.toServerName(pbServer)).collect(Collectors.toList()); + boolean offload = request.getOffload(); + if (master.cpHost != null) { + master.cpHost.preDecommissionRegionServers(servers, offload); + } + master.decommissionRegionServers(servers, offload); + if (master.cpHost != null) { + master.cpHost.postDecommissionRegionServers(servers, offload); } } catch (IOException io) { throw new ServiceException(io); } - return response.build(); + return DecommissionRegionServersResponse.newBuilder().build(); } @Override - public RemoveDrainFromRegionServersResponse removeDrainFromRegionServers(RpcController controller, - RemoveDrainFromRegionServersRequest request) throws ServiceException { - RemoveDrainFromRegionServersResponse.Builder response = - RemoveDrainFromRegionServersResponse.newBuilder(); + public RecommissionRegionServerResponse recommissionRegionServer(RpcController controller, + RecommissionRegionServerRequest request) throws ServiceException { try { master.checkInitialized(); - for (HBaseProtos.ServerName pbServer : request.getServerNameList()) { - master.removeDrainFromRegionServer(ProtobufUtil.toServerName(pbServer)); + ServerName server = ProtobufUtil.toServerName(request.getServerName()); + List<byte[]> encodedRegionNames = request.getRegionList().stream() + .map(regionSpecifier -> regionSpecifier.getValue().toByteArray()) + .collect(Collectors.toList()); + if (master.cpHost != null) { + master.cpHost.preRecommissionRegionServer(server, encodedRegionNames); + } + master.recommissionRegionServer(server, encodedRegionNames); + if (master.cpHost != null) { + master.cpHost.postRecommissionRegionServer(server, encodedRegionNames); } } catch (IOException io) { throw new ServiceException(io); } - return response.build(); + return RecommissionRegionServerResponse.newBuilder().build(); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index e815950..055a480 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.List; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableDescriptors; @@ -476,24 +477,6 @@ public interface MasterServices extends Server { IOException; /** - * Mark a region server as draining to prevent additional regions from getting assigned to it. - * @param server Region servers to drain. - */ - void drainRegionServer(final ServerName server); - - /** - * List region servers marked as draining to not get additional regions assigned to them. - * @return List of draining servers. - */ - List<ServerName> listDrainingRegionServers(); - - /** - * Remove drain from a region server to allow additional regions assignments. - * @param server Region server to remove drain from. - */ - void removeDrainFromRegionServer(final ServerName server); - - /** * @return {@link LockManager} to lock namespaces/tables/regions. */ LockManager getLockManager(); http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index b63d55a..c014303 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -42,6 +42,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClockOutOfSyncException; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionLoad; @@ -92,6 +93,9 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto * A server is fully processed only after the handler is fully enabled * and has completed the handling. */ +/** + * + */ @InterfaceAudience.Private public class ServerManager { public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART = @@ -664,7 +668,7 @@ public class ServerManager { /* * Remove the server from the drain list. */ - public boolean removeServerFromDrainList(final ServerName sn) { + public synchronized boolean removeServerFromDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> @@ -676,10 +680,12 @@ public class ServerManager { return this.drainingServers.remove(sn); } - /* + /** * Add the server to the drain list. + * @param sn + * @return True if the server is added or the server is already on the drain list. */ - public boolean addServerToDrainList(final ServerName sn) { + public synchronized boolean addServerToDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> @@ -693,7 +699,7 @@ public class ServerManager { if (this.drainingServers.contains(sn)) { LOG.warn("Server " + sn + " is already in the draining server list." + "Ignoring request to add it again."); - return false; + return true; } LOG.info("Server " + sn + " added to draining server list."); return this.drainingServers.add(sn); http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java index 6da09cd..1478991 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java @@ -1504,6 +1504,23 @@ public class AccessController implements MasterCoprocessor, RegionCoprocessor, requirePermission(getActiveUser(ctx), "clearDeadServers", Action.ADMIN); } + @Override + public void preDecommissionRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx, + List<ServerName> servers, boolean offload) throws IOException { + requirePermission(getActiveUser(ctx), "decommissionRegionServers", Action.ADMIN); + } + + @Override + public void preListDecommissionedRegionServers(ObserverContext<MasterCoprocessorEnvironment> ctx) throws IOException { + requirePermission(getActiveUser(ctx), "listDecommissionedRegionServers", Action.ADMIN); + } + + @Override + public void preRecommissionRegionServer(ObserverContext<MasterCoprocessorEnvironment> ctx, + ServerName server, List<byte[]> encodedRegionNames) throws IOException { + requirePermission(getActiveUser(ctx), "recommissionRegionServers", Action.ADMIN); + } + /* ---- RegionObserver implementation ---- */ @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java index fb9485f..cdc6794 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java @@ -44,6 +44,9 @@ import org.apache.zookeeper.KeeperException; * <p>If an RS gets added to the draining list, we add a watcher to it and call * {@link ServerManager#addServerToDrainList(ServerName)} * + * <p>This class is deprecated in 2.0 because decommission/draining API goes through + * master in 2.0. Can remove this class in 3.0. + * */ @InterfaceAudience.Private public class DrainingServerTracker extends ZooKeeperListener { http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java index 6814f97..953fae0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; +import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.concurrent.ExecutorService; @@ -34,6 +35,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -695,65 +697,62 @@ public class TestAdmin2 { assertTrue(lockList.startsWith("[")); } - /* - * This test drains all regions so cannot be run in parallel with other tests. - */ - @Ignore @Test(timeout = 30000) - public void testDrainRegionServers() throws Exception { - List<ServerName> drainingServers = admin.listDrainingRegionServers(); - assertTrue(drainingServers.isEmpty()); - - // Drain all region servers. - Collection<ServerName> clusterServers = - admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS)).getServers(); - drainingServers = new ArrayList<>(); - for (ServerName server : clusterServers) { - drainingServers.add(server); - } - admin.drainRegionServers(drainingServers); + @Test(timeout = 30000) + public void testDecommissionRegionServers() throws Exception { + List<ServerName> decommissionedRegionServers = admin.listDecommissionedRegionServers(); + assertTrue(decommissionedRegionServers.isEmpty()); - // Check that drain lists all region servers. - drainingServers = admin.listDrainingRegionServers(); - assertEquals(clusterServers.size(), drainingServers.size()); - for (ServerName server : clusterServers) { - assertTrue(drainingServers.contains(server)); + final TableName tableName = TableName.valueOf(name.getMethodName()); + TEST_UTIL.createMultiRegionTable(tableName, "f".getBytes(), 6); + + ArrayList<ServerName> clusterRegionServers = + new ArrayList<>(admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS)).getServers()); + + assertEquals(clusterRegionServers.size(), 3); + + HashMap<ServerName, List<RegionInfo>> serversToDecommssion = new HashMap<>(); + // Get a server that has regions. We will decommission two of the servers, + // leaving one online. + int i; + for (i = 0; i < clusterRegionServers.size(); i++) { + List<RegionInfo> regionsOnServer = admin.getRegions(clusterRegionServers.get(i)); + if (regionsOnServer.size() > 0) { + serversToDecommssion.put(clusterRegionServers.get(i), regionsOnServer); + break; + } } - // Try for 20 seconds to create table (new region). Will not complete because all RSs draining. - final TableName hTable = TableName.valueOf(name.getMethodName()); - final HTableDescriptor htd = new HTableDescriptor(hTable); - htd.addFamily(new HColumnDescriptor("cf")); + clusterRegionServers.remove(i); + // Get another server to decommission. + serversToDecommssion.put(clusterRegionServers.get(0), + admin.getRegions(clusterRegionServers.get(0))); - final Runnable createTable = new Thread() { - @Override - public void run() { - try { - admin.createTable(htd); - } catch (IOException ioe) { - assertTrue(false); // Should not get IOException. - } - } - }; + ServerName remainingServer = clusterRegionServers.get(1); - final ExecutorService executor = Executors.newSingleThreadExecutor(); - final java.util.concurrent.Future<?> future = executor.submit(createTable); - executor.shutdown(); - try { - future.get(20, TimeUnit.SECONDS); - } catch (TimeoutException ie) { - assertTrue(true); // Expecting timeout to happen. - } + // Decommission + admin.decommissionRegionServers(new ArrayList<ServerName>(serversToDecommssion.keySet()), true); + assertEquals(2, admin.listDecommissionedRegionServers().size()); - // Kill executor if still processing. - if (!executor.isTerminated()) { - executor.shutdownNow(); - assertTrue(true); + // Verify the regions have been off the decommissioned servers, all on the one + // remaining server. + for (ServerName server : serversToDecommssion.keySet()) { + for (RegionInfo region : serversToDecommssion.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } } - // Remove drain list. - admin.removeDrainFromRegionServers(drainingServers); - drainingServers = admin.listDrainingRegionServers(); - assertTrue(drainingServers.isEmpty()); - + // Recommission and load the regions. + for (ServerName server : serversToDecommssion.keySet()) { + List<byte[]> encodedRegionNames = serversToDecommssion.get(server).stream() + .map(region -> region.getEncodedNameAsBytes()).collect(Collectors.toList()); + admin.recommissionRegionServer(server, encodedRegionNames); + } + assertTrue(admin.listDecommissionedRegionServers().isEmpty()); + // Verify the regions have been moved to the recommissioned servers + for (ServerName server : serversToDecommssion.keySet()) { + for (RegionInfo region : serversToDecommssion.get(server)) { + TEST_UTIL.assertRegionOnServer(region, server, 10000); + } + } } } http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java new file mode 100644 index 0000000..466ff7f --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.ClusterStatus.Option; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +@Category({ ClientTests.class, MediumTests.class }) +public class TestAsyncDecommissionAdminApi extends TestAsyncAdminBase { + + @Test(timeout = 30000) + public void testAsyncDecommissionRegionServers() throws Exception { + List<ServerName> decommissionedRegionServers = admin.listDecommissionedRegionServers().get(); + assertTrue(decommissionedRegionServers.isEmpty()); + + TEST_UTIL.createMultiRegionTable(tableName, FAMILY, 4); + + ArrayList<ServerName> clusterRegionServers = + new ArrayList<>(admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS)).get().getServers()); + + assertEquals(clusterRegionServers.size(), 2); + + HashMap<ServerName, List<RegionInfo>> serversToDecommssion = new HashMap<>(); + // Get a server that has regions. We will decommission one of the servers, + // leaving one online. + int i; + for (i = 0; i < clusterRegionServers.size(); i++) { + List<RegionInfo> regionsOnServer = admin.getOnlineRegions(clusterRegionServers.get(i)).get(); + if (regionsOnServer.size() > 0) { + serversToDecommssion.put(clusterRegionServers.get(i), regionsOnServer); + break; + } + } + + clusterRegionServers.remove(i); + ServerName remainingServer = clusterRegionServers.get(0); + + // Decommission + admin.decommissionRegionServers(new ArrayList<ServerName>(serversToDecommssion.keySet()), + true).get(); + assertEquals(1, admin.listDecommissionedRegionServers().get().size()); + + // Verify the regions have been off the decommissioned servers, all on the remaining server. + for (ServerName server : serversToDecommssion.keySet()) { + for (RegionInfo region : serversToDecommssion.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } + } + + // Recommission and load regions + for (ServerName server : serversToDecommssion.keySet()) { + List<byte[]> encodedRegionNames = serversToDecommssion.get(server).stream() + .map(region -> region.getEncodedNameAsBytes()).collect(Collectors.toList()); + admin.recommissionRegionServer(server, encodedRegionNames).get(); + } + assertTrue(admin.listDecommissionedRegionServers().get().isEmpty()); + // Verify the regions have been moved to the recommissioned servers + for (ServerName server : serversToDecommssion.keySet()) { + for (RegionInfo region : serversToDecommssion.get(server)) { + TEST_UTIL.assertRegionOnServer(region, server, 10000); + } + } + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDrainAdminApi.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDrainAdminApi.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDrainAdminApi.java deleted file mode 100644 index c31775e..0000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDrainAdminApi.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.client; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.testclassification.ClientTests; -import org.apache.hadoop.hbase.testclassification.MediumTests; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -@Category({ ClientTests.class, MediumTests.class }) -public class TestAsyncDrainAdminApi extends TestAsyncAdminBase { - - /* - * This test drains all regions so cannot be run in parallel with other tests. - */ - @Ignore @Test(timeout = 30000) - public void testDrainRegionServers() throws Exception { - List<ServerName> drainingServers = admin.listDrainingRegionServers().get(); - assertTrue(drainingServers.isEmpty()); - - // Drain all region servers. - Collection<ServerName> clusterServers = admin.getRegionServers().get(); - drainingServers = new ArrayList<>(); - for (ServerName server : clusterServers) { - drainingServers.add(server); - } - admin.drainRegionServers(drainingServers).join(); - - // Check that drain lists all region servers. - drainingServers = admin.listDrainingRegionServers().get(); - assertEquals(clusterServers.size(), drainingServers.size()); - for (ServerName server : clusterServers) { - assertTrue(drainingServers.contains(server)); - } - - // Try for 20 seconds to create table (new region). Will not complete because all RSs draining. - TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); - builder.addColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)); - final Runnable createTable = new Thread() { - @Override - public void run() { - try { - admin.createTable(builder.build()).join(); - } catch (Exception ioe) { - assertTrue(false); // Should not get IOException. - } - } - }; - - final ExecutorService executor = Executors.newSingleThreadExecutor(); - final java.util.concurrent.Future<?> future = executor.submit(createTable); - executor.shutdown(); - try { - future.get(20, TimeUnit.SECONDS); - } catch (TimeoutException ie) { - assertTrue(true); // Expecting timeout to happen. - } - - // Kill executor if still processing. - if (!executor.isTerminated()) { - executor.shutdownNow(); - assertTrue(true); - } - - // Remove drain list. - admin.removeDrainFromRegionServers(drainingServers); - drainingServers = admin.listDrainingRegionServers().get(); - assertTrue(drainingServers.isEmpty()); - } -} http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index fda3563..4f544e4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -409,21 +409,6 @@ public class MockNoopMasterServices implements MasterServices, Server { } @Override - public void drainRegionServer(ServerName server) { - return; - } - - @Override - public List<ServerName> listDrainingRegionServers() { - return null; - } - - @Override - public void removeDrainFromRegionServer(ServerName servers) { - return; - } - - @Override public ReplicationPeerConfig getReplicationPeerConfig(String peerId) throws ReplicationException, IOException { return null; http://git-wip-us.apache.org/repos/asf/hbase/blob/a43a00e8/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperACL.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperACL.java index 89164f4..05ad73e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperACL.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperACL.java @@ -325,19 +325,19 @@ public class TestZooKeeperACL { if (!secureZKAvailable) { return; } - List<ServerName> drainingServers = new ArrayList<>(1); - drainingServers.add(ServerName.parseServerName("ZZZ,123,123")); + List<ServerName> decommissionedServers = new ArrayList<>(1); + decommissionedServers.add(ServerName.parseServerName("ZZZ,123,123")); // If unable to connect to secure ZK cluster then this operation would fail. - TEST_UTIL.getAdmin().drainRegionServers(drainingServers); + TEST_UTIL.getAdmin().decommissionRegionServers(decommissionedServers, false); - drainingServers = TEST_UTIL.getAdmin().listDrainingRegionServers(); - assertEquals(1, drainingServers.size()); - assertEquals(ServerName.parseServerName("ZZZ,123,123"), drainingServers.get(0)); + decommissionedServers = TEST_UTIL.getAdmin().listDecommissionedRegionServers(); + assertEquals(1, decommissionedServers.size()); + assertEquals(ServerName.parseServerName("ZZZ,123,123"), decommissionedServers.get(0)); - TEST_UTIL.getAdmin().removeDrainFromRegionServers(drainingServers); - drainingServers = TEST_UTIL.getAdmin().listDrainingRegionServers(); - assertEquals(0, drainingServers.size()); + TEST_UTIL.getAdmin().recommissionRegionServer(decommissionedServers.get(0), null); + decommissionedServers = TEST_UTIL.getAdmin().listDecommissionedRegionServers(); + assertEquals(0, decommissionedServers.size()); } }