Repository: hbase Updated Branches: refs/heads/branch-2 0409c54ba -> 1a5aedab3
HBASE-20741 Split of a region with replicas creates all daughter regions and its replica in same server (Ram) Signed-off-by: Huaxiang Sun, Michael Stack, Duo Zhang Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1a5aedab Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1a5aedab Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1a5aedab Branch: refs/heads/branch-2 Commit: 1a5aedab3428abf6a2d66fec342a99177562a87d Parents: 0409c54 Author: Vasudevan <ramkrishna.s.vasude...@intel.com> Authored: Tue Sep 4 16:26:11 2018 +0530 Committer: Vasudevan <ramkrishna.s.vasude...@intel.com> Committed: Tue Sep 4 16:26:11 2018 +0530 ---------------------------------------------------------------------- .../master/assignment/AssignmentManager.java | 23 ++- .../assignment/AssignmentManagerUtil.java | 69 +++++--- .../assignment/MergeTableRegionsProcedure.java | 5 +- .../assignment/SplitTableRegionProcedure.java | 14 +- .../hbase/master/balancer/BaseLoadBalancer.java | 21 ++- .../assignment/TestRegionReplicaSplit.java | 158 +++++++++++++++++++ 6 files changed, 255 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 745703f..cf75d73 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -663,16 +663,22 @@ public class AssignmentManager implements ServerListener { * scheme). If at assign-time, the target chosen is no longer up, thats fine, the * AssignProcedure will ask the balancer for a new target, and so on. */ - public TransitRegionStateProcedure[] createRoundRobinAssignProcedures( - List<RegionInfo> hris) { + public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris, + List<ServerName> serversToExclude) { if (hris.isEmpty()) { return new TransitRegionStateProcedure[0]; } + + if (serversToExclude != null + && this.master.getServerManager().getOnlineServersList().size() == 1) { + LOG.debug("Only one region server found and hence going ahead with the assignment"); + serversToExclude = null; + } try { // Ask the balancer to assign our regions. Pass the regions en masse. The balancer can do // a better job if it has all the assignments in the one lump. Map<ServerName, List<RegionInfo>> assignments = getBalancer().roundRobinAssignment(hris, - this.master.getServerManager().createDestinationServersList(null)); + this.master.getServerManager().createDestinationServersList(serversToExclude)); // Return mid-method! return createAssignProcedures(assignments); } catch (HBaseIOException hioe) { @@ -682,6 +688,17 @@ public class AssignmentManager implements ServerListener { return createAssignProcedures(hris); } + /** + * Create round-robin assigns. Use on table creation to distribute out regions across cluster. + * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer + * to populate the assigns with targets chosen using round-robin (default balancer + * scheme). If at assign-time, the target chosen is no longer up, thats fine, the + * AssignProcedure will ask the balancer for a new target, and so on. + */ + public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris) { + return createRoundRobinAssignProcedures(hris, null); + } + @VisibleForTesting static int compare(TransitRegionStateProcedure left, TransitRegionStateProcedure right) { if (left.getRegion().isMetaRegion()) { http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java index d0dca09..7f2d11a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hbase.master.assignment; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.ListIterator; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; + +import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -47,6 +50,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionIn */ @InterfaceAudience.Private final class AssignmentManagerUtil { + private static final int DEFAULT_REGION_REPLICA = 1; + private AssignmentManagerUtil() { } @@ -136,38 +141,56 @@ final class AssignmentManagerUtil { } private static TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env, - Stream<RegionInfo> regions, int regionReplication, ServerName targetServer) { - return regions - .flatMap(hri -> IntStream.range(0, regionReplication) - .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))) - .map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode) - .map(regionNode -> { - TransitRegionStateProcedure proc = - TransitRegionStateProcedure.assign(env, regionNode.getRegionInfo(), targetServer); - regionNode.lock(); - try { - // should never fail, as we have the exclusive region lock, and the region is newly - // created, or has been successfully closed so should not be on any servers, so SCP will - // not process it either. - assert !regionNode.isInTransition(); - regionNode.setProcedure(proc); - } finally { - regionNode.unlock(); - } - return proc; - }).toArray(TransitRegionStateProcedure[]::new); + List<RegionInfo> regions, int regionReplication, ServerName targetServer) { + // create the assign procs only for the primary region using the targetServer + TransitRegionStateProcedure[] primaryRegionProcs = regions.stream() + .map(env.getAssignmentManager().getRegionStates()::getOrCreateRegionStateNode) + .map(regionNode -> { + TransitRegionStateProcedure proc = + TransitRegionStateProcedure.assign(env, regionNode.getRegionInfo(), targetServer); + regionNode.lock(); + try { + // should never fail, as we have the exclusive region lock, and the region is newly + // created, or has been successfully closed so should not be on any servers, so SCP will + // not process it either. + assert !regionNode.isInTransition(); + regionNode.setProcedure(proc); + } finally { + regionNode.unlock(); + } + return proc; + }).toArray(TransitRegionStateProcedure[]::new); + if (regionReplication == DEFAULT_REGION_REPLICA) { + // this is the default case + return primaryRegionProcs; + } + // collect the replica region infos + List<RegionInfo> replicaRegionInfos = + new ArrayList<RegionInfo>(regions.size() * (regionReplication - 1)); + for (RegionInfo hri : regions) { + // start the index from 1 + for (int i = 1; i < regionReplication; i++) { + replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i)); + } + } + // create round robin procs. Note that we exclude the primary region's target server + TransitRegionStateProcedure[] replicaRegionAssignProcs = + env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos, + Collections.singletonList(targetServer)); + // combine both the procs and return the result + return ArrayUtils.addAll(primaryRegionProcs, replicaRegionAssignProcs); } static TransitRegionStateProcedure[] createAssignProceduresForOpeningNewRegions( - MasterProcedureEnv env, Stream<RegionInfo> regions, int regionReplication, + MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication, ServerName targetServer) { return createAssignProcedures(env, regions, regionReplication, targetServer); } - static void reopenRegionsForRollback(MasterProcedureEnv env, Stream<RegionInfo> regions, + static void reopenRegionsForRollback(MasterProcedureEnv env, List<RegionInfo> regions, int regionReplication, ServerName targetServer) { TransitRegionStateProcedure[] procs = - createAssignProcedures(env, regions, regionReplication, targetServer); + createAssignProcedures(env, regions, regionReplication, targetServer); env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); } http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index ff2ba5b..efeea59 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.stream.Stream; import org.apache.hadoop.conf.Configuration; @@ -663,7 +664,7 @@ public class MergeTableRegionsProcedure * Rollback close regions **/ private void rollbackCloseRegionsForMerge(MasterProcedureEnv env) throws IOException { - AssignmentManagerUtil.reopenRegionsForRollback(env, Stream.of(regionsToMerge), + AssignmentManagerUtil.reopenRegionsForRollback(env, Arrays.asList(regionsToMerge), getRegionReplication(env), getServerName(env)); } @@ -676,7 +677,7 @@ public class MergeTableRegionsProcedure private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) throws IOException { return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, - Stream.of(mergedRegion), getRegionReplication(env), getServerName(env)); + Collections.singletonList(mergedRegion), getRegionReplication(env), getServerName(env)); } private int getRegionReplication(final MasterProcedureEnv env) throws IOException { http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 4e292c5..0994c7a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -22,6 +22,7 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -539,8 +540,9 @@ public class SplitTableRegionProcedure * Rollback close parent region */ private void openParentRegion(MasterProcedureEnv env) throws IOException { - AssignmentManagerUtil.reopenRegionsForRollback(env, Stream.of(getParentRegion()), - getRegionReplication(env), getParentRegionServerName(env)); + AssignmentManagerUtil.reopenRegionsForRollback(env, + Collections.singletonList((getParentRegion())), getRegionReplication(env), + getParentRegionServerName(env)); } /** @@ -813,9 +815,11 @@ public class SplitTableRegionProcedure private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) throws IOException { - return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, - Stream.of(daughter_1_RI, daughter_2_RI), getRegionReplication(env), - getParentRegionServerName(env)); + List<RegionInfo> hris = new ArrayList<RegionInfo>(2); + hris.add(daughter_1_RI); + hris.add(daughter_2_RI); + return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris, + getRegionReplication(env), getParentRegionServerName(env)); } private int getRegionReplication(final MasterProcedureEnv env) throws IOException { http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 51c2758..0f6e348 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -1271,13 +1271,30 @@ public abstract class BaseLoadBalancer implements LoadBalancer { List<RegionInfo> lastFewRegions = new ArrayList<>(); // assign the remaining by going through the list and try to assign to servers one-by-one int serverIdx = RANDOM.nextInt(numServers); - for (RegionInfo region : unassignedRegions) { + OUTER : for (RegionInfo region : unassignedRegions) { boolean assigned = false; - for (int j = 0; j < numServers; j++) { // try all servers one by one + INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one ServerName serverName = servers.get((j + serverIdx) % numServers); if (!cluster.wouldLowerAvailability(region, serverName)) { List<RegionInfo> serverRegions = assignments.computeIfAbsent(serverName, k -> new ArrayList<>()); + if (!RegionReplicaUtil.isDefaultReplica(region.getReplicaId())) { + // if the region is not a default replica + // check if the assignments map has the other replica region on this server + for (RegionInfo hri : serverRegions) { + if (RegionReplicaUtil.isReplicasForSameRegion(region, hri)) { + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping the server, " + serverName + + " , got the same server for the region " + region); + } + // do not allow this case. The unassignedRegions we got because the + // replica region in this list was not assigned because of lower availablity issue. + // So when we assign here we should ensure that as far as possible the server being + // selected does not have the server where the replica region was not assigned. + continue INNER; // continue the inner loop, ie go to the next server + } + } + } serverRegions.add(region); cluster.doAssignRegion(region, serverName); serverIdx = (j + serverIdx + 1) % numServers; //remain from next server http://git-wip-us.apache.org/repos/asf/hbase/blob/1a5aedab/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java new file mode 100644 index 0000000..e28b3ff --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java @@ -0,0 +1,158 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.apache.hadoop.hbase.util.RegionSplitter; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestRegionReplicaSplit { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionReplicaSplit.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRegionReplicaSplit.class); + + private static final int NB_SERVERS = 4; + private static Table table; + + private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); + private static final byte[] f = HConstants.CATALOG_FAMILY; + + @BeforeClass + public static void beforeClass() throws Exception { + HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3); + HTU.startMiniCluster(NB_SERVERS); + final TableName tableName = TableName.valueOf(TestRegionReplicaSplit.class.getSimpleName()); + + // Create table then get the single region for our new table. + createTable(tableName); + } + + @Rule + public TestName name = new TestName(); + + private static void createTable(final TableName tableName) throws IOException { + TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); + builder.setRegionReplication(3); + // create a table with 3 replication + table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2), + new Configuration(HTU.getConfiguration())); + } + + private static byte[][] getSplits(int numRegions) { + RegionSplitter.UniformSplit split = new RegionSplitter.UniformSplit(); + split.setFirstRow(Bytes.toBytes(0L)); + split.setLastRow(Bytes.toBytes(Long.MAX_VALUE)); + return split.split(numRegions); + } + + @AfterClass + public static void afterClass() throws Exception { + HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false; + table.close(); + HTU.shutdownMiniCluster(); + } + + public void testRegionReplicaSplitRegionAssignment() throws Exception { + HTU.loadNumericRows(table, f, 0, 3); + // split the table + List<RegionInfo> regions = new ArrayList<RegionInfo>(); + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + System.out.println("the region before split is is " + r.getRegionInfo() + + rs.getRegionServer().getServerName()); + regions.add(r.getRegionInfo()); + } + } + HTU.getAdmin().split(table.getName(), Bytes.toBytes(1)); + int count = 0; + while (true) { + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + count++; + } + } + if (count >= 9) { + break; + } + count = 0; + } + List<ServerName> newRegionLocations = new ArrayList<ServerName>(); + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + RegionInfo prevInfo = null; + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + if (!regions.contains(r.getRegionInfo()) + && !RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())) { + LOG.info("The region is " + r.getRegionInfo() + " the location is " + + rs.getRegionServer().getServerName()); + if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) + && newRegionLocations.contains(rs.getRegionServer().getServerName()) + && prevInfo != null + && Bytes.equals(prevInfo.getStartKey(), r.getRegionInfo().getStartKey()) + && Bytes.equals(prevInfo.getEndKey(), r.getRegionInfo().getEndKey())) { + fail("Splitted regions should not be assigned to same region server"); + } else { + prevInfo = r.getRegionInfo(); + if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) + && !newRegionLocations.contains(rs.getRegionServer().getServerName())) { + newRegionLocations.add(rs.getRegionServer().getServerName()); + } + } + } + } + } + // since we assign the daughter regions in round robin fashion, both the daugther region + // replicas will be assigned to two unique servers. + assertEquals("The new regions should be assigned to 3 unique servers ", 3, + newRegionLocations.size()); + } +}