HBASE-14867 SimpleRegionNormalizer needs to have better heuristics to trigger merge operation
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1e4992c6 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1e4992c6 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1e4992c6 Branch: refs/heads/hbase-12439 Commit: 1e4992c6eccb81166cdda842a68644fa962a3fdc Parents: 6868c63 Author: tedyu <yuzhih...@gmail.com> Authored: Tue Dec 29 06:21:57 2015 -0800 Committer: tedyu <yuzhih...@gmail.com> Committed: Tue Dec 29 06:21:57 2015 -0800 ---------------------------------------------------------------------- .../normalizer/SimpleRegionNormalizer.java | 106 +++++++++---------- .../normalizer/TestSimpleRegionNormalizer.java | 43 +++++++- 2 files changed, 90 insertions(+), 59 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/1e4992c6/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java index 659b3dc..fe6034b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java @@ -27,8 +27,11 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.List; /** @@ -66,6 +69,22 @@ public class SimpleRegionNormalizer implements RegionNormalizer { this.masterServices = masterServices; } + /* + * This comparator compares the region size. + * The second element in the triple is region size while the 3rd element + * is the index of the region in the underlying List + */ + private Comparator<Triple<HRegionInfo, Long, Integer>> regionSizeComparator = + new Comparator<Triple<HRegionInfo, Long, Integer>>() { + @Override + public int compare(Triple<HRegionInfo, Long, Integer> pair, + Triple<HRegionInfo, Long, Integer> pair2) { + long sz = pair.getSecond(); + long sz2 = pair2.getSecond(); + return (sz < sz2) ? -1 : ((sz == sz2) ? 0 : 1); + } + }; + /** * Computes next most "urgent" normalization action on the table. * Action may be either a split, or a merge, or no action. @@ -76,7 +95,7 @@ public class SimpleRegionNormalizer implements RegionNormalizer { @Override public NormalizationPlan computePlanForTable(TableName table) throws HBaseIOException { if (table == null || table.isSystemTable()) { - LOG.debug("Normalization of table " + table + " isn't allowed"); + LOG.debug("Normalization of system table " + table + " isn't allowed"); return EmptyNormalizationPlan.getInstance(); } @@ -95,57 +114,18 @@ public class SimpleRegionNormalizer implements RegionNormalizer { ", number of regions: " + tableRegions.size()); long totalSizeMb = 0; - Pair<HRegionInfo, Long> largestRegion = new Pair<>(); - - // A is a smallest region, B is it's smallest neighbor - Pair<HRegionInfo, Long> smallestRegion = new Pair<>(); - int smallestRegionIndex = 0; + ArrayList<Triple<HRegionInfo, Long, Integer>> regionsWithSize = + new ArrayList<Triple<HRegionInfo, Long, Integer>>(tableRegions.size()); for (int i = 0; i < tableRegions.size(); i++) { HRegionInfo hri = tableRegions.get(i); long regionSize = getRegionSize(hri); + regionsWithSize.add(new Triple<HRegionInfo, Long, Integer>(hri, regionSize, i)); totalSizeMb += regionSize; - - if (largestRegion.getFirst() == null || regionSize > largestRegion.getSecond()) { - largestRegion.setFirst(hri); - largestRegion.setSecond(regionSize); - } - - if (smallestRegion.getFirst() == null || regionSize < smallestRegion.getSecond()) { - smallestRegion.setFirst(hri); - smallestRegion.setSecond(regionSize); - smallestRegionIndex = i; - } } + Collections.sort(regionsWithSize, regionSizeComparator); - // now get smallest neighbor of smallest region - long leftNeighborSize = -1; - - if (smallestRegionIndex > 0) { - leftNeighborSize = getRegionSize(tableRegions.get(smallestRegionIndex - 1)); - } - - long rightNeighborSize = -1; - if (smallestRegionIndex < tableRegions.size() - 1) { - rightNeighborSize = getRegionSize(tableRegions.get(smallestRegionIndex + 1)); - } - - Pair<HRegionInfo, Long> smallestNeighborOfSmallestRegion; - if (leftNeighborSize == -1) { - smallestNeighborOfSmallestRegion = - new Pair<>(tableRegions.get(smallestRegionIndex + 1), rightNeighborSize); - } else if (rightNeighborSize == -1) { - smallestNeighborOfSmallestRegion = - new Pair<>(tableRegions.get(smallestRegionIndex - 1), leftNeighborSize); - } else { - if (leftNeighborSize < rightNeighborSize) { - smallestNeighborOfSmallestRegion = - new Pair<>(tableRegions.get(smallestRegionIndex - 1), leftNeighborSize); - } else { - smallestNeighborOfSmallestRegion = - new Pair<>(tableRegions.get(smallestRegionIndex + 1), rightNeighborSize); - } - } + Triple<HRegionInfo, Long, Integer> largestRegion = regionsWithSize.get(tableRegions.size()-1); double avgRegionSize = totalSizeMb / (double) tableRegions.size(); @@ -159,19 +139,31 @@ public class SimpleRegionNormalizer implements RegionNormalizer { + largestRegion.getFirst().getRegionNameAsString() + " has size " + largestRegion.getSecond() + ", more than 2 times than avg size, splitting"); return new SplitNormalizationPlan(largestRegion.getFirst(), null); - } else { - if (smallestRegion.getSecond() + smallestNeighborOfSmallestRegion.getSecond() - < avgRegionSize) { - LOG.debug("Table " + table + ", smallest region size: " + smallestRegion.getSecond() - + " and its smallest neighbor size: " + smallestNeighborOfSmallestRegion.getSecond() - + ", less than the avg size, merging them"); - return new MergeNormalizationPlan(smallestRegion.getFirst(), - smallestNeighborOfSmallestRegion.getFirst()); - } else { - LOG.debug("No normalization needed, regions look good for table: " + table); - return EmptyNormalizationPlan.getInstance(); + } + int candidateIdx = 0; + // look for two successive entries whose indices are adjacent + while (candidateIdx < tableRegions.size()-1) { + if (Math.abs(regionsWithSize.get(candidateIdx).getThird() - + regionsWithSize.get(candidateIdx + 1).getThird()) == 1) { + break; } + candidateIdx++; + } + if (candidateIdx == tableRegions.size()-1) { + LOG.debug("No neighboring regions found for table: " + table); + return EmptyNormalizationPlan.getInstance(); + } + Triple<HRegionInfo, Long, Integer> candidateRegion = regionsWithSize.get(candidateIdx); + Triple<HRegionInfo, Long, Integer> candidateRegion2 = regionsWithSize.get(candidateIdx+1); + if (candidateRegion.getSecond() + candidateRegion2.getSecond() < avgRegionSize) { + LOG.debug("Table " + table + ", smallest region size: " + candidateRegion.getSecond() + + " and its smallest neighbor size: " + candidateRegion2.getSecond() + + ", less than the avg size, merging them"); + return new MergeNormalizationPlan(candidateRegion.getFirst(), + candidateRegion2.getFirst()); } + LOG.debug("No normalization needed, regions look good for table: " + table); + return EmptyNormalizationPlan.getInstance(); } private long getRegionSize(HRegionInfo hri) { http://git-wip-us.apache.org/repos/asf/hbase/blob/1e4992c6/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java index 211911e..970af43 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/normalizer/TestSimpleRegionNormalizer.java @@ -142,8 +142,8 @@ public class TestSimpleRegionNormalizer { hris.add(hri4); regionSizes.put(hri4.getRegionName(), 15); - HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); - hris.add(hri4); + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff")); + hris.add(hri5); regionSizes.put(hri5.getRegionName(), 16); setupMocksForNormalizer(regionSizes, hris); @@ -154,6 +154,45 @@ public class TestSimpleRegionNormalizer { assertEquals(hri3, ((MergeNormalizationPlan) plan).getSecondRegion()); } + // Test for situation illustrated in HBASE-14867 + @Test + public void testMergeOfSecondSmallestRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testMergeOfSmallRegions"); + List<HRegionInfo> hris = new ArrayList<>(); + Map<byte[], Integer> regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 1); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 10000); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 10000); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 10000); + + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("eee"), Bytes.toBytes("fff")); + hris.add(hri5); + regionSizes.put(hri5.getRegionName(), 2700); + + HRegionInfo hri6 = new HRegionInfo(testTable, Bytes.toBytes("fff"), Bytes.toBytes("ggg")); + hris.add(hri6); + regionSizes.put(hri6.getRegionName(), 2700); + + setupMocksForNormalizer(regionSizes, hris); + NormalizationPlan plan = normalizer.computePlanForTable(testTable); + + assertTrue(plan instanceof MergeNormalizationPlan); + assertEquals(hri5, ((MergeNormalizationPlan) plan).getFirstRegion()); + assertEquals(hri6, ((MergeNormalizationPlan) plan).getSecondRegion()); + } + @Test public void testMergeOfSmallNonAdjacentRegions() throws HBaseIOException { TableName testTable = TableName.valueOf("testMergeOfSmallRegions");