ndimiduk commented on a change in pull request #2490: URL: https://github.com/apache/hbase/pull/2490#discussion_r509692111
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java ########## @@ -315,35 +316,60 @@ private boolean skipForMerge(final RegionStates regionStates, final RegionInfo r * towards target average or target region count. */ private List<NormalizationPlan> computeMergeNormalizationPlans(final NormalizeContext ctx) { - if (ctx.getTableRegions().size() < minRegionCount) { + if (isEmpty(ctx.getTableRegions()) || ctx.getTableRegions().size() < minRegionCount) { LOG.debug("Table {} has {} regions, required min number of regions for normalizer to run" + " is {}, not computing merge plans.", ctx.getTableName(), ctx.getTableRegions().size(), minRegionCount); return Collections.emptyList(); } - final double avgRegionSizeMb = ctx.getAverageRegionSizeMb(); + final long avgRegionSizeMb = (long) ctx.getAverageRegionSizeMb(); + if (avgRegionSizeMb < mergeMinRegionSizeMb) { + return Collections.emptyList(); + } LOG.debug("Computing normalization plan for table {}. average region size: {}, number of" + " regions: {}.", ctx.getTableName(), avgRegionSizeMb, ctx.getTableRegions().size()); - final List<NormalizationPlan> plans = new ArrayList<>(); - for (int candidateIdx = 0; candidateIdx < ctx.getTableRegions().size() - 1; candidateIdx++) { - final RegionInfo current = ctx.getTableRegions().get(candidateIdx); - final RegionInfo next = ctx.getTableRegions().get(candidateIdx + 1); - if (skipForMerge(ctx.getRegionStates(), current) - || skipForMerge(ctx.getRegionStates(), next)) { - continue; + // this nested loop walks the table's region chain once, looking for contiguous sequences of + // regions that meet the criteria for merge. The outer loop tracks the starting point of the + // next sequence, the inner loop looks for the end of that sequence. A single sequence becomes + // an instance of MergeNormalizationPlan. + + final List<NormalizationPlan> plans = new LinkedList<>(); + final List<NormalizationTarget> rangeMembers = new LinkedList<>(); + long sumRangeMembersSizeMb; + int current = 0; + for (int rangeStart = 0; + rangeStart < ctx.getTableRegions().size() - 1 && current < ctx.getTableRegions().size();) { + // walk the region chain looking for contiguous sequences of regions that can be merged. + rangeMembers.clear(); + sumRangeMembersSizeMb = 0; + for (current = rangeStart; current < ctx.getTableRegions().size(); current++) { + final RegionInfo regionInfo = ctx.getTableRegions().get(current); + final long regionSizeMb = getRegionSizeMB(regionInfo); + if (skipForMerge(ctx.getRegionStates(), regionInfo)) { + // this region cannot participate in a range. resume the outer loop. + rangeStart = Math.max(current, rangeStart + 1); Review comment: This line of questions convinces me you really read the code. Thank you for the thoughtful review! ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org