ndimiduk commented on a change in pull request #1786: URL: https://github.com/apache/hbase/pull/1786#discussion_r434088560
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java ########## @@ -18,126 +17,474 @@ */ package org.apache.hadoop.hbase.master.normalizer; +import java.io.IOException; +import java.time.Instant; +import java.time.Period; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.List; - -import org.apache.hadoop.hbase.HBaseIOException; +import java.util.Objects; +import java.util.function.BooleanSupplier; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.RegionMetrics; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Size; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; /** * Simple implementation of region normalizer. Logic in use: * <ol> - * <li>Get all regions of a given table - * <li>Get avg size S of each region (by total size of store files reported in RegionMetrics) - * <li>Seek every single region one by one. If a region R0 is bigger than S * 2, it is kindly - * requested to split. Thereon evaluate the next region R1 - * <li>Otherwise, if R0 + R1 is smaller than S, R0 and R1 are kindly requested to merge. Thereon - * evaluate the next region R2 - * <li>Otherwise, R1 is evaluated + * <li>Get all regions of a given table</li> + * <li>Get avg size S of the regions in the table (by total size of store files reported in + * RegionMetrics)</li> + * <li>For each region R0, if R0 is bigger than S * 2, it is kindly requested to split.</li> + * <li>Otherwise, for the next region in the chain R1, if R0 + R1 is smaller then S, R0 and R1 + * are kindly requested to merge.</li> Review comment: Rather than slop, my recommendation is that we add the concept of a "minimum region size" for deciding when to split. I filed [HBASE-24464](https://issues.apache.org/jira/browse/HBASE-24464) for that discussion. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org