virajjasani commented on a change in pull request #931: HBASE-22285 A normalizer which merges small size regions with adjacen… URL: https://github.com/apache/hbase/pull/931#discussion_r357673815
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/MergeNormalizer.java ########## @@ -0,0 +1,154 @@ +package org.apache.hadoop.hbase.master.normalizer; + +import com.google.protobuf.ServiceException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.master.MasterRpcServices; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.RequestConverter; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + + + +/** + * Implementation of MergeNormalizer + * + * Logic in use: + * + * <ol> + * <li> get all regions of a given table + * <li> get avg size S of each region (by total size of store files reported in RegionLoad) + * <li> Otherwise, two region R1 and its smallest neighbor R2 are merged, + * if R1 + R1 < S, and all such regions are returned to be merged + * <li> Otherwise, no action is performed + * </ol> + * <p> + * Region sizes are coarse and approximate on the order of megabytes. Also, + * empty regions (less than 1MB) are also merged if the age of region is > MIN_DURATION_FOR_MERGE (default 2) + */ + +@InterfaceAudience.Private +public class MergeNormalizer implements RegionNormalizer { + private static final Log LOG = LogFactory.getLog(MergeNormalizer.class); + private static final int MIN_REGION_COUNT = 3; + private static final int MIN_DURATION_FOR_MERGE=2; + private MasterServices masterServices; + private MasterRpcServices masterRpcServices; + + @Override public void setMasterServices(MasterServices masterServices) { + this.masterServices = masterServices; + } + + @Override public void setMasterRpcServices(MasterRpcServices masterRpcServices) { + this.masterRpcServices = masterRpcServices; + } + + @Override public List<NormalizationPlan> computePlanForTable(TableName table) + throws HBaseIOException { + if (table == null || table.isSystemTable()) { + LOG.debug("Normalization of system table " + table + " isn't allowed"); + return null; + } + boolean mergeEnabled = true; + try { + mergeEnabled = masterRpcServices.isSplitOrMergeEnabled(null, + RequestConverter.buildIsSplitOrMergeEnabledRequest(Admin.MasterSwitchType.MERGE)).getEnabled(); + } catch (ServiceException se) { + LOG.debug("Unable to determine whether merge is enabled", se); + } + if (!mergeEnabled) { + LOG.debug("Merge disabled for table: " + table); + return null; + } + List<NormalizationPlan> plans = new ArrayList<NormalizationPlan>(); + List<HRegionInfo> tableRegions = masterServices.getAssignmentManager().getRegionStates(). + getRegionsOfTable(table); + if (tableRegions == null || tableRegions.size() < MIN_REGION_COUNT) { + int nrRegions = tableRegions == null ? 0 : tableRegions.size(); + LOG.debug("Table " + table + " has " + nrRegions + " regions, required min number" Review comment: nit: we can log with parameterized arguments replacing {} with arguments ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services