HBASE-14906 Improvements on FlushLargeStoresPolicy (Yu Li)

Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/c15e0af8
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/c15e0af8
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/c15e0af8

Branch: refs/heads/hbase-12439
Commit: c15e0af84aeb4ab992482a957c2b242d2ab57d76
Parents: bebcc09
Author: stack <st...@apache.org>
Authored: Thu Dec 10 16:49:23 2015 -0800
Committer: stack <st...@apache.org>
Committed: Thu Dec 10 16:49:23 2015 -0800

----------------------------------------------------------------------
 .../src/main/resources/hbase-default.xml        | 17 ++++----
 .../regionserver/FlushLargeStoresPolicy.java    | 44 ++++++++++++++------
 .../hadoop/hbase/regionserver/HRegion.java      |  4 ++
 .../regionserver/TestPerColumnFamilyFlush.java  | 10 +++--
 4 files changed, 51 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/c15e0af8/hbase-common/src/main/resources/hbase-default.xml
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/resources/hbase-default.xml 
b/hbase-common/src/main/resources/hbase-default.xml
index fa5d522..37a6298 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -612,16 +612,17 @@ possible configurations would overwhelm and obscure the 
important.
     every hbase.server.thread.wakefrequency.</description>
   </property>
   <property>
-    <name>hbase.hregion.percolumnfamilyflush.size.lower.bound</name>
+    <name>hbase.hregion.percolumnfamilyflush.size.lower.bound.min</name>
     <value>16777216</value>
     <description>
-    If FlushLargeStoresPolicy is used, then every time that we hit the
-    total memstore limit, we find out all the column families whose memstores
-    exceed this value, and only flush them, while retaining the others whose
-    memstores are lower than this limit. If none of the families have their
-    memstore size more than this, all the memstores will be flushed
-    (just as usual). This value should be less than half of the total memstore
-    threshold (hbase.hregion.memstore.flush.size).
+    If FlushLargeStoresPolicy is used and there are multiple column families,
+    then every time that we hit the total memstore limit, we find out all the
+    column families whose memstores exceed a "lower bound" and only flush them
+    while retaining the others in memory. The "lower bound" will be
+    "hbase.hregion.memstore.flush.size / column_family_number" by default
+    unless value of this property is larger than that. If none of the families
+    have their memstore size more than lower bound, all the memstores will be
+    flushed (just as usual).
     </description>
   </property>
   <property>

http://git-wip-us.apache.org/repos/asf/hbase/blob/c15e0af8/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java
index 328e890..b4d47c7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushLargeStoresPolicy.java
@@ -38,35 +38,50 @@ public class FlushLargeStoresPolicy extends FlushPolicy {
   public static final String HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND =
       "hbase.hregion.percolumnfamilyflush.size.lower.bound";
 
-  private static final long 
DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND = 1024 * 1024 * 16L;
+  public static final String HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN =
+      "hbase.hregion.percolumnfamilyflush.size.lower.bound.min";
 
-  private long flushSizeLowerBound;
+  private static final long 
DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN =
+      1024 * 1024 * 16L;
+
+  private long flushSizeLowerBound = -1;
 
   @Override
   protected void configureForRegion(HRegion region) {
     super.configureForRegion(region);
-    long flushSizeLowerBound;
+    int familyNumber = region.getTableDesc().getFamilies().size();
+    if (familyNumber <= 1) {
+      // No need to parse and set flush size lower bound if only one family
+      // Family number might also be zero in some of our unit test case
+      return;
+    }
+    // For multiple families, lower bound is the "average flush size" by 
default
+    // unless setting in configuration is larger.
+    long flushSizeLowerBound = region.getMemstoreFlushSize() / familyNumber;
+    long minimumLowerBound =
+        getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
+          DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN);
+    if (minimumLowerBound > flushSizeLowerBound) {
+      flushSizeLowerBound = minimumLowerBound;
+    }
+    // use the setting in table description if any
     String flushedSizeLowerBoundString =
         
region.getTableDesc().getValue(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
     if (flushedSizeLowerBoundString == null) {
-      flushSizeLowerBound =
-          getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
-            DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
       if (LOG.isDebugEnabled()) {
-        LOG.debug(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND
-            + " is not specified, use global config(" + flushSizeLowerBound + 
") instead");
+        LOG.debug("No " + HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND
+            + " set in description of table " + 
region.getTableDesc().getTableName()
+            + ", use config (" + flushSizeLowerBound + ") instead");
       }
     } else {
       try {
         flushSizeLowerBound = Long.parseLong(flushedSizeLowerBoundString);
       } catch (NumberFormatException nfe) {
-        flushSizeLowerBound =
-            getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
-              DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
+        // fall back for fault setting
         LOG.warn("Number format exception when parsing "
             + HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND + " for table "
             + region.getTableDesc().getTableName() + ":" + 
flushedSizeLowerBoundString + ". " + nfe
-            + ", use global config(" + flushSizeLowerBound + ") instead");
+            + ", use config (" + flushSizeLowerBound + ") instead");
 
       }
     }
@@ -87,6 +102,11 @@ public class FlushLargeStoresPolicy extends FlushPolicy {
 
   @Override
   public Collection<Store> selectStoresToFlush() {
+    // no need to select stores if only one family
+    if (region.getTableDesc().getFamilies().size() == 1) {
+      return region.stores.values();
+    }
+    // start selection
     Collection<Store> stores = region.stores.values();
     Set<Store> specificStoresToFlush = new HashSet<Store>();
     for (Store store : stores) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/c15e0af8/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 484d5ee..9549a13 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -8183,4 +8183,8 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
     return this.getRegionInfo().isMetaRegion() ? CellComparator.META_COMPARATOR
         : CellComparator.COMPARATOR;
   }
+
+  public long getMemstoreFlushSize() {
+    return this.memstoreFlushSize;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/c15e0af8/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestPerColumnFamilyFlush.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestPerColumnFamilyFlush.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestPerColumnFamilyFlush.java
index 0df2799..624f4a5 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestPerColumnFamilyFlush.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestPerColumnFamilyFlush.java
@@ -128,7 +128,8 @@ public class TestPerColumnFamilyFlush {
     Configuration conf = HBaseConfiguration.create();
     conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 200 * 1024);
     conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, 
FlushLargeStoresPolicy.class.getName());
-    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
 100 * 1024);
+    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
+      100 * 1024);
     // Intialize the region
     Region region = initHRegion("testSelectiveFlushWhenEnabled", conf);
     // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
@@ -336,7 +337,7 @@ public class TestPerColumnFamilyFlush {
     conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 20000);
     // Carefully chosen limits so that the memstore just flushes when we're 
done
     conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, 
FlushLargeStoresPolicy.class.getName());
-    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
 10000);
+    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
 10000);
     final int numRegionServers = 4;
     try {
       TEST_UTIL.startMiniCluster(numRegionServers);
@@ -451,7 +452,7 @@ public class TestPerColumnFamilyFlush {
     conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 128 * 1024 * 1024);
     conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, 
FlushLargeStoresPolicy.class.getName());
     long cfFlushSizeLowerBound = 2048;
-    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
+    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
       cfFlushSizeLowerBound);
 
     // One hour, prevent periodic rolling
@@ -568,7 +569,6 @@ public class TestPerColumnFamilyFlush {
     Configuration conf = TEST_UTIL.getConfiguration();
     conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, memstoreFlushSize);
     conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, 
FlushAllStoresPolicy.class.getName());
-    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
 400 * 1024);
     conf.setInt(HStore.BLOCKING_STOREFILES_KEY, 10000);
     conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
       ConstantSizeRegionSplitPolicy.class.getName());
@@ -608,6 +608,8 @@ public class TestPerColumnFamilyFlush {
 
     LOG.info("==============Test with selective flush enabled===============");
     conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, 
FlushLargeStoresPolicy.class.getName());
+    // default value of per-cf flush lower bound is too big, set to a small 
enough value
+    
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
 0);
     try {
       TEST_UTIL.startMiniCluster(1);
       TEST_UTIL.getHBaseAdmin().createNamespace(

Reply via email to