Author: liyin Date: Thu May 9 18:18:23 2013 New Revision: 1480733 URL: http://svn.apache.org/r1480733 Log: [HBASE-8307] Improving the Histogram class to work fine in underload case
Author: manukranthk Summary: Add a list to store and serve the percentile metrics when the load under a default load. This will help in avoiding the roundoff errors in the cases with low samples. Test Plan: Unit test. Test on Shadow. Reviewers: liyintang, rshroff Reviewed By: liyintang CC: hbase-eng@ Differential Revision: https://phabricator.fb.com/D802945 Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java?rev=1480733&r1=1480732&r2=1480733&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/PercentileMetric.java Thu May 9 18:18:23 2013 @@ -87,7 +87,7 @@ public class PercentileMetric extends Me } public void updateMetric() { - this.set(underlyingHistogram.getPercentileEstimate(percentile).longValue()); + this.set((long)underlyingHistogram.getPercentileEstimate(percentile)); } public void refresh() { Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java?rev=1480733&r1=1480732&r2=1480733&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Histogram.java Thu May 9 18:18:23 2013 @@ -19,7 +19,10 @@ package org.apache.hadoop.hbase.util; import org.apache.hadoop.hbase.regionserver.metrics.PercentileMetric; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.LogFactory; @@ -47,9 +50,12 @@ import org.apache.commons.logging.Log; public class Histogram { private List<Bucket> buckets; private int numBuckets; - private Double minValue; - private Double maxValue; + private double minValue; + private double maxValue; + private List<Double> underloadSampleList; // We serve the under loaded cases + // from this list. final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + private boolean underload = true; public static final Log LOG = LogFactory.getLog(Histogram.class.getName()); /** @@ -61,45 +67,50 @@ public class Histogram { PercentileMetric.HISTOGRAM_MINVALUE_DEFAULT, PercentileMetric.HISTOGRAM_MAXVALUE_DEFAULT); } + // We will consider the case when we have more than 100 samples as + // overloaded case and use the Histogram only in such scenarios. In other + // cases, we can serve the percentile queries using the underloadSampleList + private static final int DEFAULT_MINIMUM_LOAD = 100; // Bucket indexing is from 1 to N - public Histogram(int numBuckets, Double minValue, Double maxValue) { + public Histogram(int numBuckets, double minValue, double maxValue) { if (numBuckets < 1 || minValue >= maxValue) { throw new UnsupportedOperationException(); } buckets = new ArrayList<Bucket>(numBuckets); + underloadSampleList = Collections.synchronizedList(new ArrayList<Double>()); refresh(numBuckets, minValue, maxValue); } // This is included in the bucket - private Double getBucketStartValue(int bucketIndex) { + private double getBucketStartValue(int bucketIndex) { if (bucketIndex < 1 || bucketIndex > this.numBuckets) { throw new IndexOutOfBoundsException(); } - Double gap = this.maxValue - this.minValue; - Double slice = gap/this.numBuckets; + double gap = this.maxValue - this.minValue; + double slice = gap/this.numBuckets; return this.minValue + (bucketIndex - 1.0)*slice; } //This is not included in the bucket - private Double getBucketEndValue(int bucketIndex) { + private double getBucketEndValue(int bucketIndex) { if (bucketIndex < 1 || bucketIndex > this.numBuckets) { throw new IndexOutOfBoundsException(); } - Double gap = this.maxValue - this.minValue; - Double slice = gap/this.numBuckets; + double gap = this.maxValue - this.minValue; + double slice = gap/this.numBuckets; return this.minValue + (bucketIndex)*slice; } - private int getBucketIndex(Double value) { + private int getBucketIndex(double value) { if (value < this.minValue) { return 0; } else if (value >= this.maxValue) { return this.numBuckets + 1; } else { - Double gap = value - this.minValue; - Double idx = this.numBuckets * gap / (this.maxValue - this.minValue); - int i = idx.intValue() + 1; + double gap = value - this.minValue; + double idx = this.numBuckets * gap / (this.maxValue - this.minValue); + int i = (int)idx + 1; // Check if the index falls in the margin somehow. if (value < this.getBucketStartValue(i)) i--; else if (value >= this.getBucketEndValue(i)) i++; @@ -110,8 +121,8 @@ public class Histogram { public void refresh(int numBuckets) { this.lock.writeLock().lock(); try { - Double minValue = this.minValue; - Double maxValue = this.maxValue; + double minValue = this.minValue; + double maxValue = this.maxValue; for (Bucket bucket : this.buckets) { if (bucket.count > 0) { minValue = bucket.getMinValue(); @@ -133,11 +144,13 @@ public class Histogram { } } - private void refresh(int numBuckets, Double minValue, Double maxValue) { + private void refresh(int numBuckets, double minValue, double maxValue) { this.numBuckets = numBuckets; this.minValue = minValue; this.maxValue = maxValue; this.buckets.clear(); + underloadSampleList.clear(); + underload = true; buckets.add(new Bucket(Double.MIN_VALUE, this.getBucketStartValue(1))); for (int i = 1; i<=this.numBuckets; i++) { this.buckets.add(new Bucket(this.getBucketStartValue(i), @@ -147,26 +160,35 @@ public class Histogram { Double.MAX_VALUE)); } - public Double getPercentileEstimate(Double prcntyl) { + public double getPercentileEstimate(double prcntyl) { // We scan from the end of the table since our use case is to find the // p99, p95 latencies. + double originalPrcntyl = prcntyl; if (prcntyl > 100.0 || prcntyl < 0.0) { throw new IllegalArgumentException("Percentile input value not in range."); } else { prcntyl = 100.0 - prcntyl; } - Double ret = 0.0; + double ret = 0.0; this.lock.writeLock().lock(); try { + if (underloadSampleList.size() == 0) { + LOG.warn("Too few data points. Consider increasing the sampling time."); + return ret; + } else if (underload) { + Collections.sort(underloadSampleList); + // Since the use case is to clear the list after a call to this + // function, we can afford to sort this list here and enjoy O(1) the + // rest of the time. + return underloadSampleList.get( + (int)(originalPrcntyl * underloadSampleList.size()/100.0)); + } int totalCount = 0; for (Bucket bucket : this.buckets) { totalCount += bucket.count; } - if (totalCount == 0) { - throw new UnsupportedOperationException("Too few data points."); - } - Double countToCoverDouble = (totalCount * prcntyl / 100.0); - int countToCover = countToCoverDouble.intValue(); + double countToCoverdouble = (totalCount * prcntyl / 100.0); + int countToCover = (int)countToCoverdouble; for (int i=this.buckets.size() - 1; i >= 0; i--) { Bucket bucket = this.buckets.get(i); if (bucket.getCount() >= countToCover) { @@ -184,11 +206,26 @@ public class Histogram { return ret; } - public void addValue(Double value) { + public void addValue(double value) { this.lock.readLock().lock(); try { - Bucket bucket = buckets.get(this.getBucketIndex(value)); - bucket.addValue(value); + if (underloadSampleList.size() >= Histogram.DEFAULT_MINIMUM_LOAD) { + if (underload) { + synchronized (underloadSampleList) { + if (underload) { + for (double val : underloadSampleList) { + Bucket bucket = buckets.get(this.getBucketIndex(value)); + bucket.addValue(val); + } + } + underload = false; + } + } + Bucket bucket = buckets.get(this.getBucketIndex(value)); + bucket.addValue(value); + } else { + underloadSampleList.add(value); + } } catch (Exception e) { LOG.error("Unknown Exception : " + e.getMessage()); } finally { @@ -201,13 +238,13 @@ public class Histogram { } public class Bucket { - private Double sum; + private double sum; private int count; - private Double minValue; - private Double maxValue; - private Double startValue; - private Double endValue; - public Bucket(Double startValue, Double endValue) { + private double minValue; + private double maxValue; + private double startValue; + private double endValue; + public Bucket(double startValue, double endValue) { this.sum = 0.0; this.count = 0; this.minValue = endValue; @@ -216,7 +253,7 @@ public class Histogram { this.endValue = endValue; } - public void addValue(Double value) { + public void addValue(double value) { this.sum = this.sum + value; count++; this.minValue = Math.min(this.minValue, value); @@ -229,7 +266,7 @@ public class Histogram { * For the purpose of this calculation, the distribution over the bucket * is assumed to be uniformly distributed between minValue and maxValue */ - public int getGreaterCount(Double value) { + public int getGreaterCount(double value) { if (!(this.minValue < value && this.maxValue >= value)) { throw new IllegalArgumentException(); } @@ -238,26 +275,26 @@ public class Histogram { if (this.minValue > value) return 0; else return 1; } - Double gap = value - this.minValue; - Double ret = this.count * gap / (this.maxValue - this.minValue); - return ret.intValue(); + double gap = value - this.minValue; + double ret = this.count * gap / (this.maxValue - this.minValue); + return (int)ret; } /** * This function gives the value which is more than a certain count in this * bucket. * */ - public Double getGreaterValue(int count) { + public double getGreaterValue(int count) { if (count > this.count) { throw new IllegalArgumentException(); } if (count == 0) return this.endValue; - Double gap = this.maxValue - this.minValue; - Double ret = this.minValue + gap * count / this.count; + double gap = this.maxValue - this.minValue; + double ret = this.minValue + gap * count / this.count; return ret; } - public Double getSum() { + public double getSum() { return this.sum; } @@ -265,11 +302,11 @@ public class Histogram { return this.count; } - public Double getMinValue() { + public double getMinValue() { return this.minValue; } - public Double getMaxValue() { + public double getMaxValue() { return this.maxValue; } Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java?rev=1480733&r1=1480732&r2=1480733&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/HistogramTest.java Thu May 9 18:18:23 2013 @@ -27,22 +27,22 @@ public class HistogramTest extends TestC public void testAboveMaxValue() { Double hi = 10000.0; Histogram hist = new Histogram(1000, 100.0, hi); - for (int i=0; i<100; i++) { + for (int i=0; i<1000; i++) { Double tmp = hi + i * 1.0; hist.addValue(tmp); } Double prcntyl = hist.getPercentileEstimate(95.0); - assertTrue(prcntyl >= (hi + 94) && prcntyl <= (hi + 96)); + assertTrue(prcntyl >= (hi + 949) && prcntyl <= (hi + 950)); } @Test public void testBelowMinValue() { - Histogram hist = new Histogram(1000, 100.0, 10000.0); - for (int i=0; i<100; i++) { + Histogram hist = new Histogram(1000, 1000.0, 10000.0); + for (int i=0; i<1000; i++) { Double tmp = i * 1.0; hist.addValue(tmp); } Double prcntyl = hist.getPercentileEstimate(95.0); - assertTrue(prcntyl >= 94 && prcntyl <= 96); + assertTrue(prcntyl >= 949 && prcntyl <= 950); } @Test @@ -61,18 +61,18 @@ public class HistogramTest extends TestC @Test public void testHistogramExtremeValues() { - Histogram hist = new Histogram(100, 0.0, 100.0); - for (int i=1; i<100; i++) { + Histogram hist = new Histogram(100, 0.0, 1000.0); + for (int i=1; i<1000; i++) { Double tmp = i - 0.1; hist.addValue(tmp); } - hist.addValue(100.1); - hist.addValue(100.2); + hist.addValue(1000.1); + hist.addValue(1000.2); Double prcntyl = hist.getPercentileEstimate(99.9999999999); - assertTrue(prcntyl <= 100.2); + assertTrue(prcntyl <= 1000.2); hist = new Histogram(100, 10.0, 100.0); - for (int i=11; i<100; i++) { + for (int i=10; i<1100; i++) { Double tmp = i - 0.1; hist.addValue(tmp); } @@ -81,4 +81,15 @@ public class HistogramTest extends TestC prcntyl = hist.getPercentileEstimate(0.0); assertTrue(prcntyl >= 5.1); } + + public void testFewDataPoints() { + Histogram hist = new Histogram(100, 0.0, 100.0); + for (int i=10; i>=1; i--) { + hist.addValue((double)i); + } + Double prcntyl = hist.getPercentileEstimate(99.0); + assertTrue(prcntyl >= 9 && prcntyl <= 10); + prcntyl = hist.getPercentileEstimate(0.0); + assertTrue(prcntyl >= 1 && prcntyl <= 2); + } }
