d-c-manning commented on a change in pull request #1962:
URL: https://github.com/apache/hbase/pull/1962#discussion_r451224609



##########
File path: 
hbase-hadoop-compat/src/test/java/org/apache/hadoop/metrics2/impl/TestMutableRangeHistogram.java
##########
@@ -0,0 +1,66 @@
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MetricsTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.metrics2.AbstractMetric;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.lib.MutableRangeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableSizeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableTimeHistogram;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Collection;
+
+@Category({ MetricsTests.class, SmallTests.class})
+public class TestMutableRangeHistogram {
+
+    @ClassRule
+    public static final HBaseClassTestRule CLASS_RULE =
+            HBaseClassTestRule.forClass(TestMutableRangeHistogram.class);
+
+    private static final String RECORD_NAME = "test";
+    private static final String SIZE_METRIC_NAME = "TestSize";
+    private static final String TIME_METRIC_NAME = "TestTime";
+
+    @Test
+    public void testMutableSizeHistogram() {
+        testMutableRangeHistogram(SIZE_METRIC_NAME, new 
MutableSizeHistogram(SIZE_METRIC_NAME, ""));
+    }
+
+    @Test
+    public void testMutableTimeHistogram() {
+        testMutableRangeHistogram(TIME_METRIC_NAME, new 
MutableTimeHistogram(TIME_METRIC_NAME, ""));
+    }
+
+    private static void testMutableRangeHistogram(String metricName, 
MutableRangeHistogram histogram) {
+        // fill up values
+        long[] ranges = histogram.getRanges();
+        for (long val : ranges) {
+            histogram.add(val - 1);
+        }
+        histogram.add(ranges[ranges.length - 1] + 1);
+        // put into metrics collector
+        MetricsCollectorImpl collector = new MetricsCollectorImpl();
+        MetricsRecordBuilder builder = collector.addRecord(RECORD_NAME);
+        histogram.snapshot(builder, true);

Review comment:
       after you call `snapshot` once, the internal bins of `FastLongHistogram` 
will reallocate based on the min and max values seen so far. So after this, you 
can repeat the calls to `histogram.add` one more time, and then call `snapshot` 
a second time. Then the bins will be more accurate (but poor precision for 
lower values, due to linear distribution.)

##########
File path: 
hbase-hadoop-compat/src/test/java/org/apache/hadoop/metrics2/impl/TestMutableRangeHistogram.java
##########
@@ -0,0 +1,66 @@
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MetricsTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.metrics2.AbstractMetric;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.lib.MutableRangeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableSizeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableTimeHistogram;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Collection;
+
+@Category({ MetricsTests.class, SmallTests.class})
+public class TestMutableRangeHistogram {
+
+    @ClassRule
+    public static final HBaseClassTestRule CLASS_RULE =
+            HBaseClassTestRule.forClass(TestMutableRangeHistogram.class);
+
+    private static final String RECORD_NAME = "test";
+    private static final String SIZE_METRIC_NAME = "TestSize";
+    private static final String TIME_METRIC_NAME = "TestTime";
+
+    @Test
+    public void testMutableSizeHistogram() {
+        testMutableRangeHistogram(SIZE_METRIC_NAME, new 
MutableSizeHistogram(SIZE_METRIC_NAME, ""));
+    }
+
+    @Test
+    public void testMutableTimeHistogram() {
+        testMutableRangeHistogram(TIME_METRIC_NAME, new 
MutableTimeHistogram(TIME_METRIC_NAME, ""));
+    }
+
+    private static void testMutableRangeHistogram(String metricName, 
MutableRangeHistogram histogram) {
+        // fill up values
+        long[] ranges = histogram.getRanges();
+        for (long val : ranges) {
+            histogram.add(val - 1);
+        }
+        histogram.add(ranges[ranges.length - 1] + 1);

Review comment:
       an attempt to choose a value well above the max to ensure it ends up in 
the highest bin.
   ```suggestion
           histogram.add(ranges[ranges.length - 1] + ranges[ranges.length - 2] 
/ 2);
   ```

##########
File path: 
hbase-hadoop-compat/src/test/java/org/apache/hadoop/metrics2/impl/TestMutableRangeHistogram.java
##########
@@ -0,0 +1,66 @@
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MetricsTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.metrics2.AbstractMetric;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.lib.MutableRangeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableSizeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableTimeHistogram;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Collection;
+
+@Category({ MetricsTests.class, SmallTests.class})
+public class TestMutableRangeHistogram {
+
+    @ClassRule
+    public static final HBaseClassTestRule CLASS_RULE =
+            HBaseClassTestRule.forClass(TestMutableRangeHistogram.class);
+
+    private static final String RECORD_NAME = "test";
+    private static final String SIZE_METRIC_NAME = "TestSize";
+    private static final String TIME_METRIC_NAME = "TestTime";
+
+    @Test
+    public void testMutableSizeHistogram() {
+        testMutableRangeHistogram(SIZE_METRIC_NAME, new 
MutableSizeHistogram(SIZE_METRIC_NAME, ""));
+    }
+
+    @Test
+    public void testMutableTimeHistogram() {
+        testMutableRangeHistogram(TIME_METRIC_NAME, new 
MutableTimeHistogram(TIME_METRIC_NAME, ""));
+    }
+
+    private static void testMutableRangeHistogram(String metricName, 
MutableRangeHistogram histogram) {
+        // fill up values
+        long[] ranges = histogram.getRanges();
+        for (long val : ranges) {
+            histogram.add(val - 1);

Review comment:
       instead of `val - 1`, maybe we can do the average/midpoint of bin 
boundaries? (For `MutableTimeHistogram` that would be `450000`, `210000`, 
`90000`, `45000`, `20000`, `6500`, etc.

##########
File path: 
hbase-hadoop-compat/src/test/java/org/apache/hadoop/metrics2/impl/TestMutableRangeHistogram.java
##########
@@ -0,0 +1,66 @@
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MetricsTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.metrics2.AbstractMetric;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.lib.MutableRangeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableSizeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableTimeHistogram;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Collection;
+
+@Category({ MetricsTests.class, SmallTests.class})
+public class TestMutableRangeHistogram {
+
+    @ClassRule
+    public static final HBaseClassTestRule CLASS_RULE =
+            HBaseClassTestRule.forClass(TestMutableRangeHistogram.class);
+
+    private static final String RECORD_NAME = "test";
+    private static final String SIZE_METRIC_NAME = "TestSize";
+    private static final String TIME_METRIC_NAME = "TestTime";
+
+    @Test
+    public void testMutableSizeHistogram() {
+        testMutableRangeHistogram(SIZE_METRIC_NAME, new 
MutableSizeHistogram(SIZE_METRIC_NAME, ""));
+    }
+
+    @Test
+    public void testMutableTimeHistogram() {
+        testMutableRangeHistogram(TIME_METRIC_NAME, new 
MutableTimeHistogram(TIME_METRIC_NAME, ""));
+    }
+
+    private static void testMutableRangeHistogram(String metricName, 
MutableRangeHistogram histogram) {

Review comment:
       @WenFeiYi yes, unfortunately, I don't think this test case with full 
validation will ever pass due to the bugs and general design of 
FastLongHistogram. (That is a shame, because it would be great to have your 
test.) [HBASE-18151](https://issues.apache.org/jira/browse/HBASE-18151) calls 
out some of the problems. I think we need a major overhaul, and probably should 
use something like `HdrHistogram` to get more accurate metrics.
   
   As for this change, though, the reason why we won't be able to make it work 
is because `FastLongHistogram` divides its buckets linearly. So it's impossible 
for it to accurately measure items at `300000-600000` while also being able to 
measure between `3-10` and `10-30`. The internal bins of `FastLongHistogram` 
will have a width of `600000 / 255` = ~2000. So it will only accurately measure 
the higher buckets.
   
   You could probably make the test kind of work by adding the values, do a 
snapshot to allow the bins to dynamically grow, and then only test the higher 
bin values where the precision is adequate. Even then, you may run into 
boundary issues... so you may have to change to choose values that are far 
within the ranges, instead of close to the boundaries.
   
   I'll try to make some example comments. I think with these comments, you can 
make the tests pass, even though you can't make it do exactly what you want it 
to do.

##########
File path: 
hbase-hadoop-compat/src/test/java/org/apache/hadoop/metrics2/impl/TestMutableRangeHistogram.java
##########
@@ -0,0 +1,66 @@
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.MetricsTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.metrics2.AbstractMetric;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.lib.MutableRangeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableSizeHistogram;
+import org.apache.hadoop.metrics2.lib.MutableTimeHistogram;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Collection;
+
+@Category({ MetricsTests.class, SmallTests.class})
+public class TestMutableRangeHistogram {
+
+    @ClassRule
+    public static final HBaseClassTestRule CLASS_RULE =
+            HBaseClassTestRule.forClass(TestMutableRangeHistogram.class);
+
+    private static final String RECORD_NAME = "test";
+    private static final String SIZE_METRIC_NAME = "TestSize";
+    private static final String TIME_METRIC_NAME = "TestTime";
+
+    @Test
+    public void testMutableSizeHistogram() {
+        testMutableRangeHistogram(SIZE_METRIC_NAME, new 
MutableSizeHistogram(SIZE_METRIC_NAME, ""));
+    }
+
+    @Test
+    public void testMutableTimeHistogram() {
+        testMutableRangeHistogram(TIME_METRIC_NAME, new 
MutableTimeHistogram(TIME_METRIC_NAME, ""));
+    }
+
+    private static void testMutableRangeHistogram(String metricName, 
MutableRangeHistogram histogram) {
+        // fill up values
+        long[] ranges = histogram.getRanges();
+        for (long val : ranges) {
+            histogram.add(val - 1);
+        }
+        histogram.add(ranges[ranges.length - 1] + 1);
+        // put into metrics collector
+        MetricsCollectorImpl collector = new MetricsCollectorImpl();
+        MetricsRecordBuilder builder = collector.addRecord(RECORD_NAME);
+        histogram.snapshot(builder, true);
+        // get and print
+        Collection<MetricsRecordImpl> records = collector.getRecords();
+        assertEquals(records.size(), 1);
+        MetricsRecordImpl record = records.iterator().next();
+        assertEquals(record.name(), RECORD_NAME);
+        String prefix = metricName + "_" + histogram.getRangeType();
+        int count = 0;
+        for (AbstractMetric metric : record.metrics()) {

Review comment:
       here you could only validate the highest bins... depending on the 
precision. For `MutableTimeHistogram` with a precision of ~2000, you could 
probably validate these buckets: `10000, 30000, 60000, 120000, 300000, 600000`. 
The lower counts will eventually be incorrect due to the lack of precision.
   
   The other option is to have several tests - each test could validate only 
some of the bins. For example, if you only fill in the lower bins, then the 
linear distribution of the internal bins of `FastLongHistogram` will be of 
smaller width, and the precision will be better for the lower buckets. You 
still need to `add` the values, take a `snapshot`, then `add` the values again, 
and take a `snapshot` gain, before doing the validation.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to