ableegoldman commented on a change in pull request #8697: URL: https://github.com/apache/kafka/pull/8697#discussion_r429461550
########## File path: clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java ########## @@ -492,6 +493,52 @@ public void testPercentiles() { assertEquals(75, (Double) p75.metricValue(), 1.0); } + @Test + public void testPercentilesWithRandomNumbersAndLinearBucketing() { + long seed = new Random().nextLong(); + int sizeInBytes = 1000 * 1000; // 1MB + long maximumValue = 1000 * 24 * 60 * 60 * 1000L; // if values are ms, max is 1000 days + + try { + Random prng = new Random(seed); + int numberOfValues = 5000 + prng.nextInt(10_000); // ranges is [5000, 15000] + + Percentiles percs = new Percentiles(sizeInBytes, + maximumValue, + BucketSizing.LINEAR, + new Percentile(metrics.metricName("test.p90", "grp1"), 90), + new Percentile(metrics.metricName("test.p99", "grp1"), 99)); + MetricConfig config = new MetricConfig().eventWindow(50).samples(2); + Sensor sensor = metrics.sensor("test", config); + sensor.add(percs); + Metric p90 = this.metrics.metrics().get(metrics.metricName("test.p90", "grp1")); + Metric p99 = this.metrics.metrics().get(metrics.metricName("test.p99", "grp1")); + + final List<Long> values = new ArrayList<>(numberOfValues); + // record two windows worth of sequential values + for (int i = 0; i < numberOfValues; ++i) { + long value = Math.abs(prng.nextLong()) % maximumValue; + values.add(value); + sensor.record(value); + } + + Collections.sort(values); + + int p90Index = (int)Math.ceil(((double)(90 * numberOfValues)) / 100); + int p99Index = (int)Math.ceil(((double)(99 * numberOfValues)) / 100); + + double expectedP90 = values.get(p90Index - 1); + double expectedP99 = values.get(p99Index - 1); + + assertEquals(expectedP90, (Double) p90.metricValue(), expectedP90 / 10); Review comment: Trying to build confidence in the `Percentiles` implementation and gauge the accuracy with a more complicated test. I found it was accurate to within 5% maybe 2/3 or 3/4 of the time, but it seems reasonable to expect it to be accurate to within 10% ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org