JayajP commented on code in PR #29445:
URL: https://github.com/apache/beam/pull/29445#discussion_r1396180448
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/util/HistogramData.java:
##########
@@ -47,6 +47,9 @@ public class HistogramData implements Serializable {
private long numTopRecords;
private long numBottomRecords;
+ private double sum_of_squared_deviations;
Review Comment:
done
##########
sdks/java/core/src/main/java/org/apache/beam/sdk/util/HistogramData.java:
##########
@@ -184,6 +193,26 @@ public synchronized void record(double value) {
buckets[bucketType.getBucketIndex(value)]++;
numBoundedBucketRecords++;
}
+ updateStatistics(value);
+ }
+
+ /**
+ * Update 'mean' and 'sum of squared deviations' statistics with the newly
recorded value <a
+ *
href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm">
+ * Welford Method</a>.
+ *
+ * @param value
+ */
+ private void updateStatistics(double value) {
+ long count = getTotalCount();
+ if (count == 1) {
+ mean = value;
+ return;
+ }
+
+ double old_mean = mean;
+ mean = old_mean + (value - old_mean) / count;
+ sum_of_squared_deviations += (value - mean) * (value - old_mean);
Review Comment:
done
##########
sdks/java/core/src/test/java/org/apache/beam/sdk/util/HistogramDataTest.java:
##########
@@ -332,4 +332,19 @@ public void testExponentialBuckets_NumBuckets() {
HistogramData negativeScaleBucket = HistogramData.exponential(-3, 500);
assertThat(negativeScaleBucket.getBucketType().getNumBuckets(),
equalTo(4));
}
+
+ @Test
+ public void testStatistics() {
+ HistogramData histogram1 = HistogramData.linear(0, 10, 10);
+ for (int i = 0; i < 10; i++) {
+ histogram1.record(i * 10.0);
+ }
+
+ assertThat(histogram1.getMean(), equalTo(45.0));
+ double sum_of_squared_deviations = 0;
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]