arunpandianp commented on code in PR #38458:
URL: https://github.com/apache/beam/pull/38458#discussion_r3225490580


##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillStateCache.java:
##########
@@ -99,22 +108,48 @@ public interface Builder {
 
     Builder setSupportMapViaMultimap(boolean supportMapViaMultimap);
 
+    Builder setMaxCachedValueBytes(long maxCachedValueBytes);
+
+    Builder setEnableHistogram(boolean enableHistogram);
+
     WindmillStateCache build();
   }
 
   public static Builder builder() {
-    return new 
AutoBuilder_WindmillStateCache_Builder().setSupportMapViaMultimap(false);
+    return new AutoBuilder_WindmillStateCache_Builder()
+        .setSupportMapViaMultimap(false)
+        .setMaxCachedValueBytes(Long.MAX_VALUE)
+        .setEnableHistogram(true);
+  }
+
+  public void setMaxCachedValueBytesOverride(long limit) {
+    this.maxCachedValueBytesOverride = limit;
+  }
+
+  private long getMaxCachedValueBytesLimit() {
+    long override = maxCachedValueBytesOverride;
+    return override >= 0 ? override : defaultMaxCachedValueBytes;
   }
 
   private EntryStats calculateEntryStats() {
     EntryStats stats = new EntryStats();
     BiConsumer<StateId, StateCacheEntry> consumer =
         (stateId, stateCacheEntry) -> {
           stats.entries++;
-          stats.idWeight += stateId.getWeight();
-          stats.entryWeight += stateCacheEntry.getWeight();
+          long idWeight = stateId.getWeight();
+          stats.idWeight += idWeight;
+          long entryWeight = stateCacheEntry.getWeight();
+          stats.entryWeight += entryWeight;
           stats.entryValues += stateCacheEntry.values.size();
           stats.maxEntryValues = Math.max(stats.maxEntryValues, 
stateCacheEntry.values.size());
+          if (enableHistogram) {
+            stats.addKeyWeight(idWeight);
+            stats.addEntryWeight(entryWeight);
+            stateCacheEntry.values.forEach(
+                (encodedAddress, weightedValue) -> {

Review Comment:
   This is executed once every (10m) debug capture report. I think computing 
the histograms here is better than the overhead for keeping it in sync during 
the processing. Added an experiment to disable the histogram stats if needed. 
Any other options? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to