robertwb commented on a change in pull request #11184: [WIP][BEAM-4374] Update 
protos related to MonitoringInfo.
URL: https://github.com/apache/beam/pull/11184#discussion_r395933271
 
 

 ##########
 File path: model/pipeline/src/main/proto/metrics.proto
 ##########
 @@ -229,101 +215,127 @@ message MonitoringInfo {
     NAMESPACE = 5 [(label_props) = { name: "NAMESPACE" }];
     NAME = 6 [(label_props) = { name: "NAME" }];
   }
+
   // A set of key+value labels which define the scope of the metric.
   // Either a well defined entity id for matching the enum names in
   // the MonitoringInfoLabels enum or any arbitrary label
   // set by a custom metric or user metric.
+  //
   // A monitoring system is expected to be able to aggregate the metrics
   // together for all updates having the same URN and labels. Some systems such
   // as Stackdriver will be able to aggregate the metrics using a subset of the
   // provided labels
-  map<string, string> labels = 5;
-
-  // The walltime of the most recent update.
-  // Useful for aggregation for latest types such as LatestInt64.
-  google.protobuf.Timestamp timestamp = 6;
+  map<string, string> labels = 4;
 }
 
 message MonitoringInfoTypeUrns {
   enum Enum {
+    // Represents an integer counter where values are summed across bundles.
+    //
+    // Encoding: <value>
+    //   - value: beam:coder:varint:v1
     SUM_INT64_TYPE = 0 [(org.apache.beam.model.pipeline.v1.beam_urn) =
-                            "beam:metrics:sum_int_64"];
-
-    DISTRIBUTION_INT64_TYPE = 1 [(org.apache.beam.model.pipeline.v1.beam_urn) =
-                                     "beam:metrics:distribution_int_64"];
-
-    LATEST_INT64_TYPE = 2 [(org.apache.beam.model.pipeline.v1.beam_urn) =
-                               "beam:metrics:latest_int_64"];
+                            "beam:metrics:sum_int64:v1"];
+
+    // Represents a double counter where values are summed across bundles.
+    //
+    // Encoding: <value>
+    //   value: beam:coder:double:v1
+    SUM_DOUBLE_TYPE = 1 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                        "beam:metrics:sum_int64:v1"];
+
+    // Represents a distribution of an integer value where:
+    //   - count: represents the number of values seen across all bundles
+    //   - sum: represents the total of the value across all bundles
+    //   - min: represents the smallest value seen across all bundles
+    //   - max: represents the largest value seen across all bundles
+    //
+    // Encoding: <count><sum><min><max>
+    //   - count: beam:coder:varint:v1
+    //   - sum:   beam:coder:varint:v1
+    //   - min:   beam:coder:varint:v1
+    //   - max:   beam:coder:varint:v1
+    DISTRIBUTION_INT64_TYPE = 2 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                                     "beam:metrics:distribution_int64:v1"];
+
+    // Represents a distribution of a double value where:
+    //   - count: represents the number of values seen across all bundles
+    //   - sum: represents the total of the value across all bundles
+    //   - min: represents the smallest value seen across all bundles
+    //   - max: represents the largest value seen across all bundles
+    //
+    // Encoding: <count><sum><min><max>
+    //   - count: beam:coder:varint:v1
+    //   - sum:   beam:coder:double:v1
+    //   - min:   beam:coder:double:v1
+    //   - max:   beam:coder:double:v1
+    DISTRIBUTION_DOUBLE_TYPE = 3 [(org.apache.beam.model.pipeline.v1.beam_urn) 
=
+                                 "beam:metrics:distribution_int64:v1"];
+
+    // Represents the latest seen integer value. The timestamp is used to
+    // provide an "ordering" over multiple values to determine which is the
+    // latest.
+    //
+    // Encoding: <timestamp><value>
+    //   - timestamp: beam:coder:varint:v1     (milliseconds since epoch)
+    //   - value:     beam:coder:varint:v1
+    LATEST_INT64_TYPE = 4 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                               "beam:metrics:latest_int64:v1"];
+
+    // Represents the latest seen integer value. The timestamp is used to
+    // provide an "ordering" over multiple values to determine which is the
+    // latest.
+    //
+    // Encoding: <timestamp><value>
+    //   - timestamp: beam:coder:varint:v1     (milliseconds since epoch)
+    //   - value:     beam:coder:double:v1
+    LATEST_DOUBLE_TYPE = 5 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                           "beam:metrics:latest_int64:v1"];
+
+    // Represents the largest set of integer values seen across bundles.
+    //
+    // Encoding: <value1><value2>...<valueN>
+    //   - valueX: beam:coder:varint:v1
+    TOP_N_INT64_TYPE = 6 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                           "beam:metrics:top_n_int64:v1"];
+
+    // Represents the largest set of double values seen across bundles.
+    //
+    // Encoding: <value1><value2>...<valueN>
+    //   - valueX: beam:coder:double:v1
+    TOP_N_DOUBLE_TYPE = 7 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                            "beam:metrics:top_n_int64:v1"];
+
+    // Represents the smallest set of integer values seen across bundles.
+    //
+    // Encoding: <value1><value2>...<valueN>
+    //   - valueX: beam:coder:varint:v1
+    BOTTOM_N_INT64_TYPE = 8 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                          "beam:metrics:bottom_n_int64:v1"];
+
+    // Represents the smallest set of double values seen across bundles.
+    //
+    // Encoding: <value1><value2>...<valueN>
+    //   - valueX: beam:coder:double:v1
+    BOTTOM_N_DOUBLE_TYPE = 9 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+                           "beam:metrics:bottom_n_int64:v1"];
+
+    // Encoding: <value1><value2>...<valueN>
+    //   - valueX: beam:coder:double:v1
+    PROGRESS_TYPE = 10 [(org.apache.beam.model.pipeline.v1.beam_urn) =
 
 Review comment:
   We don't want to report progress for each processed element, as this would 
become an unreasonably large value. We could report aggregated progress of all 
finished ones, plus partial progress of the in-flight ones, in which case the 
sum becomes a refinement of done-element-count. There's still no useful way to 
define cross-bundle aggregation (without resulting in an absolutely huge 
value), so I would lean towards this being its own type. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to