[ 
https://issues.apache.org/jira/browse/BEAM-3741?focusedWorklogId=164203&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-164203
 ]

ASF GitHub Bot logged work on BEAM-3741:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 09/Nov/18 02:01
            Start Date: 09/Nov/18 02:01
    Worklog Time Spent: 10m 
      Work Description: lukecwik commented on a change in pull request #6963: 
[BEAM-3741] Proto changes for reporting backlog/splitting/finalizing bundles.
URL: https://github.com/apache/beam/pull/6963#discussion_r232123242
 
 

 ##########
 File path: model/fn-execution/src/main/proto/beam_fn_api.proto
 ##########
 @@ -184,55 +187,72 @@ message ProcessBundleDescriptor {
   org.apache.beam.model.pipeline.v1.ApiServiceDescriptor 
state_api_service_descriptor = 7;
 }
 
-// Represents a partition of the bundle into two bundles: a "primary" and
-// a "residual", with the following properties:
-// - The work in primary and residual doesn't overlap, and combined, adds up
-//   to the work in the current bundle if the split hadn't happened.
-// - The current bundle, if it keeps executing, will have done none of the
-//   work under residual roots.
-// - The current bundle, if no further splits happen, will have done exactly
-//   the work under primary_roots.
-// For more rigorous definitions see https://s.apache.org/beam-breaking-fusion
-message BundleSplit {
-  // One of the root applications specifying the scope of work for a bundle.
-  message Application {
-    // (Required) The primitive transform to which to pass the element
-    string ptransform_id = 1;
+// One of the applications specifying the scope of work for a bundle.
+message BundleApplication {
+  // (Required) The primitive transform to which to pass the element
+  string ptransform_id = 1;
+
+  // (Required) Name of the transform's input to which to pass the element.
+  string input_id = 2;
 
-    // (Required) Name of the transform's input to which to pass the element.
-    string input_id = 2;
+  // (Required) The encoded element to pass to the transform.
+  bytes element = 3;
 
-    // (Required) The encoded element to pass to the transform.
-    bytes element = 3;
+  // Lower bound on timestamps of elements that this PTransform
+  // will produce into each of its output PCollections when invoked on this
+  // element and input. The map is keyed by the local output name of the
+  // PTransform.
+  map<string, google.protobuf.Timestamp> output_watermarks = 4;
 
-    // Approximate lower bounds on timestamps of elements that this PTransform
-    // will produce into each of its output PCollections, when invoked on this
-    // element. Keyed by the transform's local output name.
-    map<string, int64> output_watermarks = 4;
+  // Represents an estimate for the amount of currently outstanding work.
+  message Backlog {
+    // This informs Runners on how to aggregate the backlog
+    // being reported across multiple active bundles. Backlogs
+    // are aggregated using the set of partitions.
+    //
+    // For example SplittableDoFn's which consume elements from:
+    //  * a globally shared resource such as a Pubsub queue should set this
+    //    to “”.
+    //  * a shared partitioned resource should use the partition identifier.
+    //  * a uniquely partitioned resource such as a file range should set this 
to
+    //    file name + start offset.
+    bytes partition = 1;
+
+    // The estimate for the backlog.
+    oneof value {
+      // Represents an estimate for the amount of outstanding work. Values
+      // compare lexicographically.
+      bytes bytes = 1000;
 
-    // Approximate fraction of all work of the current bundle (before split)
-    // represented by invoking this Application and its downstream 
applications.
-    // The sum of fraction_of_work between all primary_roots and residual_roots
-    // must add up to approximately 1.0.
-    google.protobuf.DoubleValue fraction_of_work = 5;
+      // Whether the backlog is unknown.
+      bool is_unknown = 1001;
+    }
   }
 
-  // An an Application should be scheduled after a delay.
-  message DelayedApplication {
-    // The delay in seconds (lower bound).
-    double delay_sec = 1;
+  // (Required) An estimate for the amount outstanding work related to
+  // this application.
+  Backlog backlog = 5;
 
-    // (Required) The application that should be scheduled.
-    Application application = 2;
-  }
+  // (Required) Whether this application potentially produces an unbounded
+  // amount of data. Note that this should only be set to BOUNDED if and
+  // only if the application is known to produce a finite amount of output.
+  //
+  // Note that this is different from the backlog as the backlog represents
+  // how much work there is currently outstanding.
+  org.apache.beam.model.pipeline.v1.IsBounded.Enum is_bounded = 6;
 
-  // Root applications that should replace the current bundle.
-  repeated Application primary_roots = 1;
+  // Contains additional monitoring information related to this application.
 
 Review comment:
   Added a link to a doc that describes these additional signals.
   
   Tthese monitoring infos give insights into a specific application that while 
the ProcessBundleProgressResponse contains monitoring info that is the 
aggregate of many applications.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 164203)
    Time Spent: 7h 50m  (was: 7h 40m)

> Proto changes for splitting over Fn API
> ---------------------------------------
>
>                 Key: BEAM-3741
>                 URL: https://issues.apache.org/jira/browse/BEAM-3741
>             Project: Beam
>          Issue Type: Sub-task
>          Components: beam-model
>            Reporter: Eugene Kirpichov
>            Assignee: Eugene Kirpichov
>            Priority: Major
>             Fix For: 2.5.0
>
>          Time Spent: 7h 50m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to