This is an automated email from the ASF dual-hosted git repository.
altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new f0a7ee5 Update python Dataflow API, and add FlexRS pipeline option.
(#7932)
f0a7ee5 is described below
commit f0a7ee5f65068260f85f3b469db25f2f8fdc1c0d
Author: Eric Roshan-Eisner <[email protected]>
AuthorDate: Wed Feb 27 16:20:26 2019 -0800
Update python Dataflow API, and add FlexRS pipeline option. (#7932)
* Update python dataflow API.
* Add FlexRS pipeline option in Dataflow python runner
---
.../python/apache_beam/options/pipeline_options.py | 4 +
.../runners/dataflow/internal/apiclient.py | 9 +
.../runners/dataflow/internal/apiclient_test.py | 38 ++
.../clients/dataflow/dataflow_v1b3_client.py | 136 ++++-
.../clients/dataflow/dataflow_v1b3_messages.py | 584 +++++++++++++++++----
5 files changed, 660 insertions(+), 111 deletions(-)
diff --git a/sdks/python/apache_beam/options/pipeline_options.py
b/sdks/python/apache_beam/options/pipeline_options.py
index 8c60b6b..22a0c0a 100644
--- a/sdks/python/apache_beam/options/pipeline_options.py
+++ b/sdks/python/apache_beam/options/pipeline_options.py
@@ -418,6 +418,10 @@ class GoogleCloudOptions(PipelineOptions):
default=None,
help='Set a Google Cloud KMS key name to be used in '
'Dataflow state operations (GBK, Streaming).')
+ parser.add_argument('--flexrs_goal',
+ default=None,
+ choices=['COST_OPTIMIZED', 'SPEED_OPTIMIZED'],
+ help='Set the Flexible Resource Scheduling mode')
def validate(self, validator):
errors = []
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index 9533dd4..1903982 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -190,6 +190,15 @@ class Environment(object):
if ('use_multiple_sdk_containers' not in debug_options_experiments and
'no_use_multiple_sdk_containers' not in debug_options_experiments):
self.debug_options.experiments.append('use_multiple_sdk_containers')
+ # FlexRS
+ if self.google_cloud_options.flexrs_goal == 'COST_OPTIMIZED':
+ self.proto.flexResourceSchedulingGoal = (
+ dataflow.Environment.FlexResourceSchedulingGoalValueValuesEnum.
+ FLEXRS_COST_OPTIMIZED)
+ elif self.google_cloud_options.flexrs_goal == 'SPEED_OPTIMIZED':
+ self.proto.flexResourceSchedulingGoal = (
+ dataflow.Environment.FlexResourceSchedulingGoalValueValuesEnum.
+ FLEXRS_SPEED_OPTIMIZED)
# Experiments
if self.debug_options.experiments:
for experiment in self.debug_options.experiments:
diff --git
a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
index b3fe941..0d8fc43 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
@@ -99,6 +99,44 @@ class UtilTest(unittest.TestCase):
self.assertEqual(env.proto.workerPools[0].subnetwork,
'/regions/MY/subnetworks/SUBNETWORK')
+ def test_flexrs_blank(self):
+ pipeline_options = PipelineOptions(
+ ['--temp_location', 'gs://any-location/temp'])
+
+ env = apiclient.Environment([], #packages
+ pipeline_options,
+ '2.0.0', #any environment version
+ FAKE_PIPELINE_URL)
+ self.assertEqual(env.proto.flexResourceSchedulingGoal, None)
+
+ def test_flexrs_cost(self):
+ pipeline_options = PipelineOptions(
+ ['--flexrs_goal', 'COST_OPTIMIZED',
+ '--temp_location', 'gs://any-location/temp'])
+
+ env = apiclient.Environment([], #packages
+ pipeline_options,
+ '2.0.0', #any environment version
+ FAKE_PIPELINE_URL)
+ self.assertEqual(
+ env.proto.flexResourceSchedulingGoal,
+ (dataflow.Environment.FlexResourceSchedulingGoalValueValuesEnum.
+ FLEXRS_COST_OPTIMIZED))
+
+ def test_flexrs_speed(self):
+ pipeline_options = PipelineOptions(
+ ['--flexrs_goal', 'SPEED_OPTIMIZED',
+ '--temp_location', 'gs://any-location/temp'])
+
+ env = apiclient.Environment([], #packages
+ pipeline_options,
+ '2.0.0', #any environment version
+ FAKE_PIPELINE_URL)
+ self.assertEqual(
+ env.proto.flexResourceSchedulingGoal,
+ (dataflow.Environment.FlexResourceSchedulingGoalValueValuesEnum.
+ FLEXRS_SPEED_OPTIMIZED))
+
def test_invalid_default_job_name(self):
# Regexp for job names in dataflow.
regexp = '^[a-z]([-a-z0-9]{0,61}[a-z0-9])?$'
diff --git
a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py
b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py
index 6be79a6..c830be8 100644
---
a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py
+++
b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py
@@ -15,7 +15,7 @@
# limitations under the License.
#
-"""Generated client library for dataflow version v1b3."""
+"Generated client library for dataflow version v1b3."""
# NOTE: This file is autogenerated and should not be edited by hand.
from __future__ import absolute_import
@@ -144,6 +144,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def List(self, request, global_params=None):
r"""Request the job status.
+To request the status of a job, we recommend using
+`projects.locations.jobs.messages.list` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.messages.list` is not recommended, as you can only request
+the status of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsJobsMessagesListRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -268,6 +274,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def Create(self, request, global_params=None):
r"""Creates a Cloud Dataflow job.
+To create a job, we recommend using `projects.locations.jobs.create` with a
+[regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.create` is not recommended, as your job will always start
+in `us-central1`.
+
Args:
request: (DataflowProjectsJobsCreateRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -294,6 +306,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def Get(self, request, global_params=None):
r"""Gets the state of the specified Cloud Dataflow job.
+To get the state of a job, we recommend using `projects.locations.jobs.get`
+with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.get` is not recommended, as you can only get the state of
+jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsJobsGetRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -320,6 +338,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def GetMetrics(self, request, global_params=None):
r"""Request the job status.
+To request the status of a job, we recommend using
+`projects.locations.jobs.getMetrics` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.getMetrics` is not recommended, as you can only request the
+status of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsJobsGetMetricsRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -344,7 +368,14 @@ class DataflowV1b3(base_api.BaseApiClient):
)
def List(self, request, global_params=None):
- r"""List the jobs of a project in a given region.
+ r"""List the jobs of a project.
+
+To list the jobs of a project in a region, we recommend using
+`projects.locations.jobs.get` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
+list the all jobs across all regions, use `projects.jobs.aggregated`. Using
+`projects.jobs.list` is not recommended, as you can only get the list of
+jobs that are running in `us-central1`.
Args:
request: (DataflowProjectsJobsListRequest) input message
@@ -369,9 +400,41 @@ class DataflowV1b3(base_api.BaseApiClient):
supports_download=False,
)
+ def Snapshot(self, request, global_params=None):
+ r"""Snapshot the state of a streaming job.
+
+ Args:
+ request: (DataflowProjectsJobsSnapshotRequest) input message
+ global_params: (StandardQueryParameters, default: None) global
arguments
+ Returns:
+ (Snapshot) The response message.
+ """
+ config = self.GetMethodConfig('Snapshot')
+ return self._RunMethod(
+ config, request, global_params=global_params)
+
+ Snapshot.method_config = lambda: base_api.ApiMethodInfo(
+ http_method=u'POST',
+ method_id=u'dataflow.projects.jobs.snapshot',
+ ordered_params=[u'projectId', u'jobId'],
+ path_params=[u'jobId', u'projectId'],
+ query_params=[],
+ relative_path=u'v1b3/projects/{projectId}/jobs/{jobId}:snapshot',
+ request_field=u'snapshotJobRequest',
+ request_type_name=u'DataflowProjectsJobsSnapshotRequest',
+ response_type_name=u'Snapshot',
+ supports_download=False,
+ )
+
def Update(self, request, global_params=None):
r"""Updates the state of an existing Cloud Dataflow job.
+To update the state of an existing job, we recommend using
+`projects.locations.jobs.update` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.update` is not recommended, as you can only update the state
+of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsJobsUpdateRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -470,6 +533,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def List(self, request, global_params=None):
r"""Request the job status.
+To request the status of a job, we recommend using
+`projects.locations.jobs.messages.list` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.messages.list` is not recommended, as you can only request
+the status of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsLocationsJobsMessagesListRequest) input
message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -568,6 +637,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def Create(self, request, global_params=None):
r"""Creates a Cloud Dataflow job.
+To create a job, we recommend using `projects.locations.jobs.create` with a
+[regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.create` is not recommended, as your job will always start
+in `us-central1`.
+
Args:
request: (DataflowProjectsLocationsJobsCreateRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -594,6 +669,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def Get(self, request, global_params=None):
r"""Gets the state of the specified Cloud Dataflow job.
+To get the state of a job, we recommend using `projects.locations.jobs.get`
+with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.get` is not recommended, as you can only get the state of
+jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsLocationsJobsGetRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -620,6 +701,12 @@ class DataflowV1b3(base_api.BaseApiClient):
def GetMetrics(self, request, global_params=None):
r"""Request the job status.
+To request the status of a job, we recommend using
+`projects.locations.jobs.getMetrics` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.getMetrics` is not recommended, as you can only request the
+status of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsLocationsJobsGetMetricsRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -644,7 +731,14 @@ class DataflowV1b3(base_api.BaseApiClient):
)
def List(self, request, global_params=None):
- r"""List the jobs of a project in a given region.
+ r"""List the jobs of a project.
+
+To list the jobs of a project in a region, we recommend using
+`projects.locations.jobs.get` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
+list the all jobs across all regions, use `projects.jobs.aggregated`. Using
+`projects.jobs.list` is not recommended, as you can only get the list of
+jobs that are running in `us-central1`.
Args:
request: (DataflowProjectsLocationsJobsListRequest) input message
@@ -669,9 +763,41 @@ class DataflowV1b3(base_api.BaseApiClient):
supports_download=False,
)
+ def Snapshot(self, request, global_params=None):
+ r"""Snapshot the state of a streaming job.
+
+ Args:
+ request: (DataflowProjectsLocationsJobsSnapshotRequest) input message
+ global_params: (StandardQueryParameters, default: None) global
arguments
+ Returns:
+ (Snapshot) The response message.
+ """
+ config = self.GetMethodConfig('Snapshot')
+ return self._RunMethod(
+ config, request, global_params=global_params)
+
+ Snapshot.method_config = lambda: base_api.ApiMethodInfo(
+ http_method=u'POST',
+ method_id=u'dataflow.projects.locations.jobs.snapshot',
+ ordered_params=[u'projectId', u'location', u'jobId'],
+ path_params=[u'jobId', u'location', u'projectId'],
+ query_params=[],
+
relative_path=u'v1b3/projects/{projectId}/locations/{location}/jobs/{jobId}:snapshot',
+ request_field=u'snapshotJobRequest',
+ request_type_name=u'DataflowProjectsLocationsJobsSnapshotRequest',
+ response_type_name=u'Snapshot',
+ supports_download=False,
+ )
+
def Update(self, request, global_params=None):
r"""Updates the state of an existing Cloud Dataflow job.
+To update the state of an existing job, we recommend using
+`projects.locations.jobs.update` with a [regional endpoint]
+(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
+`projects.jobs.update` is not recommended, as you can only update the state
+of jobs that are running in `us-central1`.
+
Args:
request: (DataflowProjectsLocationsJobsUpdateRequest) input message
global_params: (StandardQueryParameters, default: None) global
arguments
@@ -775,7 +901,7 @@ class DataflowV1b3(base_api.BaseApiClient):
method_id=u'dataflow.projects.locations.templates.launch',
ordered_params=[u'projectId', u'location'],
path_params=[u'location', u'projectId'],
- query_params=[u'gcsPath', u'validateOnly'],
+ query_params=[u'dynamicTemplate_gcsPath',
u'dynamicTemplate_stagingLocation', u'gcsPath', u'validateOnly'],
relative_path=u'v1b3/projects/{projectId}/locations/{location}/templates:launch',
request_field=u'launchTemplateParameters',
request_type_name=u'DataflowProjectsLocationsTemplatesLaunchRequest',
@@ -899,7 +1025,7 @@ class DataflowV1b3(base_api.BaseApiClient):
method_id=u'dataflow.projects.templates.launch',
ordered_params=[u'projectId'],
path_params=[u'projectId'],
- query_params=[u'gcsPath', u'location', u'validateOnly'],
+ query_params=[u'dynamicTemplate_gcsPath',
u'dynamicTemplate_stagingLocation', u'gcsPath', u'location', u'validateOnly'],
relative_path=u'v1b3/projects/{projectId}/templates:launch',
request_field=u'launchTemplateParameters',
request_type_name=u'DataflowProjectsTemplatesLaunchRequest',
diff --git
a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py
b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py
index 279a140..bfd80c2 100644
---
a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py
+++
b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py
@@ -95,11 +95,15 @@ class ApproximateSplitRequest(_messages.Message):
Fields:
fractionConsumed: A fraction at which to split the work item, from 0.0
(beginning of the input) to 1.0 (end of the input).
+ fractionOfRemainder: The fraction of the remainder of work to split the
+ work item at, from 0.0 (split at the current position) to 1.0 (end of
+ the input).
position: A Position at which to split the work item.
"""
fractionConsumed = _messages.FloatField(1)
- position = _messages.MessageField('Position', 2)
+ fractionOfRemainder = _messages.FloatField(2)
+ position = _messages.MessageField('Position', 3)
class AutoscalingEvent(_messages.Message):
@@ -507,7 +511,9 @@ class CreateJobFromTemplateRequest(_messages.Message):
create the job. Must be a valid Cloud Storage URL, beginning with
`gs://`.
jobName: Required. The job name to use for the created job.
- location: The location to which to direct the request.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
parameters: The runtime parameters to pass to the job.
"""
@@ -579,7 +585,9 @@ class
DataflowProjectsJobsAggregatedRequest(_messages.Message):
Fields:
filter: The kind of filter to use.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
pageSize: If there are many jobs, limit response to at most this many. The
actual number of jobs returned will be the lesser of max_responses and
an unspecified server-defined limit.
@@ -635,7 +643,9 @@ class DataflowProjectsJobsCreateRequest(_messages.Message):
Fields:
job: A Job resource to be passed as the request body.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
replaceJobId: Deprecated. This field is now in the Job message.
view: The level of information requested in response.
@@ -697,7 +707,9 @@ class
DataflowProjectsJobsGetMetricsRequest(_messages.Message):
Fields:
jobId: The job to get messages for.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
projectId: A project id.
startTime: Return only metric data that has changed since this time.
Default is to return all information about all metrics for the job.
@@ -717,7 +729,9 @@ class DataflowProjectsJobsGetRequest(_messages.Message):
Fields:
jobId: The job ID.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
view: The level of information requested in response.
"""
@@ -752,7 +766,9 @@ class DataflowProjectsJobsListRequest(_messages.Message):
Fields:
filter: The kind of filter to use.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
pageSize: If there are many jobs, limit response to at most this many. The
actual number of jobs returned will be the lesser of max_responses and
an unspecified server-defined limit.
@@ -811,7 +827,9 @@ class
DataflowProjectsJobsMessagesListRequest(_messages.Message):
endTime: Return only messages with timestamps < end_time. The default is
now (i.e. return up to the latest messages available).
jobId: The job to get messages about.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
minimumImportance: Filter to only get messages with importance >= level
pageSize: If specified, determines the maximum number of messages to
return. If unspecified, the service may choose an appropriate default,
@@ -853,13 +871,30 @@ class
DataflowProjectsJobsMessagesListRequest(_messages.Message):
startTime = _messages.StringField(8)
+class DataflowProjectsJobsSnapshotRequest(_messages.Message):
+ r"""A DataflowProjectsJobsSnapshotRequest object.
+
+ Fields:
+ jobId: The job to be snapshotted.
+ projectId: The project which owns the job to be snapshotted.
+ snapshotJobRequest: A SnapshotJobRequest resource to be passed as the
+ request body.
+ """
+
+ jobId = _messages.StringField(1, required=True)
+ projectId = _messages.StringField(2, required=True)
+ snapshotJobRequest = _messages.MessageField('SnapshotJobRequest', 3)
+
+
class DataflowProjectsJobsUpdateRequest(_messages.Message):
r"""A DataflowProjectsJobsUpdateRequest object.
Fields:
job: A Job resource to be passed as the request body.
jobId: The job ID.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
"""
@@ -907,7 +942,9 @@ class
DataflowProjectsLocationsJobsCreateRequest(_messages.Message):
Fields:
job: A Job resource to be passed as the request body.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
replaceJobId: Deprecated. This field is now in the Job message.
view: The level of information requested in response.
@@ -941,7 +978,9 @@ class
DataflowProjectsLocationsJobsDebugGetConfigRequest(_messages.Message):
getDebugConfigRequest: A GetDebugConfigRequest resource to be passed as
the request body.
jobId: The job id.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
projectId: The project id.
"""
@@ -956,7 +995,9 @@ class
DataflowProjectsLocationsJobsDebugSendCaptureRequest(_messages.Message):
Fields:
jobId: The job id.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
projectId: The project id.
sendDebugCaptureRequest: A SendDebugCaptureRequest resource to be passed
as the request body.
@@ -973,7 +1014,9 @@ class
DataflowProjectsLocationsJobsGetMetricsRequest(_messages.Message):
Fields:
jobId: The job to get messages for.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
projectId: A project id.
startTime: Return only metric data that has changed since this time.
Default is to return all information about all metrics for the job.
@@ -993,7 +1036,9 @@ class
DataflowProjectsLocationsJobsGetRequest(_messages.Message):
Fields:
jobId: The job ID.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
view: The level of information requested in response.
"""
@@ -1028,7 +1073,9 @@ class
DataflowProjectsLocationsJobsListRequest(_messages.Message):
Fields:
filter: The kind of filter to use.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
pageSize: If there are many jobs, limit response to at most this many. The
actual number of jobs returned will be the lesser of max_responses and
an unspecified server-defined limit.
@@ -1087,7 +1134,9 @@ class
DataflowProjectsLocationsJobsMessagesListRequest(_messages.Message):
endTime: Return only messages with timestamps < end_time. The default is
now (i.e. return up to the latest messages available).
jobId: The job to get messages about.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
minimumImportance: Filter to only get messages with importance >= level
pageSize: If specified, determines the maximum number of messages to
return. If unspecified, the service may choose an appropriate default,
@@ -1129,13 +1178,32 @@ class
DataflowProjectsLocationsJobsMessagesListRequest(_messages.Message):
startTime = _messages.StringField(8)
+class DataflowProjectsLocationsJobsSnapshotRequest(_messages.Message):
+ r"""A DataflowProjectsLocationsJobsSnapshotRequest object.
+
+ Fields:
+ jobId: The job to be snapshotted.
+ location: The location that contains this job.
+ projectId: The project which owns the job to be snapshotted.
+ snapshotJobRequest: A SnapshotJobRequest resource to be passed as the
+ request body.
+ """
+
+ jobId = _messages.StringField(1, required=True)
+ location = _messages.StringField(2, required=True)
+ projectId = _messages.StringField(3, required=True)
+ snapshotJobRequest = _messages.MessageField('SnapshotJobRequest', 4)
+
+
class DataflowProjectsLocationsJobsUpdateRequest(_messages.Message):
r"""A DataflowProjectsLocationsJobsUpdateRequest object.
Fields:
job: A Job resource to be passed as the request body.
jobId: The job ID.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
projectId: The ID of the Cloud Platform project that the job belongs to.
"""
@@ -1152,7 +1220,9 @@ class
DataflowProjectsLocationsJobsWorkItemsLeaseRequest(_messages.Message):
jobId: Identifies the workflow job this worker belongs to.
leaseWorkItemRequest: A LeaseWorkItemRequest resource to be passed as the
request body.
- location: The location which contains the WorkItem's job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the WorkItem's job.
projectId: Identifies the project this worker belongs to.
"""
@@ -1167,7 +1237,9 @@ class
DataflowProjectsLocationsJobsWorkItemsReportStatusRequest(_messages.Messag
Fields:
jobId: The job which the WorkItem is part of.
- location: The location which contains the WorkItem's job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the WorkItem's job.
projectId: The project which owns the WorkItem's job.
reportWorkItemStatusRequest: A ReportWorkItemStatusRequest resource to be
passed as the request body.
@@ -1185,7 +1257,9 @@ class
DataflowProjectsLocationsTemplatesCreateRequest(_messages.Message):
Fields:
createJobFromTemplateRequest: A CreateJobFromTemplateRequest resource to
be passed as the request body.
- location: The location to which to direct the request.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
projectId: Required. The ID of the Cloud Platform project that the job
belongs to.
"""
@@ -1203,9 +1277,10 @@ class
DataflowProjectsLocationsTemplatesGetRequest(_messages.Message):
Fields:
gcsPath: Required. A Cloud Storage path to the template from which to
- create the job. Must be a valid Cloud Storage URL, beginning with
- `gs://`.
- location: The location to which to direct the request.
+ create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
projectId: Required. The ID of the Cloud Platform project that the job
belongs to.
view: The view to retrieve. Defaults to METADATA_ONLY.
@@ -1229,29 +1304,39 @@ class
DataflowProjectsLocationsTemplatesLaunchRequest(_messages.Message):
r"""A DataflowProjectsLocationsTemplatesLaunchRequest object.
Fields:
- gcsPath: Required. A Cloud Storage path to the template from which to
- create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.
+ dynamicTemplate_gcsPath: Path to dynamic template spec file on GCS. The
+ file must be a Json serialized DynamicTemplateFieSpec object.
+ dynamicTemplate_stagingLocation: Cloud Storage path for staging
+ dependencies. Must be a valid Cloud Storage URL, beginning with `gs://`.
+ gcsPath: A Cloud Storage path to the template from which to create the
+ job. Must be valid Cloud Storage URL, beginning with 'gs://'.
launchTemplateParameters: A LaunchTemplateParameters resource to be passed
as the request body.
- location: The location to which to direct the request.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
projectId: Required. The ID of the Cloud Platform project that the job
belongs to.
validateOnly: If true, the request is validated but not actually executed.
Defaults to false.
"""
- gcsPath = _messages.StringField(1)
- launchTemplateParameters =
_messages.MessageField('LaunchTemplateParameters', 2)
- location = _messages.StringField(3, required=True)
- projectId = _messages.StringField(4, required=True)
- validateOnly = _messages.BooleanField(5)
+ dynamicTemplate_gcsPath = _messages.StringField(1)
+ dynamicTemplate_stagingLocation = _messages.StringField(2)
+ gcsPath = _messages.StringField(3)
+ launchTemplateParameters =
_messages.MessageField('LaunchTemplateParameters', 4)
+ location = _messages.StringField(5, required=True)
+ projectId = _messages.StringField(6, required=True)
+ validateOnly = _messages.BooleanField(7)
class DataflowProjectsLocationsWorkerMessagesRequest(_messages.Message):
r"""A DataflowProjectsLocationsWorkerMessagesRequest object.
Fields:
- location: The location which contains the job
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job.
projectId: The project to send the WorkerMessages to.
sendWorkerMessagesRequest: A SendWorkerMessagesRequest resource to be
passed as the request body.
@@ -1284,9 +1369,10 @@ class
DataflowProjectsTemplatesGetRequest(_messages.Message):
Fields:
gcsPath: Required. A Cloud Storage path to the template from which to
- create the job. Must be a valid Cloud Storage URL, beginning with
- `gs://`.
- location: The location to which to direct the request.
+ create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
projectId: Required. The ID of the Cloud Platform project that the job
belongs to.
view: The view to retrieve. Defaults to METADATA_ONLY.
@@ -1310,22 +1396,30 @@ class
DataflowProjectsTemplatesLaunchRequest(_messages.Message):
r"""A DataflowProjectsTemplatesLaunchRequest object.
Fields:
- gcsPath: Required. A Cloud Storage path to the template from which to
- create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.
+ dynamicTemplate_gcsPath: Path to dynamic template spec file on GCS. The
+ file must be a Json serialized DynamicTemplateFieSpec object.
+ dynamicTemplate_stagingLocation: Cloud Storage path for staging
+ dependencies. Must be a valid Cloud Storage URL, beginning with `gs://`.
+ gcsPath: A Cloud Storage path to the template from which to create the
+ job. Must be valid Cloud Storage URL, beginning with 'gs://'.
launchTemplateParameters: A LaunchTemplateParameters resource to be passed
as the request body.
- location: The location to which to direct the request.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
+ which to direct the request.
projectId: Required. The ID of the Cloud Platform project that the job
belongs to.
validateOnly: If true, the request is validated but not actually executed.
Defaults to false.
"""
- gcsPath = _messages.StringField(1)
- launchTemplateParameters =
_messages.MessageField('LaunchTemplateParameters', 2)
- location = _messages.StringField(3)
- projectId = _messages.StringField(4, required=True)
- validateOnly = _messages.BooleanField(5)
+ dynamicTemplate_gcsPath = _messages.StringField(1)
+ dynamicTemplate_stagingLocation = _messages.StringField(2)
+ gcsPath = _messages.StringField(3)
+ launchTemplateParameters =
_messages.MessageField('LaunchTemplateParameters', 4)
+ location = _messages.StringField(5)
+ projectId = _messages.StringField(6, required=True)
+ validateOnly = _messages.BooleanField(7)
class DataflowProjectsWorkerMessagesRequest(_messages.Message):
@@ -1499,6 +1593,10 @@ class DynamicSourceSplit(_messages.Message):
class Environment(_messages.Message):
r"""Describes the environment in which a Dataflow Job runs.
+ Enums:
+ FlexResourceSchedulingGoalValueValuesEnum: Which Flexible Resource
+ Scheduling mode to run in.
+
Messages:
InternalExperimentsValue: Experimental settings.
SdkPipelineOptionsValue: The Cloud Dataflow SDK pipeline options specified
@@ -1518,6 +1616,8 @@ class Environment(_messages.Message):
related tables are stored. The supported resource type is: Google
BigQuery: bigquery.googleapis.com/{dataset}
experiments: The list of experiments to enable.
+ flexResourceSchedulingGoal: Which Flexible Resource Scheduling mode to run
+ in.
internalExperiments: Experimental settings.
sdkPipelineOptions: The Cloud Dataflow SDK pipeline options specified by
the user. These options are passed through the service and are used to
@@ -1541,6 +1641,18 @@ class Environment(_messages.Message):
specified in order for the job to have workers.
"""
+ class FlexResourceSchedulingGoalValueValuesEnum(_messages.Enum):
+ r"""Which Flexible Resource Scheduling mode to run in.
+
+ Values:
+ FLEXRS_UNSPECIFIED: Run in the default mode.
+ FLEXRS_SPEED_OPTIMIZED: Optimize for lower execution time.
+ FLEXRS_COST_OPTIMIZED: Optimize for lower cost.
+ """
+ FLEXRS_UNSPECIFIED = 0
+ FLEXRS_SPEED_OPTIMIZED = 1
+ FLEXRS_COST_OPTIMIZED = 2
+
@encoding.MapUnrecognizedFields('additionalProperties')
class InternalExperimentsValue(_messages.Message):
r"""Experimental settings.
@@ -1647,13 +1759,14 @@ class Environment(_messages.Message):
clusterManagerApiService = _messages.StringField(1)
dataset = _messages.StringField(2)
experiments = _messages.StringField(3, repeated=True)
- internalExperiments = _messages.MessageField('InternalExperimentsValue', 4)
- sdkPipelineOptions = _messages.MessageField('SdkPipelineOptionsValue', 5)
- serviceAccountEmail = _messages.StringField(6)
- tempStoragePrefix = _messages.StringField(7)
- userAgent = _messages.MessageField('UserAgentValue', 8)
- version = _messages.MessageField('VersionValue', 9)
- workerPools = _messages.MessageField('WorkerPool', 10, repeated=True)
+ flexResourceSchedulingGoal =
_messages.EnumField('FlexResourceSchedulingGoalValueValuesEnum', 4)
+ internalExperiments = _messages.MessageField('InternalExperimentsValue', 5)
+ sdkPipelineOptions = _messages.MessageField('SdkPipelineOptionsValue', 6)
+ serviceAccountEmail = _messages.StringField(7)
+ tempStoragePrefix = _messages.StringField(8)
+ userAgent = _messages.MessageField('UserAgentValue', 9)
+ version = _messages.MessageField('VersionValue', 10)
+ workerPools = _messages.MessageField('WorkerPool', 11, repeated=True)
class ExecutionStageState(_messages.Message):
@@ -1792,10 +1905,14 @@ class ExecutionStageSummary(_messages.Message):
class FailedLocation(_messages.Message):
- r"""Indicates which location failed to respond to a request for data.
+ r"""Indicates which [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
+ to respond to a request for data.
Fields:
- name: The name of the failed location.
+ name: The name of the [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that failed to respond.
"""
name = _messages.StringField(1)
@@ -1850,7 +1967,9 @@ class GetDebugConfigRequest(_messages.Message):
Fields:
componentId: The internal component id for which debug configuration is
requested.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
workerId: The worker id, i.e., VM hostname.
"""
@@ -2045,6 +2164,8 @@ class Job(_messages.Message):
is empty and, in that case, the service ignores it.
createTime: The timestamp when the job was initially created. Immutable
and set by the Cloud Dataflow service.
+ createdFromSnapshotId: If this is specified, the job's initial state is
+ populated from the given snapshot.
currentState: The current state of the job. Jobs are created in the
`JOB_STATE_STOPPED` state unless otherwise specified. A job in the
`JOB_STATE_RUNNING` state may asynchronously enter a terminal state.
@@ -2066,7 +2187,9 @@ class Job(_messages.Message):
\p{Ll}\p{Lo}{0,62} * Values must conform to regexp:
[\p{Ll}\p{Lo}\p{N}_-]{0,63} * Both keys and values are additionally
constrained to be <= 128 bytes in size.
- location: The location that contains this job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains this job.
name: The user-specified Cloud Dataflow job name. Only one Job with a
given name may exist in a project at any given time. If a caller
attempts to create a Job with the same name as an already-existing Job,
@@ -2097,7 +2220,9 @@ class Job(_messages.Message):
is updated when the job is started by the Cloud Dataflow service. For
other jobs, start_time always equals to create_time and is immutable and
set by the Cloud Dataflow service.
- steps: The top-level steps that constitute the entire job.
+ steps: Exactly one of step or steps_location should be specified. The
+ top-level steps that constitute the entire job.
+ stepsLocation: The GCS location where the steps are stored.
tempFiles: A set of files the system should be aware of that are used for
temporary storage. These temporary files will be removed on job
completion. No duplicates are allowed. No file patterns are supported.
@@ -2316,26 +2441,28 @@ class Job(_messages.Message):
clientRequestId = _messages.StringField(1)
createTime = _messages.StringField(2)
- currentState = _messages.EnumField('CurrentStateValueValuesEnum', 3)
- currentStateTime = _messages.StringField(4)
- environment = _messages.MessageField('Environment', 5)
- executionInfo = _messages.MessageField('JobExecutionInfo', 6)
- id = _messages.StringField(7)
- jobMetadata = _messages.MessageField('JobMetadata', 8)
- labels = _messages.MessageField('LabelsValue', 9)
- location = _messages.StringField(10)
- name = _messages.StringField(11)
- pipelineDescription = _messages.MessageField('PipelineDescription', 12)
- projectId = _messages.StringField(13)
- replaceJobId = _messages.StringField(14)
- replacedByJobId = _messages.StringField(15)
- requestedState = _messages.EnumField('RequestedStateValueValuesEnum', 16)
- stageStates = _messages.MessageField('ExecutionStageState', 17,
repeated=True)
- startTime = _messages.StringField(18)
- steps = _messages.MessageField('Step', 19, repeated=True)
- tempFiles = _messages.StringField(20, repeated=True)
- transformNameMapping = _messages.MessageField('TransformNameMappingValue',
21)
- type = _messages.EnumField('TypeValueValuesEnum', 22)
+ createdFromSnapshotId = _messages.StringField(3)
+ currentState = _messages.EnumField('CurrentStateValueValuesEnum', 4)
+ currentStateTime = _messages.StringField(5)
+ environment = _messages.MessageField('Environment', 6)
+ executionInfo = _messages.MessageField('JobExecutionInfo', 7)
+ id = _messages.StringField(8)
+ jobMetadata = _messages.MessageField('JobMetadata', 9)
+ labels = _messages.MessageField('LabelsValue', 10)
+ location = _messages.StringField(11)
+ name = _messages.StringField(12)
+ pipelineDescription = _messages.MessageField('PipelineDescription', 13)
+ projectId = _messages.StringField(14)
+ replaceJobId = _messages.StringField(15)
+ replacedByJobId = _messages.StringField(16)
+ requestedState = _messages.EnumField('RequestedStateValueValuesEnum', 17)
+ stageStates = _messages.MessageField('ExecutionStageState', 18,
repeated=True)
+ startTime = _messages.StringField(19)
+ steps = _messages.MessageField('Step', 20, repeated=True)
+ stepsLocation = _messages.StringField(21)
+ tempFiles = _messages.StringField(22, repeated=True)
+ transformNameMapping = _messages.MessageField('TransformNameMappingValue',
23)
+ type = _messages.EnumField('TypeValueValuesEnum', 24)
class JobExecutionInfo(_messages.Message):
@@ -2447,7 +2574,7 @@ class JobMessage(_messages.Message):
class JobMetadata(_messages.Message):
r"""Metadata available primarily for filtering jobs. Will be included in the
- ListJob response and Job SUMMARY view+.
+ ListJob response and Job SUMMARY view.
Fields:
bigTableDetails: Identification of a BigTable source used in the Dataflow
@@ -2473,7 +2600,7 @@ class JobMetadata(_messages.Message):
class JobMetrics(_messages.Message):
- r"""JobMetrics contains a collection of metrics descibing the detailed
+ r"""JobMetrics contains a collection of metrics describing the detailed
progress of a Dataflow job. Metrics correspond to user-defined and system-
defined metrics in the job. This resource captures only the most recent
values of each metric; time-series data can be queried for them (under the
@@ -2588,10 +2715,18 @@ class LaunchTemplateResponse(_messages.Message):
class LeaseWorkItemRequest(_messages.Message):
r"""Request to lease WorkItems.
+ Messages:
+ UnifiedWorkerRequestValue: Untranslated bag-of-bytes WorkRequest from
+ UnifiedWorker.
+
Fields:
currentWorkerTime: The current timestamp at the worker.
- location: The location which contains the WorkItem's job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the WorkItem's job.
requestedLeaseDuration: The initial lease period.
+ unifiedWorkerRequest: Untranslated bag-of-bytes WorkRequest from
+ UnifiedWorker.
workItemTypes: Filter for WorkItem type.
workerCapabilities: Worker capabilities. WorkItems might be limited to
workers with specific capabilities.
@@ -2599,22 +2734,82 @@ class LeaseWorkItemRequest(_messages.Message):
virtual machine running the worker.
"""
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class UnifiedWorkerRequestValue(_messages.Message):
+ r"""Untranslated bag-of-bytes WorkRequest from UnifiedWorker.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ UnifiedWorkerRequestValue object.
+
+ Fields:
+ additionalProperties: Properties of the object. Contains field @type
+ with type URL.
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a UnifiedWorkerRequestValue object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A extra_types.JsonValue attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.MessageField('extra_types.JsonValue', 2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
currentWorkerTime = _messages.StringField(1)
location = _messages.StringField(2)
requestedLeaseDuration = _messages.StringField(3)
- workItemTypes = _messages.StringField(4, repeated=True)
- workerCapabilities = _messages.StringField(5, repeated=True)
- workerId = _messages.StringField(6)
+ unifiedWorkerRequest = _messages.MessageField('UnifiedWorkerRequestValue', 4)
+ workItemTypes = _messages.StringField(5, repeated=True)
+ workerCapabilities = _messages.StringField(6, repeated=True)
+ workerId = _messages.StringField(7)
class LeaseWorkItemResponse(_messages.Message):
r"""Response to a request to lease WorkItems.
+ Messages:
+ UnifiedWorkerResponseValue: Untranslated bag-of-bytes WorkResponse for
+ UnifiedWorker.
+
Fields:
+ unifiedWorkerResponse: Untranslated bag-of-bytes WorkResponse for
+ UnifiedWorker.
workItems: A list of the leased WorkItems.
"""
- workItems = _messages.MessageField('WorkItem', 1, repeated=True)
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class UnifiedWorkerResponseValue(_messages.Message):
+ r"""Untranslated bag-of-bytes WorkResponse for UnifiedWorker.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ UnifiedWorkerResponseValue object.
+
+ Fields:
+ additionalProperties: Properties of the object. Contains field @type
+ with type URL.
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a UnifiedWorkerResponseValue object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A extra_types.JsonValue attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.MessageField('extra_types.JsonValue', 2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
+ unifiedWorkerResponse = _messages.MessageField('UnifiedWorkerResponseValue',
1)
+ workItems = _messages.MessageField('WorkItem', 2, repeated=True)
class ListJobMessagesResponse(_messages.Message):
@@ -2637,8 +2832,9 @@ class ListJobsResponse(_messages.Message):
partial response, depending on the page size in the ListJobsRequest.
Fields:
- failedLocation: Zero or more messages describing locations that failed to
- respond.
+ failedLocation: Zero or more messages describing the [regional endpoints]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that failed to respond.
jobs: A subset of the requested job information.
nextPageToken: Set if there may be more results than fit in this response.
"""
@@ -3072,7 +3268,7 @@ class PartialGroupByKeyInstruction(_messages.Message):
class PipelineDescription(_messages.Message):
r"""A descriptive representation of submitted pipeline as well as the
executed form. This data is provided by the Dataflow service for ease of
- visualizing the pipeline and interpretting Dataflow provided metrics.
+ visualizing the pipeline and interpreting Dataflow provided metrics.
Fields:
displayData: Pipeline level display data.
@@ -3166,9 +3362,17 @@ class ReadInstruction(_messages.Message):
class ReportWorkItemStatusRequest(_messages.Message):
r"""Request to report the status of WorkItems.
+ Messages:
+ UnifiedWorkerRequestValue: Untranslated bag-of-bytes
+ WorkProgressUpdateRequest from UnifiedWorker.
+
Fields:
currentWorkerTime: The current timestamp at the worker.
- location: The location which contains the WorkItem's job.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the WorkItem's job.
+ unifiedWorkerRequest: Untranslated bag-of-bytes WorkProgressUpdateRequest
+ from UnifiedWorker.
workItemStatuses: The order is unimportant, except that the order of the
WorkItemServiceState messages in the ReportWorkItemStatusResponse
corresponds to the order of WorkItemStatus messages here.
@@ -3178,23 +3382,85 @@ class ReportWorkItemStatusRequest(_messages.Message):
(with an error response).
"""
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class UnifiedWorkerRequestValue(_messages.Message):
+ r"""Untranslated bag-of-bytes WorkProgressUpdateRequest from
+ UnifiedWorker.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ UnifiedWorkerRequestValue object.
+
+ Fields:
+ additionalProperties: Properties of the object. Contains field @type
+ with type URL.
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a UnifiedWorkerRequestValue object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A extra_types.JsonValue attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.MessageField('extra_types.JsonValue', 2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
currentWorkerTime = _messages.StringField(1)
location = _messages.StringField(2)
- workItemStatuses = _messages.MessageField('WorkItemStatus', 3, repeated=True)
- workerId = _messages.StringField(4)
+ unifiedWorkerRequest = _messages.MessageField('UnifiedWorkerRequestValue', 3)
+ workItemStatuses = _messages.MessageField('WorkItemStatus', 4, repeated=True)
+ workerId = _messages.StringField(5)
class ReportWorkItemStatusResponse(_messages.Message):
r"""Response from a request to report the status of WorkItems.
+ Messages:
+ UnifiedWorkerResponseValue: Untranslated bag-of-bytes
+ WorkProgressUpdateResponse for UnifiedWorker.
+
Fields:
+ unifiedWorkerResponse: Untranslated bag-of-bytes
+ WorkProgressUpdateResponse for UnifiedWorker.
workItemServiceStates: A set of messages indicating the service-side state
for each WorkItem whose status was reported, in the same order as the
WorkItemStatus messages in the ReportWorkItemStatusRequest which
resulting in this response.
"""
- workItemServiceStates = _messages.MessageField('WorkItemServiceState', 1,
repeated=True)
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class UnifiedWorkerResponseValue(_messages.Message):
+ r"""Untranslated bag-of-bytes WorkProgressUpdateResponse for
+ UnifiedWorker.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ UnifiedWorkerResponseValue object.
+
+ Fields:
+ additionalProperties: Properties of the object. Contains field @type
+ with type URL.
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a UnifiedWorkerResponseValue object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A extra_types.JsonValue attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.MessageField('extra_types.JsonValue', 2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
+ unifiedWorkerResponse = _messages.MessageField('UnifiedWorkerResponseValue',
1)
+ workItemServiceStates = _messages.MessageField('WorkItemServiceState', 2,
repeated=True)
class ReportedParallelism(_messages.Message):
@@ -3235,8 +3501,12 @@ class
ResourceUtilizationReportResponse(_messages.Message):
class RuntimeEnvironment(_messages.Message):
r"""The environment values to set at runtime.
+ Messages:
+ AdditionalUserLabelsValue: Additional user labels attached to the job.
+
Fields:
additionalExperiments: Additional experiment flags for the job.
+ additionalUserLabels: Additional user labels attached to the job.
bypassTempDirValidation: Whether to bypass the safety checks for the job's
temporary directory. Use with caution.
machineType: The machine type to use for the job. Defaults to the value
@@ -3245,6 +3515,8 @@ class RuntimeEnvironment(_messages.Message):
made available to your pipeline during execution, from 1 to 1000.
network: Network to which VMs will be assigned. If empty or unspecified,
the service will use the network "default".
+ numWorkers: The initial number of Google Compute Engine instnaces for the
+ job.
serviceAccountEmail: The email address of the service account to run the
job as.
subnetwork: Subnetwork to which VMs will be assigned, if desired.
@@ -3256,19 +3528,47 @@ class RuntimeEnvironment(_messages.Message):
for launching worker instances to run your pipeline.
"""
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class AdditionalUserLabelsValue(_messages.Message):
+ r"""Additional user labels attached to the job.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ AdditionalUserLabelsValue object.
+
+ Fields:
+ additionalProperties: Additional properties of type
+ AdditionalUserLabelsValue
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a AdditionalUserLabelsValue object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A string attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.StringField(2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
additionalExperiments = _messages.StringField(1, repeated=True)
- bypassTempDirValidation = _messages.BooleanField(2)
- machineType = _messages.StringField(3)
- maxWorkers = _messages.IntegerField(4, variant=_messages.Variant.INT32)
- network = _messages.StringField(5)
- serviceAccountEmail = _messages.StringField(6)
- subnetwork = _messages.StringField(7)
- tempLocation = _messages.StringField(8)
- zone = _messages.StringField(9)
+ additionalUserLabels = _messages.MessageField('AdditionalUserLabelsValue', 2)
+ bypassTempDirValidation = _messages.BooleanField(3)
+ machineType = _messages.StringField(4)
+ maxWorkers = _messages.IntegerField(5, variant=_messages.Variant.INT32)
+ network = _messages.StringField(6)
+ numWorkers = _messages.IntegerField(7, variant=_messages.Variant.INT32)
+ serviceAccountEmail = _messages.StringField(8)
+ subnetwork = _messages.StringField(9)
+ tempLocation = _messages.StringField(10)
+ zone = _messages.StringField(11)
class SdkVersion(_messages.Message):
- r"""The version of the SDK used to run the jobl
+ r"""The version of the SDK used to run the job.
Enums:
SdkSupportStatusValueValuesEnum: The support status for this SDK version.
@@ -3276,7 +3576,7 @@ class SdkVersion(_messages.Message):
Fields:
sdkSupportStatus: The support status for this SDK version.
version: The version of the SDK used to run the job.
- versionDisplayName: A readable string describing the version of the sdk.
+ versionDisplayName: A readable string describing the version of the SDK.
"""
class SdkSupportStatusValueValuesEnum(_messages.Enum):
@@ -3310,7 +3610,9 @@ class SendDebugCaptureRequest(_messages.Message):
componentId: The internal component id for which debug information is
sent.
data: The encoded debug information.
- location: The location which contains the job specified by job_id.
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job specified by job_id.
workerId: The worker id, i.e., VM hostname.
"""
@@ -3329,7 +3631,9 @@ class SendWorkerMessagesRequest(_messages.Message):
r"""A request for sending worker messages to the service.
Fields:
- location: The location which contains the job
+ location: The [regional endpoint]
+ (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints)
+ that contains the job.
workerMessages: The WorkerMessages to send.
"""
@@ -3529,6 +3833,36 @@ class Sink(_messages.Message):
spec = _messages.MessageField('SpecValue', 2)
+class Snapshot(_messages.Message):
+ r"""Represents a snapshot of a job.
+
+ Fields:
+ creationTime: The time this snapshot was created.
+ id: The unique ID of this snapshot.
+ projectId: The project this snapshot belongs to.
+ sourceJobId: The job this snapshot was created from.
+ ttl: The time after which this snapshot will be automatically deleted.
+ """
+
+ creationTime = _messages.StringField(1)
+ id = _messages.StringField(2)
+ projectId = _messages.StringField(3)
+ sourceJobId = _messages.StringField(4)
+ ttl = _messages.StringField(5)
+
+
+class SnapshotJobRequest(_messages.Message):
+ r"""Request to create a snapshot of a job.
+
+ Fields:
+ location: The location that contains this job.
+ ttl: TTL for the snapshot.
+ """
+
+ location = _messages.StringField(1)
+ ttl = _messages.StringField(2)
+
+
class Source(_messages.Message):
r"""A source that records can be read and decoded from.
@@ -4147,17 +4481,52 @@ class
StreamingApplianceSnapshotConfig(_messages.Message):
class StreamingComputationConfig(_messages.Message):
r"""Configuration information for a single streaming computation.
+ Messages:
+ TransformUserNameToStateFamilyValue: Map from user name of stateful
+ transforms in this stage to their state family.
+
Fields:
computationId: Unique identifier for this computation.
instructions: Instructions that comprise the computation.
stageName: Stage name of this computation.
systemName: System defined name for this computation.
+ transformUserNameToStateFamily: Map from user name of stateful transforms
+ in this stage to their state family.
"""
+ @encoding.MapUnrecognizedFields('additionalProperties')
+ class TransformUserNameToStateFamilyValue(_messages.Message):
+ r"""Map from user name of stateful transforms in this stage to their state
+ family.
+
+ Messages:
+ AdditionalProperty: An additional property for a
+ TransformUserNameToStateFamilyValue object.
+
+ Fields:
+ additionalProperties: Additional properties of type
+ TransformUserNameToStateFamilyValue
+ """
+
+ class AdditionalProperty(_messages.Message):
+ r"""An additional property for a TransformUserNameToStateFamilyValue
+ object.
+
+ Fields:
+ key: Name of the additional property.
+ value: A string attribute.
+ """
+
+ key = _messages.StringField(1)
+ value = _messages.StringField(2)
+
+ additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
+
computationId = _messages.StringField(1)
instructions = _messages.MessageField('ParallelInstruction', 2,
repeated=True)
stageName = _messages.StringField(3)
systemName = _messages.StringField(4)
+ transformUserNameToStateFamily =
_messages.MessageField('TransformUserNameToStateFamilyValue', 5)
class StreamingComputationRanges(_messages.Message):
@@ -4216,6 +4585,8 @@ class StreamingConfigTask(_messages.Message):
families.
Fields:
+ maxWorkItemCommitBytes: Maximum size for work item commit supported
+ windmill storage layer.
streamingComputationConfigs: Set of computation configuration information.
userStepToStateFamilyNameMap: Map from user step names to state families.
windmillServiceEndpoint: If present, the worker must use this endpoint to
@@ -4253,10 +4624,11 @@ class StreamingConfigTask(_messages.Message):
additionalProperties = _messages.MessageField('AdditionalProperty', 1,
repeated=True)
- streamingComputationConfigs =
_messages.MessageField('StreamingComputationConfig', 1, repeated=True)
- userStepToStateFamilyNameMap =
_messages.MessageField('UserStepToStateFamilyNameMapValue', 2)
- windmillServiceEndpoint = _messages.StringField(3)
- windmillServicePort = _messages.IntegerField(4)
+ maxWorkItemCommitBytes = _messages.IntegerField(1)
+ streamingComputationConfigs =
_messages.MessageField('StreamingComputationConfig', 2, repeated=True)
+ userStepToStateFamilyNameMap =
_messages.MessageField('UserStepToStateFamilyNameMapValue', 3)
+ windmillServiceEndpoint = _messages.StringField(4)
+ windmillServicePort = _messages.IntegerField(5)
class StreamingSetupTask(_messages.Message):
@@ -4321,7 +4693,7 @@ class StructuredMessage(_messages.Message):
programmatic consumption.
Fields:
- messageKey: Idenfier for this message type. Used by external systems to
+ messageKey: Identifier for this message type. Used by external systems to
internationalize or personalize message.
messageText: Human-readable version of message.
parameters: The structured data associated with this message.