This is an automated email from the ASF dual-hosted git repository.
altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 24e9ced [BEAM-7833] warn user when --region flag is not explicitly
set (#9173)
24e9ced is described below
commit 24e9cedcc768d901de795477fa78c7f357635671
Author: Kyle Weaver <[email protected]>
AuthorDate: Tue Aug 6 09:40:05 2019 -0700
[BEAM-7833] warn user when --region flag is not explicitly set (#9173)
* [BEAM-7833] warn user when --region flag is not explicitly set
---
.../apache/beam/runners/dataflow/DataflowRunner.java | 8 ++++++++
.../dataflow/options/DataflowPipelineOptions.java | 4 ++--
.../dataflow/worker/DataflowWorkUnitClientTest.java | 1 +
sdks/go/pkg/beam/runners/dataflow/dataflow.go | 8 +++++++-
sdks/python/apache_beam/options/pipeline_options.py | 18 +++++++++++++-----
5 files changed, 31 insertions(+), 8 deletions(-)
diff --git
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 05b055b..4c48cb3 100644
---
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -238,6 +238,14 @@ public class DataflowRunner extends
PipelineRunner<DataflowPipelineJob> {
"Missing required values: " + Joiner.on(',').join(missing));
}
+ if (dataflowOptions.getRegion() == null) {
+ dataflowOptions.setRegion("us-central1");
+ LOG.warn(
+ "--region not set; will default to us-central1. Future releases of
Beam will "
+ + "require the user to set the region explicitly. "
+ +
"https://cloud.google.com/compute/docs/regions-zones/regions-zones");
+ }
+
PathValidator validator = dataflowOptions.getPathValidator();
String gcpTempLocation;
try {
diff --git
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
index 35df563..c035839 100644
---
a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
+++
b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
@@ -126,8 +126,8 @@ public interface DataflowPipelineOptions
@Description(
"The Google Compute Engine region for creating Dataflow jobs. See "
+ "https://cloud.google.com/compute/docs/regions-zones/regions-zones
for a list of valid "
- + "options. Default is up to the Dataflow service.")
- @Default.String("us-central1")
+ + "options. Currently defaults to us-central1, but future releases
of Beam will "
+ + "require the user to set the region explicitly.")
String getRegion();
void setRegion(String region);
diff --git
a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
index 2355071..947d290 100644
---
a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
+++
b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java
@@ -86,6 +86,7 @@ public class DataflowWorkUnitClientTest {
pipelineOptions.setWorkerId(WORKER_ID);
pipelineOptions.setGcpCredential(new TestCredential());
pipelineOptions.setDataflowClient(service);
+ pipelineOptions.setRegion("us-central1");
}
@Test
diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflow.go
b/sdks/go/pkg/beam/runners/dataflow/dataflow.go
index b709664..921d829 100644
--- a/sdks/go/pkg/beam/runners/dataflow/dataflow.go
+++ b/sdks/go/pkg/beam/runners/dataflow/dataflow.go
@@ -54,7 +54,7 @@ var (
maxNumWorkers = flag.Int64("max_num_workers", 0, "Maximum number
of workers during scaling (optional).")
autoscalingAlgorithm = flag.String("autoscaling_algorithm", "",
"Autoscaling mode to use (optional).")
zone = flag.String("zone", "", "GCP zone (optional)")
- region = flag.String("region", "us-central1", "GCP Region
(optional)")
+ region = flag.String("region", "", "GCP Region (optional
but encouraged)")
network = flag.String("network", "", "GCP network
(optional)")
tempLocation = flag.String("temp_location", "", "Temp location
(optional)")
machineType = flag.String("worker_machine_type", "", "GCE
machine type (optional)")
@@ -90,6 +90,12 @@ func Execute(ctx context.Context, p *beam.Pipeline) error {
if *stagingLocation == "" {
return errors.New("no GCS staging location specified. Use
--staging_location=gs://<bucket>/<path>")
}
+ if *region == "" {
+ *region = "us-central1"
+ log.Warn(ctx, "--region not set; will default to us-central1.
Future releases of Beam will "+
+ "require the user to set the region explicitly. "+
+
"https://cloud.google.com/compute/docs/regions-zones/regions-zones")
+ }
if *image == "" {
*image = getContainerImage(ctx)
}
diff --git a/sdks/python/apache_beam/options/pipeline_options.py
b/sdks/python/apache_beam/options/pipeline_options.py
index de92519..658978f 100644
--- a/sdks/python/apache_beam/options/pipeline_options.py
+++ b/sdks/python/apache_beam/options/pipeline_options.py
@@ -448,13 +448,12 @@ class GoogleCloudOptions(PipelineOptions):
parser.add_argument('--temp_location',
default=None,
help='GCS path for saving temporary workflow jobs.')
- # The Cloud Dataflow service does not yet honor this setting. However, once
- # service support is added then users of this SDK will be able to control
- # the region. Default is up to the Dataflow service. See
+ # The Google Compute Engine region for creating Dataflow jobs. See
# https://cloud.google.com/compute/docs/regions-zones/regions-zones for a
- # list of valid options/
+ # list of valid options. Currently defaults to us-central1, but future
+ # releases of Beam will require the user to set the region explicitly.
parser.add_argument('--region',
- default='us-central1',
+ default=None,
help='The Google Compute Engine region for creating '
'Dataflow job.')
parser.add_argument('--service_account_email',
@@ -515,6 +514,15 @@ class GoogleCloudOptions(PipelineOptions):
errors.append('--dataflow_job_file and --template_location '
'are mutually exclusive.')
+ if self.view_as(GoogleCloudOptions).region is None:
+ self.view_as(GoogleCloudOptions).region = 'us-central1'
+ runner = self.view_as(StandardOptions).runner
+ if runner == 'DataflowRunner' or runner == 'TestDataflowRunner':
+ logging.warning(
+ '--region not set; will default to us-central1. Future releases of
'
+ 'Beam will require the user to set the region explicitly. '
+
'https://cloud.google.com/compute/docs/regions-zones/regions-zones')
+
return errors