This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new 24e9ced [BEAM-7833] warn user when --region flag is not explicitly set (#9173) 24e9ced is described below commit 24e9cedcc768d901de795477fa78c7f357635671 Author: Kyle Weaver <kcwea...@google.com> AuthorDate: Tue Aug 6 09:40:05 2019 -0700 [BEAM-7833] warn user when --region flag is not explicitly set (#9173) * [BEAM-7833] warn user when --region flag is not explicitly set --- .../apache/beam/runners/dataflow/DataflowRunner.java | 8 ++++++++ .../dataflow/options/DataflowPipelineOptions.java | 4 ++-- .../dataflow/worker/DataflowWorkUnitClientTest.java | 1 + sdks/go/pkg/beam/runners/dataflow/dataflow.go | 8 +++++++- sdks/python/apache_beam/options/pipeline_options.py | 18 +++++++++++++----- 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index 05b055b..4c48cb3 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -238,6 +238,14 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> { "Missing required values: " + Joiner.on(',').join(missing)); } + if (dataflowOptions.getRegion() == null) { + dataflowOptions.setRegion("us-central1"); + LOG.warn( + "--region not set; will default to us-central1. Future releases of Beam will " + + "require the user to set the region explicitly. " + + "https://cloud.google.com/compute/docs/regions-zones/regions-zones"); + } + PathValidator validator = dataflowOptions.getPathValidator(); String gcpTempLocation; try { diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java index 35df563..c035839 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java @@ -126,8 +126,8 @@ public interface DataflowPipelineOptions @Description( "The Google Compute Engine region for creating Dataflow jobs. See " + "https://cloud.google.com/compute/docs/regions-zones/regions-zones for a list of valid " - + "options. Default is up to the Dataflow service.") - @Default.String("us-central1") + + "options. Currently defaults to us-central1, but future releases of Beam will " + + "require the user to set the region explicitly.") String getRegion(); void setRegion(String region); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java index 2355071..947d290 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java @@ -86,6 +86,7 @@ public class DataflowWorkUnitClientTest { pipelineOptions.setWorkerId(WORKER_ID); pipelineOptions.setGcpCredential(new TestCredential()); pipelineOptions.setDataflowClient(service); + pipelineOptions.setRegion("us-central1"); } @Test diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflow.go b/sdks/go/pkg/beam/runners/dataflow/dataflow.go index b709664..921d829 100644 --- a/sdks/go/pkg/beam/runners/dataflow/dataflow.go +++ b/sdks/go/pkg/beam/runners/dataflow/dataflow.go @@ -54,7 +54,7 @@ var ( maxNumWorkers = flag.Int64("max_num_workers", 0, "Maximum number of workers during scaling (optional).") autoscalingAlgorithm = flag.String("autoscaling_algorithm", "", "Autoscaling mode to use (optional).") zone = flag.String("zone", "", "GCP zone (optional)") - region = flag.String("region", "us-central1", "GCP Region (optional)") + region = flag.String("region", "", "GCP Region (optional but encouraged)") network = flag.String("network", "", "GCP network (optional)") tempLocation = flag.String("temp_location", "", "Temp location (optional)") machineType = flag.String("worker_machine_type", "", "GCE machine type (optional)") @@ -90,6 +90,12 @@ func Execute(ctx context.Context, p *beam.Pipeline) error { if *stagingLocation == "" { return errors.New("no GCS staging location specified. Use --staging_location=gs://<bucket>/<path>") } + if *region == "" { + *region = "us-central1" + log.Warn(ctx, "--region not set; will default to us-central1. Future releases of Beam will "+ + "require the user to set the region explicitly. "+ + "https://cloud.google.com/compute/docs/regions-zones/regions-zones") + } if *image == "" { *image = getContainerImage(ctx) } diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index de92519..658978f 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -448,13 +448,12 @@ class GoogleCloudOptions(PipelineOptions): parser.add_argument('--temp_location', default=None, help='GCS path for saving temporary workflow jobs.') - # The Cloud Dataflow service does not yet honor this setting. However, once - # service support is added then users of this SDK will be able to control - # the region. Default is up to the Dataflow service. See + # The Google Compute Engine region for creating Dataflow jobs. See # https://cloud.google.com/compute/docs/regions-zones/regions-zones for a - # list of valid options/ + # list of valid options. Currently defaults to us-central1, but future + # releases of Beam will require the user to set the region explicitly. parser.add_argument('--region', - default='us-central1', + default=None, help='The Google Compute Engine region for creating ' 'Dataflow job.') parser.add_argument('--service_account_email', @@ -515,6 +514,15 @@ class GoogleCloudOptions(PipelineOptions): errors.append('--dataflow_job_file and --template_location ' 'are mutually exclusive.') + if self.view_as(GoogleCloudOptions).region is None: + self.view_as(GoogleCloudOptions).region = 'us-central1' + runner = self.view_as(StandardOptions).runner + if runner == 'DataflowRunner' or runner == 'TestDataflowRunner': + logging.warning( + '--region not set; will default to us-central1. Future releases of ' + 'Beam will require the user to set the region explicitly. ' + 'https://cloud.google.com/compute/docs/regions-zones/regions-zones') + return errors