angoenka commented on code in PR #17323:
URL: https://github.com/apache/beam/pull/17323#discussion_r848925424


##########
sdks/python/apache_beam/runners/dataflow/dataflow_runner.py:
##########
@@ -458,20 +525,46 @@ def run_pipeline(self, pipeline, options, 
pipeline_proto=None):
         pipeline.replace_all(DataflowRunner._JRH_PTRANSFORM_OVERRIDES)
 
     from apache_beam.transforms import environments
-    if options.view_as(SetupOptions).prebuild_sdk_container_engine:
-      # if prebuild_sdk_container_engine is specified we will build a new sdk
-      # container image with dependencies pre-installed and use that image,
-      # instead of using the inferred default container image.
-      self._default_environment = (
-          environments.DockerEnvironment.from_options(options))
-      options.view_as(WorkerOptions).sdk_container_image = (
-          self._default_environment.container_image)
-    else:
-      self._default_environment = (
-          environments.DockerEnvironment.from_container_image(
-              apiclient.get_container_image_from_options(options),
-              artifacts=environments.python_sdk_dependencies(options),
-              
resource_hints=environments.resource_hints_from_options(options)))
+    original_options_contain_prebuild = options.view_as(
+        SetupOptions).prebuild_sdk_container_engine is not None
+    self._enable_sdk_prebuild_if_applicable(options)
+    should_submit_job_without_prebuilding = True
+    try:
+      if options.view_as(SetupOptions).prebuild_sdk_container_engine:
+        # if prebuild_sdk_container_engine is specified we will build a new sdk
+        # container image with dependencies pre-installed and use that image,
+        # instead of using the inferred default container image.
+        _LOGGER.info(
+            "SDK container image pre-build workflow started. Check dataflow "
+            "website https://cloud.google.com/dataflow/docs/guides/";
+            "using-custom-containers#prebuild for more info.")
+        self._default_environment = (
+            environments.DockerEnvironment.from_options(options))
+        options.view_as(WorkerOptions).sdk_container_image = (
+            self._default_environment.container_image)
+        should_submit_job_without_prebuilding = False
+        _LOGGER.warning(
+            "Please delete prebuilt SDK container image %s after the "
+            "job is done. Failing to do so may incur additional "
+            "storage costs." % self._default_environment.container_image)
+    except RuntimeError as e:
+      _LOGGER.error("SDK image build failed: %s" % e, exc_info=True)
+      if original_options_contain_prebuild:
+        # if the options is explicitly specified other than auto opted-in, stop
+        # the submission.
+        raise

Review Comment:
   Shall we add a some error to the raise?



##########
sdks/python/apache_beam/runners/dataflow/dataflow_runner.py:
##########
@@ -458,20 +525,46 @@ def run_pipeline(self, pipeline, options, 
pipeline_proto=None):
         pipeline.replace_all(DataflowRunner._JRH_PTRANSFORM_OVERRIDES)
 
     from apache_beam.transforms import environments
-    if options.view_as(SetupOptions).prebuild_sdk_container_engine:
-      # if prebuild_sdk_container_engine is specified we will build a new sdk
-      # container image with dependencies pre-installed and use that image,
-      # instead of using the inferred default container image.
-      self._default_environment = (
-          environments.DockerEnvironment.from_options(options))
-      options.view_as(WorkerOptions).sdk_container_image = (
-          self._default_environment.container_image)
-    else:
-      self._default_environment = (
-          environments.DockerEnvironment.from_container_image(
-              apiclient.get_container_image_from_options(options),
-              artifacts=environments.python_sdk_dependencies(options),
-              
resource_hints=environments.resource_hints_from_options(options)))
+    original_options_contain_prebuild = options.view_as(
+        SetupOptions).prebuild_sdk_container_engine is not None
+    self._enable_sdk_prebuild_if_applicable(options)
+    should_submit_job_without_prebuilding = True
+    try:
+      if options.view_as(SetupOptions).prebuild_sdk_container_engine:
+        # if prebuild_sdk_container_engine is specified we will build a new sdk
+        # container image with dependencies pre-installed and use that image,
+        # instead of using the inferred default container image.
+        _LOGGER.info(
+            "SDK container image pre-build workflow started. Check dataflow "
+            "website https://cloud.google.com/dataflow/docs/guides/";
+            "using-custom-containers#prebuild for more info.")
+        self._default_environment = (
+            environments.DockerEnvironment.from_options(options))
+        options.view_as(WorkerOptions).sdk_container_image = (
+            self._default_environment.container_image)
+        should_submit_job_without_prebuilding = False
+        _LOGGER.warning(
+            "Please delete prebuilt SDK container image %s after the "
+            "job is done. Failing to do so may incur additional "
+            "storage costs." % self._default_environment.container_image)
+    except RuntimeError as e:
+      _LOGGER.error("SDK image build failed: %s" % e, exc_info=True)
+      if original_options_contain_prebuild:
+        # if the options is explicitly specified other than auto opted-in, stop
+        # the submission.
+        raise
+      _LOGGER.info(
+          "Falling back to job submission without SDK container image "
+          "pre-building.")
+      options.view_as(SetupOptions).prebuild_sdk_container_engine = None
+    finally:
+      if should_submit_job_without_prebuilding:

Review Comment:
   Shall we remove `should_submit_job_without_prebuilding` and make finally 
part of exception block?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to