VictorPlusC commented on a change in pull request #16601:
URL: https://github.com/apache/beam/pull/16601#discussion_r794960220
##########
File path:
sdks/python/apache_beam/runners/interactive/interactive_environment.py
##########
@@ -357,19 +363,47 @@ def get_cache_manager(self, pipeline,
create_if_absent=False):
given pipeline. If the pipeline is absent from the environment while
create_if_absent is True, creates and returns a new file based cache
manager for the pipeline."""
+ if self._is_in_ipython:
+ warnings.filterwarnings(
+ 'ignore',
+ 'options is deprecated since First stable release. References to '
+ '<pipeline>.options will not be supported',
+ category=DeprecationWarning)
+
cache_manager = self._cache_managers.get(str(id(pipeline)), None)
+ if isinstance(pipeline, Pipeline):
+ if hasattr(pipeline.runner, '_underlying_runner'):
+ pipeline_runner = pipeline.runner._underlying_runner
+ else:
+ pipeline_runner = pipeline.runner
+ else:
+ pipeline_runner = None
if not cache_manager and create_if_absent:
- from apache_beam.runners.interactive import interactive_beam as ib
- if ib.options.cache_root:
- #TODO(victorhc): Handle the case when the path starts with "gs://"
- if ib.options.cache_root.startswith("gs://"):
- raise ValueError("GCS paths are not currently supported.")
- cache_dir = tempfile.mkdtemp(dir=ib.options.cache_root)
+ cache_root = self.options.cache_root
+ if cache_root:
+ if cache_root.startswith('gs://'):
+ cache_dir = self._get_gcs_cache_dir(pipeline, cache_root)
+ else:
+ cache_dir = tempfile.mkdtemp(dir=cache_root)
+ if not isinstance(pipeline_runner, direct_runner.DirectRunner):
+ _LOGGER.warning(
+ 'A local cache directory has been specified while '
+ 'not using DirectRunner. It is recommended to cache into a '
+ 'GCS bucket instead.')
else:
- cache_dir = tempfile.mkdtemp(
- suffix=str(id(pipeline)),
- prefix='it-',
- dir=os.environ.get('TEST_TMPDIR', None))
+ staging_location = pipeline.options.get_all_options(
+ )['staging_location']
+ if isinstance(pipeline_runner, DataflowRunner) and staging_location:
+ cache_dir = self._get_gcs_cache_dir(pipeline, staging_location)
+ _LOGGER.warning(
+ 'No cache_root detected. '
+ 'Defaulting to temp_location %s for cache location.',
Review comment:
Resolved. Thanks for the catch!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]