Hello all,

I am running a pipeline that used to be executed on dataflow with no
issues. I am using the datalab environment. See below the error. To my
understanding happening before the pipeline code is being is being executed.
Any idea what went wrong?

Thanks,
Eila


Executing the pipeline:

*p.run().wait_until_finish()*

The following error is being fired:

INFO:root:Executing command: ['/usr/local/envs/py2env/bin/python',
'setup.py', 'sdist', '--dist-dir', '/tmp/tmp_B0gnK']
INFO:root:Starting GCS upload to
gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/workflow.tar.gz...
INFO:oauth2client.client:Attempting refresh to obtain initial access_token
INFO:root:Completed GCS upload to
gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/workflow.tar.gz
INFO:root:Staging the SDK tarball from PyPI to
gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/dataflow_python_sdk.tar
INFO:root:Executing command: ['/usr/local/envs/py2env/bin/python',
'-m', 'pip', 'install', '--download', '/tmp/tmp_B0gnK',
'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']

CalledProcessErrorTraceback (most recent call
last)<ipython-input-27-1e5aeb8b7d9b> in <module>()----> 1
p.run().wait_until_finish()
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc
in run(self, test_runner_api)    174       finally:    175
shutil.rmtree(tmpdir)--> 176     return self.runner.run(self)    177
  178   def __enter__(self):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.pyc
in run(self, pipeline)    250     # Create the job    251     result =
DataflowPipelineResult(--> 252
self.dataflow_client.create_job(self.job), self)    253     254
self._metrics = DataflowMetrics(self.dataflow_client, result,
self.job)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/retry.pyc
in wrapper(*args, **kwargs)    166       while True:    167
try:--> 168           return fun(*args, **kwargs)    169
except Exception as exn:  # pylint: disable=broad-except    170
   if not retry_filter(exn):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
in create_job(self, job)    423   def create_job(self, job):    424
 """Creates job description. May stage and/or submit for remote
execution."""--> 425     self.create_job_description(job)    426
427     # Stage and submit the job when necessary
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
in create_job_description(self, job)    446     """Creates a job
described by the workflow proto."""    447     resources =
dependency.stage_job_resources(--> 448         job.options,
file_copy=self._gcs_file_copy)    449     job.proto.environment =
Environment(    450         packages=resources, options=job.options,
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
in stage_job_resources(options, file_copy, build_setup_args, temp_dir,
populate_requirements_cache)    377       else:    378
sdk_remote_location = setup_options.sdk_location--> 379
_stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)
380       resources.append(names.DATAFLOW_SDK_TARBALL_FILE)    381
else:
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
in _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)
   462   elif sdk_remote_location == 'pypi':    463
logging.info('Staging the SDK tarball from PyPI to %s',
staged_path)--> 464
_dependency_file_copy(_download_pypi_sdk_package(temp_dir),
staged_path)    465   else:    466     raise RuntimeError(
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
in _download_pypi_sdk_package(temp_dir)    525       '--no-binary',
':all:', '--no-deps']    526   logging.info('Executing command: %s',
cmd_args)--> 527   processes.check_call(cmd_args)    528
zip_expected = os.path.join(    529       temp_dir, '%s-%s.zip' %
(package_name, version))
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/processes.pyc
in check_call(*args, **kwargs)     42   if force_shell:     43
kwargs['shell'] = True---> 44   return subprocess.check_call(*args,
**kwargs)     45      46
/usr/local/envs/py2env/lib/python2.7/subprocess.pyc in
check_call(*popenargs, **kwargs)    188         if cmd is None:    189
            cmd = popenargs[0]--> 190         raise
CalledProcessError(retcode, cmd)    191     return 0    192
CalledProcessError: Command '['/usr/local/envs/py2env/bin/python',
'-m', 'pip', 'install', '--download', '/tmp/tmp_B0gnK',
'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']'
returned non-zero exit status 2



-- 
Eila
www.orielresearch.org
https://www.meetu <https://www.meetup.com/Deep-Learning-In-Production/>p.co
<https://www.meetup.com/Deep-Learning-In-Production/>
m/Deep-Learning-In-Production/
<https://www.meetup.com/Deep-Learning-In-Production/>

Reply via email to