Hi Beam,
I'm trying to prebuild a container image for my Beam job (thanks for the
tip, Cham!), but my job running on Dataflow eventually fails with a
pickling error and ModuleNotFoundError:
Traceback (most recent call last):
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 267, in _execute
response = task()
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 340, in <lambda>
lambda: self.create_worker().do_instruction(request), request)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 580, in do_instruction
return getattr(self, request_type)(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 611, in process_bundle
bundle_processor = self.bundle_processor_cache.get(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 441, in get
processor = bundle_processor.BundleProcessor(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 863, in __init__
self.ops = self.create_execution_tree(self.process_bundle_descriptor)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 916, in create_execution_tree
return collections.OrderedDict([(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 919, in <listcomp>
get_operation(transform_id))) for transform_id in sorted(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
result = cache[args] = func(*args)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
transform_consumers = {
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
result = cache[args] = func(*args)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
transform_consumers = {
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
result = cache[args] = func(*args)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
transform_consumers = {
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
result = cache[args] = func(*args)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 898, in get_operation
transform_consumers = {
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <dictcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 899, in <listcomp>
tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 806, in wrapper
result = cache[args] = func(*args)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 903, in get_operation
return transform_factory.create_operation(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1198, in create_operation
return creator(self, transform_id, transform_proto, payload, consumers)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1545, in create_par_do
return _create_pardo_operation(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1588, in _create_pardo_operation
dofn_data = pickler.loads(serialized_fn)
File
"/usr/local/lib/python3.9/site-packages/apache_beam/internal/pickler.py",
line 51, in loads
return desired_pickle_lib.loads(
File
"/usr/local/lib/python3.9/site-packages/apache_beam/internal/dill_pickler.py",
line 289, in loads
return dill.loads(s)
File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 275, in
loads
return load(file, ignore, **kwds)
File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 270, in
load
return Unpickler(file, ignore=ignore, **kwds).load()
File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 472, in
load
obj = StockUnpickler.load(self)
File "/usr/local/lib/python3.9/site-packages/dill/_dill.py", line 462, in
find_class
return StockUnpickler.find_class(self, module, name)
ModuleNotFoundError: No module named 'canonicalization'
So, my local dependencies are not found. They worked fine before I tried
prebuildling.
We're using Bazel, so I could see that being part of the problem. My build
rule is as follows:
py_binary(
name = "beam_prebuild",
main = "beam_ami_parser.py",
srcs = ["beam_ami_parser.py"],
deps = [
requirement("apache-beam"),
"//canonicalization:bigtable",
"//canonicalization:constants",
],
target_compatible_with = ["//build:python3_9"],
)
I'd appreciate any help on figuring out why my modules aren't found.
Thanks!
-Lina