Stephan Hoyer created BEAM-4805:
-----------------------------------

             Summary: beam.Map doesn't work on functions defined with *args
                 Key: BEAM-4805
                 URL: https://issues.apache.org/jira/browse/BEAM-4805
             Project: Beam
          Issue Type: Bug
          Components: sdk-py-core
            Reporter: Stephan Hoyer
            Assignee: Ahmet Altay


Consider the following example:
{code:python}
 import apache_beam as beam
def f(*args, **kwargs):
 return args, kwargs
[1, 2, 3] | beam.Map(f)
{code}

When I run this code using the latest released version of Beam (2.5.0), I see 
the following error:
{noformat}
TypeErrorTraceback (most recent call last)
<ipython-input-20-9003b3f5887a> in <module>()
----> 1 range(3) | beam.Map(f)

/usr/local/lib/python2.7/dist-packages/apache_beam/transforms/ptransform.pyc in 
__ror__(self, left, label)
    491     _allocate_materialized_pipeline(p)
    492     materialized_result = _AddMaterializationTransforms().visit(result)
--> 493     p.run().wait_until_finish()
    494     _release_materialized_pipeline(p)
    495     return _FinalizeMaterialization().visit(materialized_result)

/usr/local/lib/python2.7/dist-packages/apache_beam/pipeline.pyc in run(self, 
test_runner_api)
    388     if test_runner_api and self._verify_runner_api_compatible():
    389       return Pipeline.from_runner_api(
--> 390           self.to_runner_api(), self.runner, self._options).run(False)
    391 
    392     if self._options.view_as(TypeOptions).runtime_type_check:

/usr/local/lib/python2.7/dist-packages/apache_beam/pipeline.pyc in run(self, 
test_runner_api)
    401       finally:
    402         shutil.rmtree(tmpdir)
--> 403     return self.runner.run_pipeline(self)
    404 
    405   def __enter__(self):

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/direct/direct_runner.pyc
 in run_pipeline(self, pipeline)
    132       runner = BundleBasedDirectRunner()
    133 
--> 134     return runner.run_pipeline(pipeline)
    135 
    136 

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in run_pipeline(self, pipeline)
    216     from apache_beam.runners.dataflow.dataflow_runner import 
DataflowRunner
    217     pipeline.visit(DataflowRunner.group_by_key_input_visitor())
--> 218     return self.run_via_runner_api(pipeline.to_runner_api())
    219 
    220   def run_via_runner_api(self, pipeline_proto):

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in run_via_runner_api(self, pipeline_proto)
    219 
    220   def run_via_runner_api(self, pipeline_proto):
--> 221     return self.run_stages(*self.create_stages(pipeline_proto))
    222 
    223   def create_stages(self, pipeline_proto):

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in run_stages(self, pipeline_components, stages, safe_coders)
    857         metrics_by_stage[stage.name] = self.run_stage(
    858             controller, pipeline_components, stage,
--> 859             pcoll_buffers, safe_coders).process_bundle.metrics
    860     finally:
    861       controller.close()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in run_stage(self, controller, pipeline_components, stage, pcoll_buffers, 
safe_coders)
    968     return BundleManager(
    969         controller, get_buffer, process_bundle_descriptor,
--> 970         self._progress_frequency).process_bundle(data_input, 
data_output)
    971 
    972   # These classes are used to interact with the worker.

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in process_bundle(self, inputs, expected_outputs)
   1172         process_bundle=beam_fn_api_pb2.ProcessBundleRequest(
   1173             
process_bundle_descriptor_reference=self._bundle_descriptor.id))
-> 1174     result_future = 
self._controller.control_handler.push(process_bundle)
   1175 
   1176     with ProgressRequester(

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/portability/fn_api_runner.pyc
 in push(self, request)
   1052         request.instruction_id = 'control_%s' % self._uid_counter
   1053       logging.debug('CONTROL REQUEST %s', request)
-> 1054       response = self.worker.do_instruction(request)
   1055       logging.debug('CONTROL RESPONSE %s', response)
   1056       return ControlFuture(request.instruction_id, response)

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.pyc
 in do_instruction(self, request)
    206       # E.g. if register is set, this will call 
self.register(request.register))
    207       return getattr(self, request_type)(getattr(request, request_type),
--> 208                                          request.instruction_id)
    209     else:
    210       raise NotImplementedError

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.pyc
 in process_bundle(self, request, instruction_id)
    228     try:
    229       with state_handler.process_instruction_id(instruction_id):
--> 230         processor.process_bundle(instruction_id)
    231     finally:
    232       del self.bundle_processors[instruction_id]

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.pyc
 in process_bundle(self, instruction_id)
    287       for op in reversed(self.ops.values()):
    288         logging.info('start %s', op)
--> 289         op.start()
    290 
    291       # Inject inputs from data plane.

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ReadOperation.start()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ReadOperation.start()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ReadOperation.start()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.Operation.output()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ConsumerSet.receive()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.DoOperation.process()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.DoOperation.process()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common.DoFnRunner.receive()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common.DoFnRunner.process()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common.DoFnRunner._reraise_augmented()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common.DoFnRunner.process()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common.SimpleInvoker.invoke_process()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/common.so in 
apache_beam.runners.common._OutputProcessor.process_outputs()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ConsumerSet.receive()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/operations.so 
in apache_beam.runners.worker.operations.ConsumerSet.update_counters_start()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/opcounters.so 
in apache_beam.runners.worker.opcounters.OperationCounters.update_from()

/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/opcounters.so 
in apache_beam.runners.worker.opcounters.OperationCounters.do_sample()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coder_impl.so in 
apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coder_impl.so in 
apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coder_impl.so in 
apache_beam.coders.coder_impl.AbstractComponentCoderImpl.get_estimated_size_and_observables()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coder_impl.so in 
apache_beam.coders.coder_impl.CoderImpl.get_estimated_size_and_observables()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coder_impl.so in 
apache_beam.coders.coder_impl.VarIntCoderImpl.estimate_size()

/usr/local/lib/python2.7/dist-packages/apache_beam/coders/stream.pyx in 
apache_beam.coders.stream.get_varint_size()
    220     return (<double*><char*>&as_long)[0]
    221 
--> 222 cpdef libc.stdint.int64_t get_varint_size(libc.stdint.int64_t value):
    223   """Returns the size of the given integer value when encode as a 
VarInt."""
    224   cdef libc.stdint.int64_t varint_size = 0

TypeError: an integer is required [while running 'Map(f)']
{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to