[ https://issues.apache.org/jira/browse/BEAM-6522?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Work on BEAM-6522 stopped by Valentyn Tymofieiev. ------------------------------------------------- > Dill fails to pickle avro.RecordSchema classes on Python 3. > ------------------------------------------------------------ > > Key: BEAM-6522 > URL: https://issues.apache.org/jira/browse/BEAM-6522 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core > Reporter: Robbe > Assignee: Valentyn Tymofieiev > Priority: Major > Time Spent: 7.5h > Remaining Estimate: 0h > > The avroio module still has 4 failing tests. This is actually 2 times the > same 2 tests, both for Avro and Fastavro. > *apache_beam.io.avroio_test.TestAvro.test_sink_transform* > *apache_beam.io.avroio_test.TestFastAvro.test_sink_transform* > fail with: > {code:java} > Traceback (most recent call last): > File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio_test.py", > line 432, in test_sink_transform > | avroio.WriteToAvro(path, self.SCHEMA, use_fastavro=self.use_fastavro) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line > 112, in __or__ > return self.pipeline.apply(ptransform, self) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line > 515, in apply > pvalueish_result = self.runner.apply(transform, pvalueish, self._options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 193, in apply > return m(transform, input, options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 199, in apply_PTransform > return transform.expand(input) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio.py", line > 528, in expand > return pcoll | beam.io.iobase.Write(self._sink) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line > 112, in __or__ > return self.pipeline.apply(ptransform, self) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line > 515, in apply > pvalueish_result = self.runner.apply(transform, pvalueish, self._options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 193, in apply > return m(transform, input, options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 199, in apply_PTransform > return transform.expand(input) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/iobase.py", line > 960, in expand > return pcoll | WriteImpl(self.sink) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line > 112, in __or__ > return self.pipeline.apply(ptransform, self) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line > 515, in apply > pvalueish_result = self.runner.apply(transform, pvalueish, self._options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 193, in apply > return m(transform, input, options) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", > line 199, in apply_PTransform > return transform.expand(input) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/iobase.py", line > 979, in expand > lambda _, sink: sink.initialize_write(), self.sink) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", > line 1103, in Map > pardo = FlatMap(wrapper, *args, **kwargs) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", > line 1054, in FlatMap > pardo = ParDo(CallableWrapperDoFn(fn), *args, **kwargs) > File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", > line 864, in __init__ > super(ParDo, self).__init__(fn, *args, **kwargs) > File > "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/ptransform.py", > line 646, in __init__ > self.args = pickler.loads(pickler.dumps(self.args)) > File > "/home/robbe/workspace/beam/sdks/python/apache_beam/internal/pickler.py", > line 247, in loads > return dill.loads(s) > File > "/home/robbe/workspace/beam/sdks/python/.eggs/dill-0.2.9-py3.5.egg/dill/_dill.py", > line 317, in loads > return load(file, ignore) > File > "/home/robbe/workspace/beam/sdks/python/.eggs/dill-0.2.9-py3.5.egg/dill/_dill.py", > line 305, in load > obj = pik.load() > File > "/home/robbe/workspace/beam/sdks/python/target/.tox/py3/lib/python3.5/site-packages/avro/schema.py", > line 173, in __setitem__ > % (key, value, self)) > Exception: Attempting to map key 'favorite_color' to value <avro.schema.Field > object at 0x7f8f72d0d0b8> in ImmutableDict {} > {code} > > *apache_beam.io.avroio_test.TestAvro.test_split_points* > *apache_beam.io.avroio_test.TestFastAvro.test_split_points* > fail with: > > {code:java} > Traceback (most recent call last): > File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio_test.py", > line 308, in test_split_points > self.assertEquals(split_points_report[-10:], [(2, 1)] * 10) > AssertionError: Lists differ: [(10, 1), (10, 1), (10, 1), (10, 1), (10, 1[42 > chars], 1)] != [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2[32 chars], 1)] > First differing element 0: > (10, 1) > (2, 1) > + [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), > (2, 1)] > - [(10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1), > - (10, 1)] > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)