This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new c34c367 Finish Python 3 porting for coders subpackage (#6310) c34c367 is described below commit c34c367f5da6f9bef8a46471195470923a201af9 Author: Robbe Sneyders <robbe.sneyd...@gmail.com> AuthorDate: Fri Sep 7 03:44:46 2018 +0200 Finish Python 3 porting for coders subpackage (#6310) --- sdks/python/apache_beam/coders/coders.py | 5 +++-- .../apache_beam/coders/coders_test_common.py | 12 +++++++---- sdks/python/apache_beam/coders/slow_stream.py | 22 ++++++++++++++++++-- .../apache_beam/coders/standard_coders_test.py | 2 +- sdks/python/apache_beam/coders/stream_test.py | 24 +++++++++++----------- sdks/python/tox.ini | 4 +++- 6 files changed, 47 insertions(+), 22 deletions(-) diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index cf4b9b5..ad4edbb 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -62,12 +62,13 @@ __all__ = ['Coder', def serialize_coder(coder): from apache_beam.internal import pickler - return '%s$%s' % (coder.__class__.__name__, pickler.dumps(coder)) + return b'%s$%s' % (coder.__class__.__name__.encode('utf-8'), + pickler.dumps(coder)) def deserialize_coder(serialized): from apache_beam.internal import pickler - return pickler.loads(serialized.split('$', 1)[1]) + return pickler.loads(serialized.split(b'$', 1)[1]) # pylint: enable=wrong-import-order, wrong-import-position diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index 0b8b4c2..969c1de 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -20,6 +20,7 @@ from __future__ import absolute_import import logging import math +import sys import unittest from builtins import range @@ -41,7 +42,7 @@ from . import observable class CustomCoder(coders.Coder): def encode(self, x): - return str(x+1) + return str(x+1).encode('utf-8') def decode(self, encoded): return int(encoded) - 1 @@ -56,6 +57,9 @@ class CodersTest(unittest.TestCase): def setUpClass(cls): cls.seen = set() cls.seen_nested = set() + # Method has been renamed in Python 3 + if sys.version_info[0] < 3: + cls.assertCountEqual = cls.assertItemsEqual @classmethod def tearDownClass(cls): @@ -272,7 +276,7 @@ class CodersTest(unittest.TestCase): yield i iterable_coder = coders.IterableCoder(coders.VarIntCoder()) - self.assertItemsEqual(list(iter_generator(count)), + self.assertCountEqual(list(iter_generator(count)), iterable_coder.decode( iterable_coder.encode(iter_generator(count)))) @@ -374,8 +378,8 @@ class CodersTest(unittest.TestCase): self.assertEqual({'@type': 'kind:global_window'}, coder.as_cloud_object()) # Test binary representation - self.assertEqual('', coder.encode(value)) - self.assertEqual(value, coder.decode('')) + self.assertEqual(b'', coder.encode(value)) + self.assertEqual(value, coder.decode(b'')) # Test unnested self.check_coder(coder, value) # Test nested diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index da27a49..4bdece6 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -22,6 +22,7 @@ For internal use only; no backwards-compatibility guarantees. from __future__ import absolute_import import struct +import sys from builtins import chr from builtins import object @@ -70,7 +71,7 @@ class OutputStream(object): self.write(struct.pack('>d', v)) def get(self): - return ''.join(self.data) + return b''.join(self.data) def size(self): return len(self.data) @@ -114,6 +115,19 @@ class InputStream(object): self.data = data self.pos = 0 + # The behavior of looping over a byte-string and obtaining byte characters + # has been changed between python 2 and 3. + # b = b'\xff\x01' + # Python 2: + # b[0] = '\xff' + # ord(b[0]) = 255 + # Python 3: + # b[0] = 255 + if sys.version_info[0] >= 3: + self.read_byte = self.read_byte_py3 + else: + self.read_byte = self.read_byte_py2 + def size(self): return len(self.data) - self.pos @@ -124,10 +138,14 @@ class InputStream(object): def read_all(self, nested): return self.read(self.read_var_int64() if nested else self.size()) - def read_byte(self): + def read_byte_py2(self): self.pos += 1 return ord(self.data[self.pos - 1]) + def read_byte_py3(self): + self.pos += 1 + return self.data[self.pos - 1] + def read_var_int64(self): shift = 0 result = 0 diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index f704c49..031406f 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -67,7 +67,7 @@ class StandardCodersTest(unittest.TestCase): } _urn_to_json_value_parser = { - 'beam:coder:bytes:v1': lambda x: x, + 'beam:coder:bytes:v1': lambda x: x.encode('utf-8'), 'beam:coder:varint:v1': lambda x: x, 'beam:coder:kv:v1': lambda x, key_parser, value_parser: (key_parser(x['key']), diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 641fefa..ad046fb 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -36,20 +36,20 @@ class StreamTest(unittest.TestCase): def test_read_write(self): out_s = self.OutputStream() - out_s.write('abc') - out_s.write('\0\t\n') - out_s.write('xyz', True) - out_s.write('', True) + out_s.write(b'abc') + out_s.write(b'\0\t\n') + out_s.write(b'xyz', True) + out_s.write(b'', True) in_s = self.InputStream(out_s.get()) - self.assertEquals('abc\0\t\n', in_s.read(6)) - self.assertEquals('xyz', in_s.read_all(True)) - self.assertEquals('', in_s.read_all(True)) + self.assertEquals(b'abc\0\t\n', in_s.read(6)) + self.assertEquals(b'xyz', in_s.read_all(True)) + self.assertEquals(b'', in_s.read_all(True)) def test_read_all(self): out_s = self.OutputStream() - out_s.write('abc') + out_s.write(b'abc') in_s = self.InputStream(out_s.get()) - self.assertEquals('abc', in_s.read_all(False)) + self.assertEquals(b'abc', in_s.read_all(False)) def test_read_write_byte(self): out_s = self.OutputStream() @@ -129,15 +129,15 @@ class StreamTest(unittest.TestCase): def test_byte_counting(self): bc_s = self.ByteCountingOutputStream() self.assertEquals(0, bc_s.get_count()) - bc_s.write('def') + bc_s.write(b'def') self.assertEquals(3, bc_s.get_count()) - bc_s.write('') + bc_s.write(b'') self.assertEquals(3, bc_s.get_count()) bc_s.write_byte(10) self.assertEquals(4, bc_s.get_count()) # "nested" also writes the length of the string, which should # cause 1 extra byte to be counted. - bc_s.write('2345', nested=True) + bc_s.write(b'2345', nested=True) self.assertEquals(9, bc_s.get_count()) bc_s.write_var_int64(63) self.assertEquals(10, bc_s.get_count()) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index f425ba7..b12e35d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -56,11 +56,13 @@ commands = [testenv:py3] setenv = BEAM_EXPERIMENTAL_PY3=1 +modules = + apache_beam.coders,apache_beam.tools commands = python --version pip --version {toxinidir}/scripts/run_tox_cleanup.sh - python setup.py nosetests --tests apache_beam.runners.direct.direct_metrics_test:DirectMetricsTest.test_combiner_functions + python setup.py nosetests --tests {[testenv:py3]modules} {toxinidir}/scripts/run_tox_cleanup.sh [testenv:py27-cython]