[ https://issues.apache.org/jira/browse/BEAM-5270?focusedWorklogId=141999&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-141999 ]
ASF GitHub Bot logged work on BEAM-5270: ---------------------------------------- Author: ASF GitHub Bot Created on: 07/Sep/18 01:44 Start Date: 07/Sep/18 01:44 Worklog Time Spent: 10m Work Description: aaltay closed pull request #6310: [BEAM-5270] Finish Python 3 porting for coders subpackage URL: https://github.com/apache/beam/pull/6310 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index cf4b9b5d520..ad4edbbb374 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -62,12 +62,13 @@ def serialize_coder(coder): from apache_beam.internal import pickler - return '%s$%s' % (coder.__class__.__name__, pickler.dumps(coder)) + return b'%s$%s' % (coder.__class__.__name__.encode('utf-8'), + pickler.dumps(coder)) def deserialize_coder(serialized): from apache_beam.internal import pickler - return pickler.loads(serialized.split('$', 1)[1]) + return pickler.loads(serialized.split(b'$', 1)[1]) # pylint: enable=wrong-import-order, wrong-import-position diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index 0b8b4c20fde..969c1de10c4 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -20,6 +20,7 @@ import logging import math +import sys import unittest from builtins import range @@ -41,7 +42,7 @@ class CustomCoder(coders.Coder): def encode(self, x): - return str(x+1) + return str(x+1).encode('utf-8') def decode(self, encoded): return int(encoded) - 1 @@ -56,6 +57,9 @@ class CodersTest(unittest.TestCase): def setUpClass(cls): cls.seen = set() cls.seen_nested = set() + # Method has been renamed in Python 3 + if sys.version_info[0] < 3: + cls.assertCountEqual = cls.assertItemsEqual @classmethod def tearDownClass(cls): @@ -272,7 +276,7 @@ def iter_generator(count): yield i iterable_coder = coders.IterableCoder(coders.VarIntCoder()) - self.assertItemsEqual(list(iter_generator(count)), + self.assertCountEqual(list(iter_generator(count)), iterable_coder.decode( iterable_coder.encode(iter_generator(count)))) @@ -374,8 +378,8 @@ def test_global_window_coder(self): self.assertEqual({'@type': 'kind:global_window'}, coder.as_cloud_object()) # Test binary representation - self.assertEqual('', coder.encode(value)) - self.assertEqual(value, coder.decode('')) + self.assertEqual(b'', coder.encode(value)) + self.assertEqual(value, coder.decode(b'')) # Test unnested self.check_coder(coder, value) # Test nested diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index da27a49883a..4bdece6072b 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -22,6 +22,7 @@ from __future__ import absolute_import import struct +import sys from builtins import chr from builtins import object @@ -70,7 +71,7 @@ def write_bigendian_double(self, v): self.write(struct.pack('>d', v)) def get(self): - return ''.join(self.data) + return b''.join(self.data) def size(self): return len(self.data) @@ -114,6 +115,19 @@ def __init__(self, data): self.data = data self.pos = 0 + # The behavior of looping over a byte-string and obtaining byte characters + # has been changed between python 2 and 3. + # b = b'\xff\x01' + # Python 2: + # b[0] = '\xff' + # ord(b[0]) = 255 + # Python 3: + # b[0] = 255 + if sys.version_info[0] >= 3: + self.read_byte = self.read_byte_py3 + else: + self.read_byte = self.read_byte_py2 + def size(self): return len(self.data) - self.pos @@ -124,10 +138,14 @@ def read(self, size): def read_all(self, nested): return self.read(self.read_var_int64() if nested else self.size()) - def read_byte(self): + def read_byte_py2(self): self.pos += 1 return ord(self.data[self.pos - 1]) + def read_byte_py3(self): + self.pos += 1 + return self.data[self.pos - 1] + def read_var_int64(self): shift = 0 result = 0 diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index f704c490c97..031406f40ac 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -67,7 +67,7 @@ class StandardCodersTest(unittest.TestCase): } _urn_to_json_value_parser = { - 'beam:coder:bytes:v1': lambda x: x, + 'beam:coder:bytes:v1': lambda x: x.encode('utf-8'), 'beam:coder:varint:v1': lambda x: x, 'beam:coder:kv:v1': lambda x, key_parser, value_parser: (key_parser(x['key']), diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 641fefad45d..ad046fb5598 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -36,20 +36,20 @@ class StreamTest(unittest.TestCase): def test_read_write(self): out_s = self.OutputStream() - out_s.write('abc') - out_s.write('\0\t\n') - out_s.write('xyz', True) - out_s.write('', True) + out_s.write(b'abc') + out_s.write(b'\0\t\n') + out_s.write(b'xyz', True) + out_s.write(b'', True) in_s = self.InputStream(out_s.get()) - self.assertEquals('abc\0\t\n', in_s.read(6)) - self.assertEquals('xyz', in_s.read_all(True)) - self.assertEquals('', in_s.read_all(True)) + self.assertEquals(b'abc\0\t\n', in_s.read(6)) + self.assertEquals(b'xyz', in_s.read_all(True)) + self.assertEquals(b'', in_s.read_all(True)) def test_read_all(self): out_s = self.OutputStream() - out_s.write('abc') + out_s.write(b'abc') in_s = self.InputStream(out_s.get()) - self.assertEquals('abc', in_s.read_all(False)) + self.assertEquals(b'abc', in_s.read_all(False)) def test_read_write_byte(self): out_s = self.OutputStream() @@ -129,15 +129,15 @@ def test_read_write_bigendian_int32(self): def test_byte_counting(self): bc_s = self.ByteCountingOutputStream() self.assertEquals(0, bc_s.get_count()) - bc_s.write('def') + bc_s.write(b'def') self.assertEquals(3, bc_s.get_count()) - bc_s.write('') + bc_s.write(b'') self.assertEquals(3, bc_s.get_count()) bc_s.write_byte(10) self.assertEquals(4, bc_s.get_count()) # "nested" also writes the length of the string, which should # cause 1 extra byte to be counted. - bc_s.write('2345', nested=True) + bc_s.write(b'2345', nested=True) self.assertEquals(9, bc_s.get_count()) bc_s.write_var_int64(63) self.assertEquals(10, bc_s.get_count()) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index f425ba7673b..b12e35d2e67 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -56,11 +56,13 @@ commands = [testenv:py3] setenv = BEAM_EXPERIMENTAL_PY3=1 +modules = + apache_beam.coders,apache_beam.tools commands = python --version pip --version {toxinidir}/scripts/run_tox_cleanup.sh - python setup.py nosetests --tests apache_beam.runners.direct.direct_metrics_test:DirectMetricsTest.test_combiner_functions + python setup.py nosetests --tests {[testenv:py3]modules} {toxinidir}/scripts/run_tox_cleanup.sh [testenv:py27-cython] ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 141999) Time Spent: 2.5h (was: 2h 20m) > Finish Python 3 porting for coders module > ----------------------------------------- > > Key: BEAM-5270 > URL: https://issues.apache.org/jira/browse/BEAM-5270 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core > Reporter: Robbe > Assignee: Robbe > Priority: Major > Time Spent: 2.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)