[ https://issues.apache.org/jira/browse/BEAM-3981?focusedWorklogId=100665&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-100665 ]
ASF GitHub Bot logged work on BEAM-3981: ---------------------------------------- Author: ASF GitHub Bot Created on: 10/May/18 16:09 Start Date: 10/May/18 16:09 Worklog Time Spent: 10m Work Description: aaltay closed pull request #5053: [BEAM-3981] Futurize coders subpackage URL: https://github.com/apache/beam/pull/5053 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/python/apache_beam/coders/__init__.py b/sdks/python/apache_beam/coders/__init__.py index acca89f70f4..3192494ebbf 100644 --- a/sdks/python/apache_beam/coders/__init__.py +++ b/sdks/python/apache_beam/coders/__init__.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import absolute_import from apache_beam.coders.coders import * from apache_beam.coders.typecoders import registry diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd b/sdks/python/apache_beam/coders/coder_impl.pxd index 98dd508556a..8a85d085448 100644 --- a/sdks/python/apache_beam/coders/coder_impl.pxd +++ b/sdks/python/apache_beam/coders/coder_impl.pxd @@ -70,11 +70,11 @@ cdef class DeterministicFastPrimitivesCoderImpl(CoderImpl): cdef object NoneType cdef char UNKNOWN_TYPE, NONE_TYPE, INT_TYPE, FLOAT_TYPE, BOOL_TYPE -cdef char STR_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE +cdef char BYTES_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE cdef class FastPrimitivesCoderImpl(StreamCoderImpl): cdef CoderImpl fallback_coder_impl - @cython.locals(unicode_value=unicode, dict_value=dict) + @cython.locals(dict_value=dict) cpdef encode_to_stream(self, value, OutputStream stream, bint nested) diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index cc7ed87c3ad..2fcd0f22a31 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -24,13 +24,17 @@ This module may be optionally compiled with Cython, using the corresponding coder_impl.pxd file for type hints. +Py2/3 porting: Native range is used on both python versions instead of +future.builtins.range to avoid performance regression in Cython compiled code. + For internal use only; no backwards-compatibility guarantees. """ from __future__ import absolute_import +from __future__ import division -from types import NoneType - -import six +import sys +from builtins import chr +from builtins import object from apache_beam.coders import observable from apache_beam.utils import windowed_value @@ -54,10 +58,12 @@ from .slow_stream import get_varint_size # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports -try: - long # Python 2 -except NameError: - long = int # Python 3 +try: # Python 2 + long # pylint: disable=long-builtin + unicode # pylint: disable=unicode-builtin +except NameError: # Python 3 + long = int + unicode = str class CoderImpl(object): @@ -199,7 +205,7 @@ def __init__(self, coder, step_label): self._step_label = step_label def _check_safe(self, value): - if isinstance(value, (str, six.text_type, long, int, float)): + if isinstance(value, (bytes, unicode, long, int, float)): pass elif value is None: pass @@ -253,7 +259,7 @@ def decode(self, encoded): NONE_TYPE = 0 INT_TYPE = 1 FLOAT_TYPE = 2 -STR_TYPE = 3 +BYTES_TYPE = 3 UNICODE_TYPE = 4 BOOL_TYPE = 9 LIST_TYPE = 5 @@ -279,7 +285,7 @@ def get_estimated_size_and_observables(self, value, nested=False): def encode_to_stream(self, value, stream, nested): t = type(value) - if t is NoneType: + if value is None: stream.write_byte(NONE_TYPE) elif t is int: stream.write_byte(INT_TYPE) @@ -287,13 +293,13 @@ def encode_to_stream(self, value, stream, nested): elif t is float: stream.write_byte(FLOAT_TYPE) stream.write_bigendian_double(value) - elif t is str: - stream.write_byte(STR_TYPE) + elif t is bytes: + stream.write_byte(BYTES_TYPE) stream.write(value, nested) - elif t is six.text_type: - unicode_value = value # for typing + elif t is unicode: + text_value = value # for typing stream.write_byte(UNICODE_TYPE) - stream.write(unicode_value.encode('utf-8'), nested) + stream.write(text_value.encode('utf-8'), nested) elif t is list or t is tuple or t is set: stream.write_byte( LIST_TYPE if t is list else TUPLE_TYPE if t is tuple else SET_TYPE) @@ -304,7 +310,13 @@ def encode_to_stream(self, value, stream, nested): dict_value = value # for typing stream.write_byte(DICT_TYPE) stream.write_var_int64(len(dict_value)) - for k, v in dict_value.iteritems(): + # Use iteritems() on Python 2 instead of future.builtins.iteritems to + # avoid performance regression in Cython compiled code. + if sys.version_info[0] == 2: + items = dict_value.iteritems() # pylint: disable=dict-iter-method + else: + items = dict_value.items() + for k, v in items: self.encode_to_stream(k, stream, True) self.encode_to_stream(v, stream, True) elif t is bool: @@ -322,7 +334,7 @@ def decode_from_stream(self, stream, nested): return stream.read_var_int64() elif t == FLOAT_TYPE: return stream.read_bigendian_double() - elif t == STR_TYPE: + elif t == BYTES_TYPE: return stream.read_all(nested) elif t == UNICODE_TYPE: return stream.read_all(nested).decode('utf-8') @@ -394,8 +406,9 @@ def _from_normal_time(self, value): def encode_to_stream(self, value, out, nested): span_micros = value.end.micros - value.start.micros - out.write_bigendian_uint64(self._from_normal_time(value.end.micros / 1000)) - out.write_var_int64(span_micros / 1000) + out.write_bigendian_uint64( + self._from_normal_time(value.end.micros // 1000)) + out.write_var_int64(span_micros // 1000) def decode_from_stream(self, in_, nested): end_millis = self._to_normal_time(in_.read_bigendian_uint64()) @@ -409,7 +422,7 @@ def estimate_size(self, value, nested=False): # An IntervalWindow is context-insensitive, with a timestamp (8 bytes) # and a varint timespam. span = value.end.micros - value.start.micros - return 8 + get_varint_size(span / 1000) + return 8 + get_varint_size(span // 1000) class TimestampCoderImpl(StreamCoderImpl): @@ -427,7 +440,7 @@ def estimate_size(self, unused_value, nested=False): return 8 -small_ints = [chr(_) for _ in range(128)] +small_ints = [chr(_).encode('latin-1') for _ in range(128)] class VarIntCoderImpl(StreamCoderImpl): @@ -474,7 +487,7 @@ def decode_from_stream(self, stream, nested): return self._value def encode(self, value): - b = '' # avoid byte vs str vs unicode error + b = b'' # avoid byte vs str vs unicode error return b def decode(self, encoded): @@ -783,7 +796,7 @@ def encode_to_stream(self, value, out, nested): # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on # precision of timestamps. self._from_normal_time( - restore_sign * (abs(wv.timestamp_micros) / 1000))) + restore_sign * (abs(wv.timestamp_micros) // 1000))) self._windows_coder.encode_to_stream(wv.windows, out, True) # Default PaneInfo encoded byte representing NO_FIRING. self._pane_info_coder.encode_to_stream(wv.pane_info, out, True) @@ -797,9 +810,9 @@ def decode_from_stream(self, in_stream, nested): # were indeed MIN/MAX timestamps. # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on # precision of timestamps. - if timestamp == -(abs(MIN_TIMESTAMP.micros) / 1000): + if timestamp == -(abs(MIN_TIMESTAMP.micros) // 1000): timestamp = MIN_TIMESTAMP.micros - elif timestamp == (MAX_TIMESTAMP.micros / 1000): + elif timestamp == (MAX_TIMESTAMP.micros // 1000): timestamp = MAX_TIMESTAMP.micros else: timestamp *= 1000 diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index d1ebe6b15da..b6aa40d6c82 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -22,7 +22,7 @@ from __future__ import absolute_import import base64 -import cPickle as pickle +from builtins import object import google.protobuf from google.protobuf import wrappers_pb2 @@ -33,6 +33,12 @@ from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.utils import proto_utils +# This is for py2/3 compatibility. cPickle was renamed pickle in python 3. +try: + import cPickle as pickle # Python 2 +except ImportError: + import pickle # Python 3 + # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports try: from .stream import get_varint_size @@ -210,11 +216,15 @@ def as_cloud_object(self): def __repr__(self): return self.__class__.__name__ + # pylint: disable=protected-access def __eq__(self, other): - # pylint: disable=protected-access return (self.__class__ == other.__class__ and self._dict_without_impl() == other._dict_without_impl()) - # pylint: enable=protected-access + + def __hash__(self): + return hash((self.__class__,) + + tuple(sorted(self._dict_without_impl().items()))) + # pylint: enable=protected-access _known_urns = {} @@ -312,7 +322,7 @@ class ToStringCoder(Coder): def encode(self, value): try: # Python 2 - if isinstance(value, unicode): + if isinstance(value, unicode): # pylint: disable=unicode-builtin return value.encode('utf-8') except NameError: # Python 3 pass diff --git a/sdks/python/apache_beam/coders/coders_test.py b/sdks/python/apache_beam/coders/coders_test.py index 705de8920d5..a6c456f619f 100644 --- a/sdks/python/apache_beam/coders/coders_test.py +++ b/sdks/python/apache_beam/coders/coders_test.py @@ -14,11 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +from __future__ import absolute_import import base64 import logging import unittest +from builtins import object from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message from apache_beam.coders import coders @@ -47,17 +48,11 @@ def test_equality(self): class CodersTest(unittest.TestCase): def test_str_utf8_coder(self): - real_coder = coders_registry.get_coder(str) - expected_coder = coders.BytesCoder() - self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) - real_coder = coders_registry.get_coder(bytes) expected_coder = coders.BytesCoder() self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) + real_coder.encode(b'abc'), expected_coder.encode(b'abc')) + self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc'))) # The test proto message file was generated by running the following: @@ -99,6 +94,9 @@ def __eq__(self, other): return True return False + def __hash__(self): + return hash(type(self)) + class FallbackCoderTest(unittest.TestCase): diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index 0ea7da2b6ad..0b8b4c20fde 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -21,6 +21,7 @@ import logging import math import unittest +from builtins import range import dill @@ -103,7 +104,7 @@ def test_custom_coder(self): self.check_coder(CustomCoder(), 1, -10, 5) self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())), - (1, 'a'), (-10, 'b'), (5, 'c')) + (1, b'a'), (-10, b'b'), (5, b'c')) def test_pickle_coder(self): self.check_coder(coders.PickleCoder(), 'a', 1, 1.5, (1, 2, 3)) @@ -129,7 +130,7 @@ def test_dill_coder(self): def test_fast_primitives_coder(self): coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len)) - self.check_coder(coder, None, 1, -1, 1.5, 'str\0str', u'unicode\0\u0101') + self.check_coder(coder, None, 1, -1, 1.5, b'str\0str', u'unicode\0\u0101') self.check_coder(coder, (), (1, 2, 3)) self.check_coder(coder, [], [1, 2, 3]) self.check_coder(coder, dict(), {'a': 'b'}, {0: dict(), 1: len}) @@ -139,7 +140,7 @@ def test_fast_primitives_coder(self): self.check_coder(coders.TupleCoder((coder,)), ('a',), (1,)) def test_bytes_coder(self): - self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000) + self.check_coder(coders.BytesCoder(), b'a', b'\0', b'z' * 1000) def test_varint_coder(self): # Small ints. @@ -190,7 +191,7 @@ def test_timestamp_coder(self): timestamp.Timestamp(micros=1234567890123456789)) self.check_coder( coders.TupleCoder((coders.TimestampCoder(), coders.BytesCoder())), - (timestamp.Timestamp.of(27), 'abc')) + (timestamp.Timestamp.of(27), b'abc')) def test_tuple_coder(self): kv_coder = coders.TupleCoder((coders.VarIntCoder(), coders.BytesCoder())) @@ -206,14 +207,14 @@ def test_tuple_coder(self): kv_coder.as_cloud_object()) # Test binary representation self.assertEqual( - '\x04abc', - kv_coder.encode((4, 'abc'))) + b'\x04abc', + kv_coder.encode((4, b'abc'))) # Test unnested self.check_coder( kv_coder, - (1, 'a'), - (-2, 'a' * 100), - (300, 'abc\0' * 5)) + (1, b'a'), + (-2, b'a' * 100), + (300, b'abc\0' * 5)) # Test nested self.check_coder( coders.TupleCoder( @@ -322,12 +323,12 @@ def test_windowed_value_coder(self): }, coder.as_cloud_object()) # Test binary representation - self.assertEqual('\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', + self.assertEqual(b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', coder.encode(window.GlobalWindows.windowed_value(1))) # Test decoding large timestamp self.assertEqual( - coder.decode('\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), + coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(),))) # Test unnested @@ -364,7 +365,7 @@ def test_proto_coder(self): proto_coder = coders.ProtoCoder(ma.__class__) self.check_coder(proto_coder, ma) self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())), - (ma, 'a'), (mb, 'b')) + (ma, b'a'), (mb, b'b')) def test_global_window_coder(self): coder = coders.GlobalWindowCoder() @@ -391,16 +392,16 @@ def test_length_prefix_coder(self): }, coder.as_cloud_object()) # Test binary representation - self.assertEqual('\x00', coder.encode('')) - self.assertEqual('\x01a', coder.encode('a')) - self.assertEqual('\x02bc', coder.encode('bc')) - self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383)) + self.assertEqual(b'\x00', coder.encode(b'')) + self.assertEqual(b'\x01a', coder.encode(b'a')) + self.assertEqual(b'\x02bc', coder.encode(b'bc')) + self.assertEqual(b'\xff\x7f' + b'z' * 16383, coder.encode(b'z' * 16383)) # Test unnested - self.check_coder(coder, '', 'a', 'bc', 'def') + self.check_coder(coder, b'', b'a', b'bc', b'def') # Test nested self.check_coder(coders.TupleCoder((coder, coder)), - ('', 'a'), - ('bc', 'def')) + (b'', b'a'), + (b'bc', b'def')) def test_nested_observables(self): class FakeObservableIterator(observable.ObservableMixin): diff --git a/sdks/python/apache_beam/coders/fast_coders_test.py b/sdks/python/apache_beam/coders/fast_coders_test.py index a13334a2c26..eb4077344f6 100644 --- a/sdks/python/apache_beam/coders/fast_coders_test.py +++ b/sdks/python/apache_beam/coders/fast_coders_test.py @@ -16,6 +16,7 @@ # """Unit tests for compiled implementation of coder impls.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/observable.py b/sdks/python/apache_beam/coders/observable.py index fc952cf4e55..3d0a7fc10bf 100644 --- a/sdks/python/apache_beam/coders/observable.py +++ b/sdks/python/apache_beam/coders/observable.py @@ -20,6 +20,9 @@ For internal use only; no backwards-compatibility guarantees. """ +from __future__ import absolute_import + +from builtins import object class ObservableMixin(object): diff --git a/sdks/python/apache_beam/coders/observable_test.py b/sdks/python/apache_beam/coders/observable_test.py index 09ca3041c29..ce32bf05ef2 100644 --- a/sdks/python/apache_beam/coders/observable_test.py +++ b/sdks/python/apache_beam/coders/observable_test.py @@ -16,6 +16,7 @@ # """Tests for the Observable mixin class.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py b/sdks/python/apache_beam/coders/slow_coders_test.py index 97aa39ca094..b543b56dd26 100644 --- a/sdks/python/apache_beam/coders/slow_coders_test.py +++ b/sdks/python/apache_beam/coders/slow_coders_test.py @@ -16,6 +16,7 @@ # """Unit tests for uncompiled implementation of coder impls.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index 1ab55d90f98..da27a49883a 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -19,8 +19,11 @@ For internal use only; no backwards-compatibility guarantees. """ +from __future__ import absolute_import import struct +from builtins import chr +from builtins import object class OutputStream(object): @@ -32,13 +35,13 @@ def __init__(self): self.data = [] def write(self, b, nested=False): - assert isinstance(b, str) + assert isinstance(b, bytes) if nested: self.write_var_int64(len(b)) self.data.append(b) def write_byte(self, val): - self.data.append(chr(val)) + self.data.append(chr(val).encode('latin-1')) def write_var_int64(self, v): if v < 0: diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index b595cfd5922..f704c490c97 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -17,6 +17,7 @@ """Unit tests for coders that must be consistent across all Beam SDKs. """ +from __future__ import absolute_import from __future__ import print_function import json @@ -24,6 +25,7 @@ import os.path import sys import unittest +from builtins import map import yaml @@ -74,7 +76,7 @@ class StandardCodersTest(unittest.TestCase): lambda x: IntervalWindow( start=Timestamp(micros=(x['end'] - x['span']) * 1000), end=Timestamp(micros=x['end'] * 1000)), - 'beam:coder:iterable:v1': lambda x, parser: map(parser, x), + 'beam:coder:iterable:v1': lambda x, parser: list(map(parser, x)), 'beam:coder:global_window:v1': lambda x: window.GlobalWindow(), 'beam:coder:windowed_value:v1': lambda x, value_parser, window_parser: windowed_value.create( diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 15bc5eb9ba9..641fefad45d 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -16,10 +16,13 @@ # """Tests for the stream implementations.""" +from __future__ import absolute_import +from __future__ import division import logging import math import unittest +from builtins import range from apache_beam.coders import slow_stream diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index 355c6230f92..e4efa2c7ffd 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -63,12 +63,18 @@ def MakeXyzs(v): See apache_beam.typehints.decorators module for more details. """ +from __future__ import absolute_import -import six +from builtins import object from apache_beam.coders import coders from apache_beam.typehints import typehints +try: + unicode # pylint: disable=unicode-builtin +except NameError: + unicode = str + __all__ = ['registry'] @@ -84,9 +90,8 @@ def register_standard_coders(self, fallback_coder): """Register coders for all basic and composite types.""" self._register_coder_internal(int, coders.VarIntCoder) self._register_coder_internal(float, coders.FloatCoder) - self._register_coder_internal(str, coders.BytesCoder) self._register_coder_internal(bytes, coders.BytesCoder) - self._register_coder_internal(six.text_type, coders.StrUtf8Coder) + self._register_coder_internal(unicode, coders.StrUtf8Coder) self._register_coder_internal(typehints.TupleConstraint, coders.TupleCoder) # Default fallback coders applied in that order until the first matching # coder found. diff --git a/sdks/python/apache_beam/coders/typecoders_test.py b/sdks/python/apache_beam/coders/typecoders_test.py index 2b6aa7a5129..b64cb653e56 100644 --- a/sdks/python/apache_beam/coders/typecoders_test.py +++ b/sdks/python/apache_beam/coders/typecoders_test.py @@ -16,8 +16,10 @@ # """Unit tests for the typecoders module.""" +from __future__ import absolute_import import unittest +from builtins import object from apache_beam.coders import coders from apache_beam.coders import typecoders @@ -33,6 +35,9 @@ def __init__(self, n): def __eq__(self, other): return self.number == other.number + def __hash__(self): + return self.number + class CustomCoder(coders.Coder): @@ -75,7 +80,7 @@ def test_get_coder_with_composite_custom_coder(self): def test_get_coder_with_standard_coder(self): self.assertEqual(coders.BytesCoder, - typecoders.registry.get_coder(str).__class__) + typecoders.registry.get_coder(bytes).__class__) def test_fallbackcoder(self): coder = typecoders.registry.get_coder(typehints.Any) @@ -100,22 +105,16 @@ def test_standard_int_coder(self): real_coder.decode(real_coder.encode(0x040404040404))) def test_standard_str_coder(self): - real_coder = typecoders.registry.get_coder(str) - expected_coder = coders.BytesCoder() - self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) - real_coder = typecoders.registry.get_coder(bytes) expected_coder = coders.BytesCoder() self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) + real_coder.encode(b'abc'), expected_coder.encode(b'abc')) + self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc'))) def test_iterable_coder(self): - real_coder = typecoders.registry.get_coder(typehints.Iterable[str]) + real_coder = typecoders.registry.get_coder(typehints.Iterable[bytes]) expected_coder = coders.IterableCoder(coders.BytesCoder()) - values = ['abc', 'xyz'] + values = [b'abc', b'xyz'] self.assertEqual(expected_coder, real_coder) self.assertEqual(real_coder.encode(values), expected_coder.encode(values)) diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh index ae8d043d376..68b7006a1b6 100755 --- a/sdks/python/generate_pydoc.sh +++ b/sdks/python/generate_pydoc.sh @@ -121,6 +121,9 @@ ignore_identifiers = [ # Ignore broken built-in type references 'tuple', + # Ignore future.builtin type references + 'future.types.newobject.newobject', + # Ignore private classes 'apache_beam.coders.coders._PickleCoderBase', 'apache_beam.coders.coders.FastCoder', diff --git a/sdks/python/run_pylint.sh b/sdks/python/run_pylint.sh index 7150fa6f50e..4e7dc9d8218 100755 --- a/sdks/python/run_pylint.sh +++ b/sdks/python/run_pylint.sh @@ -89,25 +89,6 @@ done isort ${MODULE} -p apache_beam --line-width 120 --check-only --order-by-type \ --combine-star --force-single-line-imports --diff --recursive ${SKIP_PARAM} -FUTURIZE_EXCLUDED=( - "typehints.py" - "pb2" - "trivial_infernce.py" -) -FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" ) -echo "Checking for files requiring stage 1 refactoring from futurize" -futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored) -futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" || echo "") -count=${#futurize_filtered} -if [ "$count" != "0" ]; then - echo "Some of the changes require futurize stage 1 changes." - echo "The files with required changes:" - echo "$futurize_filtered" - echo "You can run futurize apache_beam to see the proposed changes." - exit 1 -fi -echo "No future changes needed" - echo "Checking unittest.main for module ${MODULE}:" TESTS_MISSING_MAIN=$(find ${MODULE} | grep '\.py$' | xargs grep -l '^import unittest$' | xargs grep -L unittest.main) if [ -n "${TESTS_MISSING_MAIN}" ]; then diff --git a/sdks/python/run_pylint_2to3.sh b/sdks/python/run_pylint_2to3.sh new file mode 100755 index 00000000000..9a074368dba --- /dev/null +++ b/sdks/python/run_pylint_2to3.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script will run pylint with the --py3k parameter to check for python +# 3 compatibility. This script can run on a list of modules provided as +# command line arguments. +# +# The exit-code of the script indicates success or a failure. + +set -o errexit +set -o pipefail + +DEFAULT_MODULE=apache_beam + +usage(){ echo "Usage: $0 [MODULE|--help] +# The default MODULE is $DEFAULT_MODULE"; } + +MODULES=${DEFAULT_MODULE} +while [[ $# -gt 0 ]] ; do + key="$1" + case ${key} in + --help) usage; exit 1;; + *) + if [ ${MODULES} = ${DEFAULT_MODULE} ] ; then + MODULES=() + fi + MODULES+=("$1") + shift;; + esac +done + +FUTURIZE_EXCLUDED=( + "typehints.py" + "pb2" + "trivial_infernce.py" +) +FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" ) +echo "Checking for files requiring stage 1 refactoring from futurize" +futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored) +futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" \ + || echo "") +count=${#futurize_filtered} +if [ "$count" != "0" ]; then + echo "Some of the changes require futurize stage 1 changes." + echo "The files with required changes:" + echo "$futurize_filtered" + echo "You can run futurize apache_beam to see the proposed changes." + exit 1 +fi +echo "No future changes needed" + +# Following generated files are excluded from lint checks. +EXCLUDED_GENERATED_FILES=( +"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py" +"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py" +"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py" +"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py" +"apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py" +"apache_beam/io/gcp/internal/clients/storage/storage_v1_messages.py" +"apache_beam/coders/proto2_coder_test_messages_pb2.py" +apache_beam/portability/api/*pb2*.py +) + +FILES_TO_IGNORE="" +for file in "${EXCLUDED_GENERATED_FILES[@]}"; do + if test -z "$FILES_TO_IGNORE" + then FILES_TO_IGNORE="$(basename $file)" + else FILES_TO_IGNORE="$FILES_TO_IGNORE, $(basename $file)" + fi +done +echo "Skipping lint for generated files: $FILES_TO_IGNORE" + +echo "Running pylint --py3k for modules $( printf "%s " "${MODULES[@]}" ):" +pylint -j8 $( printf "%s " "${MODULES[@]}" ) \ + --ignore-patterns="$FILES_TO_IGNORE" --py3k diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 364148355c2..35e705583f6 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -66,7 +66,7 @@ def get_version(): ) -REQUIRED_CYTHON_VERSION = '0.26.1' +REQUIRED_CYTHON_VERSION = '0.28.1' try: _CYTHON_VERSION = get_distribution('cython').version if StrictVersion(_CYTHON_VERSION) < StrictVersion(REQUIRED_CYTHON_VERSION): diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 0d49cbc958a..caa899de2db 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -17,7 +17,7 @@ [tox] # new environments will be excluded by default unless explicitly added to envlist. -envlist = py27,py27-{gcp,cython,lint},py3-lint,docs +envlist = py27,py27-{gcp,cython,lint,lint3},py3-lint,docs toxworkdir = {toxinidir}/target/.tox [pycodestyle] @@ -34,7 +34,8 @@ whitelist_externals = find time deps = - cython: cython==0.26.1 + cython: cython==0.28.1 + future==0.16.0 # These 2 magic command overrides are required for Jenkins builds. # Otherwise we get "OSError: [Errno 2] No such file or directory" errors. @@ -88,6 +89,22 @@ commands = pip --version time {toxinidir}/run_pylint.sh +[testenv:py27-lint3] +# Checks for py2/3 compatibility issues +deps = + pycodestyle==2.3.1 + pylint==1.7.2 + future==0.16.0 + isort==4.2.15 + flake8==3.5.0 +modules = + apache_beam/coders +commands = + python --version + pip --version + time {toxinidir}/run_pylint_2to3.sh {[testenv:py27-lint3]modules} + + [testenv:py3-lint] deps = pycodestyle==2.3.1 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 100665) Time Spent: 18h 20m (was: 18h 10m) > Futurize and fix python 2 compatibility for coders package > ---------------------------------------------------------- > > Key: BEAM-3981 > URL: https://issues.apache.org/jira/browse/BEAM-3981 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core > Reporter: Robbe > Assignee: Robbe > Priority: Major > Time Spent: 18h 20m > Remaining Estimate: 0h > > Run automatic conversion with futurize tool on coders subpackage and fix > python 2 compatibility. This prepares the subpackage for python 3 support. -- This message was sent by Atlassian JIRA (v7.6.3#76005)