Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8 Changeset: r95554:962719fced4a Date: 2019-01-01 15:06 +0200 http://bitbucket.org/pypy/pypy/changeset/962719fced4a/
Log: allow a = array.array('u', u'\xff'); a.byteswap(); ord(a[0]) > sys.maxunicode diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1040,13 +1040,24 @@ return space.newbytes(item) elif mytype.typecode == 'u': code = r_uint(ord(item)) - try: - return space.newutf8(rutf8.unichr_as_utf8(code), 1) - except ValueError: - raise oefmt(space.w_ValueError, - "array contains a 32-bit integer that is outside " - "the range [U+0000; U+10ffff] of valid unicode " - "characters") + # cpython will allow values > sys.maxunicode + # while silently truncating the top bits + if code <= r_uint(0x7F): + # Encode ASCII + item = chr(code) + elif code <= r_uint(0x07FF): + item = (chr((0xc0 | (code >> 6))) + + chr((0x80 | (code & 0x3f)))) + elif code <= r_uint(0xFFFF): + item = (chr((0xe0 | (code >> 12))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + else: + item = (chr((0xf0 | (code >> 18)) & 0xff) + + chr((0x80 | ((code >> 12) & 0x3f))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + return space.newutf8(item, 1) assert 0, "unreachable" # interface diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -44,8 +44,12 @@ # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark if not we_are_translated(): - lgt = rutf8.check_utf8(utf8str, True) - assert lgt == length + try: + lgt = rutf8.check_utf8(utf8str, True) + assert lgt == length + except: + # array.array can return invalid unicode + pass @staticmethod def from_utf8builder(builder): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit