Author: Ronan Lamy <ronan.l...@gmail.com> Branch: py3.5 Changeset: r93402:da4b6cf751a5 Date: 2017-12-12 23:37 +0000 http://bitbucket.org/pypy/pypy/changeset/da4b6cf751a5/
Log: hg merge default diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,6 @@ -import py, json +import pytest +import json +from hypothesis import given, strategies def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +8,26 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + +@given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -362,7 +362,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,31 +1,41 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -107,7 +107,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1404,3 +1404,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,5 +1,8 @@ import py -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp @@ -67,3 +70,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8sp(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + +@pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,8 +1,13 @@ import sys from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) + _WIN32 = sys.platform == 'win32' _MACOSX = sys.platform == 'darwin' if _WIN32: @@ -40,6 +45,7 @@ # ____________________________________________________________ def fsdecode(space, w_string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: bytes = space.bytes_w(w_string) @@ -70,6 +76,7 @@ return space.newunicode(uni) def fsencode(space, w_uni): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: uni = space.unicode_w(w_uni) @@ -107,6 +114,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -157,3 +165,196 @@ # encoding error, it should always be reversible, and the reverse is # encode_utf8sp(). return decode_utf8(space, string, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -2,12 +2,14 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder, StringBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import ( code_to_unichr, MAXUNICODE, raw_unicode_escape_helper_unicode) from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper from pypy.module.unicodedata import unicodedb @@ -244,7 +246,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -301,7 +304,8 @@ def namereplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -611,48 +615,47 @@ return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors) # ____________________________________________________________ -# delegation to runicode +# delegation to runicode/unicodehelper -from rpython.rlib import runicode +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + if hasattr(runicode, 'py3k_' + impl_name): + impl_name = 'py3k_' + impl_name + func = getattr(runicode, impl_name) + return func def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) - wrap_encoder.func_name = rname + wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder def make_utf_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler, allow_surrogates=False) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) - wrap_encoder.func_name = rname + wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) - if hasattr(runicode, 'py3k_' + rname): - rname = 'py3k_' + rname + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -660,11 +663,10 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) - wrap_decoder.func_name = rname + wrap_decoder.__name__ = func.__name__ globals()[name] = wrap_decoder for encoder in [ diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -116,10 +116,10 @@ raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: 0x110000}) assert (charmap_decode(b"\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode(b"\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -590,6 +590,12 @@ def test_backslashreplace(self): import codecs + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected + assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000' assert b'\x00\x60\x80'.decode( 'ascii', 'backslashreplace') == u'\x00\x60\\x80' @@ -732,7 +738,7 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return ("\x01", 1) + return (u"\x01", 1) codecs.register_error("test.hui", handler_unicodeinternal) res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: @@ -939,3 +945,31 @@ assert len(w) == 1 assert str(w[0].message) == warning_msg assert w[0].category == DeprecationWarning + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == b'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == b'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == b"" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == b"\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == b"\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -429,6 +429,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -997,12 +998,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") + self.decoded.set(space, w_decoded) self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -1015,11 +1017,9 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: self._unsupportedoperation(space, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -1089,14 +1089,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -40,7 +40,8 @@ w_newline=space.newtext(mode)) lines = [] for limit in limits: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.unicode_w(w_line) if limit >= 0: assert len(line) <= limit if line: diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -76,7 +76,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -49,24 +49,24 @@ first = 0 for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + c = ord(u[i]) + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -580,11 +580,13 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -94,6 +94,14 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -1046,3 +1054,14 @@ import re raises(ValueError, re.split, '', '') re.split("a*", '') # -> warning + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -15,6 +15,7 @@ from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject from pypy.module._codecs.interp_codecs import ( CodecState, latin_1_decode, utf_16_decode, utf_32_decode) +from pypy.interpreter import unicodehelper from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -869,7 +870,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = unicodehelper.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -245,7 +245,7 @@ LPDWORD = rwin32.LPDWORD _GetSystemTimeAdjustment = rwin32.winexternal( 'GetSystemTimeAdjustment', - [LPDWORD, LPDWORD, rwin32.LPBOOL], + [LPDWORD, LPDWORD, rwin32.LPBOOL], rffi.INT) def gettimeofday(space, w_info=None): with lltype.scoped_alloc(rwin32.FILETIME) as system_time: @@ -270,7 +270,7 @@ lltype.scoped_alloc(rwin32.LPBOOL.TO, 1) as is_time_adjustment_disabled: _GetSystemTimeAdjustment(time_adjustment, time_increment, is_time_adjustment_disabled) - + _setinfo(space, w_info, "GetSystemTimeAsFileTime()", time_increment[0] * 1e-7, False, True) return space.newfloat(tv_sec + tv_usec * 1e-6) @@ -303,7 +303,7 @@ widen(t.c_millitm) * 0.001) if w_info is not None: _setinfo(space, w_info, "ftime()", 1e-3, - False, True) + False, True) return space.newfloat(result) else: if w_info: @@ -955,7 +955,7 @@ [rffi.CArrayPtr(lltype.SignedLongLong)], rwin32.DWORD) QueryPerformanceFrequency = rwin32.winexternal( - 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], + 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)], rffi.INT) def win_perf_counter(space, w_info=None): with lltype.scoped_alloc(rffi.CArray(rffi.lltype.SignedLongLong), 1) as a: diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(ValueError, time.sleep, -1.0) + raises(ValueError, time.sleep, float('nan')) + raises(OverflowError, time.sleep, float('inf')) def test_clock(self): import time diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -268,10 +268,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -319,7 +319,10 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + return space.newunicode(u''.join([unichr(i) for i in r[:stop]])) methods = {} diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -57,6 +57,11 @@ assert 'a' + 'b' == 'ab' raises(TypeError, operator.add, b'a', 'b') + def test_getitem(self): + assert u'abc'[2] == 'c' + raises(IndexError, u'abc'.__getitem__, 15) + assert u'g\u0105\u015b\u0107'[2] == u'\u015b' + def test_join(self): def check(a, b): assert a == b @@ -82,6 +87,8 @@ assert '\n\n'.splitlines() == ['', ''] assert 'a\nb\nc'.splitlines(1) == ['a\n', 'b\n', 'c'] assert '\na\nb\n'.splitlines(1) == ['\n', 'a\n', 'b\n'] + assert ((u'a' + b'\xc2\x85'.decode('utf8') + u'b\n').splitlines() == + ['a', 'b']) def test_zfill(self): assert '123'.zfill(2) == '123' @@ -128,55 +135,57 @@ raises(ValueError, 'abc'.split, '') raises(ValueError, 'abc'.split, '') assert ' a b c d'.split(None, 0) == ['a b c d'] + assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c'] def test_rsplit(self): - assert "".rsplit() == [] - assert " ".rsplit() == [] - assert "a".rsplit() == ['a'] - assert "a".rsplit("a", 1) == ['', ''] - assert " ".rsplit(" ", 1) == ['', ''] - assert "aa".rsplit("a", 2) == ['', '', ''] - assert " a ".rsplit() == ['a'] - assert "a b c".rsplit() == ['a','b','c'] - assert 'this is the rsplit function'.rsplit() == ['this', 'is', 'the', 'rsplit', 'function'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd'] - assert 'a|b|c|d'.rsplit('|', 2) == ['a|b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 1) == ['a b c', 'd'] - assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 3) == ['a', 'b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 4) == ['a', 'b', 'c', 'd'] - assert 'a b c d'.rsplit(None, 0) == ['a b c d'] - assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd'] - assert 'a b c d '.rsplit() == ['a', 'b', 'c', 'd'] - assert 'a//b//c//d'.rsplit('//') == ['a', 'b', 'c', 'd'] - assert 'endcase test'.rsplit('test') == ['endcase ', ''] - raises(ValueError, 'abc'.rsplit, '') - raises(ValueError, 'abc'.rsplit, '') - raises(ValueError, 'abc'.rsplit, '') - assert ' a b c '.rsplit(None, 0) == [' a b c'] - assert ''.rsplit('aaa') == [''] + assert u"".rsplit() == [] + assert u" ".rsplit() == [] + assert u"a".rsplit() == [u'a'] + assert u"a".rsplit(u"a", 1) == [u'', u''] + assert u" ".rsplit(u" ", 1) == [u'', u''] + assert u"aa".rsplit(u"a", 2) == [u'', u'', u''] + assert u" a ".rsplit() == [u'a'] + assert u"a b c".rsplit() == [u'a',u'b',u'c'] + assert u'this is the rsplit function'.rsplit() == [u'this', u'is', u'the', u'rsplit', u'function'] + assert u'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd'] + assert u'a|b|c|d'.rsplit('|') == [u'a', u'b', u'c', u'd'] + assert 'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd'] + assert u'a|b|c|d'.rsplit(u'|', 2) == [u'a|b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 1) == [u'a b c', u'd'] + assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 3) == [u'a', u'b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 4) == [u'a', u'b', u'c', u'd'] + assert u'a b c d'.rsplit(None, 0) == [u'a b c d'] + assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd'] + assert u'a b c d '.rsplit() == [u'a', u'b', u'c', u'd'] + assert u'a//b//c//d'.rsplit(u'//') == [u'a', u'b', u'c', u'd'] + assert u'endcase test'.rsplit(u'test') == [u'endcase ', u''] + raises(ValueError, u'abc'.rsplit, u'') + raises(ValueError, u'abc'.rsplit, '') + raises(ValueError, 'abc'.rsplit, u'') + assert u' a b c '.rsplit(None, 0) == [u' a b c'] + assert u''.rsplit('aaa') == [u''] + assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c'] def test_center(self): - s="a b" - assert s.center(0) == "a b" - assert s.center(1) == "a b" - assert s.center(2) == "a b" - assert s.center(3) == "a b" - assert s.center(4) == "a b " - assert s.center(5) == " a b " - assert s.center(6) == " a b " - assert s.center(7) == " a b " - assert s.center(8) == " a b " - assert s.center(9) == " a b " - assert 'abc'.center(10) == ' abc ' - assert 'abc'.center(6) == ' abc ' - assert 'abc'.center(3) == 'abc' - assert 'abc'.center(2) == 'abc' - assert 'abc'.center(5, '*') == '*abc*' # Python 2.4 - assert 'abc'.center(5, '*') == '*abc*' # Python 2.4 - raises(TypeError, 'abc'.center, 4, 'cba') + s=u"a b" + assert s.center(0) == u"a b" + assert s.center(1) == u"a b" + assert s.center(2) == u"a b" + assert s.center(3) == u"a b" + assert s.center(4) == u"a b " + assert s.center(5) == u" a b " + assert s.center(6) == u" a b " + assert s.center(7) == u" a b " + assert s.center(8) == u" a b " + assert s.center(9) == u" a b " + assert u'abc'.center(10) == u' abc ' + assert u'abc'.center(6) == u' abc ' + assert u'abc'.center(3) == u'abc' + assert u'abc'.center(2) == u'abc' + assert u'abc'.center(5, u'*') == u'*abc*' # Python 2.4 + assert u'abc'.center(5, '*') == u'*abc*' # Python 2.4 + raises(TypeError, u'abc'.center, 4, u'cba') def test_title(self): assert "brown fox".title() == "Brown Fox" @@ -186,23 +195,25 @@ assert "bro!wn fox".title() == "Bro!Wn Fox" assert u'A\u03a3 \u1fa1xy'.title() == u'A\u03c2 \u1fa9xy' assert u'A\u03a3A'.title() == u'A\u03c3a' + assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): - assert "".istitle() == False - assert "!".istitle() == False - assert "!!".istitle() == False - assert "brown fox".istitle() == False - assert "!brown fox".istitle() == False - assert "bROWN fOX".istitle() == False - assert "Brown Fox".istitle() == True - assert "bro!wn fox".istitle() == False - assert "Bro!wn fox".istitle() == False - assert "!brown Fox".istitle() == False - assert "!Brown Fox".istitle() == True - assert "Brow&&&&N Fox".istitle() == True - assert "!Brow&&&&n Fox".istitle() == False - assert '\u1FFc'.istitle() - assert 'Greek \u1FFcitlecases ...'.istitle() + assert u"".istitle() == False + assert u"!".istitle() == False + assert u"!!".istitle() == False + assert u"brown fox".istitle() == False + assert u"!brown fox".istitle() == False + assert u"bROWN fOX".istitle() == False + assert u"Brown Fox".istitle() == True + assert u"bro!wn fox".istitle() == False + assert u"Bro!wn fox".istitle() == False + assert u"!brown Fox".istitle() == False + assert u"!Brown Fox".istitle() == True + assert u"Brow&&&&N Fox".istitle() == True + assert u"!Brow&&&&n Fox".istitle() == False + assert u'\u1FFc'.istitle() + assert u'Greek \u1FFcitlecases ...'.istitle() def test_islower_isupper_with_titlecase(self): # \u01c5 is a char which is neither lowercase nor uppercase, but @@ -220,24 +231,36 @@ assert "_!var".isidentifier() is False assert "3abc".isidentifier() is False + def test_lower_upper(self): + assert u'a'.lower() == u'a' + assert u'A'.lower() == u'a' + assert u'\u0105'.lower() == u'\u0105' + assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' + assert u'a'.upper() == u'A' + assert u'A'.upper() == u'A' + assert u'\u0105'.upper() == u'\u0104' + assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' + def test_capitalize(self): - assert "brown fox".capitalize() == "Brown fox" - assert ' hello '.capitalize() == ' hello ' - assert 'Hello '.capitalize() == 'Hello ' - assert 'hello '.capitalize() == 'Hello ' - assert 'aaaa'.capitalize() == 'Aaaa' - assert 'AaAa'.capitalize() == 'Aaaa' + assert u"brown fox".capitalize() == u"Brown fox" + assert u' hello '.capitalize() == u' hello ' + assert u'Hello '.capitalize() == u'Hello ' + assert u'hello '.capitalize() == u'Hello ' + assert u'aaaa'.capitalize() == u'Aaaa' + assert u'AaAa'.capitalize() == u'Aaaa' # check that titlecased chars are lowered correctly # \u1ffc is the titlecased char - assert ('\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() == - '\u03a9\u0399\u1ff3\u1ff3\u1ff3') + assert (u'\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() == + u'\u03a9\u0399\u1ff3\u1ff3\u1ff3') # check with cased non-letter chars - assert ('\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() == - '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') - assert ('\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() == - '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') - assert '\u2160\u2161\u2162'.capitalize() == '\u2160\u2171\u2172' - assert '\u2170\u2171\u2172'.capitalize() == '\u2160\u2171\u2172' + assert (u'\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() == + u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') + assert (u'\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() == + u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd') + assert u'\u2160\u2161\u2162'.capitalize() == u'\u2160\u2171\u2172' + assert u'\u2170\u2171\u2172'.capitalize() == u'\u2160\u2171\u2172' # check with Ll chars with no upper - nothing changes here assert ('\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == '\u019b\u1d00\u1d86\u0221\u1fb7') @@ -261,34 +284,36 @@ def test_isprintable_wide(self): assert '\U0001F46F'.isprintable() # Since unicode 6.0 assert not '\U000E0020'.isprintable() + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): - s = "abc" + s = u"abc" assert s.rjust(2) == s assert s.rjust(3) == s - assert s.rjust(4) == " " + s - assert s.rjust(5) == " " + s - assert 'abc'.rjust(10) == ' abc' - assert 'abc'.rjust(6) == ' abc' - assert 'abc'.rjust(3) == 'abc' - assert 'abc'.rjust(2) == 'abc' - assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4 - assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4 - raises(TypeError, 'abc'.rjust, 5, 'xx') + assert s.rjust(4) == u" " + s + assert s.rjust(5) == u" " + s + assert u'abc'.rjust(10) == u' abc' + assert u'abc'.rjust(6) == u' abc' + assert u'abc'.rjust(3) == u'abc' + assert u'abc'.rjust(2) == u'abc' + assert u'abc'.rjust(5, u'*') == u'**abc' # Python 2.4 + assert u'abc'.rjust(5, '*') == u'**abc' # Python 2.4 + raises(TypeError, u'abc'.rjust, 5, u'xx') def test_ljust(self): - s = "abc" + s = u"abc" assert s.ljust(2) == s assert s.ljust(3) == s - assert s.ljust(4) == s + " " - assert s.ljust(5) == s + " " - assert 'abc'.ljust(10) == 'abc ' - assert 'abc'.ljust(6) == 'abc ' - assert 'abc'.ljust(3) == 'abc' - assert 'abc'.ljust(2) == 'abc' - assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4 - assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4 - raises(TypeError, 'abc'.ljust, 6, '') + assert s.ljust(4) == s + u" " + assert s.ljust(5) == s + u" " + assert u'abc'.ljust(10) == u'abc ' + assert u'abc'.ljust(6) == u'abc ' + assert u'abc'.ljust(3) == u'abc' + assert u'abc'.ljust(2) == u'abc' + assert u'abc'.ljust(5, u'*') == u'abc**' # Python 2.4 + assert u'abc'.ljust(5, '*') == u'abc**' # Python 2.4 + raises(TypeError, u'abc'.ljust, 6, u'') def test_replace(self): assert 'one!two!three!'.replace('!', '@', 1) == 'one@two!three!' @@ -300,6 +325,16 @@ assert 'one!two!three!'.replace('!', '@') == 'one@two@three@' assert 'one!two!three!'.replace('x', '@') == 'one!two!three!' assert 'one!two!three!'.replace('x', '@', 2) == 'one!two!three!' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert 'abc'.replace('', '-') == '-a-b-c-' assert 'abc'.replace('', '-', 3) == '-a-b-c' assert 'abc'.replace('', '-', 0) == 'abc' @@ -387,6 +422,9 @@ assert ''.startswith('a') is False assert 'x'.startswith('xx') is False assert 'y'.startswith('xx') is False + assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True + assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False + assert u'\u1234'.startswith(u'') is True def test_startswith_more(self): assert 'ab'.startswith('a', 0) is True @@ -533,7 +571,7 @@ raises(TypeError, 'hello'.translate) raises(TypeError, 'abababc'.translate, 'abc', 'xyz') - def test_unicode_form_encoded_object(self): + def test_unicode_from_encoded_object(self): assert str(b'x', 'utf-8') == 'x' assert str(b'x', 'utf-8', 'strict') == 'x' @@ -659,31 +697,31 @@ def test_partition(self): - assert ('this is the par', 'ti', 'tion method') == \ - 'this is the partition method'.partition('ti') + assert (u'this is the par', u'ti', u'tion method') == \ + u'this is the partition method'.partition(u'ti') # from raymond's original specification - S = 'http://www.python.org' - assert ('http', '://', 'www.python.org') == S.partition('://') - assert ('http://www.python.org', '', '') == S.partition('?') - assert ('', 'http://', 'www.python.org') == S.partition('http://') - assert ('http://www.python.', 'org', '') == S.partition('org') + S = u'http://www.python.org' + assert (u'http', u'://', u'www.python.org') == S.partition(u'://') + assert (u'http://www.python.org', u'', u'') == S.partition(u'?') + assert (u'', u'http://', u'www.python.org') == S.partition(u'http://') + assert (u'http://www.python.', u'org', u'') == S.partition(u'org') - raises(ValueError, S.partition, '') + raises(ValueError, S.partition, u'') raises(TypeError, S.partition, None) def test_rpartition(self): - assert ('this is the rparti', 'ti', 'on method') == \ - 'this is the rpartition method'.rpartition('ti') + assert (u'this is the rparti', u'ti', u'on method') == \ + u'this is the rpartition method'.rpartition(u'ti') # from raymond's original specification - S = 'http://www.python.org' - assert ('http', '://', 'www.python.org') == S.rpartition('://') - assert ('', '', 'http://www.python.org') == S.rpartition('?') - assert ('', 'http://', 'www.python.org') == S.rpartition('http://') - assert ('http://www.python.', 'org', '') == S.rpartition('org') + S = u'http://www.python.org' + assert (u'http', u'://', u'www.python.org') == S.rpartition(u'://') + assert (u'', u'', u'http://www.python.org') == S.rpartition(u'?') + assert (u'', u'http://', u'www.python.org') == S.rpartition(u'http://') + assert (u'http://www.python.', u'org', u'') == S.rpartition(u'org') - raises(ValueError, S.rpartition, '') + raises(ValueError, S.rpartition, u'') raises(TypeError, S.rpartition, None) def test_mul(self): @@ -706,6 +744,7 @@ def test_index(self): assert "rrarrrrrrrrra".index('a', 4, None) == 12 assert "rrarrrrrrrrra".index('a', None, 6) == 2 + assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2 def test_rindex(self): from sys import maxsize @@ -715,6 +754,7 @@ assert 'abcdefghiabc'.rindex('abc', 0, -1) == 0 assert 'abcdefghiabc'.rindex('abc', -4*maxsize, 4*maxsize) == 9 assert 'rrarrrrrrrrra'.rindex('a', 4, None) == 12 + assert u"\u1234\u5678".rindex(u'\u5678') == 1 raises(ValueError, 'abcdefghiabc'.rindex, 'hib') raises(ValueError, 'defghiabc'.rindex, 'def', 1) @@ -729,6 +769,7 @@ assert 'abcdefghiabc'.rfind('') == 12 assert 'abcdefghiabc'.rfind('abcd') == 0 assert 'abcdefghiabc'.rfind('abcz') == -1 + assert u"\u1234\u5678".rfind(u'\u5678') == 1 def test_rfind_corner_case(self): assert 'abc'.rfind('', 4) == -1 @@ -802,17 +843,31 @@ assert str(Y()).__class__ is X def test_getslice(self): - assert '123456'[1:5] == '2345' - s = "abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1, 2)] == u"b" + assert s[slice(-2, 3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2, 2)] == u"b" + assert s[slice(1, -1)] == u"b" + assert s[slice(-2, -1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_iter(self): foo = "\u1111\u2222\u3333" @@ -898,7 +953,7 @@ def test_formatting_unicode__str__2(self): class A: def __str__(self): - return 'baz' + return u'baz' class B: def __str__(self): @@ -913,12 +968,12 @@ # "bah" is all I can say class X(object): def __repr__(self): - return '\u1234' + return u'\u1234' '%s' % X() # class X(object): def __str__(self): - return '\u1234' + return u'\u1234' '%s' % X() def test_formatting_unicode__repr__(self): diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + +@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - -@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -288,6 +288,9 @@ def mark_dict_non_null(d): """ Mark dictionary as having non-null keys and values. A warning would be emitted (not an error!) in case annotation disagrees. + + This doesn't work for r_dicts. For them, pass + r_dict(..., force_non_null=True) to the constructor. """ assert isinstance(d, dict) return d diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -710,7 +710,7 @@ # ____________________________________________________________ -# utf-32 +# utf-32 (not used in PyPy any more) def str_decode_utf_32(s, size, errors, final=True, errorhandler=None): diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy -@signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash -@signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit