Author: Ronan Lamy <[email protected]>
Branch: py3.5
Changeset: r93402:da4b6cf751a5
Date: 2017-12-12 23:37 +0000
http://bitbucket.org/pypy/pypy/changeset/da4b6cf751a5/
Log: hg merge default
diff --git a/pypy/module/test_lib_pypy/test_json_extra.py
b/extra_tests/test_json.py
rename from pypy/module/test_lib_pypy/test_json_extra.py
rename to extra_tests/test_json.py
--- a/pypy/module/test_lib_pypy/test_json_extra.py
+++ b/extra_tests/test_json.py
@@ -1,4 +1,6 @@
-import py, json
+import pytest
+import json
+from hypothesis import given, strategies
def is_(x, y):
return type(x) is type(y) and x == y
@@ -6,12 +8,26 @@
def test_no_ensure_ascii():
assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"')
assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"')
- e = py.test.raises(UnicodeDecodeError, json.dumps,
- (u"\u1234", "\xc0"), ensure_ascii=False)
- assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
- e = py.test.raises(UnicodeDecodeError, json.dumps,
- ("\xc0", u"\u1234"), ensure_ascii=False)
- assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
+ with pytest.raises(UnicodeDecodeError) as excinfo:
+ json.dumps((u"\u1234", "\xc0"), ensure_ascii=False)
+ assert str(excinfo.value).startswith(
+ "'ascii' codec can't decode byte 0xc0 ")
+ with pytest.raises(UnicodeDecodeError) as excinfo:
+ json.dumps(("\xc0", u"\u1234"), ensure_ascii=False)
+ assert str(excinfo.value).startswith(
+ "'ascii' codec can't decode byte 0xc0 ")
def test_issue2191():
assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"')
+
+jsondata = strategies.recursive(
+ strategies.none() |
+ strategies.booleans() |
+ strategies.floats(allow_nan=False) |
+ strategies.text(),
+ lambda children: strategies.lists(children) |
+ strategies.dictionaries(strategies.text(), children))
+
+@given(jsondata)
+def test_roundtrip(d):
+ assert json.loads(json.dumps(d)) == d
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -149,7 +149,7 @@
xz-devel # For lzma on PyPy3.
(XXX plus the SLES11 version of libgdbm-dev and tk-dev)
-On Mac OS X::
+On Mac OS X:
Most of these build-time dependencies are installed alongside
the Developer Tools. However, note that in order for the installation to
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -362,7 +362,11 @@
containers (as list items or in sets for example), the exact rule of
equality used is "``if x is y or x == y``" (on both CPython and PyPy);
as a consequence, because all ``nans`` are identical in PyPy, you
-cannot have several of them in a set, unlike in CPython. (Issue `#1974`__)
+cannot have several of them in a set, unlike in CPython. (Issue `#1974`__).
+Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because
+``cmp`` checks with ``is`` first whether the arguments are identical (there is
+no good value to return from this call to ``cmp``, because ``cmp`` pretends
+that there is a total order on floats, but that is wrong for NaNs).
.. __:
https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,31 +1,41 @@
-===========================
-What's new in PyPy2.7 5.10+
-===========================
-
-.. this is a revision shortly after release-pypy2.7-v5.9.0
-.. startrev:d56dadcef996
-
-.. branch: cppyy-packaging
-Cleanup and improve cppyy packaging
-
-.. branch: docs-osx-brew-openssl
-
-.. branch: keep-debug-symbols
-Add a smartstrip tool, which can optionally keep the debug symbols in a
-separate file, instead of just stripping them away. Use it in packaging
-
-.. branch: bsd-patches
-Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
-tracker (issues 2694, 2695, 2696, 2697)
-
-.. branch: run-extra-tests
-Run extra_tests/ in buildbot
-
-.. branch: vmprof-0.4.10
-Upgrade the _vmprof backend to vmprof 0.4.10
-
-.. branch: fix-vmprof-stacklet-switch
-Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
-
-.. branch: win32-vcvars
-
+===========================
+What's new in PyPy2.7 5.10+
+===========================
+
+.. this is a revision shortly after release-pypy2.7-v5.9.0
+.. startrev:d56dadcef996
+
+
+.. branch: cppyy-packaging
+
+Cleanup and improve cppyy packaging
+
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
+.. branch rdict-fast-hash
+
+Make it possible to declare that the hash function of an r_dict is fast in
RPython.
diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst
b/pypy/doc/whatsnew-pypy2-5.6.0.rst
--- a/pypy/doc/whatsnew-pypy2-5.6.0.rst
+++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst
@@ -107,7 +107,7 @@
.. branch: newinitwarn
-Match CPython's stricter handling of __new/init__ arguments
+Match CPython's stricter handling of ``__new__``/``__init__`` arguments
.. branch: openssl-1.1
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -11,7 +11,7 @@
To build pypy-c you need a working python environment, and a C compiler.
It is possible to translate with a CPython 2.6 or later, but this is not
-the preferred way, because it will take a lot longer to run � depending
+the preferred way, because it will take a lot longer to run – depending
on your architecture, between two and three times as long. So head to
`our downloads`_ and get the latest stable version.
@@ -103,6 +103,7 @@
must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
``...\9.0\VC`` directory, and edit it, changing the lines that set
``VCINSTALLDIR`` and ``WindowsSdkDir``::
+
set VCINSTALLDIR=%~dp0\
set WindowsSdkDir=%~dp0\..\WinSDK\
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py
b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -1404,3 +1404,7 @@
exc = py.test.raises(SyntaxError, self.get_ast, input).value
assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
" bytes in position 0-1: truncated \\xXX escape")
+ input = "u'\\x1'"
+ exc = py.test.raises(SyntaxError, self.get_ast, input).value
+ assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
+ " bytes in position 0-2: truncated \\xXX escape")
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -1,5 +1,8 @@
import py
-from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+import pytest
+import struct
+from pypy.interpreter.unicodehelper import (
+ encode_utf8, decode_utf8, unicode_encode_utf_32_be)
from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
@@ -67,3 +70,23 @@
assert map(ord, got) == [0xd800, 0xdc00]
got = decode_utf8sp(space, "\xf0\x90\x80\x80")
assert map(ord, got) == [0x10000]
+
[email protected]('unich', [u"\ud800", u"\udc80"])
+def test_utf32_surrogates(unich):
+ assert (unicode_encode_utf_32_be(unich, 1, None) ==
+ struct.pack('>i', ord(unich)))
+ with pytest.raises(UnicodeEncodeError):
+ unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+
+ def replace_with(ru, rs):
+ def errorhandler(errors, enc, msg, u, startingpos, endingpos):
+ if errors == 'strict':
+ raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+ return ru, rs, endingpos
+ return unicode_encode_utf_32_be(
+ u"<%s>" % unich, 3, None,
+ errorhandler, allow_surrogates=False)
+
+ assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
+ assert (replace_with(None, '\xca\xfe\xca\xfe') ==
+ '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>')
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1,8 +1,13 @@
import sys
from pypy.interpreter.error import OperationError, oefmt
from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib import runicode
-from pypy.module._codecs import interp_codecs
+from rpython.rlib.runicode import (
+ default_unicode_error_encode, default_unicode_error_decode,
+ MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR)
+
_WIN32 = sys.platform == 'win32'
_MACOSX = sys.platform == 'darwin'
if _WIN32:
@@ -40,6 +45,7 @@
# ____________________________________________________________
def fsdecode(space, w_string):
+ from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
if _WIN32:
bytes = space.bytes_w(w_string)
@@ -70,6 +76,7 @@
return space.newunicode(uni)
def fsencode(space, w_uni):
+ from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
if _WIN32:
uni = space.unicode_w(w_uni)
@@ -107,6 +114,7 @@
# These functions take and return unwrapped rpython strings and unicodes
def decode_unicode_escape(space, string):
+ from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
unicodedata_handler = state.get_unicodedata_handler(space)
result, consumed = runicode.str_decode_unicode_escape(
@@ -157,3 +165,196 @@
# encoding error, it should always be reversible, and the reverse is
# encode_utf8sp().
return decode_utf8(space, string, allow_surrogates=True)
+
+# ____________________________________________________________
+# utf-32
+
+def str_decode_utf_32(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
+ return result, length
+
+def str_decode_utf_32_be(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "big", 'utf-32-be')
+ return result, length
+
+def str_decode_utf_32_le(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "little", 'utf-32-le')
+ return result, length
+
+BOM32_DIRECT = intmask(0x0000FEFF)
+BOM32_REVERSE = intmask(0xFFFE0000)
+
+def str_decode_utf_32_helper(s, size, errors, final=True,
+ errorhandler=None,
+ byteorder="native",
+ public_encoding_name='utf32'):
+ if errorhandler is None:
+ errorhandler = default_unicode_error_decode
+ bo = 0
+
+ if BYTEORDER == 'little':
+ iorder = [0, 1, 2, 3]
+ else:
+ iorder = [3, 2, 1, 0]
+
+ # Check for BOM marks (U+FEFF) in the input and adjust current
+ # byte order setting accordingly. In native mode, the leading BOM
+ # mark is skipped, in all other modes, it is copied to the output
+ # stream as-is (giving a ZWNBSP character).
+ pos = 0
+ if byteorder == 'native':
+ if size >= 4:
+ bom = intmask(
+ (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) |
+ (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]]))
+ if BYTEORDER == 'little':
+ if bom == BOM32_DIRECT:
+ pos += 4
+ bo = -1
+ elif bom == BOM32_REVERSE:
+ pos += 4
+ bo = 1
+ else:
+ if bom == BOM32_DIRECT:
+ pos += 4
+ bo = 1
+ elif bom == BOM32_REVERSE:
+ pos += 4
+ bo = -1
+ elif byteorder == 'little':
+ bo = -1
+ else:
+ bo = 1
+ if size == 0:
+ return u'', 0, bo
+ if bo == -1:
+ # force little endian
+ iorder = [0, 1, 2, 3]
+ elif bo == 1:
+ # force big endian
+ iorder = [3, 2, 1, 0]
+
+ result = UnicodeBuilder(size // 4)
+
+ while pos < size:
+ # remaining bytes at the end? (size should be divisible by 4)
+ if len(s) - pos < 4:
+ if not final:
+ break
+ r, pos = errorhandler(errors, public_encoding_name,
+ "truncated data",
+ s, pos, len(s))
+ result.append(r)
+ if len(s) - pos < 4:
+ break
+ continue
+ ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) <<
16) |
+ (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
+ if ch >= 0x110000:
+ r, pos = errorhandler(errors, public_encoding_name,
+ "codepoint not in range(0x110000)",
+ s, pos, len(s))
+ result.append(r)
+ continue
+
+ if MAXUNICODE < 65536 and ch >= 0x10000:
+ ch -= 0x10000L
+ result.append(unichr(0xD800 + (ch >> 10)))
+ result.append(unichr(0xDC00 + (ch & 0x03FF)))
+ else:
+ result.append(UNICHR(ch))
+ pos += 4
+ return result.build(), pos, bo
+
+def _STORECHAR32(result, CH, byteorder):
+ c0 = chr(((CH) >> 24) & 0xff)
+ c1 = chr(((CH) >> 16) & 0xff)
+ c2 = chr(((CH) >> 8) & 0xff)
+ c3 = chr((CH) & 0xff)
+ if byteorder == 'little':
+ result.append(c3)
+ result.append(c2)
+ result.append(c1)
+ result.append(c0)
+ else:
+ result.append(c0)
+ result.append(c1)
+ result.append(c2)
+ result.append(c3)
+
+def unicode_encode_utf_32_helper(s, size, errors,
+ errorhandler=None,
+ allow_surrogates=True,
+ byteorder='little',
+ public_encoding_name='utf32'):
+ if errorhandler is None:
+ errorhandler = default_unicode_error_encode
+ if size == 0:
+ if byteorder == 'native':
+ result = StringBuilder(4)
+ _STORECHAR32(result, 0xFEFF, BYTEORDER)
+ return result.build()
+ return ""
+
+ result = StringBuilder(size * 4 + 4)
+ if byteorder == 'native':
+ _STORECHAR32(result, 0xFEFF, BYTEORDER)
+ byteorder = BYTEORDER
+
+ pos = 0
+ while pos < size:
+ ch = ord(s[pos])
+ pos += 1
+ ch2 = 0
+ if not allow_surrogates and 0xD800 <= ch < 0xE000:
+ ru, rs, pos = errorhandler(
+ errors, public_encoding_name, 'surrogates not allowed',
+ s, pos - 1, pos)
+ if rs is not None:
+ # py3k only
+ if len(rs) % 4 != 0:
+ errorhandler(
+ 'strict', public_encoding_name, 'surrogates not
allowed',
+ s, pos - 1, pos)
+ result.append(rs)
+ continue
+ for ch in ru:
+ if ord(ch) < 0xD800:
+ _STORECHAR32(result, ord(ch), byteorder)
+ else:
+ errorhandler(
+ 'strict', public_encoding_name,
+ 'surrogates not allowed', s, pos - 1, pos)
+ continue
+ if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size:
+ ch2 = ord(s[pos])
+ if 0xDC00 <= ch2 < 0xE000:
+ ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000
+ pos += 1
+ _STORECHAR32(result, ch, byteorder)
+
+ return result.build()
+
+def unicode_encode_utf_32(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "native",
+ 'utf-32-' + BYTEORDER2)
+
+def unicode_encode_utf_32_be(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "big",
+ 'utf-32-be')
+
+def unicode_encode_utf_32_le(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "little",
+ 'utf-32-le')
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -2,12 +2,14 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import we_are_translated, not_rpython
from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
+from rpython.rlib import runicode
from rpython.rlib.runicode import (
code_to_unichr, MAXUNICODE,
raw_unicode_escape_helper_unicode)
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.interpreter import unicodehelper
from pypy.module.unicodedata import unicodedb
@@ -244,7 +246,8 @@
def xmlcharrefreplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc,
space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
@@ -301,7 +304,8 @@
def namereplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc,
space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
@@ -611,48 +615,47 @@
return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
# ____________________________________________________________
-# delegation to runicode
+# delegation to runicode/unicodehelper
-from rpython.rlib import runicode
+def _find_implementation(impl_name):
+ try:
+ func = getattr(unicodehelper, impl_name)
+ except AttributeError:
+ if hasattr(runicode, 'py3k_' + impl_name):
+ impl_name = 'py3k_' + impl_name
+ func = getattr(runicode, impl_name)
+ return func
def make_encoder_wrapper(name):
rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
- assert hasattr(runicode, rname)
- if hasattr(runicode, 'py3k_' + rname):
- rname = 'py3k_' + rname
+ func = _find_implementation(rname)
@unwrap_spec(uni=unicode, errors='text_or_none')
def wrap_encoder(space, uni, errors="strict"):
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
result = func(uni, len(uni), errors, state.encode_error_handler)
return space.newtuple([space.newbytes(result), space.newint(len(uni))])
- wrap_encoder.func_name = rname
+ wrap_encoder.__name__ = func.__name__
globals()[name] = wrap_encoder
def make_utf_encoder_wrapper(name):
rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
- assert hasattr(runicode, rname)
- if hasattr(runicode, 'py3k_' + rname):
- rname = 'py3k_' + rname
+ func = _find_implementation(rname)
@unwrap_spec(uni=unicode, errors='text_or_none')
def wrap_encoder(space, uni, errors="strict"):
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
result = func(uni, len(uni), errors, state.encode_error_handler,
allow_surrogates=False)
return space.newtuple([space.newbytes(result), space.newint(len(uni))])
- wrap_encoder.func_name = rname
+ wrap_encoder.__name__ = func.__name__
globals()[name] = wrap_encoder
def make_decoder_wrapper(name):
rname = "str_decode_%s" % (name.replace("_decode", ""), )
- assert hasattr(runicode, rname)
- if hasattr(runicode, 'py3k_' + rname):
- rname = 'py3k_' + rname
+ func = _find_implementation(rname)
@unwrap_spec(string='bufferstr', errors='text_or_none',
w_final=WrappedDefault(False))
def wrap_decoder(space, string, errors="strict", w_final=None):
@@ -660,11 +663,10 @@
errors = 'strict'
final = space.is_true(w_final)
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
result, consumed = func(string, len(string), errors,
final, state.decode_error_handler)
return space.newtuple([space.newunicode(result),
space.newint(consumed)])
- wrap_decoder.func_name = rname
+ wrap_decoder.__name__ = func.__name__
globals()[name] = wrap_decoder
for encoder in [
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -116,10 +116,10 @@
raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: 0x110000})
assert (charmap_decode(b"\x00\x01\x02", "strict",
{0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
assert (charmap_decode(b"\x00\x01\x02", "strict",
{0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
def test_escape_decode_errors(self):
from _codecs import escape_decode as decode
@@ -590,6 +590,12 @@
def test_backslashreplace(self):
import codecs
+ sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
+ expected = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
+ assert sin.encode('ascii', 'backslashreplace') == expected
+ expected = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
+ assert sin.encode("iso-8859-15", "backslashreplace") == expected
+
assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace')
== b'a\\xac\u1234\u20ac\u8000'
assert b'\x00\x60\x80'.decode(
'ascii', 'backslashreplace') == u'\x00\x60\\x80'
@@ -732,7 +738,7 @@
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
- return ("\x01", 1)
+ return (u"\x01", 1)
codecs.register_error("test.hui", handler_unicodeinternal)
res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
if sys.maxunicode > 65535:
@@ -939,3 +945,31 @@
assert len(w) == 1
assert str(w[0].message) == warning_msg
assert w[0].category == DeprecationWarning
+
+ def test_xmlcharrefreplace(self):
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1',
'xmlcharrefreplace')
+ assert r == b'ሴ\x80⍅y\xab'
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii',
'xmlcharrefreplace')
+ assert r == b'ሴ€⍅y«'
+
+ def test_errorhandler_collection(self):
+ import _codecs
+ errors = []
+ def record_error(exc):
+ if not isinstance(exc, UnicodeEncodeError):
+ raise TypeError("don't know how to handle %r" % exc)
+ errors.append(exc.object[exc.start:exc.end])
+ return (u'', exc.end)
+ _codecs.register_error("test.record", record_error)
+
+ sin = u"\xac\u1234\u1234\u20ac\u8000"
+ assert sin.encode("ascii", "test.record") == b""
+ assert errors == [sin]
+
+ errors = []
+ assert sin.encode("latin-1", "test.record") == b"\xac"
+ assert errors == [u'\u1234\u1234\u20ac\u8000']
+
+ errors = []
+ assert sin.encode("iso-8859-15", "test.record") == b"\xac\xa4"
+ assert errors == [u'\u1234\u1234', u'\u8000']
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -429,6 +429,7 @@
if not space.isinstance_w(w_decoded, space.w_unicode):
msg = "decoder should return a string result, not '%T'"
raise oefmt(space.w_TypeError, msg, w_decoded)
+ return w_decoded
class W_TextIOWrapper(W_TextIOBase):
@@ -997,12 +998,13 @@
w_decoded = space.call_method(self.w_decoder, "decode",
w_chunk,
space.newbool(bool(cookie.need_eof)))
- self.decoded.set(space, w_decoded)
+ w_decoded = check_decoded(space, w_decoded)
# Skip chars_to_skip of the decoded characters
- if len(self.decoded.text) < cookie.chars_to_skip:
+ if space.len_w(w_decoded) < cookie.chars_to_skip:
raise oefmt(space.w_IOError,
"can't restore logical file position")
+ self.decoded.set(space, w_decoded)
self.decoded.pos = cookie.chars_to_skip
else:
self.snapshot = PositionSnapshot(cookie.dec_flags, "")
@@ -1015,11 +1017,9 @@
def tell_w(self, space):
self._check_closed(space)
-
if not self.seekable:
self._unsupportedoperation(space,
"underlying stream is not seekable")
-
if not self.telling:
raise oefmt(space.w_IOError,
"telling position disabled by next() call")
@@ -1089,14 +1089,14 @@
# We didn't get enough decoded data; signal EOF to get more.
w_decoded = space.call_method(self.w_decoder, "decode",
space.newbytes(""),
- space.newint(1)) # final=1
+ space.newint(1)) # final=1
check_decoded(space, w_decoded)
- chars_decoded += len(space.unicode_w(w_decoded))
+ chars_decoded += space.len_w(w_decoded)
cookie.need_eof = 1
if chars_decoded < chars_to_skip:
raise oefmt(space.w_IOError,
- "can't reconstruct logical file position")
+ "can't reconstruct logical file position")
finally:
space.call_method(self.w_decoder, "setstate", w_saved_state)
diff --git a/pypy/module/_io/test/test_interp_textio.py
b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -40,7 +40,8 @@
w_newline=space.newtext(mode))
lines = []
for limit in limits:
- line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+ w_line = w_textio.readline_w(space, space.newint(limit))
+ line = space.unicode_w(w_line)
if limit >= 0:
assert len(line) <= limit
if line:
diff --git a/pypy/module/_pypyjson/interp_decoder.py
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -76,7 +76,7 @@
self.ll_chars = rffi.str2charp(s)
self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
self.pos = 0
- self.cache = r_dict(slice_eq, slice_hash)
+ self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
def close(self):
rffi.free_charp(self.ll_chars)
diff --git a/pypy/module/_pypyjson/interp_encoder.py
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -49,24 +49,24 @@
first = 0
for i in range(first, len(u)):
- c = u[i]
- if c <= u'~':
- if c == u'"' or c == u'\\':
+ c = ord(u[i])
+ if c <= ord('~'):
+ if c == ord('"') or c == ord('\\'):
sb.append('\\')
- elif c < u' ':
- sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+ elif c < ord(' '):
+ sb.append(ESCAPE_BEFORE_SPACE[c])
continue
- sb.append(chr(ord(c)))
+ sb.append(chr(c))
else:
- if c <= u'\uffff':
+ if c <= ord(u'\uffff'):
sb.append('\\u')
- sb.append(HEX[ord(c) >> 12])
- sb.append(HEX[(ord(c) >> 8) & 0x0f])
- sb.append(HEX[(ord(c) >> 4) & 0x0f])
- sb.append(HEX[ord(c) & 0x0f])
+ sb.append(HEX[c >> 12])
+ sb.append(HEX[(c >> 8) & 0x0f])
+ sb.append(HEX[(c >> 4) & 0x0f])
+ sb.append(HEX[c & 0x0f])
else:
# surrogate pair
- n = ord(c) - 0x10000
+ n = c - 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
sb.append('\\ud')
sb.append(HEX[(s1 >> 8) & 0x0f])
diff --git a/pypy/module/_rawffi/alt/type_converter.py
b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -128,7 +128,7 @@
intval: lltype.Signed
"""
self.error(w_ffitype, w_obj)
-
+
def handle_unichar(self, w_ffitype, w_obj, intval):
"""
intval: lltype.Signed
@@ -174,7 +174,7 @@
def handle_struct_rawffi(self, w_ffitype, w_structinstance):
"""
This method should be killed as soon as we remove support for _rawffi
structures
-
+
w_structinstance: W_StructureInstance
"""
self.error(w_ffitype, w_structinstance)
@@ -349,7 +349,7 @@
def get_struct_rawffi(self, w_ffitype, w_structdescr):
"""
This should be killed as soon as we kill support for _rawffi structures
-
+
Return type: lltype.Unsigned
(the address of the structure)
"""
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -580,11 +580,13 @@
@unwrap_spec(w_groupnum=WrappedDefault(0))
def start_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[0])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(start)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def end_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[1])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(end)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def span_w(self, w_groupnum):
diff --git a/pypy/module/_sre/test/test_app_sre.py
b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -94,6 +94,14 @@
assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ def test_findall_unicode(self):
+ import re
+ assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000")
+ assert ["a", "u"] == re.findall("b(.)", "abalbus")
+ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
+ assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ assert [u"xyz"] == re.findall(u".*yz", u"xyz")
+
def test_finditer(self):
import re
it = re.finditer("b(.)", "brabbel")
@@ -1046,3 +1054,14 @@
import re
raises(ValueError, re.split, '', '')
re.split("a*", '') # -> warning
+
+class AppTestUnicodeExtra:
+ def test_string_attribute(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.string == u"\u1233\u1234\u1235"
+
+ def test_match_start(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.start() == 1
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -15,6 +15,7 @@
from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject
from pypy.module._codecs.interp_codecs import (
CodecState, latin_1_decode, utf_16_decode, utf_32_decode)
+from pypy.interpreter import unicodehelper
from pypy.objspace.std import unicodeobject
from rpython.rlib import rstring, runicode
from rpython.tool.sourcetools import func_renamer
@@ -869,7 +870,7 @@
else:
errors = None
- result, length, byteorder = runicode.str_decode_utf_32_helper(
+ result, length, byteorder = unicodehelper.str_decode_utf_32_helper(
string, size, errors,
True, # final ? false for multiple passes?
None, # errorhandler
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -245,7 +245,7 @@
LPDWORD = rwin32.LPDWORD
_GetSystemTimeAdjustment = rwin32.winexternal(
'GetSystemTimeAdjustment',
- [LPDWORD, LPDWORD, rwin32.LPBOOL],
+ [LPDWORD, LPDWORD, rwin32.LPBOOL],
rffi.INT)
def gettimeofday(space, w_info=None):
with lltype.scoped_alloc(rwin32.FILETIME) as system_time:
@@ -270,7 +270,7 @@
lltype.scoped_alloc(rwin32.LPBOOL.TO, 1) as
is_time_adjustment_disabled:
_GetSystemTimeAdjustment(time_adjustment, time_increment,
is_time_adjustment_disabled)
-
+
_setinfo(space, w_info, "GetSystemTimeAsFileTime()",
time_increment[0] * 1e-7, False, True)
return space.newfloat(tv_sec + tv_usec * 1e-6)
@@ -303,7 +303,7 @@
widen(t.c_millitm) * 0.001)
if w_info is not None:
_setinfo(space, w_info, "ftime()", 1e-3,
- False, True)
+ False, True)
return space.newfloat(result)
else:
if w_info:
@@ -955,7 +955,7 @@
[rffi.CArrayPtr(lltype.SignedLongLong)],
rwin32.DWORD)
QueryPerformanceFrequency = rwin32.winexternal(
- 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)],
+ 'QueryPerformanceFrequency', [rffi.CArrayPtr(lltype.SignedLongLong)],
rffi.INT)
def win_perf_counter(space, w_info=None):
with lltype.scoped_alloc(rffi.CArray(rffi.lltype.SignedLongLong), 1)
as a:
diff --git a/pypy/module/time/test/test_time.py
b/pypy/module/time/test/test_time.py
--- a/pypy/module/time/test/test_time.py
+++ b/pypy/module/time/test/test_time.py
@@ -19,6 +19,8 @@
raises(TypeError, time.sleep, "foo")
time.sleep(0.12345)
raises(ValueError, time.sleep, -1.0)
+ raises(ValueError, time.sleep, float('nan'))
+ raises(OverflowError, time.sleep, float('inf'))
def test_clock(self):
import time
diff --git a/pypy/module/unicodedata/interp_ucd.py
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -268,10 +268,10 @@
result[0] = ch
if not composed: # If decomposed normalization we are done
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
if j <= 1:
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
current = result[0]
starter_pos = 0
@@ -319,7 +319,10 @@
result[starter_pos] = current
- return space.newunicode(u''.join([unichr(i) for i in
result[:next_insert]]))
+ return self.build(space, result, stop=next_insert)
+
+ def build(self, space, r, stop):
+ return space.newunicode(u''.join([unichr(i) for i in r[:stop]]))
methods = {}
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -57,6 +57,11 @@
assert 'a' + 'b' == 'ab'
raises(TypeError, operator.add, b'a', 'b')
+ def test_getitem(self):
+ assert u'abc'[2] == 'c'
+ raises(IndexError, u'abc'.__getitem__, 15)
+ assert u'g\u0105\u015b\u0107'[2] == u'\u015b'
+
def test_join(self):
def check(a, b):
assert a == b
@@ -82,6 +87,8 @@
assert '\n\n'.splitlines() == ['', '']
assert 'a\nb\nc'.splitlines(1) == ['a\n', 'b\n', 'c']
assert '\na\nb\n'.splitlines(1) == ['\n', 'a\n', 'b\n']
+ assert ((u'a' + b'\xc2\x85'.decode('utf8') + u'b\n').splitlines() ==
+ ['a', 'b'])
def test_zfill(self):
assert '123'.zfill(2) == '123'
@@ -128,55 +135,57 @@
raises(ValueError, 'abc'.split, '')
raises(ValueError, 'abc'.split, '')
assert ' a b c d'.split(None, 0) == ['a b c d']
+ assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c']
def test_rsplit(self):
- assert "".rsplit() == []
- assert " ".rsplit() == []
- assert "a".rsplit() == ['a']
- assert "a".rsplit("a", 1) == ['', '']
- assert " ".rsplit(" ", 1) == ['', '']
- assert "aa".rsplit("a", 2) == ['', '', '']
- assert " a ".rsplit() == ['a']
- assert "a b c".rsplit() == ['a','b','c']
- assert 'this is the rsplit function'.rsplit() == ['this', 'is', 'the',
'rsplit', 'function']
- assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd']
- assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd']
- assert 'a|b|c|d'.rsplit('|') == ['a', 'b', 'c', 'd']
- assert 'a|b|c|d'.rsplit('|', 2) == ['a|b', 'c', 'd']
- assert 'a b c d'.rsplit(None, 1) == ['a b c', 'd']
- assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd']
- assert 'a b c d'.rsplit(None, 3) == ['a', 'b', 'c', 'd']
- assert 'a b c d'.rsplit(None, 4) == ['a', 'b', 'c', 'd']
- assert 'a b c d'.rsplit(None, 0) == ['a b c d']
- assert 'a b c d'.rsplit(None, 2) == ['a b', 'c', 'd']
- assert 'a b c d '.rsplit() == ['a', 'b', 'c', 'd']
- assert 'a//b//c//d'.rsplit('//') == ['a', 'b', 'c', 'd']
- assert 'endcase test'.rsplit('test') == ['endcase ', '']
- raises(ValueError, 'abc'.rsplit, '')
- raises(ValueError, 'abc'.rsplit, '')
- raises(ValueError, 'abc'.rsplit, '')
- assert ' a b c '.rsplit(None, 0) == [' a b c']
- assert ''.rsplit('aaa') == ['']
+ assert u"".rsplit() == []
+ assert u" ".rsplit() == []
+ assert u"a".rsplit() == [u'a']
+ assert u"a".rsplit(u"a", 1) == [u'', u'']
+ assert u" ".rsplit(u" ", 1) == [u'', u'']
+ assert u"aa".rsplit(u"a", 2) == [u'', u'', u'']
+ assert u" a ".rsplit() == [u'a']
+ assert u"a b c".rsplit() == [u'a',u'b',u'c']
+ assert u'this is the rsplit function'.rsplit() == [u'this', u'is',
u'the', u'rsplit', u'function']
+ assert u'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
+ assert u'a|b|c|d'.rsplit('|') == [u'a', u'b', u'c', u'd']
+ assert 'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
+ assert u'a|b|c|d'.rsplit(u'|', 2) == [u'a|b', u'c', u'd']
+ assert u'a b c d'.rsplit(None, 1) == [u'a b c', u'd']
+ assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd']
+ assert u'a b c d'.rsplit(None, 3) == [u'a', u'b', u'c', u'd']
+ assert u'a b c d'.rsplit(None, 4) == [u'a', u'b', u'c', u'd']
+ assert u'a b c d'.rsplit(None, 0) == [u'a b c d']
+ assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd']
+ assert u'a b c d '.rsplit() == [u'a', u'b', u'c', u'd']
+ assert u'a//b//c//d'.rsplit(u'//') == [u'a', u'b', u'c', u'd']
+ assert u'endcase test'.rsplit(u'test') == [u'endcase ', u'']
+ raises(ValueError, u'abc'.rsplit, u'')
+ raises(ValueError, u'abc'.rsplit, '')
+ raises(ValueError, 'abc'.rsplit, u'')
+ assert u' a b c '.rsplit(None, 0) == [u' a b c']
+ assert u''.rsplit('aaa') == [u'']
+ assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
def test_center(self):
- s="a b"
- assert s.center(0) == "a b"
- assert s.center(1) == "a b"
- assert s.center(2) == "a b"
- assert s.center(3) == "a b"
- assert s.center(4) == "a b "
- assert s.center(5) == " a b "
- assert s.center(6) == " a b "
- assert s.center(7) == " a b "
- assert s.center(8) == " a b "
- assert s.center(9) == " a b "
- assert 'abc'.center(10) == ' abc '
- assert 'abc'.center(6) == ' abc '
- assert 'abc'.center(3) == 'abc'
- assert 'abc'.center(2) == 'abc'
- assert 'abc'.center(5, '*') == '*abc*' # Python 2.4
- assert 'abc'.center(5, '*') == '*abc*' # Python 2.4
- raises(TypeError, 'abc'.center, 4, 'cba')
+ s=u"a b"
+ assert s.center(0) == u"a b"
+ assert s.center(1) == u"a b"
+ assert s.center(2) == u"a b"
+ assert s.center(3) == u"a b"
+ assert s.center(4) == u"a b "
+ assert s.center(5) == u" a b "
+ assert s.center(6) == u" a b "
+ assert s.center(7) == u" a b "
+ assert s.center(8) == u" a b "
+ assert s.center(9) == u" a b "
+ assert u'abc'.center(10) == u' abc '
+ assert u'abc'.center(6) == u' abc '
+ assert u'abc'.center(3) == u'abc'
+ assert u'abc'.center(2) == u'abc'
+ assert u'abc'.center(5, u'*') == u'*abc*' # Python 2.4
+ assert u'abc'.center(5, '*') == u'*abc*' # Python 2.4
+ raises(TypeError, u'abc'.center, 4, u'cba')
def test_title(self):
assert "brown fox".title() == "Brown Fox"
@@ -186,23 +195,25 @@
assert "bro!wn fox".title() == "Bro!Wn Fox"
assert u'A\u03a3 \u1fa1xy'.title() == u'A\u03c2 \u1fa9xy'
assert u'A\u03a3A'.title() == u'A\u03c3a'
+ assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+ assert u'\ud800'.title() == u'\ud800'
def test_istitle(self):
- assert "".istitle() == False
- assert "!".istitle() == False
- assert "!!".istitle() == False
- assert "brown fox".istitle() == False
- assert "!brown fox".istitle() == False
- assert "bROWN fOX".istitle() == False
- assert "Brown Fox".istitle() == True
- assert "bro!wn fox".istitle() == False
- assert "Bro!wn fox".istitle() == False
- assert "!brown Fox".istitle() == False
- assert "!Brown Fox".istitle() == True
- assert "Brow&&&&N Fox".istitle() == True
- assert "!Brow&&&&n Fox".istitle() == False
- assert '\u1FFc'.istitle()
- assert 'Greek \u1FFcitlecases ...'.istitle()
+ assert u"".istitle() == False
+ assert u"!".istitle() == False
+ assert u"!!".istitle() == False
+ assert u"brown fox".istitle() == False
+ assert u"!brown fox".istitle() == False
+ assert u"bROWN fOX".istitle() == False
+ assert u"Brown Fox".istitle() == True
+ assert u"bro!wn fox".istitle() == False
+ assert u"Bro!wn fox".istitle() == False
+ assert u"!brown Fox".istitle() == False
+ assert u"!Brown Fox".istitle() == True
+ assert u"Brow&&&&N Fox".istitle() == True
+ assert u"!Brow&&&&n Fox".istitle() == False
+ assert u'\u1FFc'.istitle()
+ assert u'Greek \u1FFcitlecases ...'.istitle()
def test_islower_isupper_with_titlecase(self):
# \u01c5 is a char which is neither lowercase nor uppercase, but
@@ -220,24 +231,36 @@
assert "_!var".isidentifier() is False
assert "3abc".isidentifier() is False
+ def test_lower_upper(self):
+ assert u'a'.lower() == u'a'
+ assert u'A'.lower() == u'a'
+ assert u'\u0105'.lower() == u'\u0105'
+ assert u'\u0104'.lower() == u'\u0105'
+ assert u'\ud800'.lower() == u'\ud800'
+ assert u'a'.upper() == u'A'
+ assert u'A'.upper() == u'A'
+ assert u'\u0105'.upper() == u'\u0104'
+ assert u'\u0104'.upper() == u'\u0104'
+ assert u'\ud800'.upper() == u'\ud800'
+
def test_capitalize(self):
- assert "brown fox".capitalize() == "Brown fox"
- assert ' hello '.capitalize() == ' hello '
- assert 'Hello '.capitalize() == 'Hello '
- assert 'hello '.capitalize() == 'Hello '
- assert 'aaaa'.capitalize() == 'Aaaa'
- assert 'AaAa'.capitalize() == 'Aaaa'
+ assert u"brown fox".capitalize() == u"Brown fox"
+ assert u' hello '.capitalize() == u' hello '
+ assert u'Hello '.capitalize() == u'Hello '
+ assert u'hello '.capitalize() == u'Hello '
+ assert u'aaaa'.capitalize() == u'Aaaa'
+ assert u'AaAa'.capitalize() == u'Aaaa'
# check that titlecased chars are lowered correctly
# \u1ffc is the titlecased char
- assert ('\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() ==
- '\u03a9\u0399\u1ff3\u1ff3\u1ff3')
+ assert (u'\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() ==
+ u'\u03a9\u0399\u1ff3\u1ff3\u1ff3')
# check with cased non-letter chars
- assert ('\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() ==
- '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
- assert ('\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() ==
- '\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
- assert '\u2160\u2161\u2162'.capitalize() == '\u2160\u2171\u2172'
- assert '\u2170\u2171\u2172'.capitalize() == '\u2160\u2171\u2172'
+ assert (u'\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() ==
+ u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
+ assert (u'\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() ==
+ u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
+ assert u'\u2160\u2161\u2162'.capitalize() == u'\u2160\u2171\u2172'
+ assert u'\u2170\u2171\u2172'.capitalize() == u'\u2160\u2171\u2172'
# check with Ll chars with no upper - nothing changes here
assert ('\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
'\u019b\u1d00\u1d86\u0221\u1fb7')
@@ -261,34 +284,36 @@
def test_isprintable_wide(self):
assert '\U0001F46F'.isprintable() # Since unicode 6.0
assert not '\U000E0020'.isprintable()
+ assert u'\ud800'.capitalize() == u'\ud800'
+ assert u'xx\ud800'.capitalize() == u'Xx\ud800'
def test_rjust(self):
- s = "abc"
+ s = u"abc"
assert s.rjust(2) == s
assert s.rjust(3) == s
- assert s.rjust(4) == " " + s
- assert s.rjust(5) == " " + s
- assert 'abc'.rjust(10) == ' abc'
- assert 'abc'.rjust(6) == ' abc'
- assert 'abc'.rjust(3) == 'abc'
- assert 'abc'.rjust(2) == 'abc'
- assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4
- assert 'abc'.rjust(5, '*') == '**abc' # Python 2.4
- raises(TypeError, 'abc'.rjust, 5, 'xx')
+ assert s.rjust(4) == u" " + s
+ assert s.rjust(5) == u" " + s
+ assert u'abc'.rjust(10) == u' abc'
+ assert u'abc'.rjust(6) == u' abc'
+ assert u'abc'.rjust(3) == u'abc'
+ assert u'abc'.rjust(2) == u'abc'
+ assert u'abc'.rjust(5, u'*') == u'**abc' # Python 2.4
+ assert u'abc'.rjust(5, '*') == u'**abc' # Python 2.4
+ raises(TypeError, u'abc'.rjust, 5, u'xx')
def test_ljust(self):
- s = "abc"
+ s = u"abc"
assert s.ljust(2) == s
assert s.ljust(3) == s
- assert s.ljust(4) == s + " "
- assert s.ljust(5) == s + " "
- assert 'abc'.ljust(10) == 'abc '
- assert 'abc'.ljust(6) == 'abc '
- assert 'abc'.ljust(3) == 'abc'
- assert 'abc'.ljust(2) == 'abc'
- assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4
- assert 'abc'.ljust(5, '*') == 'abc**' # Python 2.4
- raises(TypeError, 'abc'.ljust, 6, '')
+ assert s.ljust(4) == s + u" "
+ assert s.ljust(5) == s + u" "
+ assert u'abc'.ljust(10) == u'abc '
+ assert u'abc'.ljust(6) == u'abc '
+ assert u'abc'.ljust(3) == u'abc'
+ assert u'abc'.ljust(2) == u'abc'
+ assert u'abc'.ljust(5, u'*') == u'abc**' # Python 2.4
+ assert u'abc'.ljust(5, '*') == u'abc**' # Python 2.4
+ raises(TypeError, u'abc'.ljust, 6, u'')
def test_replace(self):
assert 'one!two!three!'.replace('!', '@', 1) == 'one@two!three!'
@@ -300,6 +325,16 @@
assert 'one!two!three!'.replace('!', '@') == 'one@two@three@'
assert 'one!two!three!'.replace('x', '@') == 'one!two!three!'
assert 'one!two!three!'.replace('x', '@', 2) == 'one!two!three!'
+ assert u'\u1234'.replace(u'', '-') == u'-\u1234-'
+ assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-'
assert 'abc'.replace('', '-') == '-a-b-c-'
assert 'abc'.replace('', '-', 3) == '-a-b-c'
assert 'abc'.replace('', '-', 0) == 'abc'
@@ -387,6 +422,9 @@
assert ''.startswith('a') is False
assert 'x'.startswith('xx') is False
assert 'y'.startswith('xx') is False
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+ assert u'\u1234'.startswith(u'') is True
def test_startswith_more(self):
assert 'ab'.startswith('a', 0) is True
@@ -533,7 +571,7 @@
raises(TypeError, 'hello'.translate)
raises(TypeError, 'abababc'.translate, 'abc', 'xyz')
- def test_unicode_form_encoded_object(self):
+ def test_unicode_from_encoded_object(self):
assert str(b'x', 'utf-8') == 'x'
assert str(b'x', 'utf-8', 'strict') == 'x'
@@ -659,31 +697,31 @@
def test_partition(self):
- assert ('this is the par', 'ti', 'tion method') == \
- 'this is the partition method'.partition('ti')
+ assert (u'this is the par', u'ti', u'tion method') == \
+ u'this is the partition method'.partition(u'ti')
# from raymond's original specification
- S = 'http://www.python.org'
- assert ('http', '://', 'www.python.org') == S.partition('://')
- assert ('http://www.python.org', '', '') == S.partition('?')
- assert ('', 'http://', 'www.python.org') == S.partition('http://')
- assert ('http://www.python.', 'org', '') == S.partition('org')
+ S = u'http://www.python.org'
+ assert (u'http', u'://', u'www.python.org') == S.partition(u'://')
+ assert (u'http://www.python.org', u'', u'') == S.partition(u'?')
+ assert (u'', u'http://', u'www.python.org') == S.partition(u'http://')
+ assert (u'http://www.python.', u'org', u'') == S.partition(u'org')
- raises(ValueError, S.partition, '')
+ raises(ValueError, S.partition, u'')
raises(TypeError, S.partition, None)
def test_rpartition(self):
- assert ('this is the rparti', 'ti', 'on method') == \
- 'this is the rpartition method'.rpartition('ti')
+ assert (u'this is the rparti', u'ti', u'on method') == \
+ u'this is the rpartition method'.rpartition(u'ti')
# from raymond's original specification
- S = 'http://www.python.org'
- assert ('http', '://', 'www.python.org') == S.rpartition('://')
- assert ('', '', 'http://www.python.org') == S.rpartition('?')
- assert ('', 'http://', 'www.python.org') == S.rpartition('http://')
- assert ('http://www.python.', 'org', '') == S.rpartition('org')
+ S = u'http://www.python.org'
+ assert (u'http', u'://', u'www.python.org') == S.rpartition(u'://')
+ assert (u'', u'', u'http://www.python.org') == S.rpartition(u'?')
+ assert (u'', u'http://', u'www.python.org') == S.rpartition(u'http://')
+ assert (u'http://www.python.', u'org', u'') == S.rpartition(u'org')
- raises(ValueError, S.rpartition, '')
+ raises(ValueError, S.rpartition, u'')
raises(TypeError, S.rpartition, None)
def test_mul(self):
@@ -706,6 +744,7 @@
def test_index(self):
assert "rrarrrrrrrrra".index('a', 4, None) == 12
assert "rrarrrrrrrrra".index('a', None, 6) == 2
+ assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2
def test_rindex(self):
from sys import maxsize
@@ -715,6 +754,7 @@
assert 'abcdefghiabc'.rindex('abc', 0, -1) == 0
assert 'abcdefghiabc'.rindex('abc', -4*maxsize, 4*maxsize) == 9
assert 'rrarrrrrrrrra'.rindex('a', 4, None) == 12
+ assert u"\u1234\u5678".rindex(u'\u5678') == 1
raises(ValueError, 'abcdefghiabc'.rindex, 'hib')
raises(ValueError, 'defghiabc'.rindex, 'def', 1)
@@ -729,6 +769,7 @@
assert 'abcdefghiabc'.rfind('') == 12
assert 'abcdefghiabc'.rfind('abcd') == 0
assert 'abcdefghiabc'.rfind('abcz') == -1
+ assert u"\u1234\u5678".rfind(u'\u5678') == 1
def test_rfind_corner_case(self):
assert 'abc'.rfind('', 4) == -1
@@ -802,17 +843,31 @@
assert str(Y()).__class__ is X
def test_getslice(self):
- assert '123456'[1:5] == '2345'
- s = "abc"
- assert s[:] == "abc"
- assert s[1:] == "bc"
- assert s[:2] == "ab"
- assert s[1:2] == "b"
- assert s[-2:] == "bc"
- assert s[:-1] == "ab"
- assert s[-2:2] == "b"
- assert s[1:-1] == "b"
- assert s[-2:-1] == "b"
+ s = u"\u0105b\u0107"
+ assert s[:] == u"\u0105b\u0107"
+ assert s[1:] == u"b\u0107"
+ assert s[:2] == u"\u0105b"
+ assert s[1:2] == u"b"
+ assert s[-2:] == u"b\u0107"
+ assert s[:-1] == u"\u0105b"
+ assert s[-2:2] == u"b"
+ assert s[1:-1] == u"b"
+ assert s[-2:-1] == u"b"
+
+ def test_getitem_slice(self):
+ assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
+ s = u"\u0105b\u0107"
+ assert s[slice(3)] == u"\u0105b\u0107"
+ assert s[slice(1, 3)] == u"b\u0107"
+ assert s[slice(2)] == u"\u0105b"
+ assert s[slice(1, 2)] == u"b"
+ assert s[slice(-2, 3)] == u"b\u0107"
+ assert s[slice(-1)] == u"\u0105b"
+ assert s[slice(-2, 2)] == u"b"
+ assert s[slice(1, -1)] == u"b"
+ assert s[slice(-2, -1)] == u"b"
+ assert u"abcde"[::2] == u"ace"
+ assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"
def test_iter(self):
foo = "\u1111\u2222\u3333"
@@ -898,7 +953,7 @@
def test_formatting_unicode__str__2(self):
class A:
def __str__(self):
- return 'baz'
+ return u'baz'
class B:
def __str__(self):
@@ -913,12 +968,12 @@
# "bah" is all I can say
class X(object):
def __repr__(self):
- return '\u1234'
+ return u'\u1234'
'%s' % X()
#
class X(object):
def __str__(self):
- return '\u1234'
+ return u'\u1234'
'%s' % X()
def test_formatting_unicode__repr__(self):
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -194,13 +194,14 @@
listdef.generalize_range_step(flags['range_step'])
return SomeList(listdef)
- def getdictdef(self, is_r_dict=False, force_non_null=False):
+ def getdictdef(self, is_r_dict=False, force_non_null=False,
simple_hash_eq=False):
"""Get the DictDef associated with the current position."""
try:
dictdef = self.dictdefs[self.position_key]
except KeyError:
dictdef = DictDef(self, is_r_dict=is_r_dict,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
self.dictdefs[self.position_key] = dictdef
return dictdef
diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py
--- a/rpython/annotator/builtin.py
+++ b/rpython/annotator/builtin.py
@@ -237,22 +237,30 @@
return SomeInstance(clsdef)
@analyzer_for(rpython.rlib.objectmodel.r_dict)
-def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None,
s_simple_hash_eq=None):
+ return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null,
s_simple_hash_eq)
+
+@analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
+def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None,
s_simple_hash_eq=None):
+ return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn,
+ s_force_non_null, s_simple_hash_eq)
+
+def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq):
if s_force_non_null is None:
force_non_null = False
else:
assert s_force_non_null.is_constant()
force_non_null = s_force_non_null.const
+ if s_simple_hash_eq is None:
+ simple_hash_eq = False
+ else:
+ assert s_simple_hash_eq.is_constant()
+ simple_hash_eq = s_simple_hash_eq.const
dictdef = getbookkeeper().getdictdef(is_r_dict=True,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeDict(dictdef)
-
-@analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
-def robjmodel_r_ordereddict(s_eqfn, s_hashfn):
- dictdef = getbookkeeper().getdictdef(is_r_dict=True)
- dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeOrderedDict(dictdef)
+ return cls(dictdef)
@analyzer_for(rpython.rlib.objectmodel.hlinvoke)
def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s):
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -81,12 +81,14 @@
def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
s_value = s_ImpossibleValue,
is_r_dict = False,
- force_non_null = False):
+ force_non_null = False,
+ simple_hash_eq = False):
self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
self.dictkey.itemof[self] = True
self.dictvalue = DictValue(bookkeeper, s_value)
self.dictvalue.itemof[self] = True
self.force_non_null = force_non_null
+ self.simple_hash_eq = simple_hash_eq
def read_key(self, position_key):
self.dictkey.read_locations.add(position_key)
diff --git a/rpython/jit/metainterp/typesystem.py
b/rpython/jit/metainterp/typesystem.py
--- a/rpython/jit/metainterp/typesystem.py
+++ b/rpython/jit/metainterp/typesystem.py
@@ -106,11 +106,11 @@
# It is an r_dict on lltype. Two copies, to avoid conflicts with
# the value type. Note that NULL is not allowed as a key.
def new_ref_dict(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def new_ref_dict_2(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def new_ref_dict_3(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def cast_vtable_to_hashable(self, cpu, ptr):
adr = llmemory.cast_ptr_to_adr(ptr)
diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py
--- a/rpython/rlib/debug.py
+++ b/rpython/rlib/debug.py
@@ -288,6 +288,9 @@
def mark_dict_non_null(d):
""" Mark dictionary as having non-null keys and values. A warning would
be emitted (not an error!) in case annotation disagrees.
+
+ This doesn't work for r_dicts. For them, pass
+ r_dict(..., force_non_null=True) to the constructor.
"""
assert isinstance(d, dict)
return d
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -748,11 +748,19 @@
def _newdict(self):
return {}
- def __init__(self, key_eq, key_hash, force_non_null=False):
+ def __init__(self, key_eq, key_hash, force_non_null=False,
simple_hash_eq=False):
+ """ force_non_null=True means that the key can never be None (even if
+ the annotator things it could be)
+
+ simple_hash_eq=True means that the hash function is very fast, meaning
it's
+ efficient enough that the dict does not have to store the hash per key.
+ It also implies that neither the hash nor the eq function will mutate
+ the dictionary. """
self._dict = self._newdict()
self.key_eq = key_eq
self.key_hash = key_hash
self.force_non_null = force_non_null
+ self.simple_hash_eq = simple_hash_eq
def __getitem__(self, key):
return self._dict[_r_dictkey(self, key)]
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -710,7 +710,7 @@
# ____________________________________________________________
-# utf-32
+# utf-32 (not used in PyPy any more)
def str_decode_utf_32(s, size, errors, final=True,
errorhandler=None):
diff --git a/rpython/rlib/test/test_objectmodel.py
b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -330,6 +330,13 @@
res = self.interpret(g, [3])
assert res == 77
+ def test_r_dict_fast_functions(self):
+ def fn():
+ d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True)
+ return play_with_r_dict(d1)
+ res = self.interpret(fn, [])
+ assert res
+
def test_prepare_dict_update(self):
def g(n):
d = {}
diff --git a/rpython/rtyper/lltypesystem/rdict.py
b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -42,7 +42,8 @@
class DictRepr(AbstractDictRepr):
def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
- custom_eq_hash=None, force_non_null=False):
+ custom_eq_hash=None, force_non_null=False, fast_hash=False):
+ # fast_hash is ignored (only implemented in rordereddict.py)
self.rtyper = rtyper
self.DICT = lltype.GcForwardReference()
self.lowleveltype = lltype.Ptr(self.DICT)
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py
b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -66,7 +66,7 @@
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
- ll_eq_function=None, method_cache={},
+ ll_eq_function=None, method_cache={}, simple_hash_eq=False,
dummykeyobj=None, dummyvalueobj=None, rtyper=None):
# get the actual DICT type. if DICT is None, it's created, otherwise
# forward reference is becoming DICT
@@ -114,11 +114,14 @@
# * the value
entryfields.append(("value", DICTVALUE))
- if ll_fasthash_function is None:
+ if simple_hash_eq:
+ assert get_custom_eq_hash is not None
+ entrymeths['entry_hash'] = ll_hash_custom_fast
+ elif ll_fasthash_function is None:
entryfields.append(("f_hash", lltype.Signed))
- entrymeths['hash'] = ll_hash_from_cache
+ entrymeths['entry_hash'] = ll_hash_from_cache
else:
- entrymeths['hash'] = ll_hash_recomputed
+ entrymeths['entry_hash'] = ll_hash_recomputed
entrymeths['fasthashfn'] = ll_fasthash_function
# Build the lltype data structures
@@ -140,7 +143,7 @@
'keyeq': ll_keyeq_custom,
'r_rdict_eqfn': r_rdict_eqfn,
'r_rdict_hashfn': r_rdict_hashfn,
- 'paranoia': True,
+ 'paranoia': not simple_hash_eq,
}
else:
# figure out which functions must be used to hash and compare
@@ -167,13 +170,14 @@
class OrderedDictRepr(AbstractDictRepr):
def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
- custom_eq_hash=None, force_non_null=False):
+ custom_eq_hash=None, force_non_null=False,
simple_hash_eq=False):
#assert not force_non_null
self.rtyper = rtyper
self.finalized = False
self.DICT = lltype.GcForwardReference()
self.lowleveltype = lltype.Ptr(self.DICT)
self.custom_eq_hash = custom_eq_hash is not None
+ self.simple_hash_eq = simple_hash_eq
if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by
setup()
assert callable(key_repr)
self._key_repr_computer = key_repr
@@ -211,6 +215,7 @@
self.r_rdict_eqfn, self.r_rdict_hashfn = (
self._custom_eq_hash_repr())
kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr
+ kwd['simple_hash_eq'] = self.simple_hash_eq
else:
kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function()
kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function()
@@ -600,15 +605,21 @@
dummy = ENTRIES.dummy_obj.ll_dummy_value
entries[i].value = dummy
-@signature(types.any(), types.int(), returns=types.any())
-def ll_hash_from_cache(entries, i):
+@signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_from_cache(entries, d, i):
return entries[i].f_hash
-@signature(types.any(), types.int(), returns=types.any())
-def ll_hash_recomputed(entries, i):
+@signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_recomputed(entries, d, i):
ENTRIES = lltype.typeOf(entries).TO
return ENTRIES.fasthashfn(entries[i].key)
+@signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_custom_fast(entries, d, i):
+ DICT = lltype.typeOf(d).TO
+ key = entries[i].key
+ return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
+
def ll_keyhash_custom(d, key):
DICT = lltype.typeOf(d).TO
return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
@@ -962,22 +973,22 @@
if fun == FUNC_BYTE:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE)
i += 1
elif fun == FUNC_SHORT:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT)
i += 1
elif IS_64BIT and fun == FUNC_INT:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT)
i += 1
elif fun == FUNC_LONG:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG)
i += 1
else:
assert False
@@ -1015,7 +1026,7 @@
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
return index - VALID_OFFSET # found the entry
- if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash:
+ if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET)
== hash:
# correct hash, maybe the key is e.g. a different pointer to
# an equal object
found = d.keyeq(checkingkey, key)
@@ -1056,7 +1067,7 @@
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
return index - VALID_OFFSET # found the entry
- if d.keyeq is not None and entries.hash(index - VALID_OFFSET) ==
hash:
+ if d.keyeq is not None and entries.entry_hash(d, index -
VALID_OFFSET) == hash:
# correct hash, maybe the key is e.g. a different pointer to
# an equal object
found = d.keyeq(checkingkey, key)
@@ -1305,14 +1316,14 @@
def ll_dict_update(dic1, dic2):
if dic1 == dic2:
return
- ll_ensure_indexes(dic2) # needed for entries.hash() below
+ ll_ensure_indexes(dic2) # needed for entries.entry_hash() below
ll_prepare_dict_update(dic1, dic2.num_live_items)
i = 0
while i < dic2.num_ever_used_items:
entries = dic2.entries
if entries.valid(i):
entry = entries[i]
- hash = entries.hash(i)
+ hash = entries.entry_hash(dic2, i)
key = entry.key
value = entry.value
index = dic1.lookup_function(dic1, key, hash, FLAG_STORE)
@@ -1413,7 +1424,7 @@
r = lltype.malloc(ELEM.TO)
r.item0 = recast(ELEM.TO.item0, entry.key)
r.item1 = recast(ELEM.TO.item1, entry.value)
- _ll_dict_del(dic, dic.entries.hash(i), i)
+ _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i)
return r
def ll_dict_pop(dic, key):
diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py
--- a/rpython/rtyper/rbuiltin.py
+++ b/rpython/rtyper/rbuiltin.py
@@ -717,9 +717,9 @@
@typer_for(OrderedDict)
@typer_for(objectmodel.r_dict)
@typer_for(objectmodel.r_ordereddict)
-def rtype_dict_constructor(hop, i_force_non_null=None):
- # 'i_force_non_null' is ignored here; if it has any effect, it
- # has already been applied to 'hop.r_result'
+def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None):
+ # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have
any
+ # effect, it has already been applied to 'hop.r_result'
hop.exception_cannot_occur()
r_dict = hop.r_result
cDICT = hop.inputconst(lltype.Void, r_dict.DICT)
diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py
--- a/rpython/rtyper/rdict.py
+++ b/rpython/rtyper/rdict.py
@@ -15,6 +15,7 @@
s_key = dictkey.s_value
s_value = dictvalue.s_value
force_non_null = self.dictdef.force_non_null
+ simple_hash_eq = self.dictdef.simple_hash_eq
if dictkey.custom_eq_hash:
custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn),
rtyper.getrepr(dictkey.s_rdict_hashfn))
@@ -22,7 +23,7 @@
custom_eq_hash = None
return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key),
lambda: rtyper.getrepr(s_value), dictkey, dictvalue,
- custom_eq_hash, force_non_null)
+ custom_eq_hash, force_non_null, simple_hash_eq)
def rtyper_makekey(self):
self.dictdef.dictkey .dont_change_any_more = True
@@ -89,7 +90,7 @@
resulttype=ENTRIES)
# call the correct variant_*() method
method = getattr(self, 'variant_' + self.variant)
- return method(hop, ENTRIES, v_entries, v_index)
+ return method(hop, ENTRIES, v_entries, v_dict, v_index)
def get_tuple_result(self, hop, items_v):
# this allocates the tuple for the result, directly in the function
@@ -109,7 +110,7 @@
hop.genop('setfield', [v_result, c_item, v_item])
return v_result
- def variant_keys(self, hop, ENTRIES, v_entries, v_index):
+ def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index):
KEY = ENTRIES.TO.OF.key
c_key = hop.inputconst(lltype.Void, 'key')
v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key],
@@ -118,30 +119,30 @@
variant_reversed = variant_keys
- def variant_values(self, hop, ENTRIES, v_entries, v_index):
+ def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index):
VALUE = ENTRIES.TO.OF.value
c_value = hop.inputconst(lltype.Void, 'value')
v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value],
resulttype=VALUE)
return self.r_dict.recast_value(hop.llops, v_value)
- def variant_items(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_value = self.variant_values(hop, ENTRIES, v_entries, v_index)
+ def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_value))
- def variant_hashes(self, hop, ENTRIES, v_entries, v_index):
+ def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index):
# there is not really a variant 'hashes', but this method is
# convenient for the following variants
- return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index)
+ return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict,
v_index)
- def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index)
+ def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_hash))
- def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_value = self.variant_values(hop, ENTRIES, v_entries, v_index)
- v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index)
+ def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict,
v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_value, v_hash))
diff --git a/rpython/rtyper/test/test_rdict.py
b/rpython/rtyper/test/test_rdict.py
--- a/rpython/rtyper/test/test_rdict.py
+++ b/rpython/rtyper/test/test_rdict.py
@@ -538,6 +538,25 @@
r_dict = rtyper.getrepr(s)
assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+ def test_r_dict_can_be_fast(self):
+ def myeq(n, m):
+ return n == m
+ def myhash(n):
+ return ~n
+ def f():
+ d = self.new_r_dict(myeq, myhash, simple_hash_eq=True)
+ d[5] = 7
+ d[12] = 19
+ return d
+
+ t = TranslationContext()
+ s = t.buildannotator().build_types(f, [])
+ rtyper = t.buildrtyper()
+ rtyper.specialize()
+
+ r_dict = rtyper.getrepr(s)
+ assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+
def test_tuple_dict(self):
def f(i):
d = self.newdict()
@@ -1000,8 +1019,8 @@
return {}
@staticmethod
- def new_r_dict(myeq, myhash):
- return r_dict(myeq, myhash)
+ def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False):
+ return r_dict(myeq, myhash, force_non_null=force_non_null,
simple_hash_eq=simple_hash_eq)
def test_two_dicts_with_different_value_types(self):
def func(i):
diff --git a/rpython/rtyper/test/test_rordereddict.py
b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -386,8 +386,10 @@
return OrderedDict()
@staticmethod
- def new_r_dict(myeq, myhash):
- return objectmodel.r_ordereddict(myeq, myhash)
+ def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False):
+ return objectmodel.r_ordereddict(
+ myeq, myhash, force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
def test_two_dicts_with_different_value_types(self):
def func(i):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit