Author: Armin Rigo <ar...@tunes.org> Branch: py3.5 Changeset: r89777:ad1beddd3f43 Date: 2017-01-26 09:34 +0100 http://bitbucket.org/pypy/pypy/changeset/ad1beddd3f43/
Log: hg merge default diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -311,8 +311,8 @@ if config.objspace.usemodules.cpyext: if config.translation.gc not in ('incminimark', 'boehm'): raise Exception("The 'cpyext' module requires the 'incminimark'" - " 'boehm' GC. You need either 'targetpypystandalone.py" - " --withoutmod-cpyext' or '--gc=incminimark'") + " or 'boehm' GC. You need either 'targetpypystandalone.py" + " --withoutmod-cpyext', or use one of these two GCs.") config.translating = True diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -323,17 +323,28 @@ # return self._add_or_sub(w_other, -1) - def getcfield(self, w_attr): - return self.ctype.getcfield(self.space.str_w(w_attr)) + def getcfield(self, w_attr, mode): + space = self.space + attr = space.str_w(w_attr) + try: + cfield = self.ctype.getcfield(attr) + except KeyError: + raise oefmt(space.w_AttributeError, "cdata '%s' has no field '%s'", + self.ctype.name, attr) + if cfield is None: + raise oefmt(space.w_AttributeError, + "cdata '%s' points to an opaque type: cannot %s fields", + self.ctype.name, mode) + return cfield def getattr(self, w_attr): - cfield = self.getcfield(w_attr) + cfield = self.getcfield(w_attr, mode="read") with self as ptr: w_res = cfield.read(ptr, self) return w_res def setattr(self, w_attr, w_value): - cfield = self.getcfield(w_attr) + cfield = self.getcfield(w_attr, mode="write") with self as ptr: cfield.write(ptr, w_value) diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -348,7 +348,10 @@ return result def getcfield(self, attr): - return self.ctitem.getcfield(attr) + from pypy.module._cffi_backend.ctypestruct import W_CTypeStructOrUnion + if isinstance(self.ctitem, W_CTypeStructOrUnion): + return self.ctitem.getcfield(attr) + return W_CType.getcfield(self, attr) def typeoffsetof_field(self, fieldname, following): if following == 0: diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py --- a/pypy/module/_cffi_backend/ctypestruct.py +++ b/pypy/module/_cffi_backend/ctypestruct.py @@ -161,18 +161,18 @@ return self._fields_dict[attr] def getcfield(self, attr): - ready = self._fields_dict is not None - if not ready and self.size >= 0: + # Returns a W_CField. Error cases: returns None if we are an + # opaque struct; or raises KeyError if the particular field + # 'attr' does not exist. The point of not directly building the + # error here is to get the exact ctype in the error message: it + # might be of the kind 'struct foo' or 'struct foo *'. + if self._fields_dict is None: + if self.size < 0: + return None self.force_lazy_struct() - ready = True - if ready: - self = jit.promote(self) - attr = jit.promote_string(attr) - try: - return self._getcfield_const(attr) - except KeyError: - pass - return W_CType.getcfield(self, attr) + self = jit.promote(self) + attr = jit.promote_string(attr) + return self._getcfield_const(attr) # <= KeyError here def cdata_dir(self): if self.size < 0: diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -737,8 +737,14 @@ BInt = new_primitive_type("int") BStruct = new_struct_type("struct foo") BStructPtr = new_pointer_type(BStruct) - p = cast(BStructPtr, 0) - py.test.raises(AttributeError, "p.a1") # opaque + p = cast(BStructPtr, 42) + e = py.test.raises(AttributeError, "p.a1") # opaque + assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: " + "cannot read fields") + e = py.test.raises(AttributeError, "p.a1 = 10") # opaque + assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: " + "cannot write fields") + complete_struct_or_union(BStruct, [('a1', BInt, -1), ('a2', BInt, -1)]) p = newp(BStructPtr, None) @@ -749,8 +755,29 @@ assert s.a2 == 123 py.test.raises(OverflowError, "s.a1 = sys.maxsize+1") assert s.a1 == 0 - py.test.raises(AttributeError, "p.foobar") - py.test.raises(AttributeError, "s.foobar") + e = py.test.raises(AttributeError, "p.foobar") + assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'" + e = py.test.raises(AttributeError, "p.foobar = 42") + assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'" + e = py.test.raises(AttributeError, "s.foobar") + assert str(e.value) == "cdata 'struct foo' has no field 'foobar'" + e = py.test.raises(AttributeError, "s.foobar = 42") + assert str(e.value) == "cdata 'struct foo' has no field 'foobar'" + j = cast(BInt, 42) + e = py.test.raises(AttributeError, "j.foobar") + assert str(e.value) == "cdata 'int' has no attribute 'foobar'" + e = py.test.raises(AttributeError, "j.foobar = 42") + assert str(e.value) == "cdata 'int' has no attribute 'foobar'" + j = cast(new_pointer_type(BInt), 42) + e = py.test.raises(AttributeError, "j.foobar") + assert str(e.value) == "cdata 'int *' has no attribute 'foobar'" + e = py.test.raises(AttributeError, "j.foobar = 42") + assert str(e.value) == "cdata 'int *' has no attribute 'foobar'" + pp = newp(new_pointer_type(BStructPtr), p) + e = py.test.raises(AttributeError, "pp.a1") + assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'" + e = py.test.raises(AttributeError, "pp.a1 = 42") + assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'" def test_union_instance(): BInt = new_primitive_type("int") diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py --- a/pypy/objspace/std/mapdict.py +++ b/pypy/objspace/std/mapdict.py @@ -437,6 +437,9 @@ for i in range(len(self.cached_attrs)): self.cached_attrs[i] = None + def _cleanup_(self): + self.clear() + # ____________________________________________________________ # object implementation diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -72,6 +72,10 @@ class MethodCache(object): def __init__(self, space): + # Note: these attributes never change which object they contain, + # so reading 'cache.versions' for example is constant-folded. + # The actual list in 'cache.versions' is not a constant, of + # course. SIZE = 1 << space.config.objspace.std.methodcachesizeexp self.versions = [None] * SIZE self.names = [None] * SIZE @@ -89,6 +93,9 @@ for i in range(len(self.lookup_where)): self.lookup_where[i] = None_None + def _cleanup_(self): + self.clear() + class _Global(object): weakref_warning_printed = False _global = _Global() diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -201,6 +201,10 @@ StrOption("icon", "Path to the (Windows) icon to use for the executable"), StrOption("libname", "Windows: name and possibly location of the lib file to create"), + ChoiceOption("hash", + "The hash to use for strings", + ["rpython", "siphash24"], + default="rpython", cmdline="--hash"), OptionDescription("backendopt", "Backend Optimization Options", [ # control inlining @@ -390,6 +394,12 @@ if sys.platform == "darwin" or sys.platform =="win32": raise ConfigError("'asmgcc' not supported on this platform") +def apply_extra_settings(config): + # make the setting of config.hash definitive + from rpython.rlib.objectmodel import set_hash_algorithm + config.translation.hash = config.translation.hash + set_hash_algorithm(config.translation.hash) + # ---------------------------------------------------------------- def set_platform(config): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -520,10 +520,22 @@ # ---------- HASH_ALGORITHM = "rpython" # XXX Is there a better name? +HASH_ALGORITHM_FIXED = False -def _hash_string(s): - """The algorithm behind compute_hash() for a string or a unicode.""" +@not_rpython +def set_hash_algorithm(algo): + """Must be called very early, before any string is hashed with + compute_hash()!""" + global HASH_ALGORITHM + if HASH_ALGORITHM != algo: + assert not HASH_ALGORITHM_FIXED, "compute_hash() already called!" + assert algo in ("rpython", "siphash24") + HASH_ALGORITHM = algo + + +def _hash_string_rpython(s): from rpython.rlib.rarithmetic import intmask + length = len(s) if length == 0: return -1 @@ -535,6 +547,101 @@ x ^= length return intmask(x) + +@not_rpython +def _hash_string_siphash24(s): + """This version is called when untranslated only.""" + import array + from rpython.rlib.rsiphash import siphash24 + from rpython.rtyper.lltypesystem import lltype, rffi + from rpython.rlib.rarithmetic import intmask + + if not isinstance(s, str): + if isinstance(s, unicode): + lst = map(ord, s) + else: + lst = map(ord, s.chars) # for rstr.STR or UNICODE + # NOTE: a latin-1 unicode string must have the same hash as the + # corresponding byte string. + if all(n <= 0xFF for n in lst): + kind = "B" + elif rffi.sizeof(lltype.UniChar) == 4: + kind = "I" + else: + kind = "H" + s = array.array(kind, lst).tostring() + ptr = rffi.str2charp(s) + x = siphash24(ptr, len(s)) + rffi.free_charp(ptr) + return intmask(x) + +def ll_hash_string_siphash24(ll_s): + """Called from lltypesystem/rstr.py. 'll_s' is a rstr.STR or UNICODE.""" + from rpython.rlib.rsiphash import siphash24 + from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr + from rpython.rlib.rarithmetic import intmask + + length = len(ll_s.chars) + if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char: + # no GC operation from here! + addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0) + else: + # NOTE: a latin-1 unicode string must have the same hash as the + # corresponding byte string. If the unicode is all within + # 0-255, then we need to allocate a byte buffer and copy the + # latin-1 encoding in it manually. + for i in range(length): + if ord(ll_s.chars[i]) > 0xFF: + # no GC operation from here! + addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0) + length *= rffi.sizeof(rstr.UNICODE.chars.OF) + break + else: + p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw') + i = 0 + while i < length: + p[i] = chr(ord(ll_s.chars[i])) + i += 1 + x = siphash24(llmemory.cast_ptr_to_adr(p), length) + lltype.free(p, flavor='raw') + return intmask(x) + x = siphash24(addr, length) + keepalive_until_here(ll_s) + return intmask(x) +ll_hash_string_siphash24._jit_look_inside_ = False + + +@not_rpython +def _hash_string(s): + """The algorithm behind compute_hash() for a string or a unicode. + This version is only for untranslated usage, and 's' is a str or unicode. + """ + global HASH_ALGORITHM_FIXED + HASH_ALGORITHM_FIXED = True + if HASH_ALGORITHM == "rpython": + return _hash_string_rpython(s) + if HASH_ALGORITHM == "siphash24": + return _hash_string_siphash24(s) + raise NotImplementedError + +def ll_hash_string(ll_s): + """The algorithm behind compute_hash() for a string or a unicode. + This version is called from lltypesystem/rstr.py, and 'll_s' is a + rstr.STR or rstr.UNICODE. + """ + if not we_are_translated(): + global HASH_ALGORITHM_FIXED + HASH_ALGORITHM_FIXED = True + if HASH_ALGORITHM == "rpython": + return _hash_string_rpython(ll_s.chars) + if HASH_ALGORITHM == "siphash24": + if we_are_translated(): + return ll_hash_string_siphash24(ll_s) + else: + return _hash_string_siphash24(ll_s) + raise NotImplementedError + + def _hash_float(f): """The algorithm behind compute_hash() for a float. This implementation is identical to the CPython implementation, diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsiphash.py @@ -0,0 +1,157 @@ +import sys, os, struct +from contextlib import contextmanager +from rpython.rlib import rarithmetic +from rpython.rlib.objectmodel import not_rpython, always_inline +from rpython.rlib.rgc import no_collect +from rpython.rlib.rarithmetic import r_uint64 +from rpython.rlib.rawstorage import misaligned_is_fine +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi +from rpython.rtyper.lltypesystem.lloperation import llop + + +if sys.byteorder == 'little': + def _le64toh(x): + return x +else: + _le64toh = rarithmetic.byteswap + + +# Initialize the values of the secret seed: two 64-bit constants. +# CPython picks a new seed every time 'python' starts. PyPy cannot do +# that as easily because many details may rely on getting the same hash +# value before and after translation. We can, however, pick a random +# seed once per translation, which should already be quite good. + +@not_rpython +def select_random_seed(): + global k0, k1 # note: the globals k0, k1 are already byte-swapped + v0, v1 = struct.unpack("QQ", os.urandom(16)) + k0 = r_uint64(v0) + k1 = r_uint64(v1) + +select_random_seed() + +@contextmanager +def choosen_seed(new_k0, new_k1, test_misaligned_path=False): + global k0, k1, misaligned_is_fine + old = k0, k1, misaligned_is_fine + k0 = _le64toh(r_uint64(new_k0)) + k1 = _le64toh(r_uint64(new_k1)) + if test_misaligned_path: + misaligned_is_fine = False + yield + k0, k1, misaligned_is_fine = old + +def get_current_seed(): + return _le64toh(k0), _le64toh(k1) + + +magic0 = r_uint64(0x736f6d6570736575) +magic1 = r_uint64(0x646f72616e646f6d) +magic2 = r_uint64(0x6c7967656e657261) +magic3 = r_uint64(0x7465646279746573) + + +@always_inline +def _rotate(x, b): + return (x << b) | (x >> (64 - b)) + +@always_inline +def _half_round(a, b, c, d, s, t): + a += b + c += d + b = _rotate(b, s) ^ a + d = _rotate(d, t) ^ c + a = _rotate(a, 32) + return a, b, c, d + +@always_inline +def _double_round(v0, v1, v2, v3): + v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16) + v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21) + v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16) + v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21) + return v0, v1, v2, v3 + + +@no_collect +def siphash24(addr_in, size): + """Takes an address pointer and a size. Returns the hash as a r_uint64, + which can then be casted to the expected type.""" + + direct = (misaligned_is_fine or + (rffi.cast(lltype.Signed, addr_in) & 7) == 0) + + b = r_uint64(size) << 56 + v0 = k0 ^ magic0 + v1 = k1 ^ magic1 + v2 = k0 ^ magic2 + v3 = k1 ^ magic3 + + index = 0 + if direct: + while size >= 8: + mi = llop.raw_load(rffi.ULONGLONG, addr_in, index) + mi = _le64toh(mi) + size -= 8 + index += 8 + v3 ^= mi + v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) + v0 ^= mi + else: + while size >= 8: + mi = ( + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index)) | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 | + r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56 + ) + mi = _le64toh(mi) + size -= 8 + index += 8 + v3 ^= mi + v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) + v0 ^= mi + + t = r_uint64(0) + if size == 7: + t = r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 + size = 6 + if size == 6: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40 + size = 5 + if size == 5: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32 + size = 4 + if size == 4: + if direct: + t |= r_uint64(llop.raw_load(rffi.UINT, addr_in, index)) + size = 0 + else: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24 + size = 3 + if size == 3: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16 + size = 2 + if size == 2: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8 + size = 1 + if size == 1: + t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index)) + size = 0 + assert size == 0 + + b |= _le64toh(t) + + v3 ^= b + v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) + v0 ^= b + v2 ^= 0xff + v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) + v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) + + return (v0 ^ v1) ^ (v2 ^ v3) diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/test/test_rsiphash.py @@ -0,0 +1,44 @@ +from rpython.rlib.rsiphash import siphash24, choosen_seed +from rpython.rtyper.lltypesystem import llmemory, rffi + + +CASES = [ + (2323638336262702335 , ""), + (5150479602681463644 , "h"), + (1013213613370725794 , "he"), + (7028032310911240238 , "hel"), + (9535960132410784494 , "hell"), + (3256502711089771242 , "hello"), + (2389188832234450176 , "hello "), + (13253855839845990393, "hello w"), + (7850036019043917323 , "hello wo"), + (14283308628425005953, "hello wor"), + (9605549962279590084 , "hello worl"), + (16371281469632894235, "hello world"), + (7298637955795769949 , "hello world\x9a"), + (13530878135053370821, "hello world\xf3\x80"), + (1643533543579802994 , "\xffhel\x82lo world\xbc"), + (14632093238728197380, "hexlylxox rewqw"), + (3434253029196696424 , "hexlylxox rewqws"), + (9855754545877066788 , "hexlylxox rewqwsv"), + (5233065012564472454 , "hexlylxox rewqwkashdw89"), + (16768585622569081808, "hexlylxox rewqwkeashdw89"), + (17430482483431293463, "HEEExlylxox rewqwkashdw89"), + (695783005783737705 , "hello woadwealidewd 3829ez 32ig dxwaebderld"), +] + +def check(s): + p = rffi.str2charp(s) + q = rffi.str2charp('?' + s) + with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f, + test_misaligned_path=True): + x = siphash24(llmemory.cast_ptr_to_adr(p), len(s)) + y = siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s)) + rffi.free_charp(p) + rffi.free_charp(q) + assert x == y + return x + +def test_siphash24(): + for expected, string in CASES: + assert check(string) == expected diff --git a/rpython/rtyper/lltypesystem/rbytearray.py b/rpython/rtyper/lltypesystem/rbytearray.py --- a/rpython/rtyper/lltypesystem/rbytearray.py +++ b/rpython/rtyper/lltypesystem/rbytearray.py @@ -8,10 +8,10 @@ def mallocbytearray(size): return lltype.malloc(BYTEARRAY, size) -_, _, copy_bytearray_contents = rstr._new_copy_contents_fun(BYTEARRAY, BYTEARRAY, +_, _, copy_bytearray_contents, _ = rstr._new_copy_contents_fun(BYTEARRAY, BYTEARRAY, lltype.Char, 'bytearray') -_, _, copy_bytearray_contents_from_str = rstr._new_copy_contents_fun(rstr.STR, +_, _, copy_bytearray_contents_from_str, _ = rstr._new_copy_contents_fun(rstr.STR, BYTEARRAY, lltype.Char, 'bytearray_from_str') diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1073,8 +1073,9 @@ if size is None: size = llmemory.sizeof(tp) # a symbolic result in this case return size - if isinstance(tp, lltype.Ptr) or tp is llmemory.Address: - return globals()['r_void*'].BITS/8 + if (tp is lltype.Signed or isinstance(tp, lltype.Ptr) + or tp is llmemory.Address): + return LONG_BIT/8 if tp is lltype.Char or tp is lltype.Bool: return 1 if tp is lltype.UniChar: @@ -1087,8 +1088,6 @@ # :-/ return sizeof_c_type("long double") assert isinstance(tp, lltype.Number) - if tp is lltype.Signed: - return LONG_BIT/8 return tp._type.BITS/8 sizeof._annspecialcase_ = 'specialize:memo' diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -3,7 +3,7 @@ from rpython.annotator import model as annmodel from rpython.rlib import jit, types from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated, - _hash_string, keepalive_until_here, specialize, enforceargs) + ll_hash_string, keepalive_until_here, specialize, enforceargs) from rpython.rlib.signature import signature from rpython.rlib.rarithmetic import ovfcheck from rpython.rtyper.error import TyperError @@ -44,11 +44,13 @@ mallocstr = new_malloc(STR, 'mallocstr') mallocunicode = new_malloc(UNICODE, 'mallocunicode') +@specialize.memo() def emptystrfun(): - return emptystr + return string_repr.convert_const("") +@specialize.memo() def emptyunicodefun(): - return emptyunicode + return unicode_repr.convert_const(u'') def _new_copy_contents_fun(SRC_TP, DST_TP, CHAR_TP, name): @specialize.arg(0) @@ -136,15 +138,19 @@ copy_raw_to_string = func_with_new_name(copy_raw_to_string, 'copy_raw_to_%s' % name) - return copy_string_to_raw, copy_raw_to_string, copy_string_contents + return (copy_string_to_raw, copy_raw_to_string, copy_string_contents, + _get_raw_buf) (copy_string_to_raw, copy_raw_to_string, - copy_string_contents) = _new_copy_contents_fun(STR, STR, Char, 'string') + copy_string_contents, + _get_raw_buf_string) = _new_copy_contents_fun(STR, STR, Char, 'string') (copy_unicode_to_raw, copy_raw_to_unicode, - copy_unicode_contents) = _new_copy_contents_fun(UNICODE, UNICODE, UniChar, 'unicode') + copy_unicode_contents, + _get_raw_buf_unicode) = _new_copy_contents_fun(UNICODE, UNICODE, UniChar, + 'unicode') CONST_STR_CACHE = WeakValueDictionary() CONST_UNICODE_CACHE = WeakValueDictionary() @@ -382,7 +388,7 @@ # but our malloc initializes the memory to zero, so we use zero as the # special non-computed-yet value. Also, jit.conditional_call_elidable # always checks for zero, for now. - x = _hash_string(s.chars) + x = ll_hash_string(s) if x == 0: x = 29872897 s.hash = x @@ -1276,8 +1282,6 @@ char_repr.ll = LLHelpers unichar_repr.ll = LLHelpers unicode_repr = UnicodeRepr() -emptystr = string_repr.convert_const("") -emptyunicode = unicode_repr.convert_const(u'') StringRepr.repr = string_repr UnicodeRepr.repr = unicode_repr @@ -1336,14 +1340,6 @@ string_repr.iterator_repr = StringIteratorRepr() unicode_repr.iterator_repr = UnicodeIteratorRepr() -# these should be in rclass, but circular imports prevent (also it's -# not that insane that a string constant is built in this file). - -instance_str_prefix = string_repr.convert_const("<") -instance_str_infix = string_repr.convert_const(" object at 0x") -instance_str_suffix = string_repr.convert_const(">") - -null_str = string_repr.convert_const("NULL") - -unboxed_instance_str_prefix = string_repr.convert_const("<unboxed ") -unboxed_instance_str_suffix = string_repr.convert_const(">") +@specialize.memo() +def conststr(s): + return string_repr.convert_const(s) diff --git a/rpython/rtyper/lltypesystem/rtagged.py b/rpython/rtyper/lltypesystem/rtagged.py --- a/rpython/rtyper/lltypesystem/rtagged.py +++ b/rpython/rtyper/lltypesystem/rtagged.py @@ -117,9 +117,9 @@ from rpython.rtyper.lltypesystem import rstr from rpython.rtyper.rint import signed_repr llstr1 = signed_repr.ll_str(ll_unboxed_to_int(i)) - return rstr.ll_strconcat(rstr.unboxed_instance_str_prefix, + return rstr.ll_strconcat(rstr.conststr("<unboxed "), rstr.ll_strconcat(llstr1, - rstr.unboxed_instance_str_suffix)) + rstr.conststr(">"))) else: return InstanceRepr.ll_str(self, i) diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -840,18 +840,18 @@ from rpython.rtyper.lltypesystem.ll_str import ll_int2hex from rpython.rlib.rarithmetic import r_uint if not i: - return rstr.null_str + return rstr.conststr("NULL") instance = cast_pointer(OBJECTPTR, i) # Two choices: the first gives a fast answer but it can change # (typically only once) during the life of the object. #uid = r_uint(cast_ptr_to_int(i)) uid = r_uint(llop.gc_id(lltype.Signed, i)) # - res = rstr.instance_str_prefix + res = rstr.conststr("<") res = rstr.ll_strconcat(res, instance.typeptr.name) - res = rstr.ll_strconcat(res, rstr.instance_str_infix) + res = rstr.ll_strconcat(res, rstr.conststr(" object at 0x")) res = rstr.ll_strconcat(res, ll_int2hex(uid, False)) - res = rstr.ll_strconcat(res, rstr.instance_str_suffix) + res = rstr.ll_strconcat(res, rstr.conststr(">")) return res def get_ll_eq_function(self): @@ -1092,7 +1092,6 @@ except StandardError: return None - # ____________________________________________________________ # # Low-level implementation of operations on classes and instances diff --git a/rpython/translator/c/test/test_typed.py b/rpython/translator/c/test/test_typed.py --- a/rpython/translator/c/test/test_typed.py +++ b/rpython/translator/c/test/test_typed.py @@ -1,8 +1,12 @@ from __future__ import with_statement import math -import sys +import sys, os +if __name__ == '__main__': + # hack for test_hash_string_siphash24() + sys.path.insert(0, os.path.join(os.path.dirname(__file__), + '..', '..', '..', '..')) import py from rpython.rlib.rstackovf import StackOverflow @@ -597,6 +601,49 @@ assert res[3] == compute_hash(d) assert res[4] == compute_hash(("Hi", None, (7.5, 2, d))) + def _test_hash_string(self, algo): + from rpython.rlib import objectmodel + objectmodel.set_hash_algorithm(algo) + s = "hello" + u = u"world" + v = u"\u1234\u2318+\u2bcd\u2102" + hash_s = compute_hash(s) + hash_u = compute_hash(u) + hash_v = compute_hash(v) + assert hash_s == compute_hash(u"hello") # same hash because it's + assert hash_u == compute_hash("world") # a latin-1 unicode + # + def fn(length): + assert length >= 1 + return str((compute_hash(s), + compute_hash(u), + compute_hash(v), + compute_hash(s[0] + s[1:length]), + compute_hash(u[0] + u[1:length]), + compute_hash(v[0] + v[1:length]), + )) + + assert fn(5) == str((hash_s, hash_u, hash_v, hash_s, hash_u, hash_v)) + + f = self.getcompiled(fn, [int]) + res = f(5) + res = [int(a) for a in res[1:-1].split(",")] + assert res[0] == hash_s + assert res[1] == hash_u + assert res[2] == hash_v + assert res[3] == hash_s + assert res[4] == hash_u + assert res[5] == hash_v + + def test_hash_string_rpython(self): + self._test_hash_string("rpython") + + def test_hash_string_siphash24(self): + import subprocess + subprocess.check_call([sys.executable, __file__, "siphash24", + self.__class__.__module__, + self.__class__.__name__]) + def test_list_basic_ops(self): def list_basic_ops(i, j): l = [1, 2, 3] @@ -896,3 +943,11 @@ f = self.getcompiled(func, [int]) res = f(2) assert res == 1 # and not 2 + + +if __name__ == '__main__': + # for test_hash_string_siphash24() + algo, clsmodule, clsname = sys.argv[1:] + mod = __import__(clsmodule, None, None, [clsname]) + cls = getattr(mod, clsname) + cls()._test_hash_string(algo) diff --git a/rpython/translator/goal/translate.py b/rpython/translator/goal/translate.py --- a/rpython/translator/goal/translate.py +++ b/rpython/translator/goal/translate.py @@ -11,7 +11,8 @@ from rpython.config.config import (to_optparse, OptionDescription, BoolOption, ArbitraryOption, StrOption, IntOption, Config, ChoiceOption, OptHelpFormatter) from rpython.config.translationoption import (get_combined_translation_config, - set_opt_level, OPT_LEVELS, DEFAULT_OPT_LEVEL, set_platform, CACHE_DIR) + set_opt_level, OPT_LEVELS, DEFAULT_OPT_LEVEL, set_platform, CACHE_DIR, + apply_extra_settings) # clean up early rpython/_cache try: @@ -177,6 +178,9 @@ if 'handle_config' in targetspec_dic: targetspec_dic['handle_config'](config, translateconfig) + # apply extra settings + apply_extra_settings(config) + return targetspec_dic, translateconfig, config, args def show_help(translateconfig, opt_parser, targetspec_dic, config): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit