Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r85991:a84c4b359dcc Date: 2016-08-02 18:13 +0200 http://bitbucket.org/pypy/pypy/changeset/a84c4b359dcc/
Log: hg merge null_byte_after_str Allocate all RPython strings with one extra byte, normally unused. It is used to hold a final zero in case we need some 'char *' representation of the string, together with checks like 'not can_move()' or object pinning. Main new thing that this allows: 'ffi.from_buffer(string)'. diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py --- a/pypy/module/_cffi_backend/ctypefunc.py +++ b/pypy/module/_cffi_backend/ctypefunc.py @@ -157,11 +157,13 @@ mustfree_max_plus_1 = 0 buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw') try: + keepalives = [None] * len(args_w) # None or strings for i in range(len(args_w)): data = rffi.ptradd(buffer, cif_descr.exchange_args[i]) w_obj = args_w[i] argtype = self.fargs[i] - if argtype.convert_argument_from_object(data, w_obj): + if argtype.convert_argument_from_object(data, w_obj, + keepalives, i): # argtype is a pointer type, and w_obj a list/tuple/str mustfree_max_plus_1 = i + 1 @@ -177,9 +179,13 @@ if isinstance(argtype, W_CTypePointer): data = rffi.ptradd(buffer, cif_descr.exchange_args[i]) flag = get_mustfree_flag(data) + raw_cdata = rffi.cast(rffi.CCHARPP, data)[0] if flag == 1: - raw_cdata = rffi.cast(rffi.CCHARPP, data)[0] lltype.free(raw_cdata, flavor='raw') + elif flag >= 4: + value = keepalives[i] + assert value is not None + rffi.free_nonmovingbuffer(value, raw_cdata, chr(flag)) lltype.free(buffer, flavor='raw') keepalive_until_here(args_w) return w_res diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py --- a/pypy/module/_cffi_backend/ctypeobj.py +++ b/pypy/module/_cffi_backend/ctypeobj.py @@ -83,7 +83,7 @@ raise oefmt(space.w_TypeError, "cannot initialize cdata '%s'", self.name) - def convert_argument_from_object(self, cdata, w_ob): + def convert_argument_from_object(self, cdata, w_ob, keepalives, i): self.convert_from_object(cdata, w_ob) return False diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -14,8 +14,8 @@ class W_CTypePtrOrArray(W_CType): - _attrs_ = ['ctitem', 'can_cast_anything', 'length'] - _immutable_fields_ = ['ctitem', 'can_cast_anything', 'length'] + _attrs_ = ['ctitem', 'can_cast_anything', 'accept_str', 'length'] + _immutable_fields_ = ['ctitem', 'can_cast_anything', 'accept_str', 'length'] length = -1 def __init__(self, space, size, extra, extra_position, ctitem, @@ -28,6 +28,9 @@ # - for functions, it is the return type self.ctitem = ctitem self.can_cast_anything = could_cast_anything and ctitem.cast_anything + self.accept_str = (self.can_cast_anything or + (ctitem.is_primitive_integer and + ctitem.size == rffi.sizeof(lltype.Char))) def is_unichar_ptr_or_array(self): return isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar) @@ -70,9 +73,7 @@ pass else: self._convert_array_from_listview(cdata, space.listview(w_ob)) - elif (self.can_cast_anything or - (self.ctitem.is_primitive_integer and - self.ctitem.size == rffi.sizeof(lltype.Char))): + elif self.accept_str: if not space.isinstance_w(w_ob, space.w_str): raise self._convert_error("str or list or tuple", w_ob) s = space.str_w(w_ob) @@ -260,8 +261,16 @@ else: return lltype.nullptr(rffi.CCHARP.TO) - def _prepare_pointer_call_argument(self, w_init, cdata): + def _prepare_pointer_call_argument(self, w_init, cdata, keepalives, i): space = self.space + if self.accept_str and space.isinstance_w(w_init, space.w_str): + # special case to optimize strings passed to a "char *" argument + value = w_init.str_w(space) + keepalives[i] = value + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) + rffi.cast(rffi.CCHARPP, cdata)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): length = space.int_w(space.len(w_init)) @@ -297,10 +306,11 @@ rffi.cast(rffi.CCHARPP, cdata)[0] = result return 1 - def convert_argument_from_object(self, cdata, w_ob): + def convert_argument_from_object(self, cdata, w_ob, keepalives, i): from pypy.module._cffi_backend.ctypefunc import set_mustfree_flag result = (not isinstance(w_ob, cdataobj.W_CData) and - self._prepare_pointer_call_argument(w_ob, cdata)) + self._prepare_pointer_call_argument(w_ob, cdata, + keepalives, i)) if result == 0: self.convert_from_object(cdata, w_ob) set_mustfree_flag(cdata, result) diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -353,7 +353,7 @@ 'array.array' or numpy arrays.""" # w_ctchara = newtype._new_chara_type(self.space) - return func.from_buffer(self.space, w_ctchara, w_python_buffer) + return func._from_buffer(self.space, w_ctchara, w_python_buffer) @unwrap_spec(w_arg=W_CData) diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -1,7 +1,8 @@ from rpython.rtyper.annlowlevel import llstr from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw -from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.objectmodel import keepalive_until_here, we_are_translated +from rpython.rlib import jit from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import unwrap_spec, WrappedDefault @@ -132,17 +133,66 @@ raise oefmt(space.w_TypeError, "needs 'char[]', got '%s'", w_ctype.name) # + return _from_buffer(space, w_ctype, w_x) + +def _from_buffer(space, w_ctype, w_x): buf = _fetch_as_read_buffer(space, w_x) - try: - _cdata = buf.get_raw_address() - except ValueError: - raise oefmt(space.w_TypeError, - "from_buffer() got a '%T' object, which supports the " - "buffer interface but cannot be rendered as a plain " - "raw address on PyPy", w_x) + if space.isinstance_w(w_x, space.w_str): + _cdata = get_raw_address_of_string(space, w_x) + else: + try: + _cdata = buf.get_raw_address() + except ValueError: + raise oefmt(space.w_TypeError, + "from_buffer() got a '%T' object, which supports the " + "buffer interface but cannot be rendered as a plain " + "raw address on PyPy", w_x) # return cdataobj.W_CDataFromBuffer(space, _cdata, w_ctype, buf, w_x) +# ____________________________________________________________ + +class RawBytes(object): + def __init__(self, string): + self.ptr = rffi.str2charp(string, track_allocation=False) + def __del__(self): + rffi.free_charp(self.ptr, track_allocation=False) + +class RawBytesCache(object): + def __init__(self, space): + from pypy.interpreter.baseobjspace import W_Root + from rpython.rlib import rweakref + self.wdict = rweakref.RWeakKeyDictionary(W_Root, RawBytes) + +@jit.dont_look_inside +def get_raw_address_of_string(space, w_x): + """Special case for ffi.from_buffer(string). Returns a 'char *' that + is valid as long as the string object is alive. Two calls to + ffi.from_buffer(same_string) are guaranteed to return the same pointer. + """ + from rpython.rtyper.annlowlevel import llstr + from rpython.rtyper.lltypesystem.rstr import STR + from rpython.rtyper.lltypesystem import llmemory + from rpython.rlib import rgc + + cache = space.fromcache(RawBytesCache) + rawbytes = cache.wdict.get(w_x) + if rawbytes is None: + data = space.str_w(w_x) + if we_are_translated() and not rgc.can_move(data): + lldata = llstr(data) + data_start = (llmemory.cast_ptr_to_adr(lldata) + + rffi.offsetof(STR, 'chars') + + llmemory.itemoffsetof(STR.chars, 0)) + data_start = rffi.cast(rffi.CCHARP, data_start) + data_start[len(data)] = '\x00' # write the final extra null + return data_start + rawbytes = RawBytes(data) + cache.wdict.set(w_x, rawbytes) + return rawbytes.ptr + +# ____________________________________________________________ + def unsafe_escaping_ptr_for_ptr_or_array(w_cdata): if not w_cdata.ctype.is_nonfunc_pointer_or_array: diff --git a/pypy/module/_cffi_backend/parse_c_type.py b/pypy/module/_cffi_backend/parse_c_type.py --- a/pypy/module/_cffi_backend/parse_c_type.py +++ b/pypy/module/_cffi_backend/parse_c_type.py @@ -97,11 +97,8 @@ [rffi.INT], rffi.CCHARP) def parse_c_type(info, input): - p_input = rffi.str2charp(input) - try: + with rffi.scoped_view_charp(input) as p_input: res = ll_parse_c_type(info, p_input) - finally: - rffi.free_charp(p_input) return rffi.cast(lltype.Signed, res) NULL_CTX = lltype.nullptr(PCTX.TO) @@ -130,15 +127,13 @@ return rffi.getintfield(src_ctx, 'c_num_types') def search_in_globals(ctx, name): - c_name = rffi.str2charp(name) - result = ll_search_in_globals(ctx, c_name, - rffi.cast(rffi.SIZE_T, len(name))) - rffi.free_charp(c_name) + with rffi.scoped_view_charp(name) as c_name: + result = ll_search_in_globals(ctx, c_name, + rffi.cast(rffi.SIZE_T, len(name))) return rffi.cast(lltype.Signed, result) def search_in_struct_unions(ctx, name): - c_name = rffi.str2charp(name) - result = ll_search_in_struct_unions(ctx, c_name, - rffi.cast(rffi.SIZE_T, len(name))) - rffi.free_charp(c_name) + with rffi.scoped_view_charp(name) as c_name: + result = ll_search_in_struct_unions(ctx, c_name, + rffi.cast(rffi.SIZE_T, len(name))) return rffi.cast(lltype.Signed, result) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3330,13 +3330,18 @@ BChar = new_primitive_type("char") BCharP = new_pointer_type(BChar) BCharA = new_array_type(BCharP, None) - py.test.raises(TypeError, from_buffer, BCharA, b"foo") + p1 = from_buffer(BCharA, b"foo") + assert p1 == from_buffer(BCharA, b"foo") + import gc; gc.collect() + assert p1 == from_buffer(BCharA, b"foo") py.test.raises(TypeError, from_buffer, BCharA, u+"foo") try: from __builtin__ import buffer except ImportError: pass else: + # from_buffer(buffer(b"foo")) does not work, because it's not + # implemented on pypy; only from_buffer(b"foo") works. py.test.raises(TypeError, from_buffer, BCharA, buffer(b"foo")) py.test.raises(TypeError, from_buffer, BCharA, buffer(u+"foo")) try: diff --git a/pypy/module/_multiprocessing/interp_connection.py b/pypy/module/_multiprocessing/interp_connection.py --- a/pypy/module/_multiprocessing/interp_connection.py +++ b/pypy/module/_multiprocessing/interp_connection.py @@ -401,21 +401,20 @@ _WriteFile, ERROR_NO_SYSTEM_RESOURCES) from rpython.rlib import rwin32 - charp = rffi.str2charp(buf) - written_ptr = lltype.malloc(rffi.CArrayPtr(rwin32.DWORD).TO, 1, - flavor='raw') - try: - result = _WriteFile( - self.handle, rffi.ptradd(charp, offset), - size, written_ptr, rffi.NULL) + with rffi.scoped_view_charp(buf) as charp: + written_ptr = lltype.malloc(rffi.CArrayPtr(rwin32.DWORD).TO, 1, + flavor='raw') + try: + result = _WriteFile( + self.handle, rffi.ptradd(charp, offset), + size, written_ptr, rffi.NULL) - if (result == 0 and - rwin32.GetLastError_saved() == ERROR_NO_SYSTEM_RESOURCES): - raise oefmt(space.w_ValueError, - "Cannot send %d bytes over connection", size) - finally: - rffi.free_charp(charp) - lltype.free(written_ptr, flavor='raw') + if (result == 0 and + rwin32.GetLastError_saved() == ERROR_NO_SYSTEM_RESOURCES): + raise oefmt(space.w_ValueError, + "Cannot send %d bytes over connection", size) + finally: + lltype.free(written_ptr, flavor='raw') def do_recv_string(self, space, buflength, maxlength): from pypy.module._multiprocessing.interp_win32 import ( diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py --- a/pypy/module/_ssl/interp_ssl.py +++ b/pypy/module/_ssl/interp_ssl.py @@ -135,7 +135,7 @@ def __init__(self, ctx, protos): self.protos = protos - self.buf, self.pinned, self.is_raw = rffi.get_nonmovingbuffer(protos) + self.buf, self.bufflag = rffi.get_nonmovingbuffer(protos) NPN_STORAGE.set(rffi.cast(lltype.Unsigned, self.buf), self) # set both server and client callbacks, because the context @@ -147,7 +147,7 @@ def __del__(self): rffi.free_nonmovingbuffer( - self.protos, self.buf, self.pinned, self.is_raw) + self.protos, self.buf, self.bufflag) @staticmethod def advertiseNPN_cb(s, data_ptr, len_ptr, args): @@ -181,7 +181,7 @@ def __init__(self, ctx, protos): self.protos = protos - self.buf, self.pinned, self.is_raw = rffi.get_nonmovingbuffer(protos) + self.buf, self.bufflag = rffi.get_nonmovingbuffer(protos) ALPN_STORAGE.set(rffi.cast(lltype.Unsigned, self.buf), self) with rffi.scoped_str2charp(protos) as protos_buf: @@ -193,7 +193,7 @@ def __del__(self): rffi.free_nonmovingbuffer( - self.protos, self.buf, self.pinned, self.is_raw) + self.protos, self.buf, self.bufflag) @staticmethod def selectALPN_cb(s, out_ptr, outlen_ptr, client, client_len, args): @@ -228,7 +228,7 @@ Mix string into the OpenSSL PRNG state. entropy (a float) is a lower bound on the entropy contained in string.""" - with rffi.scoped_str2charp(string) as buf: + with rffi.scoped_nonmovingbuffer(string) as buf: libssl_RAND_add(buf, len(string), entropy) def RAND_status(space): diff --git a/pypy/module/cppyy/capi/builtin_capi.py b/pypy/module/cppyy/capi/builtin_capi.py --- a/pypy/module/cppyy/capi/builtin_capi.py +++ b/pypy/module/cppyy/capi/builtin_capi.py @@ -537,9 +537,8 @@ releasegil=ts_helper, compilation_info=backend.eci) def c_charp2stdstring(space, svalue): - charp = rffi.str2charp(svalue) - result = _c_charp2stdstring(charp) - rffi.free_charp(charp) + with rffi.scoped_view_charp(svalue) as charp: + result = _c_charp2stdstring(charp) return result _c_stdstring2stdstring = rffi.llexternal( "cppyy_stdstring2stdstring", diff --git a/pypy/module/cppyy/capi/cint_capi.py b/pypy/module/cppyy/capi/cint_capi.py --- a/pypy/module/cppyy/capi/cint_capi.py +++ b/pypy/module/cppyy/capi/cint_capi.py @@ -82,9 +82,8 @@ releasegil=ts_helper, compilation_info=eci) def c_charp2TString(space, svalue): - charp = rffi.str2charp(svalue) - result = _c_charp2TString(charp) - rffi.free_charp(charp) + with rffi.scoped_view_charp(svalue) as charp: + result = _c_charp2TString(charp) return result _c_TString2TString = rffi.llexternal( "cppyy_TString2TString", diff --git a/pypy/module/cppyy/capi/loadable_capi.py b/pypy/module/cppyy/capi/loadable_capi.py --- a/pypy/module/cppyy/capi/loadable_capi.py +++ b/pypy/module/cppyy/capi/loadable_capi.py @@ -65,6 +65,7 @@ else: # only other use is sring n = len(obj._string) assert raw_string == rffi.cast(rffi.CCHARP, 0) + # XXX could use rffi.get_nonmovingbuffer_final_null() raw_string = rffi.str2charp(obj._string) data = rffi.cast(rffi.CCHARPP, data) data[0] = raw_string diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -883,6 +883,7 @@ ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 + ofs_items -= 1 # for the extra null character scale = 0 self._gen_address(resloc, baseloc, ofsloc, scale, ofs_items) diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py --- a/rpython/jit/backend/llsupport/descr.py +++ b/rpython/jit/backend/llsupport/descr.py @@ -280,7 +280,7 @@ concrete_type = '\x00' def __init__(self, basesize, itemsize, lendescr, flag, is_pure=False, concrete_type='\x00'): - self.basesize = basesize + self.basesize = basesize # this includes +1 for STR self.itemsize = itemsize self.lendescr = lendescr # or None, if no length self.flag = flag @@ -676,7 +676,7 @@ def unpack_arraydescr(arraydescr): assert isinstance(arraydescr, ArrayDescr) - ofs = arraydescr.basesize + ofs = arraydescr.basesize # this includes +1 for STR size = arraydescr.itemsize sign = arraydescr.is_item_signed() return size, ofs, sign diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -293,6 +293,7 @@ basesize, itemsize, ofs_length = get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 + basesize -= 1 # for the extra null character self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1), itemsize, itemsize, basesize, NOT_SIGNED) elif opnum == rop.UNICODEGETITEM: @@ -304,6 +305,7 @@ basesize, itemsize, ofs_length = get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 + basesize -= 1 # for the extra null character self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1), op.getarg(2), itemsize, itemsize, basesize) elif opnum == rop.UNICODESETITEM: diff --git a/rpython/jit/backend/llsupport/symbolic.py b/rpython/jit/backend/llsupport/symbolic.py --- a/rpython/jit/backend/llsupport/symbolic.py +++ b/rpython/jit/backend/llsupport/symbolic.py @@ -29,7 +29,7 @@ def get_array_token(T, translate_support_code): # T can be an array or a var-sized structure if translate_support_code: - basesize = llmemory.sizeof(T, 0) + basesize = llmemory.sizeof(T, 0) # this includes +1 for STR if isinstance(T, lltype.Struct): SUBARRAY = getattr(T, T._arrayfld) itemsize = llmemory.sizeof(SUBARRAY.OF) @@ -57,6 +57,7 @@ assert carray.length.size == WORD ofs_length = before_array_part + carray.length.offset basesize = before_array_part + carray.items.offset + basesize += T._hints.get('extra_item_after_alloc', 0) # +1 for STR carrayitem = ll2ctypes.get_ctypes_type(T.OF) itemsize = ctypes.sizeof(carrayitem) return basesize, itemsize, ofs_length diff --git a/rpython/jit/backend/llsupport/test/test_descr.py b/rpython/jit/backend/llsupport/test/test_descr.py --- a/rpython/jit/backend/llsupport/test/test_descr.py +++ b/rpython/jit/backend/llsupport/test/test_descr.py @@ -435,8 +435,10 @@ def test_bytearray_descr(): c0 = GcCache(False) descr = get_array_descr(c0, rstr.STR) # for bytearray + # note that we get a basesize that has 1 extra byte for the final null char + # (only for STR) assert descr.flag == FLAG_UNSIGNED - assert descr.basesize == struct.calcsize("PP") # hash, length + assert descr.basesize == struct.calcsize("PP") + 1 # hash, length, extra assert descr.lendescr.offset == struct.calcsize("P") # hash assert not descr.is_array_of_pointers() diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -647,6 +647,9 @@ """) def test_rewrite_assembler_newstr_newunicode(self): + # note: strdescr.basesize already contains the extra final character, + # so that's why newstr(14) is rounded up to 'basesize+15' and not + # 'basesize+16'. self.check_rewrite(""" [i2] p0 = newstr(14) @@ -657,12 +660,12 @@ """, """ [i2] p0 = call_malloc_nursery( \ - %(strdescr.basesize + 16 * strdescr.itemsize + \ + %(strdescr.basesize + 15 * strdescr.itemsize + \ unicodedescr.basesize + 10 * unicodedescr.itemsize)d) gc_store(p0, 0, %(strdescr.tid)d, %(tiddescr.field_size)s) gc_store(p0, %(strlendescr.offset)s, 14, %(strlendescr.field_size)s) gc_store(p0, 0, 0, %(strhashdescr.field_size)s) - p1 = nursery_ptr_increment(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d) + p1 = nursery_ptr_increment(p0, %(strdescr.basesize + 15 * strdescr.itemsize)d) gc_store(p1, 0, %(unicodedescr.tid)d, %(tiddescr.field_size)s) gc_store(p1, %(unicodelendescr.offset)s, 10, %(unicodelendescr.field_size)s) gc_store(p1, 0, 0, %(unicodehashdescr.field_size)s) @@ -1240,14 +1243,14 @@ # 'i3 = gc_load_i(p0,i5,%(unicodedescr.itemsize)d)'], [True, (4,), 'i3 = strgetitem(p0,i1)' '->' 'i3 = gc_load_indexed_i(p0,i1,1,' - '%(strdescr.basesize)d,1)'], + '%(strdescr.basesize-1)d,1)'], #[False, (4,), 'i3 = strgetitem(p0,i1)' '->' - # 'i5 = int_add(i1, %(strdescr.basesize)d);' + # 'i5 = int_add(i1, %(strdescr.basesize-1)d);' # 'i3 = gc_load_i(p0,i5,1)'], ## setitem str/unicode [True, (4,), 'i3 = strsetitem(p0,i1,0)' '->' 'i3 = gc_store_indexed(p0,i1,0,1,' - '%(strdescr.basesize)d,1)'], + '%(strdescr.basesize-1)d,1)'], [True, (2,4), 'i3 = unicodesetitem(p0,i1,0)' '->' 'i3 = gc_store_indexed(p0,i1,0,' '%(unicodedescr.itemsize)d,' diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -3,7 +3,7 @@ from rpython.rlib.jit import JitDriver, unroll_parameters, set_param from rpython.rlib.jit import PARAMETERS, dont_look_inside from rpython.rlib.jit import promote, _get_virtualizable_token -from rpython.rlib import jit_hooks, rposix +from rpython.rlib import jit_hooks, rposix, rgc from rpython.rlib.objectmodel import keepalive_until_here from rpython.rlib.rthread import ThreadLocalReference, ThreadLocalField from rpython.jit.backend.detect_cpu import getcpuclass @@ -11,7 +11,7 @@ from rpython.jit.codewriter.policy import StopAtXPolicy from rpython.config.config import ConfigError from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rlib.rjitlog import rjitlog as jl @@ -29,6 +29,7 @@ # - floats neg and abs # - cast_int_to_float # - llexternal with macro=True + # - extra place for the zero after STR instances class BasicFrame(object): _virtualizable_ = ['i'] @@ -56,7 +57,7 @@ return ("/home.py",0,0) jitdriver = JitDriver(greens = [], - reds = ['total', 'frame', 'j'], + reds = ['total', 'frame', 'prev_s', 'j'], virtualizables = ['frame'], get_location = get_location) def f(i, j): @@ -68,9 +69,12 @@ total = 0 frame = Frame(i) j = float(j) + prev_s = rstr.mallocstr(16) while frame.i > 3: - jitdriver.can_enter_jit(frame=frame, total=total, j=j) - jitdriver.jit_merge_point(frame=frame, total=total, j=j) + jitdriver.can_enter_jit(frame=frame, total=total, j=j, + prev_s=prev_s) + jitdriver.jit_merge_point(frame=frame, total=total, j=j, + prev_s=prev_s) _get_virtualizable_token(frame) total += frame.i if frame.i >= 20: @@ -82,6 +86,11 @@ k = myabs1(myabs2(j)) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError + s = rstr.mallocstr(16) + rgc.ll_write_final_null_char(s) + rgc.ll_write_final_null_char(prev_s) + if (frame.i & 3) == 0: + prev_s = s return chr(total % 253) # class Virt2(object): diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py --- a/rpython/jit/backend/ppc/opassembler.py +++ b/rpython/jit/backend/ppc/opassembler.py @@ -994,6 +994,7 @@ basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 + basesize -= 1 # for the extra null character scale = 0 self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1673,25 +1673,6 @@ dest_addr = AddressLoc(base_loc, ofs_loc, scale, offset_loc.value) self.save_into_mem(dest_addr, value_loc, size_loc) - def genop_discard_strsetitem(self, op, arglocs): - base_loc, ofs_loc, val_loc = arglocs - basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR, - self.cpu.translate_support_code) - assert itemsize == 1 - dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize) - self.mc.MOV8(dest_addr, val_loc.lowest8bits()) - - def genop_discard_unicodesetitem(self, op, arglocs): - base_loc, ofs_loc, val_loc = arglocs - basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - if itemsize == 4: - self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc) - elif itemsize == 2: - self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc) - else: - assert 0, itemsize - # genop_discard_setfield_raw = genop_discard_setfield_gc def genop_math_read_timestamp(self, op, arglocs, resloc): diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1219,6 +1219,7 @@ ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR, self.translate_support_code) assert itemsize == 1 + ofs_items -= 1 # for the extra null character scale = 0 self.assembler.load_effective_addr(ofsloc, ofs_items, scale, resloc, baseloc) diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -991,6 +991,7 @@ basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 + basesize -= 1 # for the extra null character scale = 0 # src and src_len are tmp registers diff --git a/rpython/jit/metainterp/test/test_virtualizable.py b/rpython/jit/metainterp/test/test_virtualizable.py --- a/rpython/jit/metainterp/test/test_virtualizable.py +++ b/rpython/jit/metainterp/test/test_virtualizable.py @@ -1381,7 +1381,7 @@ return result def indirection(arg): - return interp(arg) + return interp(arg) + 1 def run_interp(n): f = hint(Frame(n), access_directly=True) diff --git a/rpython/memory/gcheader.py b/rpython/memory/gcheader.py --- a/rpython/memory/gcheader.py +++ b/rpython/memory/gcheader.py @@ -11,7 +11,21 @@ def __init__(self, HDR): """NOT_RPYTHON""" self.HDR = HDR - self.obj2header = weakref.WeakKeyDictionary() + # + # The following used to be a weakref.WeakKeyDictionary(), but + # the problem is that if you have a gcobj which has already a + # weakref cached on it and the hash already cached in that + # weakref, and later the hash of the gcobj changes (because it + # is ll2ctypes-ified), then that gcobj cannot be used as a key + # in a WeakKeyDictionary any more: from this point on, + # 'ref(gcobj)' and 'ref(gcobj, callback)' return two objects + # with different hashes... and so e.g. the sequence of + # operations 'obj2header[x]=y; assert x in obj2header' fails. + # + # Instead, just use a regular dictionary and hope that not too + # many objects would be reclaimed in a given GCHeaderBuilder + # instance. + self.obj2header = {} self.size_gc_header = llmemory.GCHeaderOffset(self) def header_of_object(self, gcptr): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -281,6 +281,10 @@ return lltype.Signed malloc_zero_filled = CDefinedIntSymbolic('MALLOC_ZERO_FILLED', default=0) +_translated_to_c = CDefinedIntSymbolic('1 /*_translated_to_c*/', default=0) + +def we_are_translated_to_c(): + return we_are_translated() and _translated_to_c # ____________________________________________________________ diff --git a/rpython/rlib/rdtoa.py b/rpython/rlib/rdtoa.py --- a/rpython/rlib/rdtoa.py +++ b/rpython/rlib/rdtoa.py @@ -56,22 +56,24 @@ raise MemoryError end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') try: - ll_input = rffi.str2charp(input) + # note: don't use the class scoped_view_charp here, it + # break some tests because this function is used by the GC + ll_input, flag = rffi.get_nonmovingbuffer_final_null(input) try: result = dg_strtod(ll_input, end_ptr) endpos = (rffi.cast(lltype.Signed, end_ptr[0]) - rffi.cast(lltype.Signed, ll_input)) - - if endpos == 0 or endpos < len(input): - raise ValueError("invalid input at position %d" % (endpos,)) - - return result finally: - rffi.free_charp(ll_input) + rffi.free_nonmovingbuffer(input, ll_input, flag) finally: lltype.free(end_ptr, flavor='raw') + if endpos == 0 or endpos < len(input): + raise ValueError("invalid input at position %d" % (endpos,)) + + return result + lower_special_strings = ['inf', '+inf', '-inf', 'nan'] upper_special_strings = ['INF', '+INF', '-INF', 'NAN'] diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -1268,3 +1268,26 @@ ptr = lltype.direct_arrayitems(array) # ptr is a Ptr(FixedSizeArray(Char, 1)). Cast it to a rffi.CCHARP return rffi.cast(rffi.CCHARP, ptr) + +@jit.dont_look_inside +@no_collect +@specialize.ll() +def ll_write_final_null_char(s): + """'s' is a low-level STR; writes a terminating NULL character after + the other characters in 's'. Warning, this only works because of + the 'extra_item_after_alloc' hack inside the definition of STR. + """ + from rpython.rtyper.lltypesystem import rffi + PSTR = lltype.typeOf(s) + assert has_final_null_char(PSTR) == 1 + n = llmemory.offsetof(PSTR.TO, 'chars') + n += llmemory.itemoffsetof(PSTR.TO.chars, 0) + n = llmemory.raw_malloc_usage(n) + n += len(s.chars) + # no GC operation from here! + ptr = rffi.cast(rffi.CCHARP, s) + ptr[n] = '\x00' + +@specialize.memo() +def has_final_null_char(PSTR): + return PSTR.TO.chars._hints.get('extra_item_after_alloc', 0) diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -250,7 +250,9 @@ if not A._hints.get('nolength'): _fields_ = [('length', lentype), - ('items', max_n * ctypes_item)] + ('items', + (max_n + A._hints.get('extra_item_after_alloc', 0)) + * ctypes_item)] else: _fields_ = [('items', max_n * ctypes_item)] @@ -695,6 +697,9 @@ # we have no clue, so we allow whatever index return 0, maxint + def shrinklength(self, newlength): + raise NotImplementedError + def getitem(self, index, uninitialized_ok=False): res = self._storage.contents._getitem(index, boundscheck=False) if isinstance(self._TYPE.OF, lltype.ContainerType): diff --git a/rpython/rtyper/lltypesystem/llmemory.py b/rpython/rtyper/lltypesystem/llmemory.py --- a/rpython/rtyper/lltypesystem/llmemory.py +++ b/rpython/rtyper/lltypesystem/llmemory.py @@ -304,8 +304,15 @@ return cast_ptr_to_adr(p) def raw_memcopy(self, srcadr, dstadr): - # should really copy the length field, but we can't - pass + # copy the length field, if we can + srclen = srcadr.ptr._obj.getlength() + dstlen = dstadr.ptr._obj.getlength() + if dstlen != srclen: + assert dstlen > srclen, "can't increase the length" + # a decrease in length occurs in the GC tests when copying a STR: + # the copy is initially allocated with really one extra char, + # the 'extra_item_after_alloc', and must be fixed. + dstadr.ptr._obj.shrinklength(srclen) class ArrayLengthOffset(AddressOffset): @@ -390,11 +397,23 @@ else: raise Exception("don't know how to take the size of a %r"%TYPE) +@specialize.memo() +def extra_item_after_alloc(ARRAY): + assert isinstance(ARRAY, lltype.Array) + return ARRAY._hints.get('extra_item_after_alloc', 0) + @specialize.arg(0) def sizeof(TYPE, n=None): + """Return the symbolic size of TYPE. + For a Struct with no varsized part, it must be called with n=None. + For an Array or a Struct with a varsized part, it is the number of items. + There is a special case to return 1 more than requested if the array + has the hint 'extra_item_after_alloc' set to 1. + """ if n is None: return _sizeof_none(TYPE) elif isinstance(TYPE, lltype.Array): + n += extra_item_after_alloc(TYPE) return itemoffsetof(TYPE) + _sizeof_none(TYPE.OF) * n else: return _sizeof_int(TYPE, n) @@ -1036,7 +1055,7 @@ _reccopy(subsrc, subdst) else: # this is a hack XXX de-hack this - llvalue = source._obj.getitem(i, uninitialized_ok=True) + llvalue = source._obj.getitem(i, uninitialized_ok=2) if not isinstance(llvalue, lltype._uninitialized): dest._obj.setitem(i, llvalue) elif isinstance(T, lltype.Struct): diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -1926,14 +1926,29 @@ return 0, stop def getitem(self, index, uninitialized_ok=False): - v = self.items[index] + try: + v = self.items[index] + except IndexError: + if (index == len(self.items) and uninitialized_ok == 2 and + self._TYPE._hints.get('extra_item_after_alloc')): + # special case: reading the extra final char returns + # an uninitialized, if 'uninitialized_ok==2' + return _uninitialized(self._TYPE.OF) + raise if isinstance(v, _uninitialized) and not uninitialized_ok: raise UninitializedMemoryAccess("%r[%s]"%(self, index)) return v def setitem(self, index, value): assert typeOf(value) == self._TYPE.OF - self.items[index] = value + try: + self.items[index] = value + except IndexError: + if (index == len(self.items) and value == '\x00' and + self._TYPE._hints.get('extra_item_after_alloc')): + # special case: writing NULL to the extra final char + return + raise assert not '__dict__' in dir(_array) assert not '__dict__' in dir(_struct) diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -15,7 +15,7 @@ from rpython.rtyper.tool.rfficache import platform, sizeof_c_type from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.annlowlevel import llhelper -from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib.objectmodel import we_are_translated, we_are_translated_to_c from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, assert_str0 from rpython.rlib import jit from rpython.rtyper.lltypesystem import llmemory @@ -232,40 +232,36 @@ call_external_function = jit.dont_look_inside( call_external_function) + def _oops(): + raise AssertionError("can't pass (any more) a unicode string" + " directly to a VOIDP argument") + _oops._annspecialcase_ = 'specialize:memo' + unrolling_arg_tps = unrolling_iterable(enumerate(args)) def wrapper(*args): real_args = () + # XXX 'to_free' leaks if an allocation fails with MemoryError + # and was not the first in this function to_free = () for i, TARGET in unrolling_arg_tps: arg = args[i] - freeme = None - if TARGET == CCHARP: + if TARGET == CCHARP or TARGET is VOIDP: if arg is None: arg = lltype.nullptr(CCHARP.TO) # None => (char*)NULL - freeme = arg + to_free = to_free + (arg, '\x04') elif isinstance(arg, str): - arg = str2charp(arg) - # XXX leaks if a str2charp() fails with MemoryError - # and was not the first in this function - freeme = arg + tup = get_nonmovingbuffer_final_null(arg) + to_free = to_free + tup + arg = tup[0] + elif isinstance(arg, unicode): + _oops() elif TARGET == CWCHARP: if arg is None: arg = lltype.nullptr(CWCHARP.TO) # None => (wchar_t*)NULL - freeme = arg + to_free = to_free + (arg,) elif isinstance(arg, unicode): arg = unicode2wcharp(arg) - # XXX leaks if a unicode2wcharp() fails with MemoryError - # and was not the first in this function - freeme = arg - elif TARGET is VOIDP: - if arg is None: - arg = lltype.nullptr(VOIDP.TO) - elif isinstance(arg, str): - arg = str2charp(arg) - freeme = arg - elif isinstance(arg, unicode): - arg = unicode2wcharp(arg) - freeme = arg + to_free = to_free + (arg,) elif _isfunctype(TARGET) and not _isllptr(arg): # XXX pass additional arguments use_gil = invoke_around_handlers @@ -283,11 +279,22 @@ or TARGET is lltype.Bool)): arg = cast(TARGET, arg) real_args = real_args + (arg,) - to_free = to_free + (freeme,) res = call_external_function(*real_args) for i, TARGET in unrolling_arg_tps: - if to_free[i]: - lltype.free(to_free[i], flavor='raw') + arg = args[i] + if TARGET == CCHARP or TARGET is VOIDP: + if arg is None: + to_free = to_free[2:] + elif isinstance(arg, str): + free_nonmovingbuffer(arg, to_free[0], to_free[1]) + to_free = to_free[2:] + elif TARGET == CWCHARP: + if arg is None: + to_free = to_free[1:] + elif isinstance(arg, unicode): + free_wcharp(to_free[0]) + to_free = to_free[1:] + assert len(to_free) == 0 if rarithmetic.r_int is not r_int: if result is INT: return cast(lltype.Signed, res) @@ -816,52 +823,69 @@ string is already nonmovable or could be pinned. Must be followed by a free_nonmovingbuffer call. - First bool returned indicates if 'data' was pinned. Second bool returned - indicates if we did a raw alloc because pinning failed. Both bools - should never be true at the same time. + Also returns a char: + * \4: no pinning, returned pointer is inside 'data' which is nonmovable + * \5: 'data' was pinned, returned pointer is inside + * \6: pinning failed, returned pointer is raw malloced + + For strings (not unicodes), the len()th character of the resulting + raw buffer is available, but not initialized. Use + get_nonmovingbuffer_final_null() instead of get_nonmovingbuffer() + to get a regular null-terminated "char *". """ lldata = llstrtype(data) count = len(data) - pinned = False - if rgc.can_move(data): - if rgc.pin(data): - pinned = True + if we_are_translated_to_c() and not rgc.can_move(data): + flag = '\x04' + else: + if we_are_translated_to_c() and rgc.pin(data): + flag = '\x05' else: - buf = lltype.malloc(TYPEP.TO, count, flavor='raw') + buf = lltype.malloc(TYPEP.TO, count + (TYPEP is CCHARP), + flavor='raw') copy_string_to_raw(lldata, buf, 0, count) - return buf, pinned, True + return buf, '\x06' # ^^^ raw malloc used to get a nonmovable copy # - # following code is executed if: + # following code is executed after we're translated to C, if: # - rgc.can_move(data) and rgc.pin(data) both returned true # - rgc.can_move(data) returned false data_start = cast_ptr_to_adr(lldata) + \ offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0) - return cast(TYPEP, data_start), pinned, False + return cast(TYPEP, data_start), flag # ^^^ already nonmovable. Therefore it's not raw allocated nor # pinned. get_nonmovingbuffer._always_inline_ = 'try' # get rid of the returned tuple get_nonmovingbuffer._annenforceargs_ = [strtype] - # (str, char*, bool, bool) -> None + @jit.dont_look_inside + def get_nonmovingbuffer_final_null(data): + tup = get_nonmovingbuffer(data) + buf, flag = tup + buf[len(data)] = lastchar + return tup + get_nonmovingbuffer_final_null._always_inline_ = 'try' + get_nonmovingbuffer_final_null._annenforceargs_ = [strtype] + + # (str, char*, char) -> None # Can't inline this because of the raw address manipulation. @jit.dont_look_inside - def free_nonmovingbuffer(data, buf, is_pinned, is_raw): + def free_nonmovingbuffer(data, buf, flag): """ - Keep 'data' alive and unpin it if it was pinned ('is_pinned' is true). - Otherwise free the non-moving copy ('is_raw' is true). + Keep 'data' alive and unpin it if it was pinned (flag==\5). + Otherwise free the non-moving copy (flag==\6). """ - if is_pinned: + if flag == '\x05': rgc.unpin(data) - if is_raw: + if flag == '\x06': lltype.free(buf, flavor='raw') - # if is_pinned and is_raw are false: data was already nonmovable, + # if flag == '\x04': data was already nonmovable, # we have nothing to clean up keepalive_until_here(data) - free_nonmovingbuffer._annenforceargs_ = [strtype, None, bool, bool] + free_nonmovingbuffer._annenforceargs_ = [strtype, None, None] # int -> (char*, str, int) # Can't inline this because of the raw address manipulation. @@ -947,18 +971,19 @@ return (str2charp, free_charp, charp2str, get_nonmovingbuffer, free_nonmovingbuffer, + get_nonmovingbuffer_final_null, alloc_buffer, str_from_buffer, keep_buffer_alive_until_here, charp2strn, charpsize2str, str2chararray, str2rawmem, ) (str2charp, free_charp, charp2str, - get_nonmovingbuffer, free_nonmovingbuffer, + get_nonmovingbuffer, free_nonmovingbuffer, get_nonmovingbuffer_final_null, alloc_buffer, str_from_buffer, keep_buffer_alive_until_here, charp2strn, charpsize2str, str2chararray, str2rawmem, ) = make_string_mappings(str) (unicode2wcharp, free_wcharp, wcharp2unicode, - get_nonmoving_unicodebuffer, free_nonmoving_unicodebuffer, + get_nonmoving_unicodebuffer, free_nonmoving_unicodebuffer, __not_usable, alloc_unicodebuffer, unicode_from_buffer, keep_unicodebuffer_alive_until_here, wcharp2unicoden, wcharpsize2unicode, unicode2wchararray, unicode2rawmem, ) = make_string_mappings(unicode) @@ -1194,10 +1219,28 @@ def __init__(self, data): self.data = data def __enter__(self): - self.buf, self.pinned, self.is_raw = get_nonmovingbuffer(self.data) + self.buf, self.flag = get_nonmovingbuffer(self.data) return self.buf def __exit__(self, *args): - free_nonmovingbuffer(self.data, self.buf, self.pinned, self.is_raw) + free_nonmovingbuffer(self.data, self.buf, self.flag) + __init__._always_inline_ = 'try' + __enter__._always_inline_ = 'try' + __exit__._always_inline_ = 'try' + +class scoped_view_charp: + """Returns a 'char *' that (tries to) point inside the given RPython + string (which must not be None). You can replace scoped_str2charp() + with scoped_view_charp() in all places that guarantee that the + content of the 'char[]' array will not be modified. + """ + def __init__(self, data): + self.data = data + __init__._annenforceargs_ = [None, annmodel.SomeString(can_be_None=False)] + def __enter__(self): + self.buf, self.flag = get_nonmovingbuffer_final_null(self.data) + return self.buf + def __exit__(self, *args): + free_nonmovingbuffer(self.data, self.buf, self.flag) __init__._always_inline_ = 'try' __enter__._always_inline_ = 'try' __exit__._always_inline_ = 'try' @@ -1206,10 +1249,10 @@ def __init__(self, data): self.data = data def __enter__(self): - self.buf, self.pinned, self.is_raw = get_nonmoving_unicodebuffer(self.data) + self.buf, self.flag = get_nonmoving_unicodebuffer(self.data) return self.buf def __exit__(self, *args): - free_nonmoving_unicodebuffer(self.data, self.buf, self.pinned, self.is_raw) + free_nonmoving_unicodebuffer(self.data, self.buf, self.flag) __init__._always_inline_ = 'try' __enter__._always_inline_ = 'try' __exit__._always_inline_ = 'try' diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -1238,7 +1238,8 @@ # ____________________________________________________________ STR.become(GcStruct('rpy_string', ('hash', Signed), - ('chars', Array(Char, hints={'immutable': True})), + ('chars', Array(Char, hints={'immutable': True, + 'extra_item_after_alloc': 1})), adtmeths={'malloc' : staticAdtMethod(mallocstr), 'empty' : staticAdtMethod(emptystrfun), 'copy_contents' : staticAdtMethod(copy_string_contents), diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py --- a/rpython/rtyper/lltypesystem/test/test_rffi.py +++ b/rpython/rtyper/lltypesystem/test/test_rffi.py @@ -516,7 +516,7 @@ def test_nonmovingbuffer(self): d = 'some cool data that should not move' def f(): - buf, is_pinned, is_raw = get_nonmovingbuffer(d) + buf, flag = get_nonmovingbuffer(d) try: counter = 0 for i in range(len(d)): @@ -524,7 +524,7 @@ counter += 1 return counter finally: - free_nonmovingbuffer(d, buf, is_pinned, is_raw) + free_nonmovingbuffer(d, buf, flag) assert f() == len(d) fn = self.compile(f, [], gcpolicy='ref') assert fn() == len(d) @@ -534,13 +534,13 @@ def f(): counter = 0 for n in range(32): - buf, is_pinned, is_raw = get_nonmovingbuffer(d) + buf, flag = get_nonmovingbuffer(d) try: for i in range(len(d)): if buf[i] == d[i]: counter += 1 finally: - free_nonmovingbuffer(d, buf, is_pinned, is_raw) + free_nonmovingbuffer(d, buf, flag) return counter fn = self.compile(f, [], gcpolicy='semispace') # The semispace gc uses raw_malloc for its internal data structs @@ -555,13 +555,13 @@ def f(): counter = 0 for n in range(32): - buf, is_pinned, is_raw = get_nonmovingbuffer(d) + buf, flag = get_nonmovingbuffer(d) try: for i in range(len(d)): if buf[i] == d[i]: counter += 1 finally: - free_nonmovingbuffer(d, buf, is_pinned, is_raw) + free_nonmovingbuffer(d, buf, flag) return counter fn = self.compile(f, [], gcpolicy='incminimark') # The incminimark gc uses raw_malloc for its internal data structs @@ -835,3 +835,11 @@ if hasattr(rffi, '__INT128_T'): value = 0xAAAABBBBCCCCDDDD assert cast(rffi.__INT128_T, r_uint64(value)) == value + +def test_scoped_view_charp(): + s = 'bar' + with scoped_view_charp(s) as buf: + assert buf[0] == 'b' + assert buf[1] == 'a' + assert buf[2] == 'r' + assert buf[3] == '\x00' diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py --- a/rpython/translator/c/node.py +++ b/rpython/translator/c/node.py @@ -253,8 +253,11 @@ yield '\t' + cdecl(typename, fname) + ';' if not self.ARRAY._hints.get('nolength', False): yield '\tlong length;' + varlength = self.varlength + if varlength is not None: + varlength += self.ARRAY._hints.get('extra_item_after_alloc', 0) line = '%s;' % cdecl(self.itemtypename, - 'items[%s]' % deflength(self.varlength)) + 'items[%s]' % deflength(varlength)) if self.ARRAY.OF is Void: # strange line = '/* array of void */' if self.ARRAY._hints.get('nolength', False): diff --git a/rpython/translator/c/test/test_lltyped.py b/rpython/translator/c/test/test_lltyped.py --- a/rpython/translator/c/test/test_lltyped.py +++ b/rpython/translator/c/test/test_lltyped.py @@ -1,4 +1,4 @@ -import py +import py, random from rpython.rtyper.lltypesystem.lltype import * from rpython.rtyper.lltypesystem import rffi from rpython.translator.c.test.test_genc import compile @@ -255,28 +255,6 @@ res2 = fn(0) assert res1 == res2 - def test_null_padding(self): - py.test.skip("we no longer pad our RPython strings with a final NUL") - from rpython.rtyper.lltypesystem import llmemory - from rpython.rtyper.lltypesystem import rstr - chars_offset = llmemory.FieldOffset(rstr.STR, 'chars') + \ - llmemory.ArrayItemsOffset(rstr.STR.chars) - # sadly, there's no way of forcing this to fail if the strings - # are allocated in a region of memory such that they just - # happen to get a NUL byte anyway :/ (a debug build will - # always fail though) - def trailing_byte(s): - adr_s = llmemory.cast_ptr_to_adr(s) - return (adr_s + chars_offset).char[len(s)] - def f(x): - r = 0 - for i in range(x): - r += ord(trailing_byte(' '*(100-x*x))) - return r - fn = self.getcompiled(f, [int]) - res = fn(10) - assert res == 0 - def test_cast_primitive(self): def f(x): x = cast_primitive(UnsignedLongLong, x) @@ -1023,3 +1001,49 @@ assert fn(r_longlong(1)) == True assert fn(r_longlong(256)) == True assert fn(r_longlong(2**32)) == True + + def test_extra_item_after_alloc(self): + from rpython.rlib import rgc + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem import rstr + # all STR objects should be allocated with enough space for one + # extra char. Check this for prebuilt strings, and for dynamically + # allocated ones with the default GC for tests. Use strings of 8, + # 16 and 24 chars because if the extra char is missing, writing to it + # is likely to cause corruption in nearby structures. + sizes = [random.choice([8, 16, 24]) for i in range(100)] + A = lltype.Struct('A', ('x', lltype.Signed)) + prebuilt = [(rstr.mallocstr(sz), + lltype.malloc(A, flavor='raw', immortal=True)) + for sz in sizes] + k = 0 + for i, (s, a) in enumerate(prebuilt): + a.x = i + for i in range(len(s.chars)): + k += 1 + if k == 256: + k = 1 + s.chars[i] = chr(k) + + def check(lst): + hashes = [] + for i, (s, a) in enumerate(lst): + assert a.x == i + rgc.ll_write_final_null_char(s) + for i, (s, a) in enumerate(lst): + assert a.x == i # check it was not overwritten + def f(): + check(prebuilt) + lst1 = [] + for i, sz in enumerate(sizes): + s = rstr.mallocstr(sz) + a = lltype.malloc(A, flavor='raw') + a.x = i + lst1.append((s, a)) + check(lst1) + for _, a in lst1: + lltype.free(a, flavor='raw') + return 42 + + fn = self.getcompiled(f, []) + assert fn() == 42 diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -3,6 +3,7 @@ import os import sys import subprocess +import random import py @@ -1468,6 +1469,52 @@ res = self.run('nursery_hash_base') assert res >= 195 + def define_extra_item_after_alloc(cls): + from rpython.rtyper.lltypesystem import rstr + # all STR objects should be allocated with enough space for + # one extra char. Check this with our GCs. Use strings of 8, + # 16 and 24 chars because if the extra char is missing, + # writing to it is likely to cause corruption in nearby + # structures. + sizes = [random.choice([8, 16, 24]) for i in range(100)] + A = lltype.Struct('A', ('x', lltype.Signed)) + prebuilt = [(rstr.mallocstr(sz), + lltype.malloc(A, flavor='raw', immortal=True)) + for sz in sizes] + k = 0 + for i, (s, a) in enumerate(prebuilt): + a.x = i + for i in range(len(s.chars)): + k += 1 + if k == 256: + k = 1 + s.chars[i] = chr(k) + + def check(lst): + hashes = [] + for i, (s, a) in enumerate(lst): + assert a.x == i + rgc.ll_write_final_null_char(s) + for i, (s, a) in enumerate(lst): + assert a.x == i # check it was not overwritten + def fn(): + check(prebuilt) + lst1 = [] + for i, sz in enumerate(sizes): + s = rstr.mallocstr(sz) + a = lltype.malloc(A, flavor='raw') + a.x = i + lst1.append((s, a)) + check(lst1) + for _, a in lst1: + lltype.free(a, flavor='raw') + return 42 + return fn + + def test_extra_item_after_alloc(self): + res = self.run('extra_item_after_alloc') + assert res == 42 + class TestGenerationalGC(TestSemiSpaceGC): gcpolicy = "generation" diff --git a/rpython/translator/tool/test/test_staticsizereport.py b/rpython/translator/tool/test/test_staticsizereport.py --- a/rpython/translator/tool/test/test_staticsizereport.py +++ b/rpython/translator/tool/test/test_staticsizereport.py @@ -67,7 +67,7 @@ (4 * S + 2 * P) + # struct dicttable (S + 2 * 8192) + # indexes, length 8192, rffi.USHORT (S + (S + S) * 3840) + # entries, length 3840 - (S + S + 5) * 3840) # 3840 strings with 5 chars each + (S + S + 6) * 3840) # 3840 strings with 5 chars each (+1 final) assert guess_size(func.builder.db, fixarrayvalnode, set()) == 100 * rffi.sizeof(lltype.Signed) + 1 * rffi.sizeof(lltype.Signed) assert guess_size(func.builder.db, dynarrayvalnode, set()) == 100 * rffi.sizeof(lltype.Signed) + 2 * rffi.sizeof(lltype.Signed) + 1 * rffi.sizeof(rffi.VOIDP) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit