Author: Armin Rigo <[email protected]>
Branch:
Changeset: r85991:a84c4b359dcc
Date: 2016-08-02 18:13 +0200
http://bitbucket.org/pypy/pypy/changeset/a84c4b359dcc/
Log: hg merge null_byte_after_str
Allocate all RPython strings with one extra byte, normally unused.
It is used to hold a final zero in case we need some 'char *'
representation of the string, together with checks like 'not
can_move()' or object pinning. Main new thing that this allows:
'ffi.from_buffer(string)'.
diff --git a/pypy/module/_cffi_backend/ctypefunc.py
b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -157,11 +157,13 @@
mustfree_max_plus_1 = 0
buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw')
try:
+ keepalives = [None] * len(args_w) # None or strings
for i in range(len(args_w)):
data = rffi.ptradd(buffer, cif_descr.exchange_args[i])
w_obj = args_w[i]
argtype = self.fargs[i]
- if argtype.convert_argument_from_object(data, w_obj):
+ if argtype.convert_argument_from_object(data, w_obj,
+ keepalives, i):
# argtype is a pointer type, and w_obj a list/tuple/str
mustfree_max_plus_1 = i + 1
@@ -177,9 +179,13 @@
if isinstance(argtype, W_CTypePointer):
data = rffi.ptradd(buffer, cif_descr.exchange_args[i])
flag = get_mustfree_flag(data)
+ raw_cdata = rffi.cast(rffi.CCHARPP, data)[0]
if flag == 1:
- raw_cdata = rffi.cast(rffi.CCHARPP, data)[0]
lltype.free(raw_cdata, flavor='raw')
+ elif flag >= 4:
+ value = keepalives[i]
+ assert value is not None
+ rffi.free_nonmovingbuffer(value, raw_cdata, chr(flag))
lltype.free(buffer, flavor='raw')
keepalive_until_here(args_w)
return w_res
diff --git a/pypy/module/_cffi_backend/ctypeobj.py
b/pypy/module/_cffi_backend/ctypeobj.py
--- a/pypy/module/_cffi_backend/ctypeobj.py
+++ b/pypy/module/_cffi_backend/ctypeobj.py
@@ -83,7 +83,7 @@
raise oefmt(space.w_TypeError, "cannot initialize cdata '%s'",
self.name)
- def convert_argument_from_object(self, cdata, w_ob):
+ def convert_argument_from_object(self, cdata, w_ob, keepalives, i):
self.convert_from_object(cdata, w_ob)
return False
diff --git a/pypy/module/_cffi_backend/ctypeptr.py
b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -14,8 +14,8 @@
class W_CTypePtrOrArray(W_CType):
- _attrs_ = ['ctitem', 'can_cast_anything', 'length']
- _immutable_fields_ = ['ctitem', 'can_cast_anything', 'length']
+ _attrs_ = ['ctitem', 'can_cast_anything', 'accept_str',
'length']
+ _immutable_fields_ = ['ctitem', 'can_cast_anything', 'accept_str',
'length']
length = -1
def __init__(self, space, size, extra, extra_position, ctitem,
@@ -28,6 +28,9 @@
# - for functions, it is the return type
self.ctitem = ctitem
self.can_cast_anything = could_cast_anything and ctitem.cast_anything
+ self.accept_str = (self.can_cast_anything or
+ (ctitem.is_primitive_integer and
+ ctitem.size == rffi.sizeof(lltype.Char)))
def is_unichar_ptr_or_array(self):
return isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar)
@@ -70,9 +73,7 @@
pass
else:
self._convert_array_from_listview(cdata, space.listview(w_ob))
- elif (self.can_cast_anything or
- (self.ctitem.is_primitive_integer and
- self.ctitem.size == rffi.sizeof(lltype.Char))):
+ elif self.accept_str:
if not space.isinstance_w(w_ob, space.w_str):
raise self._convert_error("str or list or tuple", w_ob)
s = space.str_w(w_ob)
@@ -260,8 +261,16 @@
else:
return lltype.nullptr(rffi.CCHARP.TO)
- def _prepare_pointer_call_argument(self, w_init, cdata):
+ def _prepare_pointer_call_argument(self, w_init, cdata, keepalives, i):
space = self.space
+ if self.accept_str and space.isinstance_w(w_init, space.w_str):
+ # special case to optimize strings passed to a "char *" argument
+ value = w_init.str_w(space)
+ keepalives[i] = value
+ buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value)
+ rffi.cast(rffi.CCHARPP, cdata)[0] = buf
+ return ord(buf_flag) # 4, 5 or 6
+ #
if (space.isinstance_w(w_init, space.w_list) or
space.isinstance_w(w_init, space.w_tuple)):
length = space.int_w(space.len(w_init))
@@ -297,10 +306,11 @@
rffi.cast(rffi.CCHARPP, cdata)[0] = result
return 1
- def convert_argument_from_object(self, cdata, w_ob):
+ def convert_argument_from_object(self, cdata, w_ob, keepalives, i):
from pypy.module._cffi_backend.ctypefunc import set_mustfree_flag
result = (not isinstance(w_ob, cdataobj.W_CData) and
- self._prepare_pointer_call_argument(w_ob, cdata))
+ self._prepare_pointer_call_argument(w_ob, cdata,
+ keepalives, i))
if result == 0:
self.convert_from_object(cdata, w_ob)
set_mustfree_flag(cdata, result)
diff --git a/pypy/module/_cffi_backend/ffi_obj.py
b/pypy/module/_cffi_backend/ffi_obj.py
--- a/pypy/module/_cffi_backend/ffi_obj.py
+++ b/pypy/module/_cffi_backend/ffi_obj.py
@@ -353,7 +353,7 @@
'array.array' or numpy arrays."""
#
w_ctchara = newtype._new_chara_type(self.space)
- return func.from_buffer(self.space, w_ctchara, w_python_buffer)
+ return func._from_buffer(self.space, w_ctchara, w_python_buffer)
@unwrap_spec(w_arg=W_CData)
diff --git a/pypy/module/_cffi_backend/func.py
b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -1,7 +1,8 @@
from rpython.rtyper.annlowlevel import llstr
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw
-from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib.objectmodel import keepalive_until_here, we_are_translated
+from rpython.rlib import jit
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
@@ -132,17 +133,66 @@
raise oefmt(space.w_TypeError,
"needs 'char[]', got '%s'", w_ctype.name)
#
+ return _from_buffer(space, w_ctype, w_x)
+
+def _from_buffer(space, w_ctype, w_x):
buf = _fetch_as_read_buffer(space, w_x)
- try:
- _cdata = buf.get_raw_address()
- except ValueError:
- raise oefmt(space.w_TypeError,
- "from_buffer() got a '%T' object, which supports the "
- "buffer interface but cannot be rendered as a plain "
- "raw address on PyPy", w_x)
+ if space.isinstance_w(w_x, space.w_str):
+ _cdata = get_raw_address_of_string(space, w_x)
+ else:
+ try:
+ _cdata = buf.get_raw_address()
+ except ValueError:
+ raise oefmt(space.w_TypeError,
+ "from_buffer() got a '%T' object, which supports the "
+ "buffer interface but cannot be rendered as a plain "
+ "raw address on PyPy", w_x)
#
return cdataobj.W_CDataFromBuffer(space, _cdata, w_ctype, buf, w_x)
+# ____________________________________________________________
+
+class RawBytes(object):
+ def __init__(self, string):
+ self.ptr = rffi.str2charp(string, track_allocation=False)
+ def __del__(self):
+ rffi.free_charp(self.ptr, track_allocation=False)
+
+class RawBytesCache(object):
+ def __init__(self, space):
+ from pypy.interpreter.baseobjspace import W_Root
+ from rpython.rlib import rweakref
+ self.wdict = rweakref.RWeakKeyDictionary(W_Root, RawBytes)
+
[email protected]_look_inside
+def get_raw_address_of_string(space, w_x):
+ """Special case for ffi.from_buffer(string). Returns a 'char *' that
+ is valid as long as the string object is alive. Two calls to
+ ffi.from_buffer(same_string) are guaranteed to return the same pointer.
+ """
+ from rpython.rtyper.annlowlevel import llstr
+ from rpython.rtyper.lltypesystem.rstr import STR
+ from rpython.rtyper.lltypesystem import llmemory
+ from rpython.rlib import rgc
+
+ cache = space.fromcache(RawBytesCache)
+ rawbytes = cache.wdict.get(w_x)
+ if rawbytes is None:
+ data = space.str_w(w_x)
+ if we_are_translated() and not rgc.can_move(data):
+ lldata = llstr(data)
+ data_start = (llmemory.cast_ptr_to_adr(lldata) +
+ rffi.offsetof(STR, 'chars') +
+ llmemory.itemoffsetof(STR.chars, 0))
+ data_start = rffi.cast(rffi.CCHARP, data_start)
+ data_start[len(data)] = '\x00' # write the final extra null
+ return data_start
+ rawbytes = RawBytes(data)
+ cache.wdict.set(w_x, rawbytes)
+ return rawbytes.ptr
+
+# ____________________________________________________________
+
def unsafe_escaping_ptr_for_ptr_or_array(w_cdata):
if not w_cdata.ctype.is_nonfunc_pointer_or_array:
diff --git a/pypy/module/_cffi_backend/parse_c_type.py
b/pypy/module/_cffi_backend/parse_c_type.py
--- a/pypy/module/_cffi_backend/parse_c_type.py
+++ b/pypy/module/_cffi_backend/parse_c_type.py
@@ -97,11 +97,8 @@
[rffi.INT], rffi.CCHARP)
def parse_c_type(info, input):
- p_input = rffi.str2charp(input)
- try:
+ with rffi.scoped_view_charp(input) as p_input:
res = ll_parse_c_type(info, p_input)
- finally:
- rffi.free_charp(p_input)
return rffi.cast(lltype.Signed, res)
NULL_CTX = lltype.nullptr(PCTX.TO)
@@ -130,15 +127,13 @@
return rffi.getintfield(src_ctx, 'c_num_types')
def search_in_globals(ctx, name):
- c_name = rffi.str2charp(name)
- result = ll_search_in_globals(ctx, c_name,
- rffi.cast(rffi.SIZE_T, len(name)))
- rffi.free_charp(c_name)
+ with rffi.scoped_view_charp(name) as c_name:
+ result = ll_search_in_globals(ctx, c_name,
+ rffi.cast(rffi.SIZE_T, len(name)))
return rffi.cast(lltype.Signed, result)
def search_in_struct_unions(ctx, name):
- c_name = rffi.str2charp(name)
- result = ll_search_in_struct_unions(ctx, c_name,
- rffi.cast(rffi.SIZE_T, len(name)))
- rffi.free_charp(c_name)
+ with rffi.scoped_view_charp(name) as c_name:
+ result = ll_search_in_struct_unions(ctx, c_name,
+ rffi.cast(rffi.SIZE_T, len(name)))
return rffi.cast(lltype.Signed, result)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py
b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -3330,13 +3330,18 @@
BChar = new_primitive_type("char")
BCharP = new_pointer_type(BChar)
BCharA = new_array_type(BCharP, None)
- py.test.raises(TypeError, from_buffer, BCharA, b"foo")
+ p1 = from_buffer(BCharA, b"foo")
+ assert p1 == from_buffer(BCharA, b"foo")
+ import gc; gc.collect()
+ assert p1 == from_buffer(BCharA, b"foo")
py.test.raises(TypeError, from_buffer, BCharA, u+"foo")
try:
from __builtin__ import buffer
except ImportError:
pass
else:
+ # from_buffer(buffer(b"foo")) does not work, because it's not
+ # implemented on pypy; only from_buffer(b"foo") works.
py.test.raises(TypeError, from_buffer, BCharA, buffer(b"foo"))
py.test.raises(TypeError, from_buffer, BCharA, buffer(u+"foo"))
try:
diff --git a/pypy/module/_multiprocessing/interp_connection.py
b/pypy/module/_multiprocessing/interp_connection.py
--- a/pypy/module/_multiprocessing/interp_connection.py
+++ b/pypy/module/_multiprocessing/interp_connection.py
@@ -401,21 +401,20 @@
_WriteFile, ERROR_NO_SYSTEM_RESOURCES)
from rpython.rlib import rwin32
- charp = rffi.str2charp(buf)
- written_ptr = lltype.malloc(rffi.CArrayPtr(rwin32.DWORD).TO, 1,
- flavor='raw')
- try:
- result = _WriteFile(
- self.handle, rffi.ptradd(charp, offset),
- size, written_ptr, rffi.NULL)
+ with rffi.scoped_view_charp(buf) as charp:
+ written_ptr = lltype.malloc(rffi.CArrayPtr(rwin32.DWORD).TO, 1,
+ flavor='raw')
+ try:
+ result = _WriteFile(
+ self.handle, rffi.ptradd(charp, offset),
+ size, written_ptr, rffi.NULL)
- if (result == 0 and
- rwin32.GetLastError_saved() == ERROR_NO_SYSTEM_RESOURCES):
- raise oefmt(space.w_ValueError,
- "Cannot send %d bytes over connection", size)
- finally:
- rffi.free_charp(charp)
- lltype.free(written_ptr, flavor='raw')
+ if (result == 0 and
+ rwin32.GetLastError_saved() == ERROR_NO_SYSTEM_RESOURCES):
+ raise oefmt(space.w_ValueError,
+ "Cannot send %d bytes over connection", size)
+ finally:
+ lltype.free(written_ptr, flavor='raw')
def do_recv_string(self, space, buflength, maxlength):
from pypy.module._multiprocessing.interp_win32 import (
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -135,7 +135,7 @@
def __init__(self, ctx, protos):
self.protos = protos
- self.buf, self.pinned, self.is_raw = rffi.get_nonmovingbuffer(protos)
+ self.buf, self.bufflag = rffi.get_nonmovingbuffer(protos)
NPN_STORAGE.set(rffi.cast(lltype.Unsigned, self.buf), self)
# set both server and client callbacks, because the context
@@ -147,7 +147,7 @@
def __del__(self):
rffi.free_nonmovingbuffer(
- self.protos, self.buf, self.pinned, self.is_raw)
+ self.protos, self.buf, self.bufflag)
@staticmethod
def advertiseNPN_cb(s, data_ptr, len_ptr, args):
@@ -181,7 +181,7 @@
def __init__(self, ctx, protos):
self.protos = protos
- self.buf, self.pinned, self.is_raw = rffi.get_nonmovingbuffer(protos)
+ self.buf, self.bufflag = rffi.get_nonmovingbuffer(protos)
ALPN_STORAGE.set(rffi.cast(lltype.Unsigned, self.buf), self)
with rffi.scoped_str2charp(protos) as protos_buf:
@@ -193,7 +193,7 @@
def __del__(self):
rffi.free_nonmovingbuffer(
- self.protos, self.buf, self.pinned, self.is_raw)
+ self.protos, self.buf, self.bufflag)
@staticmethod
def selectALPN_cb(s, out_ptr, outlen_ptr, client, client_len, args):
@@ -228,7 +228,7 @@
Mix string into the OpenSSL PRNG state. entropy (a float) is a lower
bound on the entropy contained in string."""
- with rffi.scoped_str2charp(string) as buf:
+ with rffi.scoped_nonmovingbuffer(string) as buf:
libssl_RAND_add(buf, len(string), entropy)
def RAND_status(space):
diff --git a/pypy/module/cppyy/capi/builtin_capi.py
b/pypy/module/cppyy/capi/builtin_capi.py
--- a/pypy/module/cppyy/capi/builtin_capi.py
+++ b/pypy/module/cppyy/capi/builtin_capi.py
@@ -537,9 +537,8 @@
releasegil=ts_helper,
compilation_info=backend.eci)
def c_charp2stdstring(space, svalue):
- charp = rffi.str2charp(svalue)
- result = _c_charp2stdstring(charp)
- rffi.free_charp(charp)
+ with rffi.scoped_view_charp(svalue) as charp:
+ result = _c_charp2stdstring(charp)
return result
_c_stdstring2stdstring = rffi.llexternal(
"cppyy_stdstring2stdstring",
diff --git a/pypy/module/cppyy/capi/cint_capi.py
b/pypy/module/cppyy/capi/cint_capi.py
--- a/pypy/module/cppyy/capi/cint_capi.py
+++ b/pypy/module/cppyy/capi/cint_capi.py
@@ -82,9 +82,8 @@
releasegil=ts_helper,
compilation_info=eci)
def c_charp2TString(space, svalue):
- charp = rffi.str2charp(svalue)
- result = _c_charp2TString(charp)
- rffi.free_charp(charp)
+ with rffi.scoped_view_charp(svalue) as charp:
+ result = _c_charp2TString(charp)
return result
_c_TString2TString = rffi.llexternal(
"cppyy_TString2TString",
diff --git a/pypy/module/cppyy/capi/loadable_capi.py
b/pypy/module/cppyy/capi/loadable_capi.py
--- a/pypy/module/cppyy/capi/loadable_capi.py
+++ b/pypy/module/cppyy/capi/loadable_capi.py
@@ -65,6 +65,7 @@
else: # only other use is sring
n = len(obj._string)
assert raw_string == rffi.cast(rffi.CCHARP, 0)
+ # XXX could use rffi.get_nonmovingbuffer_final_null()
raw_string = rffi.str2charp(obj._string)
data = rffi.cast(rffi.CCHARPP, data)
data[0] = raw_string
diff --git a/rpython/jit/backend/arm/opassembler.py
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -883,6 +883,7 @@
ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
+ ofs_items -= 1 # for the extra null character
scale = 0
self._gen_address(resloc, baseloc, ofsloc, scale, ofs_items)
diff --git a/rpython/jit/backend/llsupport/descr.py
b/rpython/jit/backend/llsupport/descr.py
--- a/rpython/jit/backend/llsupport/descr.py
+++ b/rpython/jit/backend/llsupport/descr.py
@@ -280,7 +280,7 @@
concrete_type = '\x00'
def __init__(self, basesize, itemsize, lendescr, flag, is_pure=False,
concrete_type='\x00'):
- self.basesize = basesize
+ self.basesize = basesize # this includes +1 for STR
self.itemsize = itemsize
self.lendescr = lendescr # or None, if no length
self.flag = flag
@@ -676,7 +676,7 @@
def unpack_arraydescr(arraydescr):
assert isinstance(arraydescr, ArrayDescr)
- ofs = arraydescr.basesize
+ ofs = arraydescr.basesize # this includes +1 for STR
size = arraydescr.itemsize
sign = arraydescr.is_item_signed()
return size, ofs, sign
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -293,6 +293,7 @@
basesize, itemsize, ofs_length = get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
+ basesize -= 1 # for the extra null character
self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
itemsize, itemsize, basesize,
NOT_SIGNED)
elif opnum == rop.UNICODEGETITEM:
@@ -304,6 +305,7 @@
basesize, itemsize, ofs_length = get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
+ basesize -= 1 # for the extra null character
self.emit_gc_store_or_indexed(op, op.getarg(0), op.getarg(1),
op.getarg(2),
itemsize, itemsize, basesize)
elif opnum == rop.UNICODESETITEM:
diff --git a/rpython/jit/backend/llsupport/symbolic.py
b/rpython/jit/backend/llsupport/symbolic.py
--- a/rpython/jit/backend/llsupport/symbolic.py
+++ b/rpython/jit/backend/llsupport/symbolic.py
@@ -29,7 +29,7 @@
def get_array_token(T, translate_support_code):
# T can be an array or a var-sized structure
if translate_support_code:
- basesize = llmemory.sizeof(T, 0)
+ basesize = llmemory.sizeof(T, 0) # this includes +1 for STR
if isinstance(T, lltype.Struct):
SUBARRAY = getattr(T, T._arrayfld)
itemsize = llmemory.sizeof(SUBARRAY.OF)
@@ -57,6 +57,7 @@
assert carray.length.size == WORD
ofs_length = before_array_part + carray.length.offset
basesize = before_array_part + carray.items.offset
+ basesize += T._hints.get('extra_item_after_alloc', 0) # +1 for STR
carrayitem = ll2ctypes.get_ctypes_type(T.OF)
itemsize = ctypes.sizeof(carrayitem)
return basesize, itemsize, ofs_length
diff --git a/rpython/jit/backend/llsupport/test/test_descr.py
b/rpython/jit/backend/llsupport/test/test_descr.py
--- a/rpython/jit/backend/llsupport/test/test_descr.py
+++ b/rpython/jit/backend/llsupport/test/test_descr.py
@@ -435,8 +435,10 @@
def test_bytearray_descr():
c0 = GcCache(False)
descr = get_array_descr(c0, rstr.STR) # for bytearray
+ # note that we get a basesize that has 1 extra byte for the final null char
+ # (only for STR)
assert descr.flag == FLAG_UNSIGNED
- assert descr.basesize == struct.calcsize("PP") # hash, length
+ assert descr.basesize == struct.calcsize("PP") + 1 # hash, length,
extra
assert descr.lendescr.offset == struct.calcsize("P") # hash
assert not descr.is_array_of_pointers()
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -647,6 +647,9 @@
""")
def test_rewrite_assembler_newstr_newunicode(self):
+ # note: strdescr.basesize already contains the extra final character,
+ # so that's why newstr(14) is rounded up to 'basesize+15' and not
+ # 'basesize+16'.
self.check_rewrite("""
[i2]
p0 = newstr(14)
@@ -657,12 +660,12 @@
""", """
[i2]
p0 = call_malloc_nursery( \
- %(strdescr.basesize + 16 * strdescr.itemsize + \
+ %(strdescr.basesize + 15 * strdescr.itemsize + \
unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
gc_store(p0, 0, %(strdescr.tid)d, %(tiddescr.field_size)s)
gc_store(p0, %(strlendescr.offset)s, 14,
%(strlendescr.field_size)s)
gc_store(p0, 0, 0, %(strhashdescr.field_size)s)
- p1 = nursery_ptr_increment(p0, %(strdescr.basesize + 16 *
strdescr.itemsize)d)
+ p1 = nursery_ptr_increment(p0, %(strdescr.basesize + 15 *
strdescr.itemsize)d)
gc_store(p1, 0, %(unicodedescr.tid)d, %(tiddescr.field_size)s)
gc_store(p1, %(unicodelendescr.offset)s, 10,
%(unicodelendescr.field_size)s)
gc_store(p1, 0, 0, %(unicodehashdescr.field_size)s)
@@ -1240,14 +1243,14 @@
# 'i3 = gc_load_i(p0,i5,%(unicodedescr.itemsize)d)'],
[True, (4,), 'i3 = strgetitem(p0,i1)' '->'
'i3 = gc_load_indexed_i(p0,i1,1,'
- '%(strdescr.basesize)d,1)'],
+ '%(strdescr.basesize-1)d,1)'],
#[False, (4,), 'i3 = strgetitem(p0,i1)' '->'
- # 'i5 = int_add(i1, %(strdescr.basesize)d);'
+ # 'i5 = int_add(i1, %(strdescr.basesize-1)d);'
# 'i3 = gc_load_i(p0,i5,1)'],
## setitem str/unicode
[True, (4,), 'i3 = strsetitem(p0,i1,0)' '->'
'i3 = gc_store_indexed(p0,i1,0,1,'
- '%(strdescr.basesize)d,1)'],
+ '%(strdescr.basesize-1)d,1)'],
[True, (2,4), 'i3 = unicodesetitem(p0,i1,0)' '->'
'i3 = gc_store_indexed(p0,i1,0,'
'%(unicodedescr.itemsize)d,'
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py
b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -3,7 +3,7 @@
from rpython.rlib.jit import JitDriver, unroll_parameters, set_param
from rpython.rlib.jit import PARAMETERS, dont_look_inside
from rpython.rlib.jit import promote, _get_virtualizable_token
-from rpython.rlib import jit_hooks, rposix
+from rpython.rlib import jit_hooks, rposix, rgc
from rpython.rlib.objectmodel import keepalive_until_here
from rpython.rlib.rthread import ThreadLocalReference, ThreadLocalField
from rpython.jit.backend.detect_cpu import getcpuclass
@@ -11,7 +11,7 @@
from rpython.jit.codewriter.policy import StopAtXPolicy
from rpython.config.config import ConfigError
from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, rffi, rstr
from rpython.rlib.rjitlog import rjitlog as jl
@@ -29,6 +29,7 @@
# - floats neg and abs
# - cast_int_to_float
# - llexternal with macro=True
+ # - extra place for the zero after STR instances
class BasicFrame(object):
_virtualizable_ = ['i']
@@ -56,7 +57,7 @@
return ("/home.py",0,0)
jitdriver = JitDriver(greens = [],
- reds = ['total', 'frame', 'j'],
+ reds = ['total', 'frame', 'prev_s', 'j'],
virtualizables = ['frame'],
get_location = get_location)
def f(i, j):
@@ -68,9 +69,12 @@
total = 0
frame = Frame(i)
j = float(j)
+ prev_s = rstr.mallocstr(16)
while frame.i > 3:
- jitdriver.can_enter_jit(frame=frame, total=total, j=j)
- jitdriver.jit_merge_point(frame=frame, total=total, j=j)
+ jitdriver.can_enter_jit(frame=frame, total=total, j=j,
+ prev_s=prev_s)
+ jitdriver.jit_merge_point(frame=frame, total=total, j=j,
+ prev_s=prev_s)
_get_virtualizable_token(frame)
total += frame.i
if frame.i >= 20:
@@ -82,6 +86,11 @@
k = myabs1(myabs2(j))
if k - abs(j): raise ValueError
if k - abs(-j): raise ValueError
+ s = rstr.mallocstr(16)
+ rgc.ll_write_final_null_char(s)
+ rgc.ll_write_final_null_char(prev_s)
+ if (frame.i & 3) == 0:
+ prev_s = s
return chr(total % 253)
#
class Virt2(object):
diff --git a/rpython/jit/backend/ppc/opassembler.py
b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -994,6 +994,7 @@
basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
+ basesize -= 1 # for the extra null character
scale = 0
self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1673,25 +1673,6 @@
dest_addr = AddressLoc(base_loc, ofs_loc, scale, offset_loc.value)
self.save_into_mem(dest_addr, value_loc, size_loc)
- def genop_discard_strsetitem(self, op, arglocs):
- base_loc, ofs_loc, val_loc = arglocs
- basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
- self.mc.MOV8(dest_addr, val_loc.lowest8bits())
-
- def genop_discard_unicodesetitem(self, op, arglocs):
- base_loc, ofs_loc, val_loc = arglocs
- basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 4:
- self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
- elif itemsize == 2:
- self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
- else:
- assert 0, itemsize
-
# genop_discard_setfield_raw = genop_discard_setfield_gc
def genop_math_read_timestamp(self, op, arglocs, resloc):
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1219,6 +1219,7 @@
ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
self.translate_support_code)
assert itemsize == 1
+ ofs_items -= 1 # for the extra null character
scale = 0
self.assembler.load_effective_addr(ofsloc, ofs_items, scale,
resloc, baseloc)
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -991,6 +991,7 @@
basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
+ basesize -= 1 # for the extra null character
scale = 0
# src and src_len are tmp registers
diff --git a/rpython/jit/metainterp/test/test_virtualizable.py
b/rpython/jit/metainterp/test/test_virtualizable.py
--- a/rpython/jit/metainterp/test/test_virtualizable.py
+++ b/rpython/jit/metainterp/test/test_virtualizable.py
@@ -1381,7 +1381,7 @@
return result
def indirection(arg):
- return interp(arg)
+ return interp(arg) + 1
def run_interp(n):
f = hint(Frame(n), access_directly=True)
diff --git a/rpython/memory/gcheader.py b/rpython/memory/gcheader.py
--- a/rpython/memory/gcheader.py
+++ b/rpython/memory/gcheader.py
@@ -11,7 +11,21 @@
def __init__(self, HDR):
"""NOT_RPYTHON"""
self.HDR = HDR
- self.obj2header = weakref.WeakKeyDictionary()
+ #
+ # The following used to be a weakref.WeakKeyDictionary(), but
+ # the problem is that if you have a gcobj which has already a
+ # weakref cached on it and the hash already cached in that
+ # weakref, and later the hash of the gcobj changes (because it
+ # is ll2ctypes-ified), then that gcobj cannot be used as a key
+ # in a WeakKeyDictionary any more: from this point on,
+ # 'ref(gcobj)' and 'ref(gcobj, callback)' return two objects
+ # with different hashes... and so e.g. the sequence of
+ # operations 'obj2header[x]=y; assert x in obj2header' fails.
+ #
+ # Instead, just use a regular dictionary and hope that not too
+ # many objects would be reclaimed in a given GCHeaderBuilder
+ # instance.
+ self.obj2header = {}
self.size_gc_header = llmemory.GCHeaderOffset(self)
def header_of_object(self, gcptr):
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -281,6 +281,10 @@
return lltype.Signed
malloc_zero_filled = CDefinedIntSymbolic('MALLOC_ZERO_FILLED', default=0)
+_translated_to_c = CDefinedIntSymbolic('1 /*_translated_to_c*/', default=0)
+
+def we_are_translated_to_c():
+ return we_are_translated() and _translated_to_c
# ____________________________________________________________
diff --git a/rpython/rlib/rdtoa.py b/rpython/rlib/rdtoa.py
--- a/rpython/rlib/rdtoa.py
+++ b/rpython/rlib/rdtoa.py
@@ -56,22 +56,24 @@
raise MemoryError
end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
try:
- ll_input = rffi.str2charp(input)
+ # note: don't use the class scoped_view_charp here, it
+ # break some tests because this function is used by the GC
+ ll_input, flag = rffi.get_nonmovingbuffer_final_null(input)
try:
result = dg_strtod(ll_input, end_ptr)
endpos = (rffi.cast(lltype.Signed, end_ptr[0]) -
rffi.cast(lltype.Signed, ll_input))
-
- if endpos == 0 or endpos < len(input):
- raise ValueError("invalid input at position %d" % (endpos,))
-
- return result
finally:
- rffi.free_charp(ll_input)
+ rffi.free_nonmovingbuffer(input, ll_input, flag)
finally:
lltype.free(end_ptr, flavor='raw')
+ if endpos == 0 or endpos < len(input):
+ raise ValueError("invalid input at position %d" % (endpos,))
+
+ return result
+
lower_special_strings = ['inf', '+inf', '-inf', 'nan']
upper_special_strings = ['INF', '+INF', '-INF', 'NAN']
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -1268,3 +1268,26 @@
ptr = lltype.direct_arrayitems(array)
# ptr is a Ptr(FixedSizeArray(Char, 1)). Cast it to a rffi.CCHARP
return rffi.cast(rffi.CCHARP, ptr)
+
[email protected]_look_inside
+@no_collect
[email protected]()
+def ll_write_final_null_char(s):
+ """'s' is a low-level STR; writes a terminating NULL character after
+ the other characters in 's'. Warning, this only works because of
+ the 'extra_item_after_alloc' hack inside the definition of STR.
+ """
+ from rpython.rtyper.lltypesystem import rffi
+ PSTR = lltype.typeOf(s)
+ assert has_final_null_char(PSTR) == 1
+ n = llmemory.offsetof(PSTR.TO, 'chars')
+ n += llmemory.itemoffsetof(PSTR.TO.chars, 0)
+ n = llmemory.raw_malloc_usage(n)
+ n += len(s.chars)
+ # no GC operation from here!
+ ptr = rffi.cast(rffi.CCHARP, s)
+ ptr[n] = '\x00'
+
[email protected]()
+def has_final_null_char(PSTR):
+ return PSTR.TO.chars._hints.get('extra_item_after_alloc', 0)
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py
b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -250,7 +250,9 @@
if not A._hints.get('nolength'):
_fields_ = [('length', lentype),
- ('items', max_n * ctypes_item)]
+ ('items',
+ (max_n + A._hints.get('extra_item_after_alloc', 0))
+ * ctypes_item)]
else:
_fields_ = [('items', max_n * ctypes_item)]
@@ -695,6 +697,9 @@
# we have no clue, so we allow whatever index
return 0, maxint
+ def shrinklength(self, newlength):
+ raise NotImplementedError
+
def getitem(self, index, uninitialized_ok=False):
res = self._storage.contents._getitem(index, boundscheck=False)
if isinstance(self._TYPE.OF, lltype.ContainerType):
diff --git a/rpython/rtyper/lltypesystem/llmemory.py
b/rpython/rtyper/lltypesystem/llmemory.py
--- a/rpython/rtyper/lltypesystem/llmemory.py
+++ b/rpython/rtyper/lltypesystem/llmemory.py
@@ -304,8 +304,15 @@
return cast_ptr_to_adr(p)
def raw_memcopy(self, srcadr, dstadr):
- # should really copy the length field, but we can't
- pass
+ # copy the length field, if we can
+ srclen = srcadr.ptr._obj.getlength()
+ dstlen = dstadr.ptr._obj.getlength()
+ if dstlen != srclen:
+ assert dstlen > srclen, "can't increase the length"
+ # a decrease in length occurs in the GC tests when copying a STR:
+ # the copy is initially allocated with really one extra char,
+ # the 'extra_item_after_alloc', and must be fixed.
+ dstadr.ptr._obj.shrinklength(srclen)
class ArrayLengthOffset(AddressOffset):
@@ -390,11 +397,23 @@
else:
raise Exception("don't know how to take the size of a %r"%TYPE)
[email protected]()
+def extra_item_after_alloc(ARRAY):
+ assert isinstance(ARRAY, lltype.Array)
+ return ARRAY._hints.get('extra_item_after_alloc', 0)
+
@specialize.arg(0)
def sizeof(TYPE, n=None):
+ """Return the symbolic size of TYPE.
+ For a Struct with no varsized part, it must be called with n=None.
+ For an Array or a Struct with a varsized part, it is the number of items.
+ There is a special case to return 1 more than requested if the array
+ has the hint 'extra_item_after_alloc' set to 1.
+ """
if n is None:
return _sizeof_none(TYPE)
elif isinstance(TYPE, lltype.Array):
+ n += extra_item_after_alloc(TYPE)
return itemoffsetof(TYPE) + _sizeof_none(TYPE.OF) * n
else:
return _sizeof_int(TYPE, n)
@@ -1036,7 +1055,7 @@
_reccopy(subsrc, subdst)
else:
# this is a hack XXX de-hack this
- llvalue = source._obj.getitem(i, uninitialized_ok=True)
+ llvalue = source._obj.getitem(i, uninitialized_ok=2)
if not isinstance(llvalue, lltype._uninitialized):
dest._obj.setitem(i, llvalue)
elif isinstance(T, lltype.Struct):
diff --git a/rpython/rtyper/lltypesystem/lltype.py
b/rpython/rtyper/lltypesystem/lltype.py
--- a/rpython/rtyper/lltypesystem/lltype.py
+++ b/rpython/rtyper/lltypesystem/lltype.py
@@ -1926,14 +1926,29 @@
return 0, stop
def getitem(self, index, uninitialized_ok=False):
- v = self.items[index]
+ try:
+ v = self.items[index]
+ except IndexError:
+ if (index == len(self.items) and uninitialized_ok == 2 and
+ self._TYPE._hints.get('extra_item_after_alloc')):
+ # special case: reading the extra final char returns
+ # an uninitialized, if 'uninitialized_ok==2'
+ return _uninitialized(self._TYPE.OF)
+ raise
if isinstance(v, _uninitialized) and not uninitialized_ok:
raise UninitializedMemoryAccess("%r[%s]"%(self, index))
return v
def setitem(self, index, value):
assert typeOf(value) == self._TYPE.OF
- self.items[index] = value
+ try:
+ self.items[index] = value
+ except IndexError:
+ if (index == len(self.items) and value == '\x00' and
+ self._TYPE._hints.get('extra_item_after_alloc')):
+ # special case: writing NULL to the extra final char
+ return
+ raise
assert not '__dict__' in dir(_array)
assert not '__dict__' in dir(_struct)
diff --git a/rpython/rtyper/lltypesystem/rffi.py
b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -15,7 +15,7 @@
from rpython.rtyper.tool.rfficache import platform, sizeof_c_type
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.rtyper.annlowlevel import llhelper
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, we_are_translated_to_c
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, assert_str0
from rpython.rlib import jit
from rpython.rtyper.lltypesystem import llmemory
@@ -232,40 +232,36 @@
call_external_function = jit.dont_look_inside(
call_external_function)
+ def _oops():
+ raise AssertionError("can't pass (any more) a unicode string"
+ " directly to a VOIDP argument")
+ _oops._annspecialcase_ = 'specialize:memo'
+
unrolling_arg_tps = unrolling_iterable(enumerate(args))
def wrapper(*args):
real_args = ()
+ # XXX 'to_free' leaks if an allocation fails with MemoryError
+ # and was not the first in this function
to_free = ()
for i, TARGET in unrolling_arg_tps:
arg = args[i]
- freeme = None
- if TARGET == CCHARP:
+ if TARGET == CCHARP or TARGET is VOIDP:
if arg is None:
arg = lltype.nullptr(CCHARP.TO) # None => (char*)NULL
- freeme = arg
+ to_free = to_free + (arg, '\x04')
elif isinstance(arg, str):
- arg = str2charp(arg)
- # XXX leaks if a str2charp() fails with MemoryError
- # and was not the first in this function
- freeme = arg
+ tup = get_nonmovingbuffer_final_null(arg)
+ to_free = to_free + tup
+ arg = tup[0]
+ elif isinstance(arg, unicode):
+ _oops()
elif TARGET == CWCHARP:
if arg is None:
arg = lltype.nullptr(CWCHARP.TO) # None => (wchar_t*)NULL
- freeme = arg
+ to_free = to_free + (arg,)
elif isinstance(arg, unicode):
arg = unicode2wcharp(arg)
- # XXX leaks if a unicode2wcharp() fails with MemoryError
- # and was not the first in this function
- freeme = arg
- elif TARGET is VOIDP:
- if arg is None:
- arg = lltype.nullptr(VOIDP.TO)
- elif isinstance(arg, str):
- arg = str2charp(arg)
- freeme = arg
- elif isinstance(arg, unicode):
- arg = unicode2wcharp(arg)
- freeme = arg
+ to_free = to_free + (arg,)
elif _isfunctype(TARGET) and not _isllptr(arg):
# XXX pass additional arguments
use_gil = invoke_around_handlers
@@ -283,11 +279,22 @@
or TARGET is lltype.Bool)):
arg = cast(TARGET, arg)
real_args = real_args + (arg,)
- to_free = to_free + (freeme,)
res = call_external_function(*real_args)
for i, TARGET in unrolling_arg_tps:
- if to_free[i]:
- lltype.free(to_free[i], flavor='raw')
+ arg = args[i]
+ if TARGET == CCHARP or TARGET is VOIDP:
+ if arg is None:
+ to_free = to_free[2:]
+ elif isinstance(arg, str):
+ free_nonmovingbuffer(arg, to_free[0], to_free[1])
+ to_free = to_free[2:]
+ elif TARGET == CWCHARP:
+ if arg is None:
+ to_free = to_free[1:]
+ elif isinstance(arg, unicode):
+ free_wcharp(to_free[0])
+ to_free = to_free[1:]
+ assert len(to_free) == 0
if rarithmetic.r_int is not r_int:
if result is INT:
return cast(lltype.Signed, res)
@@ -816,52 +823,69 @@
string is already nonmovable or could be pinned. Must be followed by a
free_nonmovingbuffer call.
- First bool returned indicates if 'data' was pinned. Second bool
returned
- indicates if we did a raw alloc because pinning failed. Both bools
- should never be true at the same time.
+ Also returns a char:
+ * \4: no pinning, returned pointer is inside 'data' which is
nonmovable
+ * \5: 'data' was pinned, returned pointer is inside
+ * \6: pinning failed, returned pointer is raw malloced
+
+ For strings (not unicodes), the len()th character of the resulting
+ raw buffer is available, but not initialized. Use
+ get_nonmovingbuffer_final_null() instead of get_nonmovingbuffer()
+ to get a regular null-terminated "char *".
"""
lldata = llstrtype(data)
count = len(data)
- pinned = False
- if rgc.can_move(data):
- if rgc.pin(data):
- pinned = True
+ if we_are_translated_to_c() and not rgc.can_move(data):
+ flag = '\x04'
+ else:
+ if we_are_translated_to_c() and rgc.pin(data):
+ flag = '\x05'
else:
- buf = lltype.malloc(TYPEP.TO, count, flavor='raw')
+ buf = lltype.malloc(TYPEP.TO, count + (TYPEP is CCHARP),
+ flavor='raw')
copy_string_to_raw(lldata, buf, 0, count)
- return buf, pinned, True
+ return buf, '\x06'
# ^^^ raw malloc used to get a nonmovable copy
#
- # following code is executed if:
+ # following code is executed after we're translated to C, if:
# - rgc.can_move(data) and rgc.pin(data) both returned true
# - rgc.can_move(data) returned false
data_start = cast_ptr_to_adr(lldata) + \
offsetof(STRTYPE, 'chars') + itemoffsetof(STRTYPE.chars, 0)
- return cast(TYPEP, data_start), pinned, False
+ return cast(TYPEP, data_start), flag
# ^^^ already nonmovable. Therefore it's not raw allocated nor
# pinned.
get_nonmovingbuffer._always_inline_ = 'try' # get rid of the returned tuple
get_nonmovingbuffer._annenforceargs_ = [strtype]
- # (str, char*, bool, bool) -> None
+ @jit.dont_look_inside
+ def get_nonmovingbuffer_final_null(data):
+ tup = get_nonmovingbuffer(data)
+ buf, flag = tup
+ buf[len(data)] = lastchar
+ return tup
+ get_nonmovingbuffer_final_null._always_inline_ = 'try'
+ get_nonmovingbuffer_final_null._annenforceargs_ = [strtype]
+
+ # (str, char*, char) -> None
# Can't inline this because of the raw address manipulation.
@jit.dont_look_inside
- def free_nonmovingbuffer(data, buf, is_pinned, is_raw):
+ def free_nonmovingbuffer(data, buf, flag):
"""
- Keep 'data' alive and unpin it if it was pinned ('is_pinned' is true).
- Otherwise free the non-moving copy ('is_raw' is true).
+ Keep 'data' alive and unpin it if it was pinned (flag==\5).
+ Otherwise free the non-moving copy (flag==\6).
"""
- if is_pinned:
+ if flag == '\x05':
rgc.unpin(data)
- if is_raw:
+ if flag == '\x06':
lltype.free(buf, flavor='raw')
- # if is_pinned and is_raw are false: data was already nonmovable,
+ # if flag == '\x04': data was already nonmovable,
# we have nothing to clean up
keepalive_until_here(data)
- free_nonmovingbuffer._annenforceargs_ = [strtype, None, bool, bool]
+ free_nonmovingbuffer._annenforceargs_ = [strtype, None, None]
# int -> (char*, str, int)
# Can't inline this because of the raw address manipulation.
@@ -947,18 +971,19 @@
return (str2charp, free_charp, charp2str,
get_nonmovingbuffer, free_nonmovingbuffer,
+ get_nonmovingbuffer_final_null,
alloc_buffer, str_from_buffer, keep_buffer_alive_until_here,
charp2strn, charpsize2str, str2chararray, str2rawmem,
)
(str2charp, free_charp, charp2str,
- get_nonmovingbuffer, free_nonmovingbuffer,
+ get_nonmovingbuffer, free_nonmovingbuffer, get_nonmovingbuffer_final_null,
alloc_buffer, str_from_buffer, keep_buffer_alive_until_here,
charp2strn, charpsize2str, str2chararray, str2rawmem,
) = make_string_mappings(str)
(unicode2wcharp, free_wcharp, wcharp2unicode,
- get_nonmoving_unicodebuffer, free_nonmoving_unicodebuffer,
+ get_nonmoving_unicodebuffer, free_nonmoving_unicodebuffer, __not_usable,
alloc_unicodebuffer, unicode_from_buffer, keep_unicodebuffer_alive_until_here,
wcharp2unicoden, wcharpsize2unicode, unicode2wchararray, unicode2rawmem,
) = make_string_mappings(unicode)
@@ -1194,10 +1219,28 @@
def __init__(self, data):
self.data = data
def __enter__(self):
- self.buf, self.pinned, self.is_raw = get_nonmovingbuffer(self.data)
+ self.buf, self.flag = get_nonmovingbuffer(self.data)
return self.buf
def __exit__(self, *args):
- free_nonmovingbuffer(self.data, self.buf, self.pinned, self.is_raw)
+ free_nonmovingbuffer(self.data, self.buf, self.flag)
+ __init__._always_inline_ = 'try'
+ __enter__._always_inline_ = 'try'
+ __exit__._always_inline_ = 'try'
+
+class scoped_view_charp:
+ """Returns a 'char *' that (tries to) point inside the given RPython
+ string (which must not be None). You can replace scoped_str2charp()
+ with scoped_view_charp() in all places that guarantee that the
+ content of the 'char[]' array will not be modified.
+ """
+ def __init__(self, data):
+ self.data = data
+ __init__._annenforceargs_ = [None, annmodel.SomeString(can_be_None=False)]
+ def __enter__(self):
+ self.buf, self.flag = get_nonmovingbuffer_final_null(self.data)
+ return self.buf
+ def __exit__(self, *args):
+ free_nonmovingbuffer(self.data, self.buf, self.flag)
__init__._always_inline_ = 'try'
__enter__._always_inline_ = 'try'
__exit__._always_inline_ = 'try'
@@ -1206,10 +1249,10 @@
def __init__(self, data):
self.data = data
def __enter__(self):
- self.buf, self.pinned, self.is_raw =
get_nonmoving_unicodebuffer(self.data)
+ self.buf, self.flag = get_nonmoving_unicodebuffer(self.data)
return self.buf
def __exit__(self, *args):
- free_nonmoving_unicodebuffer(self.data, self.buf, self.pinned,
self.is_raw)
+ free_nonmoving_unicodebuffer(self.data, self.buf, self.flag)
__init__._always_inline_ = 'try'
__enter__._always_inline_ = 'try'
__exit__._always_inline_ = 'try'
diff --git a/rpython/rtyper/lltypesystem/rstr.py
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1238,7 +1238,8 @@
# ____________________________________________________________
STR.become(GcStruct('rpy_string', ('hash', Signed),
- ('chars', Array(Char, hints={'immutable': True})),
+ ('chars', Array(Char, hints={'immutable': True,
+ 'extra_item_after_alloc': 1})),
adtmeths={'malloc' : staticAdtMethod(mallocstr),
'empty' : staticAdtMethod(emptystrfun),
'copy_contents' :
staticAdtMethod(copy_string_contents),
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py
b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -516,7 +516,7 @@
def test_nonmovingbuffer(self):
d = 'some cool data that should not move'
def f():
- buf, is_pinned, is_raw = get_nonmovingbuffer(d)
+ buf, flag = get_nonmovingbuffer(d)
try:
counter = 0
for i in range(len(d)):
@@ -524,7 +524,7 @@
counter += 1
return counter
finally:
- free_nonmovingbuffer(d, buf, is_pinned, is_raw)
+ free_nonmovingbuffer(d, buf, flag)
assert f() == len(d)
fn = self.compile(f, [], gcpolicy='ref')
assert fn() == len(d)
@@ -534,13 +534,13 @@
def f():
counter = 0
for n in range(32):
- buf, is_pinned, is_raw = get_nonmovingbuffer(d)
+ buf, flag = get_nonmovingbuffer(d)
try:
for i in range(len(d)):
if buf[i] == d[i]:
counter += 1
finally:
- free_nonmovingbuffer(d, buf, is_pinned, is_raw)
+ free_nonmovingbuffer(d, buf, flag)
return counter
fn = self.compile(f, [], gcpolicy='semispace')
# The semispace gc uses raw_malloc for its internal data structs
@@ -555,13 +555,13 @@
def f():
counter = 0
for n in range(32):
- buf, is_pinned, is_raw = get_nonmovingbuffer(d)
+ buf, flag = get_nonmovingbuffer(d)
try:
for i in range(len(d)):
if buf[i] == d[i]:
counter += 1
finally:
- free_nonmovingbuffer(d, buf, is_pinned, is_raw)
+ free_nonmovingbuffer(d, buf, flag)
return counter
fn = self.compile(f, [], gcpolicy='incminimark')
# The incminimark gc uses raw_malloc for its internal data structs
@@ -835,3 +835,11 @@
if hasattr(rffi, '__INT128_T'):
value = 0xAAAABBBBCCCCDDDD
assert cast(rffi.__INT128_T, r_uint64(value)) == value
+
+def test_scoped_view_charp():
+ s = 'bar'
+ with scoped_view_charp(s) as buf:
+ assert buf[0] == 'b'
+ assert buf[1] == 'a'
+ assert buf[2] == 'r'
+ assert buf[3] == '\x00'
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -253,8 +253,11 @@
yield '\t' + cdecl(typename, fname) + ';'
if not self.ARRAY._hints.get('nolength', False):
yield '\tlong length;'
+ varlength = self.varlength
+ if varlength is not None:
+ varlength += self.ARRAY._hints.get('extra_item_after_alloc', 0)
line = '%s;' % cdecl(self.itemtypename,
- 'items[%s]' % deflength(self.varlength))
+ 'items[%s]' % deflength(varlength))
if self.ARRAY.OF is Void: # strange
line = '/* array of void */'
if self.ARRAY._hints.get('nolength', False):
diff --git a/rpython/translator/c/test/test_lltyped.py
b/rpython/translator/c/test/test_lltyped.py
--- a/rpython/translator/c/test/test_lltyped.py
+++ b/rpython/translator/c/test/test_lltyped.py
@@ -1,4 +1,4 @@
-import py
+import py, random
from rpython.rtyper.lltypesystem.lltype import *
from rpython.rtyper.lltypesystem import rffi
from rpython.translator.c.test.test_genc import compile
@@ -255,28 +255,6 @@
res2 = fn(0)
assert res1 == res2
- def test_null_padding(self):
- py.test.skip("we no longer pad our RPython strings with a final NUL")
- from rpython.rtyper.lltypesystem import llmemory
- from rpython.rtyper.lltypesystem import rstr
- chars_offset = llmemory.FieldOffset(rstr.STR, 'chars') + \
- llmemory.ArrayItemsOffset(rstr.STR.chars)
- # sadly, there's no way of forcing this to fail if the strings
- # are allocated in a region of memory such that they just
- # happen to get a NUL byte anyway :/ (a debug build will
- # always fail though)
- def trailing_byte(s):
- adr_s = llmemory.cast_ptr_to_adr(s)
- return (adr_s + chars_offset).char[len(s)]
- def f(x):
- r = 0
- for i in range(x):
- r += ord(trailing_byte(' '*(100-x*x)))
- return r
- fn = self.getcompiled(f, [int])
- res = fn(10)
- assert res == 0
-
def test_cast_primitive(self):
def f(x):
x = cast_primitive(UnsignedLongLong, x)
@@ -1023,3 +1001,49 @@
assert fn(r_longlong(1)) == True
assert fn(r_longlong(256)) == True
assert fn(r_longlong(2**32)) == True
+
+ def test_extra_item_after_alloc(self):
+ from rpython.rlib import rgc
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.lltypesystem import rstr
+ # all STR objects should be allocated with enough space for one
+ # extra char. Check this for prebuilt strings, and for dynamically
+ # allocated ones with the default GC for tests. Use strings of 8,
+ # 16 and 24 chars because if the extra char is missing, writing to it
+ # is likely to cause corruption in nearby structures.
+ sizes = [random.choice([8, 16, 24]) for i in range(100)]
+ A = lltype.Struct('A', ('x', lltype.Signed))
+ prebuilt = [(rstr.mallocstr(sz),
+ lltype.malloc(A, flavor='raw', immortal=True))
+ for sz in sizes]
+ k = 0
+ for i, (s, a) in enumerate(prebuilt):
+ a.x = i
+ for i in range(len(s.chars)):
+ k += 1
+ if k == 256:
+ k = 1
+ s.chars[i] = chr(k)
+
+ def check(lst):
+ hashes = []
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i
+ rgc.ll_write_final_null_char(s)
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i # check it was not overwritten
+ def f():
+ check(prebuilt)
+ lst1 = []
+ for i, sz in enumerate(sizes):
+ s = rstr.mallocstr(sz)
+ a = lltype.malloc(A, flavor='raw')
+ a.x = i
+ lst1.append((s, a))
+ check(lst1)
+ for _, a in lst1:
+ lltype.free(a, flavor='raw')
+ return 42
+
+ fn = self.getcompiled(f, [])
+ assert fn() == 42
diff --git a/rpython/translator/c/test/test_newgc.py
b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -3,6 +3,7 @@
import os
import sys
import subprocess
+import random
import py
@@ -1468,6 +1469,52 @@
res = self.run('nursery_hash_base')
assert res >= 195
+ def define_extra_item_after_alloc(cls):
+ from rpython.rtyper.lltypesystem import rstr
+ # all STR objects should be allocated with enough space for
+ # one extra char. Check this with our GCs. Use strings of 8,
+ # 16 and 24 chars because if the extra char is missing,
+ # writing to it is likely to cause corruption in nearby
+ # structures.
+ sizes = [random.choice([8, 16, 24]) for i in range(100)]
+ A = lltype.Struct('A', ('x', lltype.Signed))
+ prebuilt = [(rstr.mallocstr(sz),
+ lltype.malloc(A, flavor='raw', immortal=True))
+ for sz in sizes]
+ k = 0
+ for i, (s, a) in enumerate(prebuilt):
+ a.x = i
+ for i in range(len(s.chars)):
+ k += 1
+ if k == 256:
+ k = 1
+ s.chars[i] = chr(k)
+
+ def check(lst):
+ hashes = []
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i
+ rgc.ll_write_final_null_char(s)
+ for i, (s, a) in enumerate(lst):
+ assert a.x == i # check it was not overwritten
+ def fn():
+ check(prebuilt)
+ lst1 = []
+ for i, sz in enumerate(sizes):
+ s = rstr.mallocstr(sz)
+ a = lltype.malloc(A, flavor='raw')
+ a.x = i
+ lst1.append((s, a))
+ check(lst1)
+ for _, a in lst1:
+ lltype.free(a, flavor='raw')
+ return 42
+ return fn
+
+ def test_extra_item_after_alloc(self):
+ res = self.run('extra_item_after_alloc')
+ assert res == 42
+
class TestGenerationalGC(TestSemiSpaceGC):
gcpolicy = "generation"
diff --git a/rpython/translator/tool/test/test_staticsizereport.py
b/rpython/translator/tool/test/test_staticsizereport.py
--- a/rpython/translator/tool/test/test_staticsizereport.py
+++ b/rpython/translator/tool/test/test_staticsizereport.py
@@ -67,7 +67,7 @@
(4 * S + 2 * P) + # struct dicttable
(S + 2 * 8192) + # indexes, length 8192, rffi.USHORT
(S + (S + S) * 3840) + # entries, length 3840
- (S + S + 5) * 3840) # 3840 strings with 5 chars each
+ (S + S + 6) * 3840) # 3840 strings with 5 chars each (+1 final)
assert guess_size(func.builder.db, fixarrayvalnode, set()) == 100 *
rffi.sizeof(lltype.Signed) + 1 * rffi.sizeof(lltype.Signed)
assert guess_size(func.builder.db, dynarrayvalnode, set()) == 100 *
rffi.sizeof(lltype.Signed) + 2 * rffi.sizeof(lltype.Signed) + 1 *
rffi.sizeof(rffi.VOIDP)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit