Author: Maciej Fijalkowski <[email protected]>
Branch:
Changeset: r63455:6bf943756133
Date: 2013-04-17 18:18 +0200
http://bitbucket.org/pypy/pypy/changeset/6bf943756133/
Log: Merge fast-newarray. This inlines the fast path of newarray in the
assembler. Disabled on ARM until we fix issues.
diff --git a/rpython/jit/backend/llsupport/assembler.py
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -70,6 +70,14 @@
# the address of the function called by 'new'
gc_ll_descr = self.cpu.gc_ll_descr
gc_ll_descr.initialize()
+ if hasattr(gc_ll_descr, 'minimal_size_in_nursery'):
+ self.gc_minimal_size_in_nursery =
gc_ll_descr.minimal_size_in_nursery
+ else:
+ self.gc_minimal_size_in_nursery = 0
+ if hasattr(gc_ll_descr, 'gcheaderbuilder'):
+ self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header
+ else:
+ self.gc_size_of_header = WORD # for tests
self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
self._build_failure_recovery(False, withfloats=False)
self._build_failure_recovery(True, withfloats=False)
@@ -85,7 +93,20 @@
self._build_wb_slowpath(True, withfloats=True)
self._build_propagate_exception_path()
if gc_ll_descr.get_malloc_slowpath_addr is not None:
- self._build_malloc_slowpath()
+ # generate few slowpaths for various cases
+ self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed')
+ self.malloc_slowpath_varsize = self._build_malloc_slowpath(
+ kind='var')
+ if hasattr(gc_ll_descr, 'malloc_str'):
+ self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str')
+ else:
+ self.malloc_slowpath_str = None
+ if hasattr(gc_ll_descr, 'malloc_unicode'):
+ self.malloc_slowpath_unicode = self._build_malloc_slowpath(
+ kind='unicode')
+ else:
+ self.malloc_slowpath_unicode = None
+
self._build_stack_check_slowpath()
if gc_ll_descr.gcrootmap:
self._build_release_gil(gc_ll_descr.gcrootmap)
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -415,6 +415,7 @@
return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
type_id, size,
False, False, False)
+
self.generate_function('malloc_nursery', malloc_nursery_slowpath,
[lltype.Signed])
@@ -567,6 +568,9 @@
def get_malloc_slowpath_addr(self):
return self.get_malloc_fn_addr('malloc_nursery')
+ def get_malloc_slowpath_array_addr(self):
+ return self.get_malloc_fn_addr('malloc_array')
+
# ____________________________________________________________
def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/llmodel.py
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -21,6 +21,8 @@
class AbstractLLCPU(AbstractCPU):
from rpython.jit.metainterp.typesystem import llhelper as ts
+ can_inline_varsize_malloc = False
+
def __init__(self, rtyper, stats, opts, translate_support_code=False,
gcdescr=None):
assert type(opts) is not bool
@@ -171,9 +173,9 @@
def pos_exc_value():
addr = llop.get_exc_value_addr(llmemory.Address)
return heaptracker.adr2int(addr)
-
+
from rpython.rlib import rstack
-
+
STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
lltype.Void))
def insert_stack_check():
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -8,19 +8,29 @@
from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
from rpython.jit.metainterp.history import JitCellToken
+FLAG_ARRAY = 0
+FLAG_STR = 1
+FLAG_UNICODE = 2
+
class GcRewriterAssembler(object):
- # This class performs the following rewrites on the list of operations:
- #
- # - Remove the DEBUG_MERGE_POINTs.
- #
- # - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
- # followed by SETFIELDs in order to initialize their GC fields. The
- # two advantages of CALL_MALLOC_NURSERY is that it inlines the common
- # path, and we need only one such operation to allocate several blocks
- # of memory at once.
- #
- # - Add COND_CALLs to the write barrier before SETFIELD_GC and
- # SETARRAYITEM_GC operations.
+ """ This class performs the following rewrites on the list of operations:
+
+ - Remove the DEBUG_MERGE_POINTs.
+
+ - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
+ followed by SETFIELDs in order to initialize their GC fields. The
+ two advantages of CALL_MALLOC_NURSERY is that it inlines the common
+ path, and we need only one such operation to allocate several blocks
+ of memory at once.
+
+ - Add COND_CALLs to the write barrier before SETFIELD_GC and
+ SETARRAYITEM_GC operations.
+
+ recent_mallocs contains a dictionary of variable -> None. If a variable
+ is in the dictionary, next setfields can be called without a write barrier,
+ because the variable got allocated after the last potentially collecting
+ resop
+ """
_previous_size = -1
_op_malloc_nursery = None
@@ -32,7 +42,7 @@
self.cpu = cpu
self.newops = []
self.known_lengths = {}
- self.recent_mallocs = {} # set of variables
+ self.recent_mallocs = {}
def rewrite(self, operations):
# we can only remember one malloc since the next malloc can possibly
@@ -92,9 +102,11 @@
assert isinstance(descr, ArrayDescr)
self.handle_new_array(descr, op)
elif opnum == rop.NEWSTR:
- self.handle_new_array(self.gc_ll_descr.str_descr, op)
+ self.handle_new_array(self.gc_ll_descr.str_descr, op,
+ kind=FLAG_STR)
elif opnum == rop.NEWUNICODE:
- self.handle_new_array(self.gc_ll_descr.unicode_descr, op)
+ self.handle_new_array(self.gc_ll_descr.unicode_descr, op,
+ kind=FLAG_UNICODE)
else:
raise NotImplementedError(op.getopname())
@@ -106,7 +118,7 @@
else:
self.gen_malloc_fixedsize(size, descr.tid, op.result)
- def handle_new_array(self, arraydescr, op):
+ def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
v_length = op.getarg(0)
total_size = -1
if isinstance(v_length, ConstInt):
@@ -119,6 +131,14 @@
pass # total_size is still -1
elif arraydescr.itemsize == 0:
total_size = arraydescr.basesize
+ elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
+ self.gen_malloc_nursery_varsize(arraydescr.itemsize,
+ v_length, op.result, arraydescr, kind=kind)):
+ # note that we cannot initialize tid here, because the array
+ # might end up being allocated by malloc_external or some
+ # stuff that initializes GC header fields differently
+ self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+ return
if (total_size >= 0 and
self.gen_malloc_nursery(total_size, op.result)):
self.gen_initialize_tid(op.result, arraydescr.tid)
@@ -152,7 +172,7 @@
size_box,
descr=descrs.jfi_frame_size)
self.newops.append(op0)
- self.gen_malloc_nursery_varsize(size_box, frame, is_small=True)
+ self.gen_malloc_nursery_varsize_small(size_box, frame)
self.gen_initialize_tid(frame, descrs.arraydescr.tid)
length_box = history.BoxInt()
op1 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
@@ -281,10 +301,28 @@
self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
self.gc_ll_descr.malloc_unicode_descr)
- def gen_malloc_nursery_varsize(self, sizebox, v_result, is_small=False):
+ def gen_malloc_nursery_varsize(self, itemsize, v_length, v_result,
+ arraydescr, kind=FLAG_ARRAY):
+ """ itemsize is an int, v_length and v_result are boxes
+ """
+ if not self.cpu.can_inline_varsize_malloc:
+ return False # temporary, kill when ARM supports it
+ gc_descr = self.gc_ll_descr
+ if (kind == FLAG_ARRAY and
+ (arraydescr.basesize != gc_descr.standard_array_basesize or
+ arraydescr.lendescr.offset !=
gc_descr.standard_array_length_ofs)):
+ return False
+ self.emitting_an_operation_that_can_collect()
+ op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE,
+ [ConstInt(kind), ConstInt(itemsize), v_length],
+ v_result, descr=arraydescr)
+ self.newops.append(op)
+ self.recent_mallocs[v_result] = None
+ return True
+
+ def gen_malloc_nursery_varsize_small(self, sizebox, v_result):
""" Generate CALL_MALLOC_NURSERY_VARSIZE_SMALL
"""
- assert is_small
self.emitting_an_operation_that_can_collect()
op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
[sizebox],
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -29,11 +29,11 @@
return r[r.find('1'):]
class TestRegallocGcIntegration(BaseTestRegalloc):
-
+
cpu = CPU(None, None)
cpu.gc_ll_descr = GcLLDescr_boehm(None, None, None)
cpu.setup_once()
-
+
S = lltype.GcForwardReference()
S.become(lltype.GcStruct('S', ('field', lltype.Ptr(S)),
('int', lltype.Signed)))
@@ -145,6 +145,7 @@
gcrootmap = None
passes_frame = True
write_barrier_descr = None
+ max_size_of_young_obj = 50
def __init__(self, callback):
GcLLDescription.__init__(self, None)
@@ -172,6 +173,20 @@
[lltype.Signed, jitframe.JITFRAMEPTR],
lltype.Signed)
+ def malloc_array(itemsize, tid, num_elem):
+ self.calls.append((itemsize, tid, num_elem))
+ return 13
+
+ self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+ [lltype.Signed] * 3,
+ lltype.Signed)
+
+ def malloc_str(size):
+ self.calls.append(('str', size))
+ return 13
+ self.generate_function('malloc_str', malloc_str, [lltype.Signed],
+ lltype.Signed)
+
def get_nursery_free_addr(self):
return rffi.cast(lltype.Signed, self.addrs)
@@ -181,6 +196,9 @@
def get_malloc_slowpath_addr(self):
return self.get_malloc_fn_addr('malloc_nursery')
+ def get_malloc_slowpath_array_addr(self):
+ return self.malloc_slowpath_array_fnptr
+
def check_nothing_in_nursery(self):
# CALL_MALLOC_NURSERY should not write anything in the nursery
for i in range(64):
@@ -229,7 +247,7 @@
p1 = call_malloc_nursery_varsize_small(i1)
p2 = call_malloc_nursery_varsize_small(i2)
guard_true(i0) [p0, p1, p2]
- '''
+ '''
self.interpret(ops, [16, 32, 16])
# check the returned pointers
gc_ll_descr = self.cpu.gc_ll_descr
@@ -244,6 +262,56 @@
# slowpath never called
assert gc_ll_descr.calls == []
+ def test_malloc_nursery_varsize(self):
+ self.cpu = self.getcpu(None)
+ A = lltype.GcArray(lltype.Signed)
+ arraydescr = self.cpu.arraydescrof(A)
+ arraydescr.tid = 15
+ ops = '''
+ [i0, i1, i2]
+ p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+ p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+ guard_false(i0) [p0, p1]
+ '''
+ self.interpret(ops, [1, 2, 3],
+ namespace={'arraydescr': arraydescr})
+ # check the returned pointers
+ gc_ll_descr = self.cpu.gc_ll_descr
+ nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+ ref = lambda n: self.cpu.get_ref_value(self.deadframe, n)
+ assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+ assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 2*WORD + 8*1
+ # check the nursery content and state
+ assert gc_ll_descr.nursery[0] == chr(15)
+ assert gc_ll_descr.nursery[2 * WORD + 8] == chr(15)
+ assert gc_ll_descr.addrs[0] == nurs_adr + (((4 * WORD + 8*1 + 5*2) +
(WORD - 1)) & ~(WORD - 1))
+ # slowpath never called
+ assert gc_ll_descr.calls == []
+
+ def test_malloc_nursery_varsize_slowpath(self):
+ self.cpu = self.getcpu(None)
+ ops = """
+ [i0, i1, i2]
+ p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+ p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+ p3 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+ # overflow
+ p4 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+ # we didn't collect, so still overflow
+ p5 = call_malloc_nursery_varsize(1, 5, i2, descr=strdescr)
+ guard_false(i0) [p0, p1, p3, p4]
+ """
+ A = lltype.GcArray(lltype.Signed)
+ arraydescr = self.cpu.arraydescrof(A)
+ arraydescr.tid = 15
+ self.interpret(ops, [10, 3, 3],
+ namespace={'arraydescr': arraydescr,
+ 'strdescr': arraydescr})
+ # check the returned pointers
+ gc_ll_descr = self.cpu.gc_ll_descr
+ assert gc_ll_descr.calls == [(8, 15, 10), (5, 15, 3), ('str', 3)]
+ # one fit, one was too large, one was not fitting
+
def test_malloc_slowpath(self):
def check(frame):
expected_size = 1
@@ -294,7 +362,7 @@
s = bin(x[0]).count('1') + bin(x[1]).count('1')
assert s == 16
# all but two registers + some stuff on stack
-
+
self.cpu = self.getcpu(check)
S1 = lltype.GcStruct('S1')
S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
@@ -360,7 +428,7 @@
class MockShadowStackRootMap(object):
is_shadow_stack = True
-
+
def __init__(self):
TP = rffi.CArray(lltype.Signed)
self.stack = lltype.malloc(TP, 10, flavor='raw')
@@ -369,7 +437,7 @@
self.stack_addr[0] = rffi.cast(lltype.Signed, self.stack)
def __del__(self):
- lltype.free(self.stack_addr, flavor='raw')
+ lltype.free(self.stack_addr, flavor='raw')
lltype.free(self.stack, flavor='raw')
def register_asm_addr(self, start, mark):
@@ -381,7 +449,7 @@
class WriteBarrierDescr(AbstractDescr):
jit_wb_cards_set = 0
jit_wb_if_flag_singlebyte = 1
-
+
def __init__(self, gc_ll_descr):
def write_barrier(frame):
gc_ll_descr.write_barrier_on_frame_called = frame
@@ -442,6 +510,15 @@
self.malloc_slowpath_fnptr = llhelper_args(malloc_slowpath,
[lltype.Signed],
lltype.Signed)
+
+ def malloc_array(itemsize, tid, num_elem):
+ import pdb
+ pdb.set_trace()
+
+ self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+ [lltype.Signed] * 3,
+ lltype.Signed)
+
self.all_nurseries = []
def init_nursery(self, nursery_size=None):
@@ -497,6 +574,9 @@
def get_malloc_slowpath_addr(self):
return self.malloc_slowpath_fnptr
+ def get_malloc_slowpath_array_addr(self):
+ return self.malloc_slowpath_array_fnptr
+
def get_nursery_free_addr(self):
return self.nursery_addr
@@ -507,7 +587,7 @@
for nursery in self.all_nurseries:
lltype.free(nursery, flavor='raw', track_allocation=False)
lltype.free(self.nursery_ptrs, flavor='raw')
-
+
def unpack_gcmap(frame):
res = []
val = 0
@@ -543,10 +623,10 @@
def test_shadowstack_call(self):
cpu = self.cpu
cpu.gc_ll_descr.init_nursery(100)
- cpu.setup_once()
+ cpu.setup_once()
S = self.S
frames = []
-
+
def check(i):
assert cpu.gc_ll_descr.gcrootmap.stack[0] == i
frame = rffi.cast(JITFRAMEPTR, i)
@@ -720,7 +800,7 @@
def test_call_may_force_gcmap(self):
cpu = self.cpu
-
+
def f(frame, arg, x):
assert not arg
assert frame.jf_gcmap[0] & 31 == 0
@@ -748,7 +828,7 @@
pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
px = call_may_force(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
guard_not_forced(descr=faildescr) [p1, p2, p3, px]
- finish(px, descr=finishdescr)
+ finish(px, descr=finaldescr)
""", namespace={'fptr': fptr, 'calldescr': calldescr,
'arraydescr': cpu.arraydescrof(A),
'faildescr': BasicFailDescr(1),
@@ -760,12 +840,12 @@
cpu.compile_loop(loop.inputargs, loop.operations, token)
frame = lltype.cast_opaque_ptr(JITFRAMEPTR,
cpu.execute_token(token, 1, a))
-
+
assert getmap(frame).count('1') == 4
def test_call_gcmap_no_guard(self):
cpu = self.cpu
-
+
def f(frame, arg, x):
assert not arg
assert frame.jf_gcmap[0] & 31 == 0
@@ -792,7 +872,7 @@
pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
px = call(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
guard_false(i0, descr=faildescr) [p1, p2, p3, px]
- finish(px, descr=finishdescr)
+ finish(px, descr=finaldescr)
""", namespace={'fptr': fptr, 'calldescr': calldescr,
'arraydescr': cpu.arraydescrof(A),
'faildescr': BasicFailDescr(1),
diff --git a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
@@ -93,8 +93,8 @@
type_system=self.type_system,
boxkinds=boxkinds)
- def interpret(self, ops, args, run=True):
- loop = self.parse(ops)
+ def interpret(self, ops, args, run=True, namespace=None):
+ loop = self.parse(ops, namespace=namespace)
self.loop = loop
looptoken = JitCellToken()
self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -85,6 +85,7 @@
signedframedescr = self.cpu.signedframedescr
floatframedescr = self.cpu.floatframedescr
casmdescr.compiled_loop_token = clt
+ tzdescr = None # noone cares
#
namespace.update(locals())
#
@@ -107,7 +108,9 @@
class BaseFakeCPU(object):
JITFRAME_FIXED_SIZE = 0
-
+
+ can_inline_varsize_malloc = True
+
def __init__(self):
self.tracker = FakeTracker()
self._cache = {}
@@ -121,7 +124,7 @@
def unpack_arraydescr_size(self, d):
return 0, d.itemsize, 0
-
+
def arraydescrof(self, ARRAY):
try:
return self._cache[ARRAY]
@@ -129,7 +132,7 @@
r = ArrayDescr(1, 2, FieldDescr('len', 0, 0, 0), 0)
self._cache[ARRAY] = r
return r
-
+
def fielddescrof(self, STRUCT, fname):
key = (STRUCT, fname)
try:
@@ -407,12 +410,23 @@
jump(i0)
""", """
[i0]
- p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
- %(bdescr.tid)d, i0, \
- descr=malloc_array_descr)
+ p0 = call_malloc_nursery_varsize(0, 1, i0, descr=bdescr)
+ setfield_gc(p0, i0, descr=blendescr)
jump(i0)
""")
+ def test_rewrite_new_string(self):
+ self.check_rewrite("""
+ [i0]
+ p0 = newstr(i0)
+ jump(i0)
+ """, """
+ [i0]
+ p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
+ setfield_gc(p0, i0, descr=strlendescr)
+ jump(i0)
+ """)
+
def test_rewrite_assembler_nonstandard_array(self):
# a non-standard array is a bit hard to get; e.g. GcArray(Float)
# is like that on Win32, but not on Linux. Build one manually...
@@ -533,10 +547,12 @@
p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
setfield_gc(p1, 10, descr=unicodelendescr)
- p2 = call_malloc_gc(ConstClass(malloc_unicode), i2, \
- descr=malloc_unicode_descr)
- p3 = call_malloc_gc(ConstClass(malloc_str), i2, \
- descr=malloc_str_descr)
+ p2 = call_malloc_nursery_varsize(2, 4, i2, \
+ descr=unicodedescr)
+ setfield_gc(p2, i2, descr=unicodelendescr)
+ p3 = call_malloc_nursery_varsize(1, 1, i2, \
+ descr=strdescr)
+ setfield_gc(p3, i2, descr=strlendescr)
jump()
""")
@@ -716,8 +732,9 @@
[i0]
p0 = call_malloc_nursery(%(tdescr.size)d)
setfield_gc(p0, 5678, descr=tiddescr)
- p1 = call_malloc_gc(ConstClass(malloc_str), i0, \
- descr=malloc_str_descr)
+ p1 = call_malloc_nursery_varsize(1, 1, i0, \
+ descr=strdescr)
+ setfield_gc(p1, i0, descr=strlendescr)
cond_call_gc_wb(p0, p1, descr=wbdescr)
setfield_raw(p0, p1, descr=tzdescr)
jump()
diff --git a/rpython/jit/backend/test/runner_test.py
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2833,7 +2833,7 @@
def test_assembler_call_propagate_exc(self):
from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
-
+
if not isinstance(self.cpu, AbstractLLCPU):
py.test.skip("llgraph can't fake exceptions well enough, give up")
@@ -3477,10 +3477,10 @@
ops = """
[i0]
i1 = int_force_ge_zero(i0) # but forced to be in a register
- finish(i1, descr=1)
+ finish(i1, descr=descr)
"""
+ descr = BasicFinalDescr()
loop = parse(ops, self.cpu, namespace=locals())
- descr = loop.operations[-1].getdescr()
looptoken = JitCellToken()
self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
for inp, outp in [(2,2), (-3, 0)]:
@@ -3493,21 +3493,20 @@
py.test.skip("pointless test on non-asm")
from rpython.jit.backend.tool.viewcode import machine_code_dump
import ctypes
+ targettoken = TargetToken()
ops = """
[i2]
i0 = same_as(i2) # but forced to be in a register
- label(i0, descr=1)
+ label(i0, descr=targettoken)
i1 = int_add(i0, i0)
- guard_true(i1, descr=faildesr) [i1]
- jump(i1, descr=1)
+ guard_true(i1, descr=faildescr) [i1]
+ jump(i1, descr=targettoken)
"""
faildescr = BasicFailDescr(2)
loop = parse(ops, self.cpu, namespace=locals())
- faildescr = loop.operations[-2].getdescr()
- jumpdescr = loop.operations[-1].getdescr()
bridge_ops = """
[i0]
- jump(i0, descr=jumpdescr)
+ jump(i0, descr=targettoken)
"""
bridge = parse(bridge_ops, self.cpu, namespace=locals())
looptoken = JitCellToken()
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1,7 +1,7 @@
import sys
import os
-from rpython.jit.backend.llsupport import symbolic, jitframe
+from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
DEBUG_COUNTER, debug_bridge)
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
@@ -60,6 +60,7 @@
self.float_const_neg_addr = 0
self.float_const_abs_addr = 0
self.malloc_slowpath = 0
+ self.malloc_slowpath_varsize = 0
self.wb_slowpath = [0, 0, 0, 0, 0]
self.setup_failure_recovery()
self.datablockwrapper = None
@@ -158,27 +159,56 @@
mc.RET()
self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
- def _build_malloc_slowpath(self):
+ def _build_malloc_slowpath(self, kind):
""" While arriving on slowpath, we have a gcpattern on stack,
nursery_head in eax and the size in edi - eax
"""
+ assert kind in ['fixed', 'str', 'unicode', 'var']
mc = codebuf.MachineCodeBlockWrapper()
self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
# store the gc pattern
mc.MOV_rs(ecx.value, WORD)
mc.MOV_br(ofs, ecx.value)
- addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
- mc.SUB_rr(edi.value, eax.value) # compute the size we want
- # the arg is already in edi
+ if kind == 'fixed':
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+ elif kind == 'str':
+ addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
+ elif kind == 'unicode':
+ addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
+ else:
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
mc.SUB_ri(esp.value, 16 - WORD)
- if IS_X86_32:
- mc.MOV_sr(0, edi.value)
- if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- mc.MOV_sr(WORD, ebp.value)
- elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- # for tests only
- mc.MOV_rr(esi.value, ebp.value)
+ if kind == 'fixed':
+ mc.SUB_rr(edi.value, eax.value) # compute the size we want
+ # the arg is already in edi
+ if IS_X86_32:
+ mc.MOV_sr(0, edi.value)
+ if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+ mc.MOV_sr(WORD, ebp.value)
+ elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+ # for tests only
+ mc.MOV_rr(esi.value, ebp.value)
+ elif kind == 'str' or kind == 'unicode':
+ if IS_X86_32:
+ # 1 for return value, 3 for alignment
+ mc.MOV_rs(edi.value, WORD * (3 + 1 + 1))
+ mc.MOV_sr(0, edi.value)
+ else:
+ mc.MOV_rs(edi.value, WORD * 3)
+ else:
+ if IS_X86_32:
+ mc.MOV_rs(edi.value, WORD * (3 + 1 + 1)) # itemsize
+ mc.MOV_sr(0, edi.value)
+ mc.MOV_rs(edi.value, WORD * (3 + 3 + 1))
+ mc.MOV_sr(WORD, edi.value) # tid
+ mc.MOV_rs(edi.value, WORD * (3 + 2 + 1))
+ mc.MOV_sr(2 * WORD, edi.value) # length
+ else:
+ # offset is 1 extra for call + 1 for SUB above
+ mc.MOV_rs(edi.value, WORD * 3) # itemsize
+ mc.MOV_rs(esi.value, WORD * 5) # tid
+ mc.MOV_rs(edx.value, WORD * 4) # length
self.set_extra_stack_depth(mc, 16)
mc.CALL(imm(addr))
mc.ADD_ri(esp.value, 16 - WORD)
@@ -205,7 +235,7 @@
mc.JMP(imm(self.propagate_exception_path))
#
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
- self.malloc_slowpath = rawstart
+ return rawstart
def _build_propagate_exception_path(self):
if not self.cpu.propagate_exception_descr:
@@ -2352,6 +2382,51 @@
self.mc.overwrite(jmp_adr-1, chr(offset))
self.mc.MOV(heap(nursery_free_adr), edi)
+ def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
+ lengthloc, itemsize, maxlength, gcmap,
+ arraydescr):
+ from rpython.jit.backend.llsupport.descr import ArrayDescr
+ assert isinstance(arraydescr, ArrayDescr)
+
+ self.mc.CMP(lengthloc, imm(maxlength))
+ self.mc.J_il8(rx86.Conditions['G'], 0) # patched later
+ jmp_adr0 = self.mc.get_relative_pos()
+ self.mc.MOV(eax, heap(nursery_free_adr))
+ self.mc.MOV(edi, lengthloc)
+ assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+ self.mc.IMUL_ri(edi.value, itemsize)
+ header_size = self.gc_size_of_header
+ self.mc.ADD_ri(edi.value, arraydescr.basesize + header_size + WORD - 1)
+ self.mc.AND_ri(edi.value, ~(WORD - 1))
+ self.mc.ADD(edi, heap(nursery_free_adr))
+ self.mc.CMP(edi, heap(nursery_top_adr))
+ # write down the tid
+ self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+ self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+ jmp_adr1 = self.mc.get_relative_pos()
+ offset = self.mc.get_relative_pos() - jmp_adr0
+ assert 0 < offset <= 127
+ self.mc.overwrite(jmp_adr0-1, chr(offset))
+ if kind == rewrite.FLAG_ARRAY:
+ self.mc.MOV_si(WORD, itemsize)
+ self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc)
+ self.mc.MOV_si(WORD * 3, arraydescr.tid)
+ addr = self.malloc_slowpath_varsize
+ else:
+ if kind == rewrite.FLAG_STR:
+ addr = self.malloc_slowpath_str
+ else:
+ assert kind == rewrite.FLAG_UNICODE
+ addr = self.malloc_slowpath_unicode
+ self.mc.MOV(RawEspLoc(WORD, INT), lengthloc)
+ # save the gcmap
+ self.push_gcmap(self.mc, gcmap, mov=True)
+ self.mc.CALL(imm(addr))
+ offset = self.mc.get_relative_pos() - jmp_adr1
+ assert 0 < offset <= 127
+ self.mc.overwrite(jmp_adr1-1, chr(offset))
+ self.mc.MOV(heap(nursery_free_adr), edi)
+
def force_token(self, reg):
# XXX kill me
assert isinstance(reg, RegLoc)
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -54,7 +54,7 @@
class X86_64_RegisterManager(X86RegisterManager):
# r11 omitted because it's used as scratch
all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
-
+
no_lower_byte_regs = []
save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
@@ -103,7 +103,7 @@
def __init__(self, base_ofs):
FrameManager.__init__(self)
self.base_ofs = base_ofs
-
+
def frame_pos(self, i, box_type):
return FrameLoc(i, get_ebp_ofs(self.base_ofs, i), box_type)
@@ -870,6 +870,33 @@
gc_ll_descr.get_nursery_top_addr(),
sizeloc, gcmap)
+ def consider_call_malloc_nursery_varsize(self, op):
+ gc_ll_descr = self.assembler.cpu.gc_ll_descr
+ if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
+ raise Exception("unreachable code")
+ # for boehm, this function should never be called
+ length_box = op.getarg(2)
+ arraydescr = op.getdescr()
+ assert isinstance(length_box, BoxInt) # we cannot have a const here!
+ # looking at the result
+ self.rm.force_allocate_reg(op.result, selected_reg=eax)
+ #
+ # We need edx as a temporary, but otherwise don't save any more
+ # register. See comments in _build_malloc_slowpath().
+ tmp_box = TempBox()
+ self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
+ lengthloc = self.rm.make_sure_var_in_reg(length_box, [op.result,
tmp_box])
+ gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+ self.rm.possibly_free_var(tmp_box)
+ #
+ itemsize = op.getarg(1).getint()
+ maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
+ self.assembler.malloc_cond_varsize(
+ op.getarg(0).getint(),
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ lengthloc, itemsize, maxlength, gcmap, arraydescr)
+
def get_gcmap(self, forbidden_regs=[], noregs=False):
frame_depth = self.fm.get_frame_depth()
gcmap = allocate_gcmap(self.assembler, frame_depth,
JITFRAME_FIXED_SIZE)
@@ -1313,7 +1340,7 @@
#jump_op = self.final_jump_op
#if jump_op is not None and jump_op.getdescr() is descr:
# self._compute_hint_frame_locations_from_descr(descr)
-
+
def consider_keepalive(self, op):
pass
diff --git a/rpython/jit/backend/x86/runner.py
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -23,6 +23,8 @@
with_threads = False
frame_reg = regloc.ebp
+ can_inline_varsize_malloc = True
+
from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE
all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes
gen_regs = gpr_reg_mgr_cls.all_regs
diff --git a/rpython/jit/metainterp/executor.py
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -354,6 +354,7 @@
rop.QUASIIMMUT_FIELD,
rop.CALL_MALLOC_GC,
rop.CALL_MALLOC_NURSERY,
+ rop.CALL_MALLOC_NURSERY_VARSIZE,
rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
rop.LABEL,
): # list of opcodes never executed by pyjitpl
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -525,6 +525,7 @@
'CALL_PURE/*d', # removed before it's passed to the backend
'CALL_MALLOC_GC/*d', # like CALL, but NULL => propagate MemoryError
'CALL_MALLOC_NURSERY/1', # nursery malloc, const number of bytes, zeroed
+ 'CALL_MALLOC_NURSERY_VARSIZE/3d',
'CALL_MALLOC_NURSERY_VARSIZE_SMALL/1',
# nursery malloc, non-const number of bytes, zeroed
# note that the number of bytes must be well known to be small enough
diff --git a/rpython/jit/metainterp/test/test_logger.py
b/rpython/jit/metainterp/test/test_logger.py
--- a/rpython/jit/metainterp/test/test_logger.py
+++ b/rpython/jit/metainterp/test/test_logger.py
@@ -93,7 +93,7 @@
[p0]
setfield_gc(p0, 3, descr=somedescr)
'''
- Descr()
+ somedescr = Descr()
self.reparse(inp, namespace=locals())
def test_guard(self):
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -106,6 +106,8 @@
tt = self.model.TargetToken(token)
self._consts[poss_descr] = tt
return tt
+ else:
+ raise
def box_for_var(self, elem):
try:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit