Author: Armin Rigo <[email protected]>
Branch:
Changeset: r83593:a07ab092b64a
Date: 2016-04-09 17:14 +0300
http://bitbucket.org/pypy/pypy/changeset/a07ab092b64a/
Log: hg merge jit-constptr-2
Remove the forced minor collection that occurs when rewriting the
assembler at the start of the JIT backend. This is done by emitting
the ConstPtrs in a separate table, and loading from the table.
Gives improved warm-up time and memory usage. Also removes annoying
special-purpose code for pinned pointers.
diff --git a/rpython/jit/backend/arm/assembler.py
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -14,7 +14,7 @@
CoreRegisterManager, check_imm_arg, VFPRegisterManager,
operations as regalloc_operations)
from rpython.jit.backend.llsupport import jitframe, rewrite
-from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER,
debug_bridge, BaseAssembler
+from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER,
BaseAssembler
from rpython.jit.backend.llsupport.regalloc import get_scale,
valid_addressing_size
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.model import CompiledLoopToken
@@ -481,8 +481,9 @@
def generate_quick_failure(self, guardtok):
startpos = self.mc.currpos()
- fail_descr, target = self.store_info_on_descr(startpos, guardtok)
- self.regalloc_push(imm(fail_descr))
+ faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+ self.load_from_gc_table(r.ip.value, faildescrindex)
+ self.regalloc_push(r.ip)
self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
self.mc.BL(target)
return startpos
@@ -556,7 +557,7 @@
debug_stop('jit-backend-ops')
def _call_header(self):
- assert self.mc.currpos() == 0
+ # there is the gc table before this point
self.gen_func_prolog()
def _call_header_with_stack_check(self):
@@ -596,20 +597,22 @@
frame_info = self.datablockwrapper.malloc_aligned(
jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
- clt.allgcrefs = []
clt.frame_info.clear() # for now
if log:
operations = self._inject_debugging_code(looptoken, operations,
'e', looptoken.number)
+ regalloc = Regalloc(assembler=self)
+ allgcrefs = []
+ operations = regalloc.prepare_loop(inputargs, operations, looptoken,
+ allgcrefs)
+ self.reserve_gcref_table(allgcrefs)
+ functionpos = self.mc.get_relative_pos()
+
self._call_header_with_stack_check()
self._check_frame_depth_debug(self.mc)
- regalloc = Regalloc(assembler=self)
- operations = regalloc.prepare_loop(inputargs, operations, looptoken,
- clt.allgcrefs)
-
loop_head = self.mc.get_relative_pos()
looptoken._ll_loop_code = loop_head
#
@@ -620,9 +623,11 @@
self.write_pending_failure_recoveries()
+ full_size = self.mc.get_relative_pos()
rawstart = self.materialize_loop(looptoken)
- looptoken._function_addr = looptoken._ll_function_addr = rawstart
+ looptoken._ll_function_addr = rawstart + functionpos
+ self.patch_gcref_table(looptoken, rawstart)
self.process_pending_guards(rawstart)
self.fixup_target_tokens(rawstart)
@@ -641,7 +646,13 @@
looptoken.number, loopname,
r_uint(rawstart + loop_head),
r_uint(rawstart + size_excluding_failure_stuff),
- r_uint(rawstart)))
+ r_uint(rawstart + functionpos)))
+ debug_print(" gc table: 0x%x" % r_uint(rawstart))
+ debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart + loop_head))
+ debug_print(" failures: 0x%x" % r_uint(rawstart +
+ size_excluding_failure_stuff))
+ debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
debug_stop("jit-backend-addr")
return AsmInfo(ops_offset, rawstart + loop_head,
@@ -678,27 +689,43 @@
arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
regalloc = Regalloc(assembler=self)
- startpos = self.mc.get_relative_pos()
+ allgcrefs = []
operations = regalloc.prepare_bridge(inputargs, arglocs,
operations,
- self.current_clt.allgcrefs,
+ allgcrefs,
self.current_clt.frame_info)
+ self.reserve_gcref_table(allgcrefs)
+ startpos = self.mc.get_relative_pos()
self._check_frame_depth(self.mc, regalloc.get_gcmap())
+ bridgestartpos = self.mc.get_relative_pos()
frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
operations)
codeendpos = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
+ fullsize = self.mc.get_relative_pos()
rawstart = self.materialize_loop(original_loop_token)
+ self.patch_gcref_table(original_loop_token, rawstart)
self.process_pending_guards(rawstart)
+ debug_start("jit-backend-addr")
+ debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+ (r_uint(descr_number), r_uint(rawstart + startpos),
+ r_uint(rawstart + codeendpos)))
+ debug_print(" gc table: 0x%x" % r_uint(rawstart))
+ debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart +
bridgestartpos))
+ debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
+ debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
+ debug_stop("jit-backend-addr")
+
# patch the jump from original guard
self.patch_trace(faildescr, original_loop_token,
- rawstart, regalloc)
+ rawstart + startpos, regalloc)
self.patch_stack_checks(frame_depth_no_fixed_size +
JITFRAME_FIXED_SIZE,
rawstart)
@@ -716,9 +743,53 @@
ops_offset=ops_offset)
self.teardown()
- debug_bridge(descr_number, rawstart, codeendpos)
+ return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
- return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+ def reserve_gcref_table(self, allgcrefs):
+ gcref_table_size = len(allgcrefs) * WORD
+ # align to a multiple of 16 and reserve space at the beginning
+ # of the machine code for the gc table. This lets us write
+ # machine code with relative addressing (see load_from_gc_table())
+ gcref_table_size = (gcref_table_size + 15) & ~15
+ mc = self.mc
+ assert mc.get_relative_pos() == 0
+ for i in range(gcref_table_size):
+ mc.writechar('\x00')
+ self.setup_gcrefs_list(allgcrefs)
+
+ def patch_gcref_table(self, looptoken, rawstart):
+ # the gc table is at the start of the machine code. Fill it now
+ tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+ self._allgcrefs)
+ gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+ gcreftracers.append(tracer) # keepalive
+ self.teardown_gcrefs_list()
+
+ def load_from_gc_table(self, regnum, index):
+ """emits either:
+ LDR Rt, [PC, #offset] if -4095 <= offset
+ or:
+ gen_load_int(Rt, offset)
+ LDR Rt, [PC, Rt] for larger offsets
+ """
+ mc = self.mc
+ address_in_buffer = index * WORD # at the start of the buffer
+ offset = address_in_buffer - (mc.get_relative_pos() + 8) # negative
+ if offset >= -4095:
+ mc.LDR_ri(regnum, r.pc.value, offset)
+ else:
+ # The offset we're loading is negative: right now,
+ # gen_load_int() will always use exactly
+ # get_max_size_of_gen_load_int() instructions. No point
+ # in optimizing in case we get less. Just in case though,
+ # we check and pad with nops.
+ extra_bytes = mc.get_max_size_of_gen_load_int() * 2
+ offset -= extra_bytes
+ start = mc.get_relative_pos()
+ mc.gen_load_int(regnum, offset)
+ while mc.get_relative_pos() != start + extra_bytes:
+ mc.NOP()
+ mc.LDR_rr(regnum, r.pc.value, regnum)
def new_stack_loc(self, i, tp):
base_ofs = self.cpu.get_baseofs_of_frame_field()
@@ -929,6 +1000,12 @@
clt.asmmemmgr_blocks = []
return clt.asmmemmgr_blocks
+ def get_asmmemmgr_gcreftracers(self, looptoken):
+ clt = looptoken.compiled_loop_token
+ if clt.asmmemmgr_gcreftracers is None:
+ clt.asmmemmgr_gcreftracers = []
+ return clt.asmmemmgr_gcreftracers
+
def _walk_operations(self, inputargs, operations, regalloc):
fcond = c.AL
self._regalloc = regalloc
diff --git a/rpython/jit/backend/arm/opassembler.py
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -35,9 +35,9 @@
class ArmGuardToken(GuardToken):
def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
- offset, guard_opnum, frame_depth, fcond=c.AL):
+ offset, guard_opnum, frame_depth, faildescrindex, fcond=c.AL):
GuardToken.__init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
- guard_opnum, frame_depth)
+ guard_opnum, frame_depth, faildescrindex)
self.fcond = fcond
self.offset = offset
@@ -178,6 +178,7 @@
assert isinstance(descr, AbstractFailDescr)
gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+ faildescrindex = self.get_gcref_from_faildescr(descr)
token = ArmGuardToken(self.cpu, gcmap,
descr,
failargs=op.getfailargs(),
@@ -185,6 +186,7 @@
offset=offset,
guard_opnum=op.getopnum(),
frame_depth=frame_depth,
+ faildescrindex=faildescrindex,
fcond=fcond)
return token
@@ -398,14 +400,13 @@
def emit_op_finish(self, op, arglocs, regalloc, fcond):
base_ofs = self.cpu.get_baseofs_of_frame_field()
- if len(arglocs) == 2:
- [return_val, fail_descr_loc] = arglocs
+ if len(arglocs) > 0:
+ [return_val] = arglocs
self.store_reg(self.mc, return_val, r.fp, base_ofs)
- else:
- [fail_descr_loc] = arglocs
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- self.mc.gen_load_int(r.ip.value, fail_descr_loc.value)
+ faildescrindex = self.get_gcref_from_faildescr(op.getdescr())
+ self.load_from_gc_table(r.ip.value, faildescrindex)
# XXX self.mov(fail_descr_loc, RawStackLoc(ofs))
self.store_reg(self.mc, r.ip, r.fp, ofs, helper=r.lr)
if op.numargs() > 0 and op.getarg(0).type == REF:
@@ -1035,9 +1036,9 @@
assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
faildescr = guard_op.getdescr()
+ faildescrindex = self.get_gcref_from_faildescr(faildescr)
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
- value = rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr))
- self.mc.gen_load_int(r.ip.value, value)
+ self.load_from_gc_table(r.ip.value, faildescrindex)
self.store_reg(self.mc, r.ip, r.fp, ofs)
def _find_nearby_operation(self, delta):
@@ -1250,3 +1251,9 @@
self._load_from_mem(res_loc, res_loc, ofs_loc, imm(scale), signed,
fcond)
return fcond
+
+ def emit_op_load_from_gc_table(self, op, arglocs, regalloc, fcond):
+ res_loc, = arglocs
+ index = op.getarg(0).getint()
+ self.load_from_gc_table(res_loc.value, index)
+ return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -1,5 +1,4 @@
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-from rpython.rlib import rgc
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.jit.backend.llsupport.regalloc import FrameManager, \
RegisterManager, TempVar, compute_vars_longevity, BaseRegalloc, \
@@ -627,16 +626,11 @@
def prepare_op_finish(self, op, fcond):
# the frame is in fp, but we have to point where in the frame is
# the potential argument to FINISH
- descr = op.getdescr()
- fail_descr = cast_instance_to_gcref(descr)
- # we know it does not move, but well
- rgc._make_sure_does_not_move(fail_descr)
- fail_descr = rffi.cast(lltype.Signed, fail_descr)
if op.numargs() == 1:
loc = self.make_sure_var_in_reg(op.getarg(0))
- locs = [loc, imm(fail_descr)]
+ locs = [loc]
else:
- locs = [imm(fail_descr)]
+ locs = []
return locs
def load_condition_into_cc(self, box):
@@ -892,6 +886,10 @@
prepare_op_same_as_r = _prepare_op_same_as
prepare_op_same_as_f = _prepare_op_same_as
+ def prepare_op_load_from_gc_table(self, op, fcond):
+ resloc = self.force_allocate_reg(op)
+ return [resloc]
+
def prepare_op_call_malloc_nursery(self, op, fcond):
size_box = op.getarg(0)
assert isinstance(size_box, ConstInt)
diff --git a/rpython/jit/backend/llsupport/assembler.py
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -23,10 +23,11 @@
class GuardToken(object):
def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
- guard_opnum, frame_depth):
+ guard_opnum, frame_depth, faildescrindex):
assert isinstance(faildescr, AbstractFailDescr)
self.cpu = cpu
self.faildescr = faildescr
+ self.faildescrindex = faildescrindex
self.failargs = failargs
self.fail_locs = fail_locs
self.gcmap = self.compute_gcmap(gcmap, failargs,
@@ -144,6 +145,22 @@
self.codemap_builder = CodemapBuilder()
self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
+ def setup_gcrefs_list(self, allgcrefs):
+ self._allgcrefs = allgcrefs
+ self._allgcrefs_faildescr_next = 0
+
+ def teardown_gcrefs_list(self):
+ self._allgcrefs = None
+
+ def get_gcref_from_faildescr(self, descr):
+ """This assumes that it is called in order for all faildescrs."""
+ search = cast_instance_to_gcref(descr)
+ while not _safe_eq(
+ self._allgcrefs[self._allgcrefs_faildescr_next], search):
+ self._allgcrefs_faildescr_next += 1
+ assert self._allgcrefs_faildescr_next < len(self._allgcrefs)
+ return self._allgcrefs_faildescr_next
+
def set_debug(self, v):
r = self._debug
self._debug = v
@@ -186,8 +203,7 @@
break
exc = guardtok.must_save_exception()
target = self.failure_recovery_code[exc + 2 * withfloats]
- fail_descr = cast_instance_to_gcref(guardtok.faildescr)
- fail_descr = rffi.cast(lltype.Signed, fail_descr)
+ faildescrindex = guardtok.faildescrindex
base_ofs = self.cpu.get_baseofs_of_frame_field()
#
# in practice, about 2/3rd of 'positions' lists that we build are
@@ -229,7 +245,7 @@
self._previous_rd_locs = positions
# write down the positions of locs
guardtok.faildescr.rd_locs = positions
- return fail_descr, target
+ return faildescrindex, target
def enter_portal_frame(self, op):
if self.cpu.HAS_CODEMAP:
@@ -288,7 +304,7 @@
gcref = cast_instance_to_gcref(value)
if gcref:
- rgc._make_sure_does_not_move(gcref)
+ rgc._make_sure_does_not_move(gcref) # but should be prebuilt
value = rffi.cast(lltype.Signed, gcref)
je_location = self._call_assembler_check_descr(value, tmploc)
#
@@ -451,3 +467,8 @@
r_uint(rawstart + codeendpos)))
debug_stop("jit-backend-addr")
+def _safe_eq(x, y):
+ try:
+ return x == y
+ except AttributeError: # minor mess
+ return False
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -22,38 +22,6 @@
from rpython.memory.gctransform import asmgcroot
from rpython.jit.codewriter.effectinfo import EffectInfo
-class MovableObjectTracker(object):
-
- ptr_array_type = lltype.GcArray(llmemory.GCREF)
- ptr_array_gcref = lltype.nullptr(llmemory.GCREF.TO)
-
- def __init__(self, cpu, const_pointers):
- size = len(const_pointers)
- # check that there are any moving object (i.e. chaning pointers).
- # Otherwise there is no reason for an instance of this class.
- assert size > 0
- #
- # prepare GC array to hold the pointers that may change
- self.ptr_array = lltype.malloc(MovableObjectTracker.ptr_array_type,
size)
- self.ptr_array_descr =
cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
- self.ptr_array_gcref = lltype.cast_opaque_ptr(llmemory.GCREF,
self.ptr_array)
- # use always the same ConstPtr to access the array
- # (easer to read JIT trace)
- self.const_ptr_gcref_array = ConstPtr(self.ptr_array_gcref)
- #
- # assign each pointer an index and put the pointer into the GC array.
- # as pointers and addresses are not a good key to use before
translation
- # ConstPtrs are used as the key for the dict.
- self._indexes = {}
- for index in range(size):
- ptr = const_pointers[index]
- self._indexes[ptr] = index
- self.ptr_array[index] = ptr.value
-
- def get_array_index(self, const_ptr):
- index = self._indexes[const_ptr]
- assert const_ptr.value == self.ptr_array[index]
- return index
# ____________________________________________________________
class GcLLDescription(GcCache):
@@ -129,96 +97,9 @@
def gc_malloc_unicode(self, num_elem):
return self._bh_malloc_array(num_elem, self.unicode_descr)
- def _record_constptrs(self, op, gcrefs_output_list,
- ops_with_movable_const_ptr,
- changeable_const_pointers):
- l = None
- for i in range(op.numargs()):
- v = op.getarg(i)
- if isinstance(v, ConstPtr) and bool(v.value):
- p = v.value
- if rgc._make_sure_does_not_move(p):
- gcrefs_output_list.append(p)
- else:
- if l is None:
- l = [i]
- else:
- l.append(i)
- if v not in changeable_const_pointers:
- changeable_const_pointers.append(v)
- #
- if op.is_guard() or op.getopnum() == rop.FINISH:
- llref = cast_instance_to_gcref(op.getdescr())
- assert rgc._make_sure_does_not_move(llref)
- gcrefs_output_list.append(llref)
- #
- if l:
- ops_with_movable_const_ptr[op] = l
-
- def _rewrite_changeable_constptrs(self, op, ops_with_movable_const_ptr,
moving_obj_tracker):
- newops = []
- for arg_i in ops_with_movable_const_ptr[op]:
- v = op.getarg(arg_i)
- # assert to make sure we got what we expected
- assert isinstance(v, ConstPtr)
- array_index = moving_obj_tracker.get_array_index(v)
-
- size, offset, _ =
unpack_arraydescr(moving_obj_tracker.ptr_array_descr)
- array_index = array_index * size + offset
- args = [moving_obj_tracker.const_ptr_gcref_array,
- ConstInt(array_index),
- ConstInt(size)]
- load_op = ResOperation(rop.GC_LOAD_R, args)
- newops.append(load_op)
- op.setarg(arg_i, load_op)
- #
- newops.append(op)
- return newops
-
def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
rewriter = GcRewriterAssembler(self, cpu)
- newops = rewriter.rewrite(operations)
-
- # the key is an operation that contains a ConstPtr as an argument and
- # this ConstPtrs pointer might change as it points to an object that
- # can't be made non-moving (e.g. the object is pinned).
- ops_with_movable_const_ptr = {}
- #
- # a list of such not really constant ConstPtrs.
- changeable_const_pointers = []
- for op in newops:
- # record all GCREFs, because the GC (or Boehm) cannot see them and
- # keep them alive if they end up as constants in the assembler.
- # If such a GCREF can change and we can't make the object it points
- # to non-movable, we have to handle it seperatly. Such GCREF's are
- # returned as ConstPtrs in 'changeable_const_pointers' and the
- # affected operation is returned in 'op_with_movable_const_ptr'.
- # For this special case see 'rewrite_changeable_constptrs'.
- self._record_constptrs(op, gcrefs_output_list,
- ops_with_movable_const_ptr, changeable_const_pointers)
- #
- # handle pointers that are not guaranteed to stay the same
- if len(ops_with_movable_const_ptr) > 0:
- moving_obj_tracker = MovableObjectTracker(cpu,
changeable_const_pointers)
- #
- if not we_are_translated():
- # used for testing
- self.last_moving_obj_tracker = moving_obj_tracker
- # make sure the array containing the pointers is not collected by
- # the GC (or Boehm)
- gcrefs_output_list.append(moving_obj_tracker.ptr_array_gcref)
- rgc._make_sure_does_not_move(moving_obj_tracker.ptr_array_gcref)
-
- ops = newops
- newops = []
- for op in ops:
- if op in ops_with_movable_const_ptr:
- rewritten_ops = self._rewrite_changeable_constptrs(op,
- ops_with_movable_const_ptr, moving_obj_tracker)
- newops.extend(rewritten_ops)
- else:
- newops.append(op)
- #
+ newops = rewriter.rewrite(operations, gcrefs_output_list)
return newops
@specialize.memo()
@@ -244,6 +125,14 @@
"""
return jitframe.JITFRAME.allocate(frame_info)
+ def make_gcref_tracer(self, array_base_addr, gcrefs):
+ # for tests, or for Boehm. Overridden for framework GCs
+ from rpython.jit.backend.llsupport import gcreftracer
+ return gcreftracer.make_boehm_tracer(array_base_addr, gcrefs)
+
+ def clear_gcref_tracer(self, tracer):
+ pass # nothing needed unless overridden
+
class JitFrameDescrs:
def _freeze_(self):
return True
@@ -752,6 +641,13 @@
p = rffi.cast(rffi.CCHARP, p)
return (ord(p[0]) & IS_OBJECT_FLAG) != 0
+ def make_gcref_tracer(self, array_base_addr, gcrefs):
+ from rpython.jit.backend.llsupport import gcreftracer
+ return gcreftracer.make_framework_tracer(array_base_addr, gcrefs)
+
+ def clear_gcref_tracer(self, tracer):
+ tracer.array_length = 0
+
# ____________________________________________________________
def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/gcreftracer.py
b/rpython/jit/backend/llsupport/gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/gcreftracer.py
@@ -0,0 +1,49 @@
+from rpython.rlib import rgc
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.jit.backend.llsupport.symbolic import WORD
+
+
+GCREFTRACER = lltype.GcStruct(
+ 'GCREFTRACER',
+ ('array_base_addr', lltype.Signed),
+ ('array_length', lltype.Signed),
+ rtti=True)
+
+def gcrefs_trace(gc, obj_addr, callback, arg):
+ obj = llmemory.cast_adr_to_ptr(obj_addr, lltype.Ptr(GCREFTRACER))
+ i = 0
+ length = obj.array_length
+ addr = obj.array_base_addr
+ while i < length:
+ p = rffi.cast(llmemory.Address, addr + i * WORD)
+ gc._trace_callback(callback, arg, p)
+ i += 1
+lambda_gcrefs_trace = lambda: gcrefs_trace
+
+def make_framework_tracer(array_base_addr, gcrefs):
+ # careful about the order here: the allocation of the GCREFTRACER
+ # can trigger a GC. So we must write the gcrefs into the raw
+ # array only afterwards...
+ rgc.register_custom_trace_hook(GCREFTRACER, lambda_gcrefs_trace)
+ length = len(gcrefs)
+ tr = lltype.malloc(GCREFTRACER)
+ # --no GC from here--
+ tr.array_base_addr = array_base_addr
+ tr.array_length = length
+ i = 0
+ while i < length:
+ p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+ p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+ i += 1
+ llop.gc_writebarrier(lltype.Void, tr)
+ # --no GC until here--
+ return tr
+
+def make_boehm_tracer(array_base_addr, gcrefs):
+ # copy the addresses, but return 'gcrefs' as the object that must be
+ # kept alive
+ for i in range(len(gcrefs)):
+ p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+ p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+ return gcrefs
diff --git a/rpython/jit/backend/llsupport/llmodel.py
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -246,6 +246,13 @@
def free_loop_and_bridges(self, compiled_loop_token):
AbstractCPU.free_loop_and_bridges(self, compiled_loop_token)
+ # turn off all gcreftracers
+ tracers = compiled_loop_token.asmmemmgr_gcreftracers
+ if tracers is not None:
+ compiled_loop_token.asmmemmgr_gcreftracers = None
+ for tracer in tracers:
+ self.gc_ll_descr.clear_gcref_tracer(tracer)
+ # then free all blocks of code and raw data
blocks = compiled_loop_token.asmmemmgr_blocks
if blocks is not None:
compiled_loop_token.asmmemmgr_blocks = None
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -1,10 +1,12 @@
from rpython.rlib import rgc
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, r_dict
from rpython.rlib.rarithmetic import ovfcheck, highest_bit
from rpython.rtyper.lltypesystem import llmemory, lltype, rstr
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.jit.metainterp import history
from rpython.jit.metainterp.history import ConstInt, ConstPtr
from rpython.jit.metainterp.resoperation import ResOperation, rop, OpHelpers
+from rpython.jit.metainterp.typesystem import rd_eq, rd_hash
from rpython.jit.codewriter import heaptracker
from rpython.jit.backend.llsupport.symbolic import (WORD,
get_array_token)
@@ -94,21 +96,28 @@
op = self.get_box_replacement(op)
orig_op = op
replaced = False
+ opnum = op.getopnum()
+ keep = (opnum == rop.JIT_DEBUG)
for i in range(op.numargs()):
orig_arg = op.getarg(i)
arg = self.get_box_replacement(orig_arg)
+ if isinstance(arg, ConstPtr) and bool(arg.value) and not keep:
+ arg = self.remove_constptr(arg)
if orig_arg is not arg:
if not replaced:
- op = op.copy_and_change(op.getopnum())
+ op = op.copy_and_change(opnum)
orig_op.set_forwarded(op)
replaced = True
op.setarg(i, arg)
- if rop.is_guard(op.opnum):
+ if rop.is_guard(opnum):
if not replaced:
- op = op.copy_and_change(op.getopnum())
+ op = op.copy_and_change(opnum)
orig_op.set_forwarded(op)
op.setfailargs([self.get_box_replacement(a, True)
for a in op.getfailargs()])
+ if rop.is_guard(opnum) or opnum == rop.FINISH:
+ llref = cast_instance_to_gcref(op.getdescr())
+ self.gcrefs_output_list.append(llref)
self._newops.append(op)
def replace_op_with(self, op, newop):
@@ -304,13 +313,16 @@
return False
- def rewrite(self, operations):
+ def rewrite(self, operations, gcrefs_output_list):
# we can only remember one malloc since the next malloc can possibly
# collect; but we can try to collapse several known-size mallocs into
# one, both for performance and to reduce the number of write
# barriers. We do this on each "basic block" of operations, which in
# this case means between CALLs or unknown-size mallocs.
#
+ self.gcrefs_output_list = gcrefs_output_list
+ self.gcrefs_map = None
+ self.gcrefs_recently_loaded = None
operations = self.remove_bridge_exception(operations)
self._changed_op = None
for i in range(len(operations)):
@@ -333,8 +345,7 @@
elif rop.can_malloc(op.opnum):
self.emitting_an_operation_that_can_collect()
elif op.getopnum() == rop.LABEL:
- self.emitting_an_operation_that_can_collect()
- self._known_lengths.clear()
+ self.emit_label()
# ---------- write barriers ----------
if self.gc_ll_descr.write_barrier_descr is not None:
if op.getopnum() == rop.SETFIELD_GC:
@@ -940,3 +951,37 @@
operations[start+2].getopnum() == rop.RESTORE_EXCEPTION):
return operations[:start] + operations[start+3:]
return operations
+
+ def emit_label(self):
+ self.emitting_an_operation_that_can_collect()
+ self._known_lengths.clear()
+ self.gcrefs_recently_loaded = None
+
+ def _gcref_index(self, gcref):
+ if self.gcrefs_map is None:
+ self.gcrefs_map = r_dict(rd_eq, rd_hash)
+ try:
+ return self.gcrefs_map[gcref]
+ except KeyError:
+ pass
+ index = len(self.gcrefs_output_list)
+ self.gcrefs_map[gcref] = index
+ self.gcrefs_output_list.append(gcref)
+ return index
+
+ def remove_constptr(self, c):
+ """Remove all ConstPtrs, and replace them with load_from_gc_table.
+ """
+ # Note: currently, gcrefs_recently_loaded is only cleared in
+ # LABELs. We'd like something better, like "don't spill it",
+ # but that's the wrong level...
+ index = self._gcref_index(c.value)
+ if self.gcrefs_recently_loaded is None:
+ self.gcrefs_recently_loaded = {}
+ try:
+ load_op = self.gcrefs_recently_loaded[index]
+ except KeyError:
+ load_op = ResOperation(rop.LOAD_FROM_GC_TABLE, [ConstInt(index)])
+ self._newops.append(load_op)
+ self.gcrefs_recently_loaded[index] = load_op
+ return load_op
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py
b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -196,31 +196,6 @@
assert is_valid_int(wbdescr.jit_wb_if_flag_byteofs)
assert is_valid_int(wbdescr.jit_wb_if_flag_singlebyte)
- def test_record_constptrs(self):
- class MyFakeCPU(object):
- def cast_adr_to_int(self, adr):
- assert adr == "some fake address"
- return 43
- class MyFakeGCRefList(object):
- def get_address_of_gcref(self, s_gcref1):
- assert s_gcref1 == s_gcref
- return "some fake address"
- S = lltype.GcStruct('S')
- s = lltype.malloc(S)
- s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
- v_random_box = InputArgRef()
- operations = [
- ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)]),
- ]
- gc_ll_descr = self.gc_ll_descr
- gc_ll_descr.gcrefs = MyFakeGCRefList()
- gcrefs = []
- operations = get_deep_immutable_oplist(operations)
- operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
- gcrefs)
- assert operations2 == operations
- assert gcrefs == [s_gcref]
-
class TestFrameworkMiniMark(TestFramework):
gc = 'minimark'
diff --git a/rpython/jit/backend/llsupport/test/test_gcreftracer.py
b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
@@ -0,0 +1,53 @@
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.jit.backend.llsupport.gcreftracer import GCREFTRACER, gcrefs_trace
+from rpython.jit.backend.llsupport.gcreftracer import make_framework_tracer
+from rpython.jit.backend.llsupport.gcreftracer import make_boehm_tracer
+
+
+class FakeGC:
+ def __init__(self):
+ self.called = []
+ def _trace_callback(self, callback, arg, addr):
+ assert callback == "callback"
+ assert arg == "arg"
+ assert lltype.typeOf(addr) == llmemory.Address
+ self.called.append(addr)
+
+
+def test_gcreftracer():
+ a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+ a[0] = 123
+ a[1] = 456
+ a[2] = 789
+ tr = lltype.malloc(GCREFTRACER)
+ tr.array_base_addr = base = rffi.cast(lltype.Signed, a)
+ tr.array_length = 3
+ gc = FakeGC()
+ gcrefs_trace(gc, llmemory.cast_ptr_to_adr(tr), "callback", "arg")
+ assert len(gc.called) == 3
+ WORD = rffi.sizeof(lltype.Signed)
+ for i in range(3):
+ assert gc.called[i] == rffi.cast(llmemory.Address, base + i * WORD)
+ lltype.free(a, flavor='raw')
+
+def test_make_framework_tracer():
+ a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+ base = rffi.cast(lltype.Signed, a)
+ tr = make_framework_tracer(base, [123, 456, 789])
+ assert a[0] == 123
+ assert a[1] == 456
+ assert a[2] == 789
+ assert tr.array_base_addr == base
+ assert tr.array_length == 3
+ lltype.free(a, flavor='raw')
+
+def test_make_boehm_tracer():
+ a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+ base = rffi.cast(lltype.Signed, a)
+ lst = [123, 456, 789]
+ tr = make_boehm_tracer(base, lst)
+ assert a[0] == 123
+ assert a[1] == 456
+ assert a[2] == 789
+ assert tr is lst
+ lltype.free(a, flavor='raw')
diff --git a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
deleted file mode 100644
--- a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from test_rewrite import get_size_descr, get_array_descr, get_description,
BaseFakeCPU
-from rpython.jit.backend.llsupport.descr import get_size_descr,\
- get_field_descr, get_array_descr, ArrayDescr, FieldDescr,\
- SizeDescr, get_interiorfield_descr
-from rpython.jit.backend.llsupport.gc import GcLLDescr_boehm,\
- GcLLDescr_framework, MovableObjectTracker
-from rpython.jit.backend.llsupport import jitframe, gc
-from rpython.jit.metainterp.gc import get_description
-from rpython.jit.tool.oparser import parse
-from rpython.jit.metainterp.optimizeopt.util import equaloplists
-from rpython.jit.metainterp.history import JitCellToken, FLOAT
-from rpython.rtyper.lltypesystem import lltype, rffi, lltype, llmemory
-from rpython.rtyper import rclass
-from rpython.jit.backend.x86.arch import WORD
-from rpython.rlib import rgc
-
-class Evaluator(object):
- def __init__(self, scope):
- self.scope = scope
- def __getitem__(self, key):
- return eval(key, self.scope)
-
-
-class FakeLoopToken(object):
- pass
-
-# The following class is based on
rpython.jit.backend.llsupport.test.test_rewrite.RewriteTests.
-# It's modified to be able to test the object pinning specific features.
-class RewriteTests(object):
- def check_rewrite(self, frm_operations, to_operations, **namespace):
- # objects to use inside the test
- A = lltype.GcArray(lltype.Signed)
- adescr = get_array_descr(self.gc_ll_descr, A)
- adescr.tid = 4321
- alendescr = adescr.lendescr
- #
- pinned_obj_type = lltype.GcStruct('PINNED_STRUCT', ('my_int',
lltype.Signed))
- pinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr,
pinned_obj_type, 'my_int')
- pinned_obj_ptr = lltype.malloc(pinned_obj_type)
- pinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF,
pinned_obj_ptr)
- assert rgc.pin(pinned_obj_gcref)
- #
- notpinned_obj_type = lltype.GcStruct('NOT_PINNED_STRUCT', ('my_int',
lltype.Signed))
- notpinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr,
notpinned_obj_type, 'my_int')
- notpinned_obj_ptr = lltype.malloc(notpinned_obj_type)
- notpinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF,
notpinned_obj_ptr)
- #
- ptr_array_descr =
self.cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
- #
- vtable_descr = self.gc_ll_descr.fielddescr_vtable
- O = lltype.GcStruct('O', ('parent', rclass.OBJECT),
- ('x', lltype.Signed))
- o_vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
- #
- tiddescr = self.gc_ll_descr.fielddescr_tid
- wbdescr = self.gc_ll_descr.write_barrier_descr
- WORD = globals()['WORD']
- #
- strdescr = self.gc_ll_descr.str_descr
- unicodedescr = self.gc_ll_descr.unicode_descr
- strlendescr = strdescr.lendescr
- unicodelendescr = unicodedescr.lendescr
-
- casmdescr = JitCellToken()
- clt = FakeLoopToken()
- clt._ll_initial_locs = [0, 8]
- frame_info = lltype.malloc(jitframe.JITFRAMEINFO, flavor='raw')
- clt.frame_info = frame_info
- frame_info.jfi_frame_depth = 13
- frame_info.jfi_frame_size = 255
- framedescrs = self.gc_ll_descr.getframedescrs(self.cpu)
- framelendescr = framedescrs.arraydescr.lendescr
- jfi_frame_depth = framedescrs.jfi_frame_depth
- jfi_frame_size = framedescrs.jfi_frame_size
- jf_frame_info = framedescrs.jf_frame_info
- signedframedescr = self.cpu.signedframedescr
- floatframedescr = self.cpu.floatframedescr
- casmdescr.compiled_loop_token = clt
- tzdescr = None # noone cares
- #
- namespace.update(locals())
- #
- for funcname in self.gc_ll_descr._generated_functions:
- namespace[funcname] = self.gc_ll_descr.get_malloc_fn(funcname)
- namespace[funcname + '_descr'] = getattr(self.gc_ll_descr,
- '%s_descr' % funcname)
- #
- ops = parse(frm_operations, namespace=namespace)
- operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
- ops.operations,
- [])
- # make the array containing the GCREF's accessible inside the tests.
- # This must be done after we call 'rewrite_assembler'. Before that
- # call 'last_moving_obj_tracker' is None or filled with some old
- # value.
- namespace['ptr_array_gcref'] =
self.gc_ll_descr.last_moving_obj_tracker.ptr_array_gcref
- expected = parse(to_operations % Evaluator(namespace),
- namespace=namespace)
- equaloplists(operations, expected.operations)
- lltype.free(frame_info, flavor='raw')
-
-class TestFramework(RewriteTests):
- def setup_method(self, meth):
- class config_(object):
- class translation(object):
- gc = 'minimark'
- gcrootfinder = 'asmgcc'
- gctransformer = 'framework'
- gcremovetypeptr = False
- gcdescr = get_description(config_)
- self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
- really_not_translated=True)
- self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
- lambda cpu: True)
- #
- class FakeCPU(BaseFakeCPU):
- def sizeof(self, STRUCT, is_object):
- descr = SizeDescr(104)
- descr.tid = 9315
- descr.vtable = 12
- return descr
- self.cpu = FakeCPU()
-
- def test_simple_getfield(self):
- self.check_rewrite("""
- []
- i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref),
descr=pinned_obj_my_int_descr)
- """, """
- []
- p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 *
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
- i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
- """)
- assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 1
-
- def test_simple_getfield_twice(self):
- self.check_rewrite("""
- []
- i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref),
descr=pinned_obj_my_int_descr)
- i1 = getfield_gc_i(ConstPtr(notpinned_obj_gcref),
descr=notpinned_obj_my_int_descr)
- i2 = getfield_gc_i(ConstPtr(pinned_obj_gcref),
descr=pinned_obj_my_int_descr)
- """, """
- []
- p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 *
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
- i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
- i1 = gc_load_i(ConstPtr(notpinned_obj_gcref), 0,
-%(notpinned_obj_my_int_descr.field_size)s)
- p2 = gc_load_r(ConstPtr(ptr_array_gcref), %(1 *
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
- i2 = gc_load_i(p2, 0, -%(pinned_obj_my_int_descr.field_size)s)
- """)
- assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 2
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -10,7 +10,7 @@
from rpython.jit.metainterp.optimizeopt.util import equaloplists
from rpython.jit.metainterp.history import JitCellToken, FLOAT
from rpython.jit.metainterp.history import AbstractFailDescr
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper import rclass
from rpython.jit.backend.x86.arch import WORD
from rpython.jit.backend.llsupport.symbolic import (WORD,
@@ -77,6 +77,9 @@
tdescr = get_size_descr(self.gc_ll_descr, T)
tdescr.tid = 5678
tzdescr = get_field_descr(self.gc_ll_descr, T, 'z')
+ myT = lltype.cast_opaque_ptr(llmemory.GCREF,
+ lltype.malloc(T, zero=True))
+ self.myT = myT
#
A = lltype.GcArray(lltype.Signed)
adescr = get_array_descr(self.gc_ll_descr, A)
@@ -112,6 +115,12 @@
xdescr = get_field_descr(self.gc_ll_descr, R1, 'x')
ydescr = get_field_descr(self.gc_ll_descr, R1, 'y')
zdescr = get_field_descr(self.gc_ll_descr, R1, 'z')
+ myR1 = lltype.cast_opaque_ptr(llmemory.GCREF,
+ lltype.malloc(R1, zero=True))
+ myR1b = lltype.cast_opaque_ptr(llmemory.GCREF,
+ lltype.malloc(R1, zero=True))
+ self.myR1 = myR1
+ self.myR1b = myR1b
#
E = lltype.GcStruct('Empty')
edescr = get_size_descr(self.gc_ll_descr, E)
@@ -174,9 +183,10 @@
ops = parse(frm_operations, namespace=namespace)
expected = parse(to_operations % Evaluator(namespace),
namespace=namespace)
+ self.gcrefs = []
operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
ops.operations,
- [])
+ self.gcrefs)
remap = {}
for a, b in zip(ops.inputargs, expected.inputargs):
remap[b] = a
@@ -1281,3 +1291,124 @@
{t}
jump()
""".format(**locals()))
+
+ def test_load_from_gc_table_1i(self):
+ self.check_rewrite("""
+ [i1]
+ setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+ jump()
+ """, """
+ [i1]
+ p0 = load_from_gc_table(0)
+ gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+ jump()
+ """)
+ assert self.gcrefs == [self.myR1]
+
+ def test_load_from_gc_table_1p(self):
+ self.check_rewrite("""
+ [p1]
+ setfield_gc(ConstPtr(myT), p1, descr=tzdescr)
+ jump()
+ """, """
+ [i1]
+ p0 = load_from_gc_table(0)
+ cond_call_gc_wb(p0, descr=wbdescr)
+ gc_store(p0, %(tzdescr.offset)s, i1, %(tzdescr.field_size)s)
+ jump()
+ """)
+ assert self.gcrefs == [self.myT]
+
+ def test_load_from_gc_table_2(self):
+ self.check_rewrite("""
+ [i1, f2]
+ setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+ setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+ jump()
+ """, """
+ [i1, f2]
+ p0 = load_from_gc_table(0)
+ gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+ gc_store(p0, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+ jump()
+ """)
+ assert self.gcrefs == [self.myR1]
+
+ def test_load_from_gc_table_3(self):
+ self.check_rewrite("""
+ [i1, f2]
+ setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+ label(f2)
+ setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+ jump()
+ """, """
+ [i1, f2]
+ p0 = load_from_gc_table(0)
+ gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+ label(f2)
+ p1 = load_from_gc_table(0)
+ gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+ jump()
+ """)
+ assert self.gcrefs == [self.myR1]
+
+ def test_load_from_gc_table_4(self):
+ self.check_rewrite("""
+ [i1, f2]
+ setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+ setfield_gc(ConstPtr(myR1b), f2, descr=ydescr)
+ jump()
+ """, """
+ [i1, f2]
+ p0 = load_from_gc_table(0)
+ gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+ p1 = load_from_gc_table(1)
+ gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+ jump()
+ """)
+ assert self.gcrefs == [self.myR1, self.myR1b]
+
+ def test_pinned_simple_getfield(self):
+ # originally in test_pinned_object_rewrite; now should give the
+ # same result for pinned objects and for normal objects
+ self.check_rewrite("""
+ []
+ i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+ """, """
+ []
+ p1 = load_from_gc_table(0)
+ i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+ """)
+ assert self.gcrefs == [self.myR1]
+
+ def test_pinned_simple_getfield_twice(self):
+ # originally in test_pinned_object_rewrite; now should give the
+ # same result for pinned objects and for normal objects
+ self.check_rewrite("""
+ []
+ i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+ i1 = getfield_gc_i(ConstPtr(myR1b), descr=xdescr)
+ i2 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+ """, """
+ []
+ p1 = load_from_gc_table(0)
+ i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+ p2 = load_from_gc_table(1)
+ i1 = gc_load_i(p2, %(xdescr.offset)s, -%(xdescr.field_size)s)
+ i2 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+ """)
+ assert self.gcrefs == [self.myR1, self.myR1b]
+
+ def test_guard_in_gcref(self):
+ self.check_rewrite("""
+ [i1, i2]
+ guard_true(i1) []
+ guard_true(i2) []
+ jump()
+ """, """
+ [i1, i2]
+ guard_true(i1) []
+ guard_true(i2) []
+ jump()
+ """)
+ assert len(self.gcrefs) == 2
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -285,7 +285,7 @@
class CompiledLoopToken(object):
asmmemmgr_blocks = None
- asmmemmgr_gcroots = 0
+ asmmemmgr_gcreftracers = None
def __init__(self, cpu, number):
cpu.tracker.total_compiled_loops += 1
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -4,7 +4,7 @@
from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
- DEBUG_COUNTER, debug_bridge)
+ DEBUG_COUNTER)
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.metainterp.history import (Const, VOID, ConstInt)
@@ -489,7 +489,6 @@
frame_info = self.datablockwrapper.malloc_aligned(
jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
- clt.allgcrefs = []
clt.frame_info.clear() # for now
if log:
@@ -498,10 +497,13 @@
regalloc = RegAlloc(self, self.cpu.translate_support_code)
#
+ allgcrefs = []
+ operations = regalloc.prepare_loop(inputargs, operations,
+ looptoken, allgcrefs)
+ self.reserve_gcref_table(allgcrefs)
+ functionpos = self.mc.get_relative_pos()
self._call_header_with_stack_check()
self._check_frame_depth_debug(self.mc)
- operations = regalloc.prepare_loop(inputargs, operations,
- looptoken, clt.allgcrefs)
looppos = self.mc.get_relative_pos()
frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
operations)
@@ -512,6 +514,7 @@
full_size = self.mc.get_relative_pos()
#
rawstart = self.materialize_loop(looptoken)
+ self.patch_gcref_table(looptoken, rawstart)
self.patch_stack_checks(frame_depth_no_fixed_size +
JITFRAME_FIXED_SIZE,
rawstart)
looptoken._ll_loop_code = looppos + rawstart
@@ -520,7 +523,13 @@
looptoken.number, loopname,
r_uint(rawstart + looppos),
r_uint(rawstart + size_excluding_failure_stuff),
- r_uint(rawstart)))
+ r_uint(rawstart + functionpos)))
+ debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
+ debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart + looppos))
+ debug_print(" failures: 0x%x" % r_uint(rawstart +
+ size_excluding_failure_stuff))
+ debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
debug_stop("jit-backend-addr")
self.patch_pending_failure_recoveries(rawstart)
#
@@ -530,7 +539,7 @@
looptoken._x86_rawstart = rawstart
looptoken._x86_fullsize = full_size
looptoken._x86_ops_offset = ops_offset
- looptoken._ll_function_addr = rawstart
+ looptoken._ll_function_addr = rawstart + functionpos
if logger:
logger.log_loop(inputargs, operations, 0, "rewritten",
name=loopname, ops_offset=ops_offset)
@@ -563,11 +572,13 @@
'b', descr_number)
arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
regalloc = RegAlloc(self, self.cpu.translate_support_code)
- startpos = self.mc.get_relative_pos()
+ allgcrefs = []
operations = regalloc.prepare_bridge(inputargs, arglocs,
operations,
- self.current_clt.allgcrefs,
+ allgcrefs,
self.current_clt.frame_info)
+ self.reserve_gcref_table(allgcrefs)
+ startpos = self.mc.get_relative_pos()
self._check_frame_depth(self.mc, regalloc.get_gcmap())
bridgestartpos = self.mc.get_relative_pos()
self._update_at_exit(arglocs, inputargs, faildescr, regalloc)
@@ -577,12 +588,22 @@
fullsize = self.mc.get_relative_pos()
#
rawstart = self.materialize_loop(original_loop_token)
+ self.patch_gcref_table(original_loop_token, rawstart)
self.patch_stack_checks(frame_depth_no_fixed_size +
JITFRAME_FIXED_SIZE,
rawstart)
- debug_bridge(descr_number, rawstart, codeendpos)
+ debug_start("jit-backend-addr")
+ debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+ (r_uint(descr_number), r_uint(rawstart + startpos),
+ r_uint(rawstart + codeendpos)))
+ debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
+ debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart +
bridgestartpos))
+ debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
+ debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
+ debug_stop("jit-backend-addr")
self.patch_pending_failure_recoveries(rawstart)
# patch the jump from original guard
- self.patch_jump_for_descr(faildescr, rawstart)
+ self.patch_jump_for_descr(faildescr, rawstart + startpos)
ops_offset = self.mc.ops_offset
frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
@@ -667,6 +688,39 @@
mc.JMP_r(X86_64_SCRATCH_REG.value)
mc.copy_to_raw_memory(adr_jump_offset)
+ def reserve_gcref_table(self, allgcrefs):
+ gcref_table_size = len(allgcrefs) * WORD
+ if IS_X86_64:
+ # align to a multiple of 16 and reserve space at the beginning
+ # of the machine code for the gc table. This lets us write
+ # machine code with relative addressing (%rip - constant).
+ gcref_table_size = (gcref_table_size + 15) & ~15
+ mc = self.mc
+ assert mc.get_relative_pos() == 0
+ for i in range(gcref_table_size):
+ mc.writechar('\x00')
+ elif IS_X86_32:
+ # allocate the gc table right now. This lets us write
+ # machine code with absolute 32-bit addressing.
+ self.gc_table_addr = self.datablockwrapper.malloc_aligned(
+ gcref_table_size, alignment=WORD)
+ #
+ self.setup_gcrefs_list(allgcrefs)
+
+ def patch_gcref_table(self, looptoken, rawstart):
+ if IS_X86_64:
+ # the gc table is at the start of the machine code
+ self.gc_table_addr = rawstart
+ elif IS_X86_32:
+ # the gc table was already allocated by reserve_gcref_table()
+ rawstart = self.gc_table_addr
+ #
+ tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+ self._allgcrefs)
+ gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+ gcreftracers.append(tracer) # keepalive
+ self.teardown_gcrefs_list()
+
def write_pending_failure_recoveries(self, regalloc):
# for each pending guard, generate the code of the recovery stub
# at the end of self.mc.
@@ -790,6 +844,12 @@
clt.asmmemmgr_blocks = []
return clt.asmmemmgr_blocks
+ def get_asmmemmgr_gcreftracers(self, looptoken):
+ clt = looptoken.compiled_loop_token
+ if clt.asmmemmgr_gcreftracers is None:
+ clt.asmmemmgr_gcreftracers = []
+ return clt.asmmemmgr_gcreftracers
+
def materialize_loop(self, looptoken):
self.datablockwrapper.done() # finish using cpu.asmmemmgr
self.datablockwrapper = None
@@ -1368,6 +1428,29 @@
genop_cast_ptr_to_int = _genop_same_as
genop_cast_int_to_ptr = _genop_same_as
+ def _patch_load_from_gc_table(self, index):
+ # must be called immediately after a "p"-mode instruction
+ # has been emitted. 64-bit mode only.
+ assert IS_X86_64
+ address_in_buffer = index * WORD # at the start of the buffer
+ p_location = self.mc.get_relative_pos()
+ offset = address_in_buffer - p_location
+ self.mc.overwrite32(p_location-4, offset)
+
+ def _addr_from_gc_table(self, index):
+ # get the address of the gc table entry 'index'. 32-bit mode only.
+ assert IS_X86_32
+ return self.gc_table_addr + index * WORD
+
+ def genop_load_from_gc_table(self, op, arglocs, resloc):
+ index = op.getarg(0).getint()
+ assert isinstance(resloc, RegLoc)
+ if IS_X86_64:
+ self.mc.MOV_rp(resloc.value, 0) # %rip-relative
+ self._patch_load_from_gc_table(index)
+ elif IS_X86_32:
+ self.mc.MOV_rj(resloc.value, self._addr_from_gc_table(index))
+
def genop_int_force_ge_zero(self, op, arglocs, resloc):
self.mc.TEST(arglocs[0], arglocs[0])
self.mov(imm0, resloc)
@@ -1843,8 +1926,9 @@
def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
fail_locs, frame_depth):
gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+ faildescrindex = self.get_gcref_from_faildescr(faildescr)
return GuardToken(self.cpu, gcmap, faildescr, failargs, fail_locs,
- guard_opnum, frame_depth)
+ guard_opnum, frame_depth, faildescrindex)
def generate_propagate_error_64(self):
assert WORD == 8
@@ -1862,8 +1946,12 @@
self._update_at_exit(guardtok.fail_locs, guardtok.failargs,
guardtok.faildescr, regalloc)
#
- fail_descr, target = self.store_info_on_descr(startpos, guardtok)
- self.mc.PUSH(imm(fail_descr))
+ faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+ if IS_X86_64:
+ self.mc.PUSH_p(0) # %rip-relative
+ self._patch_load_from_gc_table(faildescrindex)
+ elif IS_X86_32:
+ self.mc.PUSH_j(self._addr_from_gc_table(faildescrindex))
self.push_gcmap(self.mc, guardtok.gcmap, push=True)
self.mc.JMP(imm(target))
return startpos
@@ -1967,17 +2055,24 @@
def genop_finish(self, op, arglocs, result_loc):
base_ofs = self.cpu.get_baseofs_of_frame_field()
- if len(arglocs) == 2:
- [return_val, fail_descr_loc] = arglocs
+ if len(arglocs) > 0:
+ [return_val] = arglocs
if op.getarg(0).type == FLOAT and not IS_X86_64:
size = WORD * 2
else:
size = WORD
self.save_into_mem(raw_stack(base_ofs), return_val, imm(size))
- else:
- [fail_descr_loc] = arglocs
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- self.mov(fail_descr_loc, RawEbpLoc(ofs))
+
+ descr = op.getdescr()
+ faildescrindex = self.get_gcref_from_faildescr(descr)
+ if IS_X86_64:
+ self.mc.MOV_rp(eax.value, 0)
+ self._patch_load_from_gc_table(faildescrindex)
+ elif IS_X86_32:
+ self.mc.MOV_rj(eax.value, self._addr_from_gc_table(faildescrindex))
+ self.mov(eax, RawEbpLoc(ofs))
+
arglist = op.getarglist()
if arglist and arglist[0].type == REF:
if self._finish_gcmap:
@@ -2047,8 +2142,16 @@
guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
faildescr = guard_op.getdescr()
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
- self.mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed,
- cast_instance_to_gcref(faildescr))))
+
+ faildescrindex = self.get_gcref_from_faildescr(faildescr)
+ if IS_X86_64:
+ self.mc.MOV_rp(X86_64_SCRATCH_REG.value, 0)
+ self._patch_load_from_gc_table(faildescrindex)
+ self.mc.MOV(raw_stack(ofs), X86_64_SCRATCH_REG)
+ elif IS_X86_32:
+ # XXX need a scratch reg here for efficiency; be more clever
+ self.mc.PUSH_j(self._addr_from_gc_table(faildescrindex))
+ self.mc.POP(raw_stack(ofs))
def _find_nearby_operation(self, delta):
regalloc = self._regalloc
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -423,16 +423,11 @@
def consider_finish(self, op):
# the frame is in ebp, but we have to point where in the frame is
# the potential argument to FINISH
- descr = op.getdescr()
- fail_descr = cast_instance_to_gcref(descr)
- # we know it does not move, but well
- rgc._make_sure_does_not_move(fail_descr)
- fail_descr = rffi.cast(lltype.Signed, fail_descr)
if op.numargs() == 1:
loc = self.make_sure_var_in_reg(op.getarg(0))
- locs = [loc, imm(fail_descr)]
+ locs = [loc]
else:
- locs = [imm(fail_descr)]
+ locs = []
self.perform(op, locs, None)
def consider_guard_no_exception(self, op):
@@ -1141,6 +1136,10 @@
consider_same_as_r = _consider_same_as
consider_same_as_f = _consider_same_as
+ def consider_load_from_gc_table(self, op):
+ resloc = self.rm.force_allocate_reg(op)
+ self.perform(op, [], resloc)
+
def consider_int_force_ge_zero(self, op):
argloc = self.make_sure_var_in_reg(op.getarg(0))
resloc = self.force_allocate_reg(op, [op.getarg(0)])
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -297,6 +297,20 @@
return encode_abs, argnum, None, None
# ____________________________________________________________
+# ***X86_64 only***
+# Emit a mod/rm referencing an address "RIP + immediate_offset".
+
[email protected](2)
+def encode_rip_offset(mc, immediate, _, orbyte):
+ assert mc.WORD == 8
+ mc.writechar(chr(0x05 | orbyte))
+ mc.writeimm32(immediate)
+ return 0
+
+def rip_offset(argnum):
+ return encode_rip_offset, argnum, None, None
+
+# ____________________________________________________________
# For 64-bits mode: the REX.W, REX.R, REX.X, REG.B prefixes
REX_W = 8
@@ -586,6 +600,8 @@
PUS1_r = insn(rex_nw, register(1), '\x50')
PUS1_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
PUS1_m = insn(rex_nw, '\xFF', orbyte(6<<3), mem_reg_plus_const(1))
+ PUS1_j = insn(rex_nw, '\xFF', orbyte(6<<3), abs_(1))
+ PUS1_p = insn(rex_nw, '\xFF', orbyte(6<<3), rip_offset(1))
PUS1_i8 = insn('\x6A', immediate(1, 'b'))
PUS1_i32 = insn('\x68', immediate(1, 'i'))
@@ -608,6 +624,14 @@
self.PUS1_i32(immed)
self.stack_frame_size_delta(+self.WORD)
+ def PUSH_j(self, abs_addr):
+ self.PUS1_j(abs_addr)
+ self.stack_frame_size_delta(+self.WORD)
+
+ def PUSH_p(self, rip_offset):
+ self.PUS1_p(rip_offset)
+ self.stack_frame_size_delta(+self.WORD)
+
PO1_r = insn(rex_nw, register(1), '\x58')
PO1_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
@@ -914,6 +938,7 @@
add_insn('m', mem_reg_plus_const(modrm_argnum))
add_insn('a', mem_reg_plus_scaled_reg_plus_const(modrm_argnum))
add_insn('j', abs_(modrm_argnum))
+ add_insn('p', rip_offset(modrm_argnum))
# Define a regular MOV, and a variant MOV32 that only uses the low 4 bytes of a
# register
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -279,6 +279,8 @@
if modes:
tests = self.get_all_tests()
m = modes[0]
+ if m == 'p' and self.WORD == 4:
+ return []
lst = tests[m]()
random.shuffle(lst)
if methname == 'PSRAD_xi' and m == 'i':
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -51,3 +51,19 @@
def test_extra_MOV_ri64(self):
self.imm32_tests = self.imm64_tests # patch on 'self'
self.complete_test('MOV_ri')
+
+ def rip_relative_tests(self):
+ return [-0x80000000, 0x7FFFFFFF, 128, 256, -129, -255, 0, 127]
+
+ def get_all_tests(self):
+ d = super(TestRx86_64, self).get_all_tests()
+ d['p'] = self.rip_relative_tests
+ return d
+
+ def assembler_operand_rip_relative(self, value):
+ return '%d(%%rip)' % value
+
+ def get_all_assembler_operands(self):
+ d = super(TestRx86_64, self).get_all_assembler_operands()
+ d['p'] = self.assembler_operand_rip_relative
+ return d
diff --git a/rpython/jit/metainterp/executor.py
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -408,6 +408,7 @@
rop.GC_LOAD_INDEXED_R,
rop.GC_STORE,
rop.GC_STORE_INDEXED,
+ rop.LOAD_FROM_GC_TABLE,
): # list of opcodes never executed by pyjitpl
continue
if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1056,6 +1056,8 @@
'UNICODELEN/1/i',
'UNICODEGETITEM/2/i',
#
+ 'LOAD_FROM_GC_TABLE/1/r', # only emitted by rewrite.py
+ #
'_ALWAYS_PURE_LAST', # ----- end of always_pure operations -----
# parameters GC_LOAD
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit