Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r60299:8f0f2d2b73bf
Date: 2013-01-21 19:04 +0200
http://bitbucket.org/pypy/pypy/changeset/8f0f2d2b73bf/
Log: make gcmap work per-call and per-malloc-slowpath
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -120,12 +120,10 @@
descrs = JitFrameDescrs()
descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
- 'jf_frame_info', 'jf_gcpattern', 'jf_gcmap']:
+ 'jf_frame_info', 'jf_gcmap']:
setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
descrs.jfi_frame_depth = cpu.fielddescrof(jitframe.JITFRAMEINFO,
'jfi_frame_depth')
- descrs.jfi_gcmap = cpu.fielddescrof(jitframe.JITFRAMEINFO,
- 'jfi_gcmap')
return descrs
def getarraydescr_for_frame(self, type, index):
diff --git a/rpython/jit/backend/llsupport/jitframe.py
b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -1,25 +1,23 @@
-from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper.annlowlevel import llhelper
from rpython.rlib.objectmodel import specialize
from rpython.rlib.debug import ll_assert
STATICSIZE = 0 # patch from the assembler backend
+SIZEOFSIGNED = rffi.sizeof(lltype.Signed)
+IS_32BIT = (SIZEOFSIGNED == 2 ** 31 - 1)
# this is an info that only depends on the assembler executed, copied from
# compiled loop token (in fact we could use this as a compiled loop token
# XXX do this
-GCMAP = lltype.GcArray(lltype.Signed)
+GCMAP = lltype.GcArray(lltype.Unsigned)
NULLGCMAP = lltype.nullptr(GCMAP)
-# XXX make it SHORT not Signed
JITFRAMEINFO = lltype.GcStruct(
'JITFRAMEINFO',
# the depth of frame
('jfi_frame_depth', lltype.Signed),
- # gcindexlist is a list of indexes of GC ptrs
- # in the actual array jf_frame of JITFRAME
- ('jfi_gcmap', lltype.Ptr(GCMAP)),
)
NULLFRAMEINFO = lltype.nullptr(JITFRAMEINFO)
@@ -29,14 +27,13 @@
def jitframe_allocate(frame_info):
frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
- frame.jf_gcmap = frame_info.jfi_gcmap
frame.jf_frame_info = frame_info
return frame
def jitframe_copy(frame):
frame_info = frame.jf_frame_info
new_frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
- new_frame.jf_gcmap = frame_info.jfi_gcmap
+ ll_assert(frame.jf_gcmap == NULLGCMAP, "non empty gc map when copying")
new_frame.jf_frame_info = frame_info
return new_frame
@@ -50,10 +47,7 @@
('jf_descr', llmemory.GCREF),
# guard_not_forced descr
('jf_force_descr', llmemory.GCREF),
- # a bitmask of where are GCREFS in the top of the frame (saved registers)
- # used for calls and failures
- ('jf_gcpattern', lltype.Signed),
- # a copy of gcmap from frameinfo
+ # a map of GC pointers
('jf_gcmap', lltype.Ptr(GCMAP)),
# For the front-end: a GCREF for the savedata
('jf_savedata', llmemory.GCREF),
@@ -83,14 +77,14 @@
GCMAPBASEOFS = llmemory.itemoffsetof(GCMAP, 0)
BASEITEMOFS = llmemory.itemoffsetof(JITFRAME.jf_frame, 0)
SIGN_SIZE = llmemory.sizeof(lltype.Signed)
+UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned)
def jitframe_trace(obj_addr, prev):
if prev == llmemory.NULL:
(obj_addr + getofs('jf_gc_trace_state')).signed[0] = 0
return obj_addr + getofs('jf_frame_info')
fld = (obj_addr + getofs('jf_gc_trace_state')).signed[0]
- state = fld & 0xff
- no = fld >> 8
+ state = fld & 0x7 # 3bits of possible states
if state == 0:
(obj_addr + getofs('jf_gc_trace_state')).signed[0] = 1
return obj_addr + getofs('jf_descr')
@@ -106,26 +100,38 @@
elif state == 4:
(obj_addr + getofs('jf_gc_trace_state')).signed[0] = 5
return obj_addr + getofs('jf_guard_exc')
- elif state == 5:
- # bit pattern
- gcpat = (obj_addr + getofs('jf_gcpattern')).signed[0]
- while no < STATICSIZE and gcpat & (1 << no) == 0:
- no += 1
- if no != STATICSIZE:
- newstate = 5 | ((no + 1) << 8)
- (obj_addr + getofs('jf_gc_trace_state')).signed[0] = newstate
- return obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE * no
- state = 6
- no = 0
- ll_assert(state == 6, "invalid tracer state")
+ ll_assert(state == 5, "invalid state")
+ # bit pattern
+ # decode the pattern
+ if IS_32BIT:
+ # 32 possible bits
+ state = (fld >> 3) & 0x1f
+ no = fld >> (3 + 5)
+ MAX = 31
+ else:
+ # 64 possible bits
+ state = (fld >> 3) & 0x3f
+ no = fld >> (3 + 6)
+ MAX = 63
gcmap = (obj_addr + getofs('jf_gcmap')).address[0]
- gcmaplen = (gcmap + GCMAPLENGTHOFS).signed[0]
- if no >= gcmaplen:
- return llmemory.NULL
- index = (gcmap + GCMAPBASEOFS + SIGN_SIZE * no).signed[0] + STATICSIZE
- newstate = 6 | ((no + 1) << 8)
- (obj_addr + getofs('jf_gc_trace_state')).signed[0] = newstate
- return obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE * index
+ gcmap_lgt = (gcmap + GCMAPLENGTHOFS).signed[0]
+ while no < gcmap_lgt:
+ cur = (gcmap + GCMAPBASEOFS + UNSIGN_SIZE * no).unsigned[0]
+ while state < MAX and not (cur & (1 << state)):
+ state += 1
+ if state < MAX:
+ # found it
+ # save new state
+ if IS_32BIT:
+ new_state = 5 | ((state + 1) << 3) | (no << 8)
+ else:
+ new_state = 5 | ((state + 1) << 3) | (no << 9)
+ (obj_addr + getofs('jf_gc_trace_state')).signed[0] = new_state
+ return (obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE *
+ (no * SIZEOFSIGNED * 8 + state))
+ no += 1
+ state = 0
+ return llmemory.NULL
CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
llmemory.Address)
diff --git a/rpython/jit/backend/llsupport/regalloc.py
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -1,5 +1,5 @@
import os
-from rpython.jit.metainterp.history import Const, Box, REF
+from rpython.jit.metainterp.history import Const, Box, REF, INT
from rpython.rlib.objectmodel import we_are_translated, specialize
from rpython.jit.metainterp.resoperation import rop
@@ -112,56 +112,18 @@
node = node.next
return 'LinkedList(%s)' % '->'.join(l)
-def frame_manager_from_gcmap(FmClass, gcmap, depth, frame_bindings):
- if not gcmap:
- return FmClass()
- rev_bindings = [False] * depth
- for arg, loc in frame_bindings.iteritems():
- size = FmClass.frame_size(arg.type)
- if size == 2:
- rev_bindings[FmClass.get_loc_index(loc) + 1] = True
- assert size == 1
- rev_bindings[FmClass.get_loc_index(loc)] = True
- gcrefs = []
- others = []
- c = 0
- for i in range(len(gcmap)):
- item = gcmap[i]
- while c < item:
- if not rev_bindings[c]:
- others.append(c)
- c += 1
- if not rev_bindings[item]:
- gcrefs.append(item)
- c += 1
- for i in range(c, depth):
- if not rev_bindings[i]:
- others.append(i)
- fm = FmClass(depth, gcrefs, others)
- for arg, loc in frame_bindings.iteritems():
- fm.bindings[arg] = loc
- return fm
-
class FrameManager(object):
""" Manage frame positions
start_free_depth is the start where we can allocate in whatever order
we like.
-
- freelist_gcrefs and freelist_others are free lists of locations that
- can be used for gcrefs and others. below stack_free_depth. Note
- that if floats are occupying more than one spot, in order to allocate
- the correct size, we need to use more than one from the freelist in
- the consecutive order.
"""
- def __init__(self, start_free_depth=0, freelist_gcrefs=None,
- freelist_others=None):
+ def __init__(self, start_free_depth=0, freelist=None):
self.bindings = {}
self.current_frame_depth = start_free_depth
# we disable hints for now
#self.hint_frame_locations = {}
- self.freelist_gcrefs = LinkedList(self, freelist_gcrefs)
- self.freelist_others = LinkedList(self, freelist_others)
+ self.freelist = LinkedList(self, freelist)
def get_frame_depth(self):
return self.current_frame_depth
@@ -191,10 +153,7 @@
# that 'size' is a power of two. The reason for doing so is to
# avoid obscure issues in jump.py with stack locations that try
# to move from position (6,7) to position (7,8).
- if box.type == REF:
- newloc = self.freelist_gcrefs.pop(1, box.type)
- else:
- newloc = self.freelist_others.pop(size, box.type)
+ newloc = self.freelist.pop(size, box.type)
if newloc is None:
#
index = self.get_frame_depth()
@@ -209,17 +168,23 @@
self.bindings[box] = newloc
return newloc
+ def bind(self, box, loc):
+ pos = self.get_loc_index(loc)
+ size = self.frame_size(box.type)
+ if self.current_frame_depth < pos:
+ for i in range(self.current_frame_depth, pos):
+ self.freelist.append(1, self.frame_pos(i, INT))
+ self.current_frame_depth = pos + size
+ self.bindings[box] = loc
+
def mark_as_free(self, box):
try:
loc = self.bindings[box]
except KeyError:
return # already gone
del self.bindings[box]
- if box.type == REF:
- self.freelist_gcrefs.append(1, loc)
- else:
- size = self.frame_size(box.type)
- self.freelist_others.append(size, loc)
+ size = self.frame_size(box.type)
+ self.freelist.append(size, loc)
def try_to_reuse_location(self, box, loc):
xxx
@@ -242,16 +207,6 @@
def _gather_gcroots(lst, var):
lst.append(var)
- def get_gc_map(self):
- """ returns a list of locations where GC pointers are
- """
- assert not self.bindings
- # XXX unsure, maybe what we want is to
- # free everything instead
- lst = []
- self.freelist_gcrefs.foreach(self._gather_gcroots, lst)
- return lst
-
# abstract methods that need to be overwritten for specific assemblers
@staticmethod
def frame_pos(loc, type):
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -156,10 +156,6 @@
op2 = ResOperation(rop.SETFIELD_GC, [frame, history.ConstPtr(llref)],
None, descr=descrs.jf_frame_info)
self.newops.append(op2)
- llref = lltype.cast_opaque_ptr(llmemory.GCREF, jfi.jfi_gcmap)
- op3 = ResOperation(rop.SETFIELD_GC, [frame, history.ConstPtr(llref)],
- None, descr=descrs.jf_gcmap)
- self.newops.append(op3)
for i, arg in enumerate(op.getarglist()):
index, descr = self.cpu.getarraydescr_for_frame(arg.type, i)
self.newops.append(ResOperation(rop.SETARRAYITEM_GC,
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py
b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -7,7 +7,7 @@
from rpython.jit.metainterp.history import BoxPtr, BoxInt, ConstPtr
from rpython.jit.metainterp.resoperation import get_deep_immutable_oplist,
rop,\
ResOperation
-from rpython.rlib.rarithmetic import is_valid_int
+from rpython.rlib.rarithmetic import is_valid_int, r_uint
def test_boehm():
gc_ll_descr = gc.GcLLDescr_boehm(None, None, None)
@@ -276,12 +276,9 @@
frame_info = lltype.malloc(jitframe.JITFRAMEINFO, zero=True)
frame = lltype.malloc(jitframe.JITFRAME, 15, zero=True)
frame.jf_frame_info = frame_info
- frame.jf_gcmap = lltype.malloc(jitframe.GCMAP, 4)
- frame.jf_gcmap[0] = 5
- frame.jf_gcmap[1] = 7
- frame.jf_gcmap[2] = 8
- frame.jf_gcmap[3] = 10
- frame.jf_gcpattern = 1 | 4
+ frame.jf_gcmap = lltype.malloc(jitframe.GCMAP, 2)
+ frame.jf_gcmap[0] = r_uint(1 | 2 | 8 | 32 | 128)
+ frame.jf_gcmap[1] = r_uint(2 | 16 | 32 | 128)
frame_adr = llmemory.cast_ptr_to_adr(frame)
all_addrs = []
next = jitframe.jitframe_trace(frame_adr, llmemory.NULL)
@@ -296,12 +293,14 @@
counter += 1
# gcpattern
assert all_addrs[6] == indexof(0)
- assert all_addrs[7] == indexof(2)
- assert all_addrs[8] == indexof(3 + 5)
- assert all_addrs[9] == indexof(3 + 7)
- assert all_addrs[10] == indexof(3 + 8)
- assert all_addrs[11] == indexof(3 + 10)
- assert len(all_addrs) == 6 + 4 + 2
+ assert all_addrs[7] == indexof(1)
+ assert all_addrs[8] == indexof(3)
+ assert all_addrs[9] == indexof(5)
+ assert all_addrs[10] == indexof(7)
+ # XXX 32bit
+ assert all_addrs[11] == indexof(65)
+
+ assert len(all_addrs) == 6 + 5 + 4
# 6 static fields, 4 addresses from gcmap, 2 from gcpattern
finally:
jitframe.STATICSIZE = PREV_STATICSIZE
diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py
b/rpython/jit/backend/llsupport/test/test_regalloc.py
--- a/rpython/jit/backend/llsupport/test/test_regalloc.py
+++ b/rpython/jit/backend/llsupport/test/test_regalloc.py
@@ -1,8 +1,7 @@
import py
from rpython.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT,
FLOAT,\
BoxPtr
-from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList,\
- frame_manager_from_gcmap
+from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList
from rpython.jit.backend.llsupport.regalloc import RegisterManager as
BaseRegMan
def newboxes(*values):
@@ -510,7 +509,7 @@
assert fm.get_loc_index(locf1) == 4
assert fm.get_frame_depth() == 5
fm.mark_as_free(b1)
- assert fm.freelist_others
+ assert fm.freelist
b2 = BoxInt()
fm.loc(b2) # should be in the same spot as b1 before
assert fm.get(b1) is None
@@ -518,22 +517,22 @@
fm.mark_as_free(b0)
p0 = BoxPtr()
ploc = fm.loc(p0)
- assert fm.get_loc_index(ploc) == 5
- assert fm.get_frame_depth() == 6
+ assert fm.get_loc_index(ploc) == 0
+ assert fm.get_frame_depth() == 5
assert ploc != loc1
p1 = BoxPtr()
p1loc = fm.loc(p1)
- assert fm.get_loc_index(p1loc) == 6
- assert fm.get_frame_depth() == 7
+ assert fm.get_loc_index(p1loc) == 5
+ assert fm.get_frame_depth() == 6
fm.mark_as_free(p0)
p2 = BoxPtr()
p2loc = fm.loc(p2)
assert p2loc == ploc
- assert not fm.freelist_gcrefs
- assert len(fm.freelist_others) == 1
+ assert len(fm.freelist) == 0
for box in fm.bindings.keys():
fm.mark_as_free(box)
- assert fm.get_gc_map() == [5, 6]
+ fm.bind(BoxPtr(), FakeFramePos(3, 'r'))
+ assert len(fm.freelist) == 6
def test_frame_manager_basic(self):
b0, b1 = newboxes(0, 1)
@@ -562,7 +561,7 @@
assert fm.get_loc_index(locf1) == 5
assert fm.get_frame_depth() == 7
fm.mark_as_free(b1)
- assert fm.freelist_others
+ assert fm.freelist
b2 = BoxInt()
fm.loc(b2) # should be in the same spot as b1 before
assert fm.get(b1) is None
@@ -570,45 +569,22 @@
fm.mark_as_free(b0)
p0 = BoxPtr()
ploc = fm.loc(p0)
- assert fm.get_loc_index(ploc) == 7
- assert fm.get_frame_depth() == 8
+ assert fm.get_loc_index(ploc) == 0
+ assert fm.get_frame_depth() == 7
assert ploc != loc1
p1 = BoxPtr()
p1loc = fm.loc(p1)
- assert fm.get_loc_index(p1loc) == 8
- assert fm.get_frame_depth() == 9
+ assert fm.get_loc_index(p1loc) == 7
+ assert fm.get_frame_depth() == 8
fm.mark_as_free(p0)
p2 = BoxPtr()
p2loc = fm.loc(p2)
assert p2loc == ploc
- assert not fm.freelist_gcrefs
- assert len(fm.freelist_others) == 1
+ assert len(fm.freelist) == 0
fm.mark_as_free(b2)
f3 = BoxFloat()
+ fm.mark_as_free(p2)
floc = fm.loc(f3)
assert fm.get_loc_index(floc) == 0
for box in fm.bindings.keys():
fm.mark_as_free(box)
- assert fm.get_gc_map() == [7, 8]
-
- def test_fm_from_gcmap(self):
- class Loc(object):
- def __init__(self, l):
- self.l = l
-
- class Fm(FrameManager):
- @staticmethod
- def get_loc_index(l):
- return l.l
-
- b0 = BoxInt()
- b1 = BoxInt()
- l0 = Loc(5)
- l1 = Loc(2)
- bindings = {b0: l0, b1: l1}
- fm = frame_manager_from_gcmap(Fm, [1, 5, 6, 8], 13,
- bindings)
- assert repr(fm.freelist_gcrefs) == "LinkedList(1->6->8)"
- assert repr(fm.freelist_others) ==
"LinkedList(0->3->4->7->9->10->11->12)"
- assert fm.current_frame_depth == 13
- assert fm.bindings == bindings
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -98,8 +98,12 @@
[])
equaloplists(operations, expected.operations)
+class FakeTracker(object):
+ pass
+
class BaseFakeCPU(object):
def __init__(self):
+ self.tracker = FakeTracker()
self._cache = {}
self.signedframedescr = ArrayDescr(3, 8, FieldDescr('len', 0, 0, 0), 0)
self.floatframedescr = ArrayDescr(5, 8, FieldDescr('len', 0, 0, 0), 0)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -30,7 +30,7 @@
from rpython.jit.backend.x86.jump import remap_frame_layout
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.codewriter import longlong
-from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rarithmetic import intmask, r_uint
from rpython.rlib.objectmodel import compute_unique_id
# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc
4.5.0,
@@ -42,15 +42,39 @@
class GuardToken(object):
- def __init__(self, faildescr, failargs, fail_locs, exc,
+ def __init__(self, faildescr, failargs, fail_locs, exc, frame_depth,
is_guard_not_invalidated, is_guard_not_forced):
self.faildescr = faildescr
self.failargs = failargs
self.fail_locs = fail_locs
+ self.gcmap = self.compute_gcmap(failargs, fail_locs, frame_depth)
self.exc = exc
self.is_guard_not_invalidated = is_guard_not_invalidated
self.is_guard_not_forced = is_guard_not_forced
+ def compute_gcmap(self, failargs, fail_locs, frame_depth):
+ # note that regalloc has a very similar compute, but
+ # one that does iteration over all bindings, so slightly different,
+ # eh
+ size = frame_depth + JITFRAME_FIXED_SIZE
+ gcmap = lltype.malloc(jitframe.GCMAP, size // WORD // 8 + 1,
+ zero=True)
+ input_i = 0
+ for i in range(len(failargs)):
+ arg = failargs[i]
+ if arg is None:
+ continue
+ loc = fail_locs[input_i]
+ input_i += 1
+ if arg.type == REF:
+ loc = fail_locs[i]
+ if isinstance(loc, RegLoc):
+ val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
+ else:
+ val = loc.value // WORD
+ gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+ return gcmap
+
DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
('type', lltype.Char), # 'b'ridge, 'l'abel or
# 'e'ntry point
@@ -188,7 +212,7 @@
"""
mc = codebuf.MachineCodeBlockWrapper()
self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
- ofs = self.cpu.get_ofs_of_frame_field('jf_gcpattern')
+ ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
# store the gc pattern
mc.MOV_rs(ecx.value, WORD)
mc.MOV_br(ofs, ecx.value)
@@ -505,7 +529,6 @@
looptoken._x86_loop_code = looppos
frame_depth = self._assemble(regalloc, inputargs, operations)
clt.frame_info.jfi_frame_depth = frame_depth + JITFRAME_FIXED_SIZE
- self._update_gcmap(clt.frame_info, regalloc)
#
size_excluding_failure_stuff = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
@@ -581,7 +604,6 @@
self._patch_stackadjust(stack_check_patch_ofs + rawstart, frame_depth)
self.fixup_target_tokens(rawstart)
self.current_clt.frame_info.jfi_frame_depth = frame_depth
- self._update_gcmap(self.current_clt.frame_info, regalloc)
self.teardown()
# oprofile support
if self.cpu.profile_agent is not None:
@@ -659,42 +681,12 @@
offset = mc.get_relative_pos() - jg_location
assert 0 < offset <= 127
mc.overwrite(jg_location-1, chr(offset))
- return stack_check_cmp_ofs
-
- def _insert_frame_adjustment(self, frame_info):
- """ Our frame might end up being different than what we expect.
- Note that depth is fine (since bridges check that), but we need
- to update gcmap
- """
- # XXX note that this can be easily shifted to JUMP
- # instead of LABEL, would be slightly faster
- gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
- frame_info_addr = rffi.cast(lltype.Signed, frame_info)
- frame_info_ofs = self.cpu.get_ofs_of_frame_field('jf_frame_info')
- jfi_gc_map_ofs = self.cpu.get_ofs_of_frame_field('jfi_gcmap')
- if IS_X86_32:
- self.mc.MOV_bi(frame_info_ofs, frame_info_addr)
- XXX
- else:
- self.mc.MOV_ri(X86_64_SCRATCH_REG.value, frame_info_addr)
- self.mc.MOV_br(frame_info_ofs, X86_64_SCRATCH_REG.value)
- self.mc.MOV_rm(X86_64_SCRATCH_REG.value,
- (X86_64_SCRATCH_REG.value,
- jfi_gc_map_ofs))
- self.mc.MOV_br(gcmap_ofs, X86_64_SCRATCH_REG.value)
-
+ return stack_check_cmp_ofs
def _patch_stackadjust(self, adr, allocated_depth):
mc = codebuf.MachineCodeBlockWrapper()
mc.writeimm32(allocated_depth)
mc.copy_to_raw_memory(adr)
-
- def _update_gcmap(self, frame_info, regalloc):
- gcmap = regalloc.get_gc_map()
- frame_info.jfi_gcmap = lltype.malloc(jitframe.GCMAP,
- len(gcmap))
- for i in range(len(gcmap)):
- frame_info.jfi_gcmap[i] = gcmap[i]
def get_asmmemmgr_blocks(self, looptoken):
clt = looptoken.compiled_loop_token
@@ -888,7 +880,6 @@
old_fi = oldlooptoken.compiled_loop_token.frame_info
new_fi = newlooptoken.compiled_loop_token.frame_info
old_fi.jfi_frame_depth = new_fi.jfi_frame_depth
- old_fi.jfi_gcmap = new_fi.jfi_gcmap
mc = codebuf.MachineCodeBlockWrapper()
mc.JMP(imm(target))
if WORD == 4: # keep in sync with prepare_loop()
@@ -974,14 +965,14 @@
genop_math_list[oopspecindex](self, op, arglocs, resloc)
def regalloc_perform_with_guard(self, op, guard_op, faillocs,
- arglocs, resloc):
+ arglocs, resloc, frame_depth):
faildescr = guard_op.getdescr()
assert isinstance(faildescr, AbstractFailDescr)
failargs = guard_op.getfailargs()
guard_opnum = guard_op.getopnum()
guard_token = self.implement_guard_recovery(guard_opnum,
faildescr, failargs,
- faillocs)
+ faillocs, frame_depth)
if op is None:
dispatch_opnum = guard_opnum
else:
@@ -992,9 +983,10 @@
# must be added by the genop_guard_list[]()
assert guard_token is self.pending_guard_tokens[-1]
- def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
+ def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
+ frame_depth):
self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
- resloc)
+ resloc, frame_depth)
def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -1824,13 +1816,13 @@
self.implement_guard(guard_token, 'NE')
def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
- fail_locs):
+ fail_locs, frame_depth):
exc = (guard_opnum == rop.GUARD_EXCEPTION or
guard_opnum == rop.GUARD_NO_EXCEPTION or
guard_opnum == rop.GUARD_NOT_FORCED)
is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
is_guard_not_forced = guard_opnum == rop.GUARD_NOT_FORCED
- return GuardToken(faildescr, failargs, fail_locs, exc,
+ return GuardToken(faildescr, failargs, fail_locs, exc, frame_depth,
is_guard_not_invalidated, is_guard_not_forced)
def generate_propagate_error_64(self):
@@ -1855,7 +1847,6 @@
fail_descr = cast_instance_to_gcref(guardtok.faildescr)
fail_descr = rffi.cast(lltype.Signed, fail_descr)
positions = [0] * len(guardtok.fail_locs)
- gcpattern = 0
for i, loc in enumerate(guardtok.fail_locs):
if loc is None or loc is ebp: # frame
positions[i] = -1
@@ -1867,8 +1858,6 @@
v = len(gpr_reg_mgr_cls.all_regs) + loc.value
else:
v = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
- if guardtok.failargs[i].type == REF:
- gcpattern |= (1 << v)
positions[i] = v * WORD
# write down the positions of locs
guardtok.faildescr.rd_locs = positions
@@ -1878,7 +1867,11 @@
# mc.JMP(imm(target))
#else:
mc.PUSH(imm(fail_descr))
- mc.PUSH(imm(gcpattern))
+ gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, guardtok.gcmap)
+ # keep the ref alive
+ self.current_clt.allgcrefs.append(gcmapref)
+ rgc._make_sure_does_not_move(gcmapref)
+ mc.PUSH(imm(rffi.cast(lltype.Signed, gcmapref)))
mc.JMP(imm(target))
return startpos
@@ -1949,7 +1942,7 @@
# throws away most of the frame, including all the PUSHes that we
# did just above.
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcpattern')
+ ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
base_ofs = self.cpu.get_baseofs_of_frame_field()
mc.POP(eax)
mc.MOV_br(ofs2, eax.value)
@@ -1980,9 +1973,6 @@
# exit function
self._call_footer()
- def genop_discard_label(self, op, arglocs):
- self._insert_frame_adjustment(self.current_clt.frame_info)
-
def implement_guard(self, guard_token, condition=None):
# These jumps are patched later.
if condition:
@@ -2397,30 +2387,15 @@
not_implemented("not implemented operation (guard): %s" %
op.getopname())
- def closing_jump(self, target_token, gcmap):
+ def closing_jump(self, target_token):
target = target_token._x86_loop_code
if target_token in self.target_tokens_currently_compiling:
curpos = self.mc.get_relative_pos() + 5
self.mc.JMP_l(target - curpos)
else:
- # bleh, we need to clean up all the references that are not
- # in our gcmap, but are in the target gcmap
- if target_token.original_jitcell_token:
- tgt_clt =
target_token.original_jitcell_token.compiled_loop_token
- tgt_gcmap = tgt_clt.frame_info.jfi_gcmap
- rev = {}
- i = 0
- while i < len(tgt_gcmap):
- rev[tgt_gcmap[i]] = None
- i += 1
- # for now clean them all, we might change the strategy
- for k in gcmap:
- if k not in rev:
- # all ours that are not known to the target
- self.mc.MOV_bi((k + JITFRAME_FIXED_SIZE) * WORD, 0)
self.mc.JMP(imm(target))
- def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcpattern):
+ def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
assert size & (WORD-1) == 0 # must be correctly aligned
self.mc.MOV(eax, heap(nursery_free_adr))
self.mc.LEA_rm(edi.value, (eax.value, size))
@@ -2430,7 +2405,9 @@
# this is a 32bit gcpattern that describes where are GC roots
# in registers (which will be saved while doing slowpath)
# store it on top of the stack (we might not have a register free)
- self.mc.MOV_si(0, gcpattern)
+ gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, gcmap)
+ rgc._make_sure_does_not_move(gcmapref)
+ self.mc.MOV(RawEspLoc(0, REF), imm(rffi.cast(lltype.Signed, gcmapref)))
self.mc.CALL(imm(self.malloc_slowpath))
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -17,19 +17,18 @@
from rpython.jit.codewriter import longlong
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.metainterp.resoperation import rop
-from rpython.jit.backend.llsupport.jitframe import NULLGCMAP
+from rpython.jit.backend.llsupport.jitframe import NULLGCMAP, GCMAP
from rpython.jit.backend.llsupport.descr import ArrayDescr
from rpython.jit.backend.llsupport.descr import CallDescr
from rpython.jit.backend.llsupport.descr import unpack_arraydescr
from rpython.jit.backend.llsupport.descr import unpack_fielddescr
from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
-from rpython.jit.backend.llsupport.regalloc import FrameManager,
RegisterManager,\
- TempBox, compute_vars_longevity, is_comparison_or_ovf_op,\
- frame_manager_from_gcmap
+from rpython.jit.backend.llsupport.regalloc import FrameManager,\
+ RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op
from rpython.jit.backend.x86.arch import WORD, JITFRAME_FIXED_SIZE
from rpython.jit.backend.x86.arch import IS_X86_32, IS_X86_64
from rpython.jit.backend.x86 import rx86
-from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib.rarithmetic import r_longlong, r_uint
class X86RegisterManager(RegisterManager):
@@ -173,11 +172,8 @@
self.close_stack_struct = 0
self.final_jump_op = None
- def _prepare(self, inputargs, operations, allgcrefs, gcmap=NULLGCMAP,
- parent_frame_depth=0, frame_bindings=None):
- self.fm = frame_manager_from_gcmap(X86FrameManager, gcmap,
- parent_frame_depth,
- frame_bindings)
+ def _prepare(self, inputargs, operations, allgcrefs):
+ self.fm = X86FrameManager()
cpu = self.assembler.cpu
operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
allgcrefs)
@@ -205,16 +201,9 @@
self.min_bytes_before_label = 13
return operations
- def get_gc_map(self):
- return self.fm.get_gc_map()
-
def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
frame_info):
- frame_bindings = self._frame_bindings(arglocs, inputargs)
- operations = self._prepare(inputargs, operations, allgcrefs,
- frame_info.jfi_gcmap,
- frame_info.jfi_frame_depth,
- frame_bindings)
+ operations = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
self.min_bytes_before_label = 0
return operations
@@ -315,6 +304,8 @@
else:
self.rm.reg_bindings[arg] = loc
used[loc] = None
+ else:
+ self.fm.bind(arg, loc)
self.rm.free_regs = []
for reg in self.rm.all_regs:
if reg not in used:
@@ -350,7 +341,8 @@
self.rm.position += 1
self.xrm.position += 1
self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
- arglocs, result_loc)
+ arglocs, result_loc,
+ self.fm.get_frame_depth())
self.possibly_free_vars(guard_op.getfailargs())
def perform_guard(self, guard_op, arglocs, result_loc):
@@ -362,7 +354,8 @@
else:
self.assembler.dump('%s(%s)' % (guard_op, arglocs))
self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
- result_loc)
+ result_loc,
+ self.fm.get_frame_depth())
self.possibly_free_vars(guard_op.getfailargs())
def PerformDiscard(self, op, arglocs):
@@ -903,15 +896,30 @@
self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
self.rm.possibly_free_var(tmp_box)
#
+ gcmap = self._compute_gcmap()
gc_ll_descr = self.assembler.cpu.gc_ll_descr
- gcpattern = 0
- for box, reg in self.rm.reg_bindings.iteritems():
- if box.type == REF:
- gcpattern |= (1 << self.rm.all_reg_indexes[reg.value])
self.assembler.malloc_cond(
gc_ll_descr.get_nursery_free_addr(),
gc_ll_descr.get_nursery_top_addr(),
- size, gcpattern)
+ size, gcmap)
+
+ def _compute_gcmap(self):
+ frame_depth = self.fm.get_frame_depth()
+ size = frame_depth + JITFRAME_FIXED_SIZE
+ gcmap = lltype.malloc(GCMAP, size // WORD // 8 + 1,
+ zero=True)
+ for box, loc in self.rm.reg_bindings.iteritems():
+ if box.type == REF:
+ assert isinstance(loc, RegLoc)
+ val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
+ gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+ for box, loc in self.fm.bindings.iteritems():
+ if box.type == REF:
+ assert isinstance(loc, StackLoc)
+ val = loc.value // WORD
+ gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+ return gcmap
+
def consider_setfield_gc(self, op):
ofs, size, _ = unpack_fielddescr(op.getdescr())
@@ -1284,7 +1292,7 @@
src_locations1, dst_locations1, tmpreg,
src_locations2, dst_locations2, xmmtmp)
self.possibly_free_vars_for_op(op)
- assembler.closing_jump(self.jump_target_descr, self.get_gc_map())
+ assembler.closing_jump(self.jump_target_descr)
def consider_debug_merge_point(self, op):
pass
@@ -1347,7 +1355,6 @@
# loop that ends up jumping to this LABEL, then we can now provide
# the hints about the expected position of the spilled variables.
- self.PerformDiscard(op, [])
# XXX we never compile code like that?
#jump_op = self.final_jump_op
#if jump_op is not None and jump_op.getdescr() is descr:
diff --git a/rpython/jit/backend/x86/test/test_gc_integration.py
b/rpython/jit/backend/x86/test/test_gc_integration.py
--- a/rpython/jit/backend/x86/test/test_gc_integration.py
+++ b/rpython/jit/backend/x86/test/test_gc_integration.py
@@ -67,38 +67,14 @@
# p0 and p3 should be in registers, p1 not so much
assert self.getptr(0, lltype.Ptr(self.S)) == s1
# this is a fairly CPU specific check
- all = len(gpr_reg_mgr_cls.all_regs)
- assert frame.jf_gcpattern == (1 << (all - 1)) | (1 << (all - 2))
- # the gcmap should contain three things, p0, p1 and p3, but
- # p3 stays in a register and is only represented in gcpattern,
- # while p0 is in both
- assert len(frame.jf_gcmap) == 2
- for i in range(2):
- assert frame.jf_gcmap[i] == frame.jf_frame_info.jfi_gcmap[i]
- assert frame.jf_gcmap[0] == 1
- assert frame.jf_gcmap[1] == 3
-
- def test_label(self):
- ops = '''
- [i0, p0, i1, p1]
- label(i0, p0, i1, p1, descr=targettoken2)
- p3 = getfield_gc(p0, descr=fielddescr)
- force_spill(p3)
- guard_true(i0) [p0, i1, p1, p3]
- finish()
- '''
- s1 = lltype.malloc(self.S)
- s2 = lltype.malloc(self.S)
- s1.field = s2
- self.interpret(ops, [0, s1, 1, s2])
- ops2 = '''
- [p0]
- jump(1, p0, 1, p0, descr=targettoken2)
- '''
- self.interpret(ops2, [s1])
- frame = lltype.cast_opaque_ptr(jitframe.JITFRAMEPTR, self.deadframe)
- assert len(frame.jf_gcmap) == 3
- assert [frame.jf_gcmap[i] for i in range(3)] == [1, 3, 4]
+ assert len(frame.jf_gcmap) == 1
+ # the gcmap should contain three things, p0, p1 and p3
+ # p3 stays in a register
+ # while p0 and p1 are on the frame
+ assert frame.jf_gcmap[0] == (1 << 11) | (1 << 12) | (1 << 31)
+ assert frame.jf_frame[11]
+ assert frame.jf_frame[12]
+ assert frame.jf_frame[31]
def test_rewrite_constptr(self):
ops = '''
@@ -228,10 +204,8 @@
def test_malloc_slowpath(self):
def check(frame):
- assert len(frame.jf_frame_info.jfi_gcmap) == 2 # 2 pointers
- assert frame.jf_frame_info.jfi_gcmap[0] == 1
- assert frame.jf_frame_info.jfi_gcmap[1] == 2
- assert frame.jf_gcpattern == 0x2
+ assert len(frame.jf_gcmap) == 1
+ assert frame.jf_gcmap[0] == (2 | (1<<29) | (1 << 30))
self.cpu = self.getcpu(check)
ops = '''
@@ -257,8 +231,9 @@
def test_save_regs_around_malloc(self):
def check(frame):
- x = frame.jf_gcpattern
- assert bin(x) == '0b1111111011111'
+ x = frame.jf_gcmap
+ assert len(x) == 1
+ assert bin(x[0]) == '0b1111100000000000000001111111011111'
# all but two
self.cpu = self.getcpu(check)
diff --git a/rpython/jit/backend/x86/test/test_runner.py
b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -30,8 +30,7 @@
# for the individual tests see
# ====> ../../test/runner_test.py
- add_loop_instructions = ['mov', 'mov', 'mov', 'mov',
- 'mov', 'add', 'test', 'je', 'jmp']
+ add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
bridge_loop_instructions = ['cmp', 'jge', 'mov', 'call',
'mov', 'jmp']
diff --git a/rpython/rtyper/lltypesystem/llmemory.py
b/rpython/rtyper/lltypesystem/llmemory.py
--- a/rpython/rtyper/lltypesystem/llmemory.py
+++ b/rpython/rtyper/lltypesystem/llmemory.py
@@ -618,6 +618,9 @@
class _signed_fakeaccessor(_fakeaccessor):
TYPE = lltype.Signed
+class _unsigned_fakeaccessor(_fakeaccessor):
+ TYPE = lltype.Unsigned
+
class _float_fakeaccessor(_fakeaccessor):
TYPE = lltype.Float
@@ -654,6 +657,7 @@
}
fakeaddress.signed = property(_signed_fakeaccessor)
+fakeaddress.unsigned = property(_unsigned_fakeaccessor)
fakeaddress.float = property(_float_fakeaccessor)
fakeaddress.char = property(_char_fakeaccessor)
fakeaddress.address = property(_address_fakeaccessor)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit