Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r60299:8f0f2d2b73bf
Date: 2013-01-21 19:04 +0200
http://bitbucket.org/pypy/pypy/changeset/8f0f2d2b73bf/

Log:    make gcmap work per-call and per-malloc-slowpath

diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -120,12 +120,10 @@
         descrs = JitFrameDescrs()
         descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
         for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
-                     'jf_frame_info', 'jf_gcpattern', 'jf_gcmap']:
+                     'jf_frame_info', 'jf_gcmap']:
             setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
         descrs.jfi_frame_depth = cpu.fielddescrof(jitframe.JITFRAMEINFO,
                                                   'jfi_frame_depth')
-        descrs.jfi_gcmap = cpu.fielddescrof(jitframe.JITFRAMEINFO,
-                                            'jfi_gcmap')
         return descrs
 
     def getarraydescr_for_frame(self, type, index):
diff --git a/rpython/jit/backend/llsupport/jitframe.py 
b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -1,25 +1,23 @@
-from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.annlowlevel import llhelper
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.debug import ll_assert
 
 STATICSIZE = 0 # patch from the assembler backend
+SIZEOFSIGNED = rffi.sizeof(lltype.Signed)
+IS_32BIT = (SIZEOFSIGNED == 2 ** 31 - 1)
 
 # this is an info that only depends on the assembler executed, copied from
 # compiled loop token (in fact we could use this as a compiled loop token
 # XXX do this
 
-GCMAP = lltype.GcArray(lltype.Signed)
+GCMAP = lltype.GcArray(lltype.Unsigned)
 NULLGCMAP = lltype.nullptr(GCMAP)
-# XXX make it SHORT not Signed
 
 JITFRAMEINFO = lltype.GcStruct(
     'JITFRAMEINFO',
     # the depth of frame
     ('jfi_frame_depth', lltype.Signed),
-    # gcindexlist is a list of indexes of GC ptrs
-    # in the actual array jf_frame of JITFRAME
-    ('jfi_gcmap', lltype.Ptr(GCMAP)),
 )
 
 NULLFRAMEINFO = lltype.nullptr(JITFRAMEINFO)
@@ -29,14 +27,13 @@
 
 def jitframe_allocate(frame_info):
     frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
-    frame.jf_gcmap = frame_info.jfi_gcmap
     frame.jf_frame_info = frame_info
     return frame
 
 def jitframe_copy(frame):
     frame_info = frame.jf_frame_info
     new_frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
-    new_frame.jf_gcmap = frame_info.jfi_gcmap
+    ll_assert(frame.jf_gcmap == NULLGCMAP, "non empty gc map when copying")
     new_frame.jf_frame_info = frame_info
     return new_frame
 
@@ -50,10 +47,7 @@
     ('jf_descr', llmemory.GCREF),
     # guard_not_forced descr
     ('jf_force_descr', llmemory.GCREF),
-    # a bitmask of where are GCREFS in the top of the frame (saved registers)
-    # used for calls and failures
-    ('jf_gcpattern', lltype.Signed),
-    # a copy of gcmap from frameinfo
+    # a map of GC pointers
     ('jf_gcmap', lltype.Ptr(GCMAP)),
     # For the front-end: a GCREF for the savedata
     ('jf_savedata', llmemory.GCREF),
@@ -83,14 +77,14 @@
 GCMAPBASEOFS = llmemory.itemoffsetof(GCMAP, 0)
 BASEITEMOFS = llmemory.itemoffsetof(JITFRAME.jf_frame, 0)
 SIGN_SIZE = llmemory.sizeof(lltype.Signed)
+UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned)
 
 def jitframe_trace(obj_addr, prev):
     if prev == llmemory.NULL:
         (obj_addr + getofs('jf_gc_trace_state')).signed[0] = 0
         return obj_addr + getofs('jf_frame_info')
     fld = (obj_addr + getofs('jf_gc_trace_state')).signed[0]
-    state = fld & 0xff
-    no = fld >> 8
+    state = fld & 0x7 # 3bits of possible states
     if state == 0:
         (obj_addr + getofs('jf_gc_trace_state')).signed[0] = 1
         return obj_addr + getofs('jf_descr')
@@ -106,26 +100,38 @@
     elif state == 4:
         (obj_addr + getofs('jf_gc_trace_state')).signed[0] = 5
         return obj_addr + getofs('jf_guard_exc')
-    elif state == 5:
-        # bit pattern
-        gcpat = (obj_addr + getofs('jf_gcpattern')).signed[0]
-        while no < STATICSIZE and gcpat & (1 << no) == 0:
-            no += 1
-        if no != STATICSIZE:
-            newstate = 5 | ((no + 1) << 8)
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = newstate
-            return obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE * no
-        state = 6
-        no = 0
-    ll_assert(state == 6, "invalid tracer state")
+    ll_assert(state == 5, "invalid state")
+    # bit pattern
+    # decode the pattern
+    if IS_32BIT:
+        # 32 possible bits
+        state = (fld >> 3) & 0x1f
+        no = fld >> (3 + 5)
+        MAX = 31
+    else:
+        # 64 possible bits
+        state = (fld >> 3) & 0x3f
+        no = fld >> (3 + 6)
+        MAX = 63
     gcmap = (obj_addr + getofs('jf_gcmap')).address[0]
-    gcmaplen = (gcmap + GCMAPLENGTHOFS).signed[0]
-    if no >= gcmaplen:
-        return llmemory.NULL
-    index = (gcmap + GCMAPBASEOFS + SIGN_SIZE * no).signed[0] + STATICSIZE
-    newstate = 6 | ((no + 1) << 8)
-    (obj_addr + getofs('jf_gc_trace_state')).signed[0] = newstate
-    return obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE * index
+    gcmap_lgt = (gcmap + GCMAPLENGTHOFS).signed[0]
+    while no < gcmap_lgt:
+        cur = (gcmap + GCMAPBASEOFS + UNSIGN_SIZE * no).unsigned[0]
+        while state < MAX and not (cur & (1 << state)):
+            state += 1
+        if state < MAX:
+            # found it
+            # save new state
+            if IS_32BIT:
+                new_state = 5 | ((state + 1) << 3) | (no << 8)
+            else:
+                new_state = 5 | ((state + 1) << 3) | (no << 9)
+            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = new_state
+            return (obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE *
+                    (no * SIZEOFSIGNED * 8 + state))
+        no += 1
+        state = 0
+    return llmemory.NULL
 
 CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
                                   llmemory.Address)
diff --git a/rpython/jit/backend/llsupport/regalloc.py 
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -1,5 +1,5 @@
 import os
-from rpython.jit.metainterp.history import Const, Box, REF
+from rpython.jit.metainterp.history import Const, Box, REF, INT
 from rpython.rlib.objectmodel import we_are_translated, specialize
 from rpython.jit.metainterp.resoperation import rop
 
@@ -112,56 +112,18 @@
             node = node.next
         return 'LinkedList(%s)' % '->'.join(l)
 
-def frame_manager_from_gcmap(FmClass, gcmap, depth, frame_bindings):
-    if not gcmap:
-        return FmClass()
-    rev_bindings = [False] * depth
-    for arg, loc in frame_bindings.iteritems():
-        size = FmClass.frame_size(arg.type)
-        if size == 2:
-            rev_bindings[FmClass.get_loc_index(loc) + 1] = True
-        assert size == 1
-        rev_bindings[FmClass.get_loc_index(loc)] = True
-    gcrefs = []
-    others = []
-    c = 0
-    for i in range(len(gcmap)):
-        item = gcmap[i]
-        while c < item:
-            if not rev_bindings[c]:
-                others.append(c)
-            c += 1
-        if not rev_bindings[item]:
-            gcrefs.append(item)
-        c += 1
-    for i in range(c, depth):
-        if not rev_bindings[i]:
-            others.append(i)
-    fm = FmClass(depth, gcrefs, others)
-    for arg, loc in frame_bindings.iteritems():
-        fm.bindings[arg] = loc
-    return fm
-
 class FrameManager(object):
     """ Manage frame positions
 
     start_free_depth is the start where we can allocate in whatever order
     we like.
-
-    freelist_gcrefs and freelist_others are free lists of locations that
-    can be used for gcrefs and others. below stack_free_depth. Note
-    that if floats are occupying more than one spot, in order to allocate
-    the correct size, we need to use more than one from the freelist in
-    the consecutive order.
     """
-    def __init__(self, start_free_depth=0, freelist_gcrefs=None,
-                 freelist_others=None):
+    def __init__(self, start_free_depth=0, freelist=None):
         self.bindings = {}
         self.current_frame_depth = start_free_depth
         # we disable hints for now
         #self.hint_frame_locations = {}
-        self.freelist_gcrefs = LinkedList(self, freelist_gcrefs)
-        self.freelist_others = LinkedList(self, freelist_others)
+        self.freelist = LinkedList(self, freelist)
 
     def get_frame_depth(self):
         return self.current_frame_depth
@@ -191,10 +153,7 @@
         # that 'size' is a power of two.  The reason for doing so is to
         # avoid obscure issues in jump.py with stack locations that try
         # to move from position (6,7) to position (7,8).
-        if box.type == REF:
-            newloc = self.freelist_gcrefs.pop(1, box.type)
-        else:
-            newloc = self.freelist_others.pop(size, box.type)
+        newloc = self.freelist.pop(size, box.type)
         if newloc is None:
             #
             index = self.get_frame_depth()
@@ -209,17 +168,23 @@
         self.bindings[box] = newloc
         return newloc
 
+    def bind(self, box, loc):
+        pos = self.get_loc_index(loc)
+        size = self.frame_size(box.type)
+        if self.current_frame_depth < pos:
+            for i in range(self.current_frame_depth, pos):
+                self.freelist.append(1, self.frame_pos(i, INT))
+        self.current_frame_depth = pos + size
+        self.bindings[box] = loc
+
     def mark_as_free(self, box):
         try:
             loc = self.bindings[box]
         except KeyError:
             return    # already gone
         del self.bindings[box]
-        if box.type == REF:
-            self.freelist_gcrefs.append(1, loc)
-        else:
-            size = self.frame_size(box.type)
-            self.freelist_others.append(size, loc)
+        size = self.frame_size(box.type)
+        self.freelist.append(size, loc)
 
     def try_to_reuse_location(self, box, loc):
         xxx
@@ -242,16 +207,6 @@
     def _gather_gcroots(lst, var):
         lst.append(var)
 
-    def get_gc_map(self):
-        """ returns a list of locations where GC pointers are
-        """
-        assert not self.bindings
-        # XXX unsure, maybe what we want is to
-        # free everything instead
-        lst = []
-        self.freelist_gcrefs.foreach(self._gather_gcroots, lst)
-        return lst
-
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -156,10 +156,6 @@
         op2 = ResOperation(rop.SETFIELD_GC, [frame, history.ConstPtr(llref)],
                            None, descr=descrs.jf_frame_info)
         self.newops.append(op2)
-        llref = lltype.cast_opaque_ptr(llmemory.GCREF, jfi.jfi_gcmap)
-        op3 = ResOperation(rop.SETFIELD_GC, [frame, history.ConstPtr(llref)],
-                           None, descr=descrs.jf_gcmap)
-        self.newops.append(op3)
         for i, arg in enumerate(op.getarglist()):
             index, descr = self.cpu.getarraydescr_for_frame(arg.type, i)
             self.newops.append(ResOperation(rop.SETARRAYITEM_GC,
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py 
b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -7,7 +7,7 @@
 from rpython.jit.metainterp.history import BoxPtr, BoxInt, ConstPtr
 from rpython.jit.metainterp.resoperation import get_deep_immutable_oplist, 
rop,\
      ResOperation
-from rpython.rlib.rarithmetic import is_valid_int
+from rpython.rlib.rarithmetic import is_valid_int, r_uint
 
 def test_boehm():
     gc_ll_descr = gc.GcLLDescr_boehm(None, None, None)
@@ -276,12 +276,9 @@
         frame_info = lltype.malloc(jitframe.JITFRAMEINFO, zero=True)
         frame = lltype.malloc(jitframe.JITFRAME, 15, zero=True)
         frame.jf_frame_info = frame_info
-        frame.jf_gcmap = lltype.malloc(jitframe.GCMAP, 4)
-        frame.jf_gcmap[0] = 5
-        frame.jf_gcmap[1] = 7
-        frame.jf_gcmap[2] = 8
-        frame.jf_gcmap[3] = 10
-        frame.jf_gcpattern = 1 | 4
+        frame.jf_gcmap = lltype.malloc(jitframe.GCMAP, 2)
+        frame.jf_gcmap[0] = r_uint(1 | 2 | 8 | 32 | 128)
+        frame.jf_gcmap[1] = r_uint(2 | 16 | 32 | 128)
         frame_adr = llmemory.cast_ptr_to_adr(frame)
         all_addrs = []
         next = jitframe.jitframe_trace(frame_adr, llmemory.NULL)
@@ -296,12 +293,14 @@
                 counter += 1
         # gcpattern
         assert all_addrs[6] == indexof(0)
-        assert all_addrs[7] == indexof(2)
-        assert all_addrs[8] == indexof(3 + 5)
-        assert all_addrs[9] == indexof(3 + 7)
-        assert all_addrs[10] == indexof(3 + 8)
-        assert all_addrs[11] == indexof(3 + 10)
-        assert len(all_addrs) == 6 + 4 + 2
+        assert all_addrs[7] == indexof(1)
+        assert all_addrs[8] == indexof(3)
+        assert all_addrs[9] == indexof(5)
+        assert all_addrs[10] == indexof(7)
+        # XXX 32bit
+        assert all_addrs[11] == indexof(65)
+
+        assert len(all_addrs) == 6 + 5 + 4
         # 6 static fields, 4 addresses from gcmap, 2 from gcpattern
     finally:
         jitframe.STATICSIZE = PREV_STATICSIZE
diff --git a/rpython/jit/backend/llsupport/test/test_regalloc.py 
b/rpython/jit/backend/llsupport/test/test_regalloc.py
--- a/rpython/jit/backend/llsupport/test/test_regalloc.py
+++ b/rpython/jit/backend/llsupport/test/test_regalloc.py
@@ -1,8 +1,7 @@
 import py
 from rpython.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, 
FLOAT,\
      BoxPtr
-from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList,\
-     frame_manager_from_gcmap
+from rpython.jit.backend.llsupport.regalloc import FrameManager, LinkedList
 from rpython.jit.backend.llsupport.regalloc import RegisterManager as 
BaseRegMan
 
 def newboxes(*values):
@@ -510,7 +509,7 @@
         assert fm.get_loc_index(locf1) == 4
         assert fm.get_frame_depth() == 5
         fm.mark_as_free(b1)
-        assert fm.freelist_others
+        assert fm.freelist
         b2 = BoxInt()
         fm.loc(b2) # should be in the same spot as b1 before
         assert fm.get(b1) is None
@@ -518,22 +517,22 @@
         fm.mark_as_free(b0)
         p0 = BoxPtr()
         ploc = fm.loc(p0)
-        assert fm.get_loc_index(ploc) == 5
-        assert fm.get_frame_depth() == 6
+        assert fm.get_loc_index(ploc) == 0
+        assert fm.get_frame_depth() == 5
         assert ploc != loc1
         p1 = BoxPtr()
         p1loc = fm.loc(p1)
-        assert fm.get_loc_index(p1loc) == 6
-        assert fm.get_frame_depth() == 7
+        assert fm.get_loc_index(p1loc) == 5
+        assert fm.get_frame_depth() == 6
         fm.mark_as_free(p0)
         p2 = BoxPtr()
         p2loc = fm.loc(p2)
         assert p2loc == ploc
-        assert not fm.freelist_gcrefs
-        assert len(fm.freelist_others) == 1
+        assert len(fm.freelist) == 0
         for box in fm.bindings.keys():
             fm.mark_as_free(box)
-        assert fm.get_gc_map() == [5, 6]
+        fm.bind(BoxPtr(), FakeFramePos(3, 'r'))
+        assert len(fm.freelist) == 6
 
     def test_frame_manager_basic(self):
         b0, b1 = newboxes(0, 1)
@@ -562,7 +561,7 @@
         assert fm.get_loc_index(locf1) == 5
         assert fm.get_frame_depth() == 7
         fm.mark_as_free(b1)
-        assert fm.freelist_others
+        assert fm.freelist
         b2 = BoxInt()
         fm.loc(b2) # should be in the same spot as b1 before
         assert fm.get(b1) is None
@@ -570,45 +569,22 @@
         fm.mark_as_free(b0)
         p0 = BoxPtr()
         ploc = fm.loc(p0)
-        assert fm.get_loc_index(ploc) == 7
-        assert fm.get_frame_depth() == 8
+        assert fm.get_loc_index(ploc) == 0
+        assert fm.get_frame_depth() == 7
         assert ploc != loc1
         p1 = BoxPtr()
         p1loc = fm.loc(p1)
-        assert fm.get_loc_index(p1loc) == 8
-        assert fm.get_frame_depth() == 9
+        assert fm.get_loc_index(p1loc) == 7
+        assert fm.get_frame_depth() == 8
         fm.mark_as_free(p0)
         p2 = BoxPtr()
         p2loc = fm.loc(p2)
         assert p2loc == ploc
-        assert not fm.freelist_gcrefs
-        assert len(fm.freelist_others) == 1
+        assert len(fm.freelist) == 0
         fm.mark_as_free(b2)
         f3 = BoxFloat()
+        fm.mark_as_free(p2)
         floc = fm.loc(f3)
         assert fm.get_loc_index(floc) == 0
         for box in fm.bindings.keys():
             fm.mark_as_free(box)
-        assert fm.get_gc_map() == [7, 8]
-
-    def test_fm_from_gcmap(self):
-        class Loc(object):
-            def __init__(self, l):
-                self.l = l
-        
-        class Fm(FrameManager):
-            @staticmethod
-            def get_loc_index(l):
-                return l.l
-
-        b0 = BoxInt()
-        b1 = BoxInt()
-        l0 = Loc(5)
-        l1 = Loc(2)
-        bindings = {b0: l0, b1: l1}
-        fm = frame_manager_from_gcmap(Fm, [1, 5, 6, 8], 13,
-                                      bindings)
-        assert repr(fm.freelist_gcrefs) == "LinkedList(1->6->8)"
-        assert repr(fm.freelist_others) == 
"LinkedList(0->3->4->7->9->10->11->12)"
-        assert fm.current_frame_depth == 13
-        assert fm.bindings == bindings
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -98,8 +98,12 @@
                                                         [])
         equaloplists(operations, expected.operations)
 
+class FakeTracker(object):
+    pass
+
 class BaseFakeCPU(object):
     def __init__(self):
+        self.tracker = FakeTracker()
         self._cache = {}
         self.signedframedescr = ArrayDescr(3, 8, FieldDescr('len', 0, 0, 0), 0)
         self.floatframedescr = ArrayDescr(5, 8, FieldDescr('len', 0, 0, 0), 0)
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -30,7 +30,7 @@
 from rpython.jit.backend.x86.jump import remap_frame_layout
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.codewriter import longlong
-from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rarithmetic import intmask, r_uint
 from rpython.rlib.objectmodel import compute_unique_id
 
 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 
4.5.0,
@@ -42,15 +42,39 @@
 
 
 class GuardToken(object):
-    def __init__(self, faildescr, failargs, fail_locs, exc,
+    def __init__(self, faildescr, failargs, fail_locs, exc, frame_depth,
                  is_guard_not_invalidated, is_guard_not_forced):
         self.faildescr = faildescr
         self.failargs = failargs
         self.fail_locs = fail_locs
+        self.gcmap = self.compute_gcmap(failargs, fail_locs, frame_depth)
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
         self.is_guard_not_forced = is_guard_not_forced
 
+    def compute_gcmap(self, failargs, fail_locs, frame_depth):
+        # note that regalloc has a very similar compute, but
+        # one that does iteration over all bindings, so slightly different,
+        # eh
+        size = frame_depth + JITFRAME_FIXED_SIZE
+        gcmap = lltype.malloc(jitframe.GCMAP, size // WORD // 8 + 1,
+                              zero=True)
+        input_i = 0
+        for i in range(len(failargs)):
+            arg = failargs[i]
+            if arg is None:
+                continue
+            loc = fail_locs[input_i]
+            input_i += 1
+            if arg.type == REF:
+                loc = fail_locs[i]
+                if isinstance(loc, RegLoc):
+                    val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
+                else:
+                    val = loc.value // WORD
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        return gcmap
+
 DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
                               ('type', lltype.Char), # 'b'ridge, 'l'abel or
                                                      # 'e'ntry point
@@ -188,7 +212,7 @@
         """
         mc = codebuf.MachineCodeBlockWrapper()
         self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
-        ofs = self.cpu.get_ofs_of_frame_field('jf_gcpattern')
+        ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
         # store the gc pattern
         mc.MOV_rs(ecx.value, WORD)
         mc.MOV_br(ofs, ecx.value)
@@ -505,7 +529,6 @@
         looptoken._x86_loop_code = looppos
         frame_depth = self._assemble(regalloc, inputargs, operations)
         clt.frame_info.jfi_frame_depth = frame_depth + JITFRAME_FIXED_SIZE
-        self._update_gcmap(clt.frame_info, regalloc)
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
@@ -581,7 +604,6 @@
         self._patch_stackadjust(stack_check_patch_ofs + rawstart, frame_depth)
         self.fixup_target_tokens(rawstart)
         self.current_clt.frame_info.jfi_frame_depth = frame_depth
-        self._update_gcmap(self.current_clt.frame_info, regalloc)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -659,42 +681,12 @@
         offset = mc.get_relative_pos() - jg_location
         assert 0 < offset <= 127
         mc.overwrite(jg_location-1, chr(offset))
-        return stack_check_cmp_ofs
-
-    def _insert_frame_adjustment(self, frame_info):
-        """ Our frame might end up being different than what we expect.
-        Note that depth is fine (since bridges check that), but we need
-        to update gcmap
-        """
-        # XXX note that this can be easily shifted to JUMP
-        #     instead of LABEL, would be slightly faster
-        gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
-        frame_info_addr = rffi.cast(lltype.Signed, frame_info)
-        frame_info_ofs = self.cpu.get_ofs_of_frame_field('jf_frame_info')
-        jfi_gc_map_ofs = self.cpu.get_ofs_of_frame_field('jfi_gcmap')
-        if IS_X86_32:
-            self.mc.MOV_bi(frame_info_ofs, frame_info_addr)
-            XXX
-        else:
-            self.mc.MOV_ri(X86_64_SCRATCH_REG.value, frame_info_addr)
-            self.mc.MOV_br(frame_info_ofs, X86_64_SCRATCH_REG.value)
-            self.mc.MOV_rm(X86_64_SCRATCH_REG.value,
-                           (X86_64_SCRATCH_REG.value,
-                            jfi_gc_map_ofs))
-            self.mc.MOV_br(gcmap_ofs, X86_64_SCRATCH_REG.value)
-            
+        return stack_check_cmp_ofs            
 
     def _patch_stackadjust(self, adr, allocated_depth):
         mc = codebuf.MachineCodeBlockWrapper()
         mc.writeimm32(allocated_depth)
         mc.copy_to_raw_memory(adr)
-
-    def _update_gcmap(self, frame_info, regalloc):
-        gcmap = regalloc.get_gc_map()
-        frame_info.jfi_gcmap = lltype.malloc(jitframe.GCMAP,
-                                             len(gcmap))
-        for i in range(len(gcmap)):
-            frame_info.jfi_gcmap[i] = gcmap[i]
     
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -888,7 +880,6 @@
         old_fi = oldlooptoken.compiled_loop_token.frame_info
         new_fi = newlooptoken.compiled_loop_token.frame_info
         old_fi.jfi_frame_depth = new_fi.jfi_frame_depth
-        old_fi.jfi_gcmap = new_fi.jfi_gcmap
         mc = codebuf.MachineCodeBlockWrapper()
         mc.JMP(imm(target))
         if WORD == 4:         # keep in sync with prepare_loop()
@@ -974,14 +965,14 @@
         genop_math_list[oopspecindex](self, op, arglocs, resloc)
 
     def regalloc_perform_with_guard(self, op, guard_op, faillocs,
-                                    arglocs, resloc):
+                                    arglocs, resloc, frame_depth):
         faildescr = guard_op.getdescr()
         assert isinstance(faildescr, AbstractFailDescr)
         failargs = guard_op.getfailargs()
         guard_opnum = guard_op.getopnum()
         guard_token = self.implement_guard_recovery(guard_opnum,
                                                     faildescr, failargs,
-                                                    faillocs)
+                                                    faillocs, frame_depth)
         if op is None:
             dispatch_opnum = guard_opnum
         else:
@@ -992,9 +983,10 @@
             # must be added by the genop_guard_list[]()
             assert guard_token is self.pending_guard_tokens[-1]
 
-    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
+    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
+                               frame_depth):
         self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
-                                         resloc)
+                                         resloc, frame_depth)
 
     def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
         self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -1824,13 +1816,13 @@
         self.implement_guard(guard_token, 'NE')
 
     def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
-                                                               fail_locs):
+                                 fail_locs, frame_depth):
         exc = (guard_opnum == rop.GUARD_EXCEPTION or
                guard_opnum == rop.GUARD_NO_EXCEPTION or
                guard_opnum == rop.GUARD_NOT_FORCED)
         is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
         is_guard_not_forced = guard_opnum == rop.GUARD_NOT_FORCED
-        return GuardToken(faildescr, failargs, fail_locs, exc,
+        return GuardToken(faildescr, failargs, fail_locs, exc, frame_depth,
                           is_guard_not_invalidated, is_guard_not_forced)
 
     def generate_propagate_error_64(self):
@@ -1855,7 +1847,6 @@
         fail_descr = cast_instance_to_gcref(guardtok.faildescr)
         fail_descr = rffi.cast(lltype.Signed, fail_descr)
         positions = [0] * len(guardtok.fail_locs)
-        gcpattern = 0
         for i, loc in enumerate(guardtok.fail_locs):
             if loc is None or loc is ebp: # frame
                 positions[i] = -1
@@ -1867,8 +1858,6 @@
                     v = len(gpr_reg_mgr_cls.all_regs) + loc.value
                 else:
                     v = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
-                    if guardtok.failargs[i].type == REF:
-                        gcpattern |= (1 << v)
                 positions[i] = v * WORD
         # write down the positions of locs
         guardtok.faildescr.rd_locs = positions
@@ -1878,7 +1867,11 @@
         #    mc.JMP(imm(target))
         #else:
         mc.PUSH(imm(fail_descr))
-        mc.PUSH(imm(gcpattern))
+        gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, guardtok.gcmap)
+        # keep the ref alive
+        self.current_clt.allgcrefs.append(gcmapref)
+        rgc._make_sure_does_not_move(gcmapref)
+        mc.PUSH(imm(rffi.cast(lltype.Signed, gcmapref)))
         mc.JMP(imm(target))
         return startpos
 
@@ -1949,7 +1942,7 @@
         # throws away most of the frame, including all the PUSHes that we
         # did just above.
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
-        ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcpattern')
+        ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
         base_ofs = self.cpu.get_baseofs_of_frame_field()
         mc.POP(eax)
         mc.MOV_br(ofs2, eax.value)
@@ -1980,9 +1973,6 @@
         # exit function
         self._call_footer()
 
-    def genop_discard_label(self, op, arglocs):
-        self._insert_frame_adjustment(self.current_clt.frame_info)
-
     def implement_guard(self, guard_token, condition=None):
         # These jumps are patched later.
         if condition:
@@ -2397,30 +2387,15 @@
         not_implemented("not implemented operation (guard): %s" %
                         op.getopname())
 
-    def closing_jump(self, target_token, gcmap):
+    def closing_jump(self, target_token):
         target = target_token._x86_loop_code
         if target_token in self.target_tokens_currently_compiling:
             curpos = self.mc.get_relative_pos() + 5
             self.mc.JMP_l(target - curpos)
         else:
-            # bleh, we need to clean up all the references that are not
-            # in our gcmap, but are in the target gcmap
-            if target_token.original_jitcell_token:
-                tgt_clt = 
target_token.original_jitcell_token.compiled_loop_token
-                tgt_gcmap = tgt_clt.frame_info.jfi_gcmap
-                rev = {}
-                i = 0
-                while i < len(tgt_gcmap):
-                    rev[tgt_gcmap[i]] = None
-                    i += 1
-                # for now clean them all, we might change the strategy
-                for k in gcmap:
-                    if k not in rev:
-                        # all ours that are not known to the target
-                        self.mc.MOV_bi((k + JITFRAME_FIXED_SIZE) * WORD, 0)
             self.mc.JMP(imm(target))
 
-    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcpattern):
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
         assert size & (WORD-1) == 0     # must be correctly aligned
         self.mc.MOV(eax, heap(nursery_free_adr))
         self.mc.LEA_rm(edi.value, (eax.value, size))
@@ -2430,7 +2405,9 @@
         # this is a 32bit gcpattern that describes where are GC roots
         # in registers (which will be saved while doing slowpath)
         # store it on top of the stack (we might not have a register free)
-        self.mc.MOV_si(0, gcpattern)
+        gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, gcmap)
+        rgc._make_sure_does_not_move(gcmapref)
+        self.mc.MOV(RawEspLoc(0, REF), imm(rffi.cast(lltype.Signed, gcmapref)))
         self.mc.CALL(imm(self.malloc_slowpath))
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -17,19 +17,18 @@
 from rpython.jit.codewriter import longlong
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.metainterp.resoperation import rop
-from rpython.jit.backend.llsupport.jitframe import NULLGCMAP
+from rpython.jit.backend.llsupport.jitframe import NULLGCMAP, GCMAP
 from rpython.jit.backend.llsupport.descr import ArrayDescr
 from rpython.jit.backend.llsupport.descr import CallDescr
 from rpython.jit.backend.llsupport.descr import unpack_arraydescr
 from rpython.jit.backend.llsupport.descr import unpack_fielddescr
 from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
-from rpython.jit.backend.llsupport.regalloc import FrameManager, 
RegisterManager,\
-     TempBox, compute_vars_longevity, is_comparison_or_ovf_op,\
-     frame_manager_from_gcmap
+from rpython.jit.backend.llsupport.regalloc import FrameManager,\
+     RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op
 from rpython.jit.backend.x86.arch import WORD, JITFRAME_FIXED_SIZE
 from rpython.jit.backend.x86.arch import IS_X86_32, IS_X86_64
 from rpython.jit.backend.x86 import rx86
-from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib.rarithmetic import r_longlong, r_uint
 
 class X86RegisterManager(RegisterManager):
 
@@ -173,11 +172,8 @@
         self.close_stack_struct = 0
         self.final_jump_op = None
 
-    def _prepare(self, inputargs, operations, allgcrefs, gcmap=NULLGCMAP,
-                 parent_frame_depth=0, frame_bindings=None):
-        self.fm = frame_manager_from_gcmap(X86FrameManager, gcmap,
-                                           parent_frame_depth,
-                                           frame_bindings)
+    def _prepare(self, inputargs, operations, allgcrefs):
+        self.fm = X86FrameManager()
         cpu = self.assembler.cpu
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
@@ -205,16 +201,9 @@
             self.min_bytes_before_label = 13
         return operations
 
-    def get_gc_map(self):
-        return self.fm.get_gc_map()
-
     def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
                        frame_info):
-        frame_bindings = self._frame_bindings(arglocs, inputargs)
-        operations = self._prepare(inputargs, operations, allgcrefs,
-                                   frame_info.jfi_gcmap,
-                                   frame_info.jfi_frame_depth,
-                                   frame_bindings)
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
         self.min_bytes_before_label = 0
         return operations
@@ -315,6 +304,8 @@
                     else:
                         self.rm.reg_bindings[arg] = loc
                         used[loc] = None
+            else:
+                self.fm.bind(arg, loc)
         self.rm.free_regs = []
         for reg in self.rm.all_regs:
             if reg not in used:
@@ -350,7 +341,8 @@
         self.rm.position += 1
         self.xrm.position += 1
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
-                                                   arglocs, result_loc)
+                                                   arglocs, result_loc,
+                                                   self.fm.get_frame_depth())
         self.possibly_free_vars(guard_op.getfailargs())
 
     def perform_guard(self, guard_op, arglocs, result_loc):
@@ -362,7 +354,8 @@
             else:
                 self.assembler.dump('%s(%s)' % (guard_op, arglocs))
         self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
-                                              result_loc)
+                                              result_loc,
+                                              self.fm.get_frame_depth())
         self.possibly_free_vars(guard_op.getfailargs())
 
     def PerformDiscard(self, op, arglocs):
@@ -903,15 +896,30 @@
         self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
         self.rm.possibly_free_var(tmp_box)
         #
+        gcmap = self._compute_gcmap()
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        gcpattern = 0
-        for box, reg in self.rm.reg_bindings.iteritems():
-            if box.type == REF:
-                gcpattern |= (1 << self.rm.all_reg_indexes[reg.value])
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
-            size, gcpattern)
+            size, gcmap)
+
+    def _compute_gcmap(self):
+        frame_depth = self.fm.get_frame_depth()
+        size = frame_depth + JITFRAME_FIXED_SIZE
+        gcmap = lltype.malloc(GCMAP, size // WORD // 8 + 1,
+                              zero=True)
+        for box, loc in self.rm.reg_bindings.iteritems():
+            if box.type == REF:
+                assert isinstance(loc, RegLoc)
+                val = gpr_reg_mgr_cls.all_reg_indexes[loc.value]
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        for box, loc in self.fm.bindings.iteritems():
+            if box.type == REF:
+                assert isinstance(loc, StackLoc)
+                val = loc.value // WORD
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        return gcmap
+
 
     def consider_setfield_gc(self, op):
         ofs, size, _ = unpack_fielddescr(op.getdescr())
@@ -1284,7 +1292,7 @@
                                  src_locations1, dst_locations1, tmpreg,
                                  src_locations2, dst_locations2, xmmtmp)
         self.possibly_free_vars_for_op(op)
-        assembler.closing_jump(self.jump_target_descr, self.get_gc_map())
+        assembler.closing_jump(self.jump_target_descr)
 
     def consider_debug_merge_point(self, op):
         pass
@@ -1347,7 +1355,6 @@
         # loop that ends up jumping to this LABEL, then we can now provide
         # the hints about the expected position of the spilled variables.
 
-        self.PerformDiscard(op, [])
         # XXX we never compile code like that?
         #jump_op = self.final_jump_op
         #if jump_op is not None and jump_op.getdescr() is descr:
diff --git a/rpython/jit/backend/x86/test/test_gc_integration.py 
b/rpython/jit/backend/x86/test/test_gc_integration.py
--- a/rpython/jit/backend/x86/test/test_gc_integration.py
+++ b/rpython/jit/backend/x86/test/test_gc_integration.py
@@ -67,38 +67,14 @@
         # p0 and p3 should be in registers, p1 not so much
         assert self.getptr(0, lltype.Ptr(self.S)) == s1
         # this is a fairly CPU specific check
-        all = len(gpr_reg_mgr_cls.all_regs)
-        assert frame.jf_gcpattern == (1 << (all - 1)) | (1 << (all - 2))
-        # the gcmap should contain three things, p0, p1 and p3, but
-        # p3 stays in a register and is only represented in gcpattern,
-        # while p0 is in both
-        assert len(frame.jf_gcmap) == 2
-        for i in range(2):
-            assert frame.jf_gcmap[i] == frame.jf_frame_info.jfi_gcmap[i]
-        assert frame.jf_gcmap[0] == 1
-        assert frame.jf_gcmap[1] == 3
-
-    def test_label(self):
-        ops = '''
-        [i0, p0, i1, p1]
-        label(i0, p0, i1, p1, descr=targettoken2)
-        p3 = getfield_gc(p0, descr=fielddescr)
-        force_spill(p3)
-        guard_true(i0) [p0, i1, p1, p3]
-        finish()
-        '''
-        s1 = lltype.malloc(self.S)
-        s2 = lltype.malloc(self.S)
-        s1.field = s2
-        self.interpret(ops, [0, s1, 1, s2])
-        ops2 = '''
-        [p0]
-        jump(1, p0, 1, p0, descr=targettoken2)
-        '''
-        self.interpret(ops2, [s1])
-        frame = lltype.cast_opaque_ptr(jitframe.JITFRAMEPTR, self.deadframe)
-        assert len(frame.jf_gcmap) == 3
-        assert [frame.jf_gcmap[i] for i in range(3)] == [1, 3, 4]
+        assert len(frame.jf_gcmap) == 1
+        # the gcmap should contain three things, p0, p1 and p3
+        # p3 stays in a register
+        # while p0 and p1 are on the frame
+        assert frame.jf_gcmap[0] == (1 << 11) | (1 << 12) | (1 << 31)
+        assert frame.jf_frame[11]
+        assert frame.jf_frame[12]
+        assert frame.jf_frame[31]
 
     def test_rewrite_constptr(self):
         ops = '''
@@ -228,10 +204,8 @@
 
     def test_malloc_slowpath(self):
         def check(frame):
-            assert len(frame.jf_frame_info.jfi_gcmap) == 2 # 2 pointers
-            assert frame.jf_frame_info.jfi_gcmap[0] == 1
-            assert frame.jf_frame_info.jfi_gcmap[1] == 2
-            assert frame.jf_gcpattern == 0x2
+            assert len(frame.jf_gcmap) == 1
+            assert frame.jf_gcmap[0] == (2 | (1<<29) | (1 << 30))
         
         self.cpu = self.getcpu(check)
         ops = '''
@@ -257,8 +231,9 @@
 
     def test_save_regs_around_malloc(self):
         def check(frame):
-            x = frame.jf_gcpattern
-            assert bin(x) == '0b1111111011111'
+            x = frame.jf_gcmap
+            assert len(x) == 1
+            assert bin(x[0]) == '0b1111100000000000000001111111011111'
             # all but two
         
         self.cpu = self.getcpu(check)
diff --git a/rpython/jit/backend/x86/test/test_runner.py 
b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -30,8 +30,7 @@
     # for the individual tests see
     # ====> ../../test/runner_test.py
 
-    add_loop_instructions = ['mov', 'mov', 'mov', 'mov',
-                             'mov', 'add', 'test', 'je', 'jmp']
+    add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
     bridge_loop_instructions = ['cmp', 'jge', 'mov', 'call',
                                 'mov', 'jmp']
 
diff --git a/rpython/rtyper/lltypesystem/llmemory.py 
b/rpython/rtyper/lltypesystem/llmemory.py
--- a/rpython/rtyper/lltypesystem/llmemory.py
+++ b/rpython/rtyper/lltypesystem/llmemory.py
@@ -618,6 +618,9 @@
 class _signed_fakeaccessor(_fakeaccessor):
     TYPE = lltype.Signed
 
+class _unsigned_fakeaccessor(_fakeaccessor):
+    TYPE = lltype.Unsigned
+
 class _float_fakeaccessor(_fakeaccessor):
     TYPE = lltype.Float
 
@@ -654,6 +657,7 @@
                           }
 
 fakeaddress.signed = property(_signed_fakeaccessor)
+fakeaddress.unsigned = property(_unsigned_fakeaccessor)
 fakeaddress.float = property(_float_fakeaccessor)
 fakeaddress.char = property(_char_fakeaccessor)
 fakeaddress.address = property(_address_fakeaccessor)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to