Author: Maciej Fijalkowski <[email protected]>
Branch: 
Changeset: r63455:6bf943756133
Date: 2013-04-17 18:18 +0200
http://bitbucket.org/pypy/pypy/changeset/6bf943756133/

Log:    Merge fast-newarray. This inlines the fast path of newarray in the
        assembler. Disabled on ARM until we fix issues.

diff --git a/rpython/jit/backend/llsupport/assembler.py 
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -70,6 +70,14 @@
         # the address of the function called by 'new'
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        if hasattr(gc_ll_descr, 'minimal_size_in_nursery'):
+            self.gc_minimal_size_in_nursery = 
gc_ll_descr.minimal_size_in_nursery
+        else:
+            self.gc_minimal_size_in_nursery = 0
+        if hasattr(gc_ll_descr, 'gcheaderbuilder'):
+            self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header
+        else:
+            self.gc_size_of_header = WORD # for tests
         self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
         self._build_failure_recovery(False, withfloats=False)
         self._build_failure_recovery(True, withfloats=False)
@@ -85,7 +93,20 @@
             self._build_wb_slowpath(True, withfloats=True)
         self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
-            self._build_malloc_slowpath()
+            # generate few slowpaths for various cases
+            self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed')
+            self.malloc_slowpath_varsize = self._build_malloc_slowpath(
+                kind='var')
+        if hasattr(gc_ll_descr, 'malloc_str'):
+            self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str')
+        else:
+            self.malloc_slowpath_str = None
+        if hasattr(gc_ll_descr, 'malloc_unicode'):
+            self.malloc_slowpath_unicode = self._build_malloc_slowpath(
+                kind='unicode')
+        else:
+            self.malloc_slowpath_unicode = None
+
         self._build_stack_check_slowpath()
         if gc_ll_descr.gcrootmap:
             self._build_release_gil(gc_ll_descr.gcrootmap)
diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -415,6 +415,7 @@
             return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
                                                    type_id, size,
                                                    False, False, False)
+
         self.generate_function('malloc_nursery', malloc_nursery_slowpath,
                                [lltype.Signed])
 
@@ -567,6 +568,9 @@
     def get_malloc_slowpath_addr(self):
         return self.get_malloc_fn_addr('malloc_nursery')
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.get_malloc_fn_addr('malloc_array')
+    
 # ____________________________________________________________
 
 def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/llmodel.py 
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -21,6 +21,8 @@
 class AbstractLLCPU(AbstractCPU):
     from rpython.jit.metainterp.typesystem import llhelper as ts
 
+    can_inline_varsize_malloc = False
+
     def __init__(self, rtyper, stats, opts, translate_support_code=False,
                  gcdescr=None):
         assert type(opts) is not bool
@@ -171,9 +173,9 @@
         def pos_exc_value():
             addr = llop.get_exc_value_addr(llmemory.Address)
             return heaptracker.adr2int(addr)
-        
+
         from rpython.rlib import rstack
-        
+
         STACK_CHECK_SLOWPATH = lltype.Ptr(lltype.FuncType([lltype.Signed],
                                                           lltype.Void))
         def insert_stack_check():
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -8,19 +8,29 @@
 from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
 from rpython.jit.metainterp.history import JitCellToken
 
+FLAG_ARRAY = 0
+FLAG_STR = 1
+FLAG_UNICODE = 2
+
 class GcRewriterAssembler(object):
-    # This class performs the following rewrites on the list of operations:
-    #
-    # - Remove the DEBUG_MERGE_POINTs.
-    #
-    # - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
-    #   followed by SETFIELDs in order to initialize their GC fields.  The
-    #   two advantages of CALL_MALLOC_NURSERY is that it inlines the common
-    #   path, and we need only one such operation to allocate several blocks
-    #   of memory at once.
-    #
-    # - Add COND_CALLs to the write barrier before SETFIELD_GC and
-    #   SETARRAYITEM_GC operations.
+    """ This class performs the following rewrites on the list of operations:
+
+     - Remove the DEBUG_MERGE_POINTs.
+
+     - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
+       followed by SETFIELDs in order to initialize their GC fields.  The
+       two advantages of CALL_MALLOC_NURSERY is that it inlines the common
+       path, and we need only one such operation to allocate several blocks
+       of memory at once.
+
+     - Add COND_CALLs to the write barrier before SETFIELD_GC and
+       SETARRAYITEM_GC operations.
+
+    recent_mallocs contains a dictionary of variable -> None. If a variable
+    is in the dictionary, next setfields can be called without a write barrier,
+    because the variable got allocated after the last potentially collecting
+    resop
+    """
 
     _previous_size = -1
     _op_malloc_nursery = None
@@ -32,7 +42,7 @@
         self.cpu = cpu
         self.newops = []
         self.known_lengths = {}
-        self.recent_mallocs = {}     # set of variables
+        self.recent_mallocs = {}
 
     def rewrite(self, operations):
         # we can only remember one malloc since the next malloc can possibly
@@ -92,9 +102,11 @@
             assert isinstance(descr, ArrayDescr)
             self.handle_new_array(descr, op)
         elif opnum == rop.NEWSTR:
-            self.handle_new_array(self.gc_ll_descr.str_descr, op)
+            self.handle_new_array(self.gc_ll_descr.str_descr, op,
+                                  kind=FLAG_STR)
         elif opnum == rop.NEWUNICODE:
-            self.handle_new_array(self.gc_ll_descr.unicode_descr, op)
+            self.handle_new_array(self.gc_ll_descr.unicode_descr, op,
+                                  kind=FLAG_UNICODE)
         else:
             raise NotImplementedError(op.getopname())
 
@@ -106,7 +118,7 @@
         else:
             self.gen_malloc_fixedsize(size, descr.tid, op.result)
 
-    def handle_new_array(self, arraydescr, op):
+    def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
         v_length = op.getarg(0)
         total_size = -1
         if isinstance(v_length, ConstInt):
@@ -119,6 +131,14 @@
                 pass    # total_size is still -1
         elif arraydescr.itemsize == 0:
             total_size = arraydescr.basesize
+        elif (self.gc_ll_descr.can_use_nursery_malloc(1) and
+              self.gen_malloc_nursery_varsize(arraydescr.itemsize,
+              v_length, op.result, arraydescr, kind=kind)):
+            # note that we cannot initialize tid here, because the array
+            # might end up being allocated by malloc_external or some
+            # stuff that initializes GC header fields differently
+            self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+            return
         if (total_size >= 0 and
                 self.gen_malloc_nursery(total_size, op.result)):
             self.gen_initialize_tid(op.result, arraydescr.tid)
@@ -152,7 +172,7 @@
                                size_box,
                                descr=descrs.jfi_frame_size)
             self.newops.append(op0)
-            self.gen_malloc_nursery_varsize(size_box, frame, is_small=True)
+            self.gen_malloc_nursery_varsize_small(size_box, frame)
             self.gen_initialize_tid(frame, descrs.arraydescr.tid)
             length_box = history.BoxInt()
             op1 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
@@ -281,10 +301,28 @@
         self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
                                  self.gc_ll_descr.malloc_unicode_descr)
 
-    def gen_malloc_nursery_varsize(self, sizebox, v_result, is_small=False):
+    def gen_malloc_nursery_varsize(self, itemsize, v_length, v_result,
+                                   arraydescr, kind=FLAG_ARRAY):
+        """ itemsize is an int, v_length and v_result are boxes
+        """
+        if not self.cpu.can_inline_varsize_malloc:
+            return False # temporary, kill when ARM supports it
+        gc_descr = self.gc_ll_descr
+        if (kind == FLAG_ARRAY and
+            (arraydescr.basesize != gc_descr.standard_array_basesize or
+             arraydescr.lendescr.offset != 
gc_descr.standard_array_length_ofs)):
+            return False
+        self.emitting_an_operation_that_can_collect()
+        op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE,
+                          [ConstInt(kind), ConstInt(itemsize), v_length],
+                          v_result, descr=arraydescr)
+        self.newops.append(op)
+        self.recent_mallocs[v_result] = None
+        return True
+
+    def gen_malloc_nursery_varsize_small(self, sizebox, v_result):
         """ Generate CALL_MALLOC_NURSERY_VARSIZE_SMALL
         """
-        assert is_small
         self.emitting_an_operation_that_can_collect()
         op = ResOperation(rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
                           [sizebox],
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py 
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -29,11 +29,11 @@
     return r[r.find('1'):]
 
 class TestRegallocGcIntegration(BaseTestRegalloc):
-    
+
     cpu = CPU(None, None)
     cpu.gc_ll_descr = GcLLDescr_boehm(None, None, None)
     cpu.setup_once()
-    
+
     S = lltype.GcForwardReference()
     S.become(lltype.GcStruct('S', ('field', lltype.Ptr(S)),
                              ('int', lltype.Signed)))
@@ -145,6 +145,7 @@
     gcrootmap = None
     passes_frame = True
     write_barrier_descr = None
+    max_size_of_young_obj = 50
 
     def __init__(self, callback):
         GcLLDescription.__init__(self, None)
@@ -172,6 +173,20 @@
                                [lltype.Signed, jitframe.JITFRAMEPTR],
                                lltype.Signed)
 
+        def malloc_array(itemsize, tid, num_elem):
+            self.calls.append((itemsize, tid, num_elem))
+            return 13
+
+        self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+                                                         [lltype.Signed] * 3,
+                                                         lltype.Signed)
+
+        def malloc_str(size):
+            self.calls.append(('str', size))
+            return 13
+        self.generate_function('malloc_str', malloc_str, [lltype.Signed],
+                               lltype.Signed)
+
     def get_nursery_free_addr(self):
         return rffi.cast(lltype.Signed, self.addrs)
 
@@ -181,6 +196,9 @@
     def get_malloc_slowpath_addr(self):
         return self.get_malloc_fn_addr('malloc_nursery')
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.malloc_slowpath_array_fnptr
+
     def check_nothing_in_nursery(self):
         # CALL_MALLOC_NURSERY should not write anything in the nursery
         for i in range(64):
@@ -229,7 +247,7 @@
         p1 = call_malloc_nursery_varsize_small(i1)
         p2 = call_malloc_nursery_varsize_small(i2)
         guard_true(i0) [p0, p1, p2]
-        ''' 
+        '''
         self.interpret(ops, [16, 32, 16])
         # check the returned pointers
         gc_ll_descr = self.cpu.gc_ll_descr
@@ -244,6 +262,56 @@
         # slowpath never called
         assert gc_ll_descr.calls == []
 
+    def test_malloc_nursery_varsize(self):
+        self.cpu = self.getcpu(None)
+        A = lltype.GcArray(lltype.Signed)
+        arraydescr = self.cpu.arraydescrof(A)
+        arraydescr.tid = 15
+        ops = '''
+        [i0, i1, i2]
+        p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+        p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+        guard_false(i0) [p0, p1]
+        '''
+        self.interpret(ops, [1, 2, 3],
+                       namespace={'arraydescr': arraydescr})
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = lambda n: self.cpu.get_ref_value(self.deadframe, n)
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 2*WORD + 8*1
+        # check the nursery content and state
+        assert gc_ll_descr.nursery[0] == chr(15)
+        assert gc_ll_descr.nursery[2 * WORD + 8] == chr(15)
+        assert gc_ll_descr.addrs[0] == nurs_adr + (((4 * WORD + 8*1 + 5*2) + 
(WORD - 1)) & ~(WORD - 1))
+        # slowpath never called
+        assert gc_ll_descr.calls == []
+
+    def test_malloc_nursery_varsize_slowpath(self):
+        self.cpu = self.getcpu(None)
+        ops = """
+        [i0, i1, i2]
+        p0 = call_malloc_nursery_varsize(0, 8, i0, descr=arraydescr)
+        p1 = call_malloc_nursery_varsize(0, 5, i1, descr=arraydescr)
+        p3 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+        # overflow
+        p4 = call_malloc_nursery_varsize(0, 5, i2, descr=arraydescr)
+        # we didn't collect, so still overflow
+        p5 = call_malloc_nursery_varsize(1, 5, i2, descr=strdescr)
+        guard_false(i0) [p0, p1, p3, p4]
+        """
+        A = lltype.GcArray(lltype.Signed)
+        arraydescr = self.cpu.arraydescrof(A)
+        arraydescr.tid = 15
+        self.interpret(ops, [10, 3, 3],
+                       namespace={'arraydescr': arraydescr,
+                                  'strdescr': arraydescr})
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        assert gc_ll_descr.calls == [(8, 15, 10), (5, 15, 3), ('str', 3)]
+        # one fit, one was too large, one was not fitting
+
     def test_malloc_slowpath(self):
         def check(frame):
             expected_size = 1
@@ -294,7 +362,7 @@
                 s = bin(x[0]).count('1') + bin(x[1]).count('1')
                 assert s == 16
             # all but two registers + some stuff on stack
-        
+
         self.cpu = self.getcpu(check)
         S1 = lltype.GcStruct('S1')
         S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
@@ -360,7 +428,7 @@
 
 class MockShadowStackRootMap(object):
     is_shadow_stack = True
-    
+
     def __init__(self):
         TP = rffi.CArray(lltype.Signed)
         self.stack = lltype.malloc(TP, 10, flavor='raw')
@@ -369,7 +437,7 @@
         self.stack_addr[0] = rffi.cast(lltype.Signed, self.stack)
 
     def __del__(self):
-        lltype.free(self.stack_addr, flavor='raw')        
+        lltype.free(self.stack_addr, flavor='raw')
         lltype.free(self.stack, flavor='raw')
 
     def register_asm_addr(self, start, mark):
@@ -381,7 +449,7 @@
 class WriteBarrierDescr(AbstractDescr):
     jit_wb_cards_set = 0
     jit_wb_if_flag_singlebyte = 1
-    
+
     def __init__(self, gc_ll_descr):
         def write_barrier(frame):
             gc_ll_descr.write_barrier_on_frame_called = frame
@@ -442,6 +510,15 @@
         self.malloc_slowpath_fnptr = llhelper_args(malloc_slowpath,
                                                    [lltype.Signed],
                                                    lltype.Signed)
+
+        def malloc_array(itemsize, tid, num_elem):
+            import pdb
+            pdb.set_trace()
+
+        self.malloc_slowpath_array_fnptr = llhelper_args(malloc_array,
+                                                         [lltype.Signed] * 3,
+                                                         lltype.Signed)
+
         self.all_nurseries = []
 
     def init_nursery(self, nursery_size=None):
@@ -497,6 +574,9 @@
     def get_malloc_slowpath_addr(self):
         return self.malloc_slowpath_fnptr
 
+    def get_malloc_slowpath_array_addr(self):
+        return self.malloc_slowpath_array_fnptr
+
     def get_nursery_free_addr(self):
         return self.nursery_addr
 
@@ -507,7 +587,7 @@
         for nursery in self.all_nurseries:
             lltype.free(nursery, flavor='raw', track_allocation=False)
         lltype.free(self.nursery_ptrs, flavor='raw')
-    
+
 def unpack_gcmap(frame):
     res = []
     val = 0
@@ -543,10 +623,10 @@
     def test_shadowstack_call(self):
         cpu = self.cpu
         cpu.gc_ll_descr.init_nursery(100)
-        cpu.setup_once() 
+        cpu.setup_once()
         S = self.S
         frames = []
-        
+
         def check(i):
             assert cpu.gc_ll_descr.gcrootmap.stack[0] == i
             frame = rffi.cast(JITFRAMEPTR, i)
@@ -720,7 +800,7 @@
 
     def test_call_may_force_gcmap(self):
         cpu = self.cpu
-        
+
         def f(frame, arg, x):
             assert not arg
             assert frame.jf_gcmap[0] & 31 == 0
@@ -748,7 +828,7 @@
         pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
         px = call_may_force(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
         guard_not_forced(descr=faildescr) [p1, p2, p3, px]
-        finish(px, descr=finishdescr)
+        finish(px, descr=finaldescr)
         """, namespace={'fptr': fptr, 'calldescr': calldescr,
                         'arraydescr': cpu.arraydescrof(A),
                         'faildescr': BasicFailDescr(1),
@@ -760,12 +840,12 @@
         cpu.compile_loop(loop.inputargs, loop.operations, token)
         frame = lltype.cast_opaque_ptr(JITFRAMEPTR,
                                        cpu.execute_token(token, 1, a))
-        
+
         assert getmap(frame).count('1') == 4
 
     def test_call_gcmap_no_guard(self):
         cpu = self.cpu
-        
+
         def f(frame, arg, x):
             assert not arg
             assert frame.jf_gcmap[0] & 31 == 0
@@ -792,7 +872,7 @@
         pdying = getarrayitem_gc(p0, 0, descr=arraydescr)
         px = call(ConstClass(fptr), pf, pdying, i0, descr=calldescr)
         guard_false(i0, descr=faildescr) [p1, p2, p3, px]
-        finish(px, descr=finishdescr)
+        finish(px, descr=finaldescr)
         """, namespace={'fptr': fptr, 'calldescr': calldescr,
                         'arraydescr': cpu.arraydescrof(A),
                         'faildescr': BasicFailDescr(1),
diff --git a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py 
b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
@@ -93,8 +93,8 @@
                      type_system=self.type_system,
                      boxkinds=boxkinds)
 
-    def interpret(self, ops, args, run=True):
-        loop = self.parse(ops)
+    def interpret(self, ops, args, run=True, namespace=None):
+        loop = self.parse(ops, namespace=namespace)
         self.loop = loop
         looptoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -85,6 +85,7 @@
         signedframedescr = self.cpu.signedframedescr
         floatframedescr = self.cpu.floatframedescr
         casmdescr.compiled_loop_token = clt
+        tzdescr = None # noone cares
         #
         namespace.update(locals())
         #
@@ -107,7 +108,9 @@
 
 class BaseFakeCPU(object):
     JITFRAME_FIXED_SIZE = 0
-    
+
+    can_inline_varsize_malloc = True
+
     def __init__(self):
         self.tracker = FakeTracker()
         self._cache = {}
@@ -121,7 +124,7 @@
 
     def unpack_arraydescr_size(self, d):
         return 0, d.itemsize, 0
-    
+
     def arraydescrof(self, ARRAY):
         try:
             return self._cache[ARRAY]
@@ -129,7 +132,7 @@
             r = ArrayDescr(1, 2, FieldDescr('len', 0, 0, 0), 0)
             self._cache[ARRAY] = r
             return r
-        
+
     def fielddescrof(self, STRUCT, fname):
         key = (STRUCT, fname)
         try:
@@ -407,12 +410,23 @@
             jump(i0)
         """, """
             [i0]
-            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
-                                %(bdescr.tid)d, i0,           \
-                                descr=malloc_array_descr)
+            p0 = call_malloc_nursery_varsize(0, 1, i0, descr=bdescr)
+            setfield_gc(p0, i0, descr=blendescr)
             jump(i0)
         """)
 
+    def test_rewrite_new_string(self):
+        self.check_rewrite("""
+        [i0]
+        p0 = newstr(i0)
+        jump(i0)
+        """, """
+        [i0]
+        p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
+        setfield_gc(p0, i0, descr=strlendescr)
+        jump(i0)
+        """)
+
     def test_rewrite_assembler_nonstandard_array(self):
         # a non-standard array is a bit hard to get; e.g. GcArray(Float)
         # is like that on Win32, but not on Linux.  Build one manually...
@@ -533,10 +547,12 @@
             p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
             setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
             setfield_gc(p1, 10, descr=unicodelendescr)
-            p2 = call_malloc_gc(ConstClass(malloc_unicode), i2, \
-                                descr=malloc_unicode_descr)
-            p3 = call_malloc_gc(ConstClass(malloc_str), i2, \
-                                descr=malloc_str_descr)
+            p2 = call_malloc_nursery_varsize(2, 4, i2, \
+                                descr=unicodedescr)
+            setfield_gc(p2, i2, descr=unicodelendescr)
+            p3 = call_malloc_nursery_varsize(1, 1, i2, \
+                                descr=strdescr)
+            setfield_gc(p3, i2, descr=strlendescr)
             jump()
         """)
 
@@ -716,8 +732,9 @@
             [i0]
             p0 = call_malloc_nursery(%(tdescr.size)d)
             setfield_gc(p0, 5678, descr=tiddescr)
-            p1 = call_malloc_gc(ConstClass(malloc_str), i0, \
-                                descr=malloc_str_descr)
+            p1 = call_malloc_nursery_varsize(1, 1, i0, \
+                                descr=strdescr)
+            setfield_gc(p1, i0, descr=strlendescr)
             cond_call_gc_wb(p0, p1, descr=wbdescr)
             setfield_raw(p0, p1, descr=tzdescr)
             jump()
diff --git a/rpython/jit/backend/test/runner_test.py 
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2833,7 +2833,7 @@
 
     def test_assembler_call_propagate_exc(self):
         from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
-        
+
         if not isinstance(self.cpu, AbstractLLCPU):
             py.test.skip("llgraph can't fake exceptions well enough, give up")
 
@@ -3477,10 +3477,10 @@
         ops = """
         [i0]
         i1 = int_force_ge_zero(i0)    # but forced to be in a register
-        finish(i1, descr=1)
+        finish(i1, descr=descr)
         """
+        descr = BasicFinalDescr()
         loop = parse(ops, self.cpu, namespace=locals())
-        descr = loop.operations[-1].getdescr()
         looptoken = JitCellToken()
         self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
         for inp, outp in [(2,2), (-3, 0)]:
@@ -3493,21 +3493,20 @@
             py.test.skip("pointless test on non-asm")
         from rpython.jit.backend.tool.viewcode import machine_code_dump
         import ctypes
+        targettoken = TargetToken()
         ops = """
         [i2]
         i0 = same_as(i2)    # but forced to be in a register
-        label(i0, descr=1)
+        label(i0, descr=targettoken)
         i1 = int_add(i0, i0)
-        guard_true(i1, descr=faildesr) [i1]
-        jump(i1, descr=1)
+        guard_true(i1, descr=faildescr) [i1]
+        jump(i1, descr=targettoken)
         """
         faildescr = BasicFailDescr(2)
         loop = parse(ops, self.cpu, namespace=locals())
-        faildescr = loop.operations[-2].getdescr()
-        jumpdescr = loop.operations[-1].getdescr()
         bridge_ops = """
         [i0]
-        jump(i0, descr=jumpdescr)
+        jump(i0, descr=targettoken)
         """
         bridge = parse(bridge_ops, self.cpu, namespace=locals())
         looptoken = JitCellToken()
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1,7 +1,7 @@
 import sys
 import os
 
-from rpython.jit.backend.llsupport import symbolic, jitframe
+from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
                                                 DEBUG_COUNTER, debug_bridge)
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
@@ -60,6 +60,7 @@
         self.float_const_neg_addr = 0
         self.float_const_abs_addr = 0
         self.malloc_slowpath = 0
+        self.malloc_slowpath_varsize = 0
         self.wb_slowpath = [0, 0, 0, 0, 0]
         self.setup_failure_recovery()
         self.datablockwrapper = None
@@ -158,27 +159,56 @@
         mc.RET()
         self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
 
-    def _build_malloc_slowpath(self):
+    def _build_malloc_slowpath(self, kind):
         """ While arriving on slowpath, we have a gcpattern on stack,
         nursery_head in eax and the size in edi - eax
         """
+        assert kind in ['fixed', 'str', 'unicode', 'var']
         mc = codebuf.MachineCodeBlockWrapper()
         self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
         # store the gc pattern
         mc.MOV_rs(ecx.value, WORD)
         mc.MOV_br(ofs, ecx.value)
-        addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
-        mc.SUB_rr(edi.value, eax.value)       # compute the size we want
-        # the arg is already in edi
+        if kind == 'fixed':
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+        elif kind == 'str':
+            addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
+        elif kind == 'unicode':
+            addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
+        else:
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
         mc.SUB_ri(esp.value, 16 - WORD)
-        if IS_X86_32:
-            mc.MOV_sr(0, edi.value)
-            if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-                mc.MOV_sr(WORD, ebp.value)
-        elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
-            # for tests only
-            mc.MOV_rr(esi.value, ebp.value)
+        if kind == 'fixed':
+            mc.SUB_rr(edi.value, eax.value) # compute the size we want
+            # the arg is already in edi
+            if IS_X86_32:
+                mc.MOV_sr(0, edi.value)
+                if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+                    mc.MOV_sr(WORD, ebp.value)
+            elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+                # for tests only
+                mc.MOV_rr(esi.value, ebp.value)
+        elif kind == 'str' or kind == 'unicode':
+            if IS_X86_32:
+                # 1 for return value, 3 for alignment
+                mc.MOV_rs(edi.value, WORD * (3 + 1 + 1))
+                mc.MOV_sr(0, edi.value)
+            else:
+                mc.MOV_rs(edi.value, WORD * 3)
+        else:
+            if IS_X86_32:
+                mc.MOV_rs(edi.value, WORD * (3 + 1 + 1)) # itemsize
+                mc.MOV_sr(0, edi.value)
+                mc.MOV_rs(edi.value, WORD * (3 + 3 + 1))
+                mc.MOV_sr(WORD, edi.value) # tid
+                mc.MOV_rs(edi.value, WORD * (3 + 2 + 1))
+                mc.MOV_sr(2 * WORD, edi.value) # length
+            else:
+                # offset is 1 extra for call + 1 for SUB above
+                mc.MOV_rs(edi.value, WORD * 3) # itemsize
+                mc.MOV_rs(esi.value, WORD * 5) # tid
+                mc.MOV_rs(edx.value, WORD * 4) # length
         self.set_extra_stack_depth(mc, 16)
         mc.CALL(imm(addr))
         mc.ADD_ri(esp.value, 16 - WORD)
@@ -205,7 +235,7 @@
         mc.JMP(imm(self.propagate_exception_path))
         #
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        self.malloc_slowpath = rawstart
+        return rawstart
 
     def _build_propagate_exception_path(self):
         if not self.cpu.propagate_exception_descr:
@@ -2352,6 +2382,51 @@
         self.mc.overwrite(jmp_adr-1, chr(offset))
         self.mc.MOV(heap(nursery_free_adr), edi)
 
+    def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
+                            lengthloc, itemsize, maxlength, gcmap,
+                            arraydescr):
+        from rpython.jit.backend.llsupport.descr import ArrayDescr
+        assert isinstance(arraydescr, ArrayDescr)
+
+        self.mc.CMP(lengthloc, imm(maxlength))
+        self.mc.J_il8(rx86.Conditions['G'], 0) # patched later
+        jmp_adr0 = self.mc.get_relative_pos()
+        self.mc.MOV(eax, heap(nursery_free_adr))
+        self.mc.MOV(edi, lengthloc)
+        assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+        self.mc.IMUL_ri(edi.value, itemsize)
+        header_size = self.gc_size_of_header
+        self.mc.ADD_ri(edi.value, arraydescr.basesize + header_size + WORD - 1)
+        self.mc.AND_ri(edi.value, ~(WORD - 1))
+        self.mc.ADD(edi, heap(nursery_free_adr))
+        self.mc.CMP(edi, heap(nursery_top_adr))
+        # write down the tid
+        self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+        self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+        jmp_adr1 = self.mc.get_relative_pos()
+        offset = self.mc.get_relative_pos() - jmp_adr0
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_adr0-1, chr(offset))
+        if kind == rewrite.FLAG_ARRAY:
+            self.mc.MOV_si(WORD, itemsize)
+            self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc)
+            self.mc.MOV_si(WORD * 3, arraydescr.tid)
+            addr = self.malloc_slowpath_varsize
+        else:
+            if kind == rewrite.FLAG_STR:
+                addr = self.malloc_slowpath_str
+            else:
+                assert kind == rewrite.FLAG_UNICODE
+                addr = self.malloc_slowpath_unicode
+            self.mc.MOV(RawEspLoc(WORD, INT), lengthloc)
+        # save the gcmap
+        self.push_gcmap(self.mc, gcmap, mov=True)
+        self.mc.CALL(imm(addr))
+        offset = self.mc.get_relative_pos() - jmp_adr1
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_adr1-1, chr(offset))
+        self.mc.MOV(heap(nursery_free_adr), edi)
+
     def force_token(self, reg):
         # XXX kill me
         assert isinstance(reg, RegLoc)
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -54,7 +54,7 @@
 class X86_64_RegisterManager(X86RegisterManager):
     # r11 omitted because it's used as scratch
     all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
-    
+
     no_lower_byte_regs = []
     save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
 
@@ -103,7 +103,7 @@
     def __init__(self, base_ofs):
         FrameManager.__init__(self)
         self.base_ofs = base_ofs
-    
+
     def frame_pos(self, i, box_type):
         return FrameLoc(i, get_ebp_ofs(self.base_ofs, i), box_type)
 
@@ -870,6 +870,33 @@
             gc_ll_descr.get_nursery_top_addr(),
             sizeloc, gcmap)
 
+    def consider_call_malloc_nursery_varsize(self, op):
+        gc_ll_descr = self.assembler.cpu.gc_ll_descr
+        if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
+            raise Exception("unreachable code")
+            # for boehm, this function should never be called
+        length_box = op.getarg(2)
+        arraydescr = op.getdescr()
+        assert isinstance(length_box, BoxInt) # we cannot have a const here!
+        # looking at the result
+        self.rm.force_allocate_reg(op.result, selected_reg=eax)
+        #
+        # We need edx as a temporary, but otherwise don't save any more
+        # register.  See comments in _build_malloc_slowpath().
+        tmp_box = TempBox()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
+        lengthloc = self.rm.make_sure_var_in_reg(length_box, [op.result, 
tmp_box])
+        gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+        self.rm.possibly_free_var(tmp_box)
+        #
+        itemsize = op.getarg(1).getint()
+        maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
+        self.assembler.malloc_cond_varsize(
+            op.getarg(0).getint(),
+            gc_ll_descr.get_nursery_free_addr(),
+            gc_ll_descr.get_nursery_top_addr(),
+            lengthloc, itemsize, maxlength, gcmap, arraydescr)
+
     def get_gcmap(self, forbidden_regs=[], noregs=False):
         frame_depth = self.fm.get_frame_depth()
         gcmap = allocate_gcmap(self.assembler, frame_depth, 
JITFRAME_FIXED_SIZE)
@@ -1313,7 +1340,7 @@
         #jump_op = self.final_jump_op
         #if jump_op is not None and jump_op.getdescr() is descr:
         #    self._compute_hint_frame_locations_from_descr(descr)
-        
+
 
     def consider_keepalive(self, op):
         pass
diff --git a/rpython/jit/backend/x86/runner.py 
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -23,6 +23,8 @@
     with_threads = False
     frame_reg = regloc.ebp
 
+    can_inline_varsize_malloc = True
+
     from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes
     gen_regs = gpr_reg_mgr_cls.all_regs
diff --git a/rpython/jit/metainterp/executor.py 
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -354,6 +354,7 @@
                          rop.QUASIIMMUT_FIELD,
                          rop.CALL_MALLOC_GC,
                          rop.CALL_MALLOC_NURSERY,
+                         rop.CALL_MALLOC_NURSERY_VARSIZE,
                          rop.CALL_MALLOC_NURSERY_VARSIZE_SMALL,
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -525,6 +525,7 @@
     'CALL_PURE/*d',             # removed before it's passed to the backend
     'CALL_MALLOC_GC/*d',      # like CALL, but NULL => propagate MemoryError
     'CALL_MALLOC_NURSERY/1',  # nursery malloc, const number of bytes, zeroed
+    'CALL_MALLOC_NURSERY_VARSIZE/3d',
     'CALL_MALLOC_NURSERY_VARSIZE_SMALL/1',
     # nursery malloc, non-const number of bytes, zeroed
     # note that the number of bytes must be well known to be small enough
diff --git a/rpython/jit/metainterp/test/test_logger.py 
b/rpython/jit/metainterp/test/test_logger.py
--- a/rpython/jit/metainterp/test/test_logger.py
+++ b/rpython/jit/metainterp/test/test_logger.py
@@ -93,7 +93,7 @@
         [p0]
         setfield_gc(p0, 3, descr=somedescr)
         '''
-        Descr()
+        somedescr = Descr()
         self.reparse(inp, namespace=locals())
 
     def test_guard(self):
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -106,6 +106,8 @@
                 tt = self.model.TargetToken(token)
                 self._consts[poss_descr] = tt
                 return tt
+            else:
+                raise
 
     def box_for_var(self, elem):
         try:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to