Author: Armin Rigo <ar...@tunes.org>
Branch: guard-compatible
Changeset: r84571:d0ffd51b5d0b
Date: 2016-05-22 16:41 +0200
http://bitbucket.org/pypy/pypy/changeset/d0ffd51b5d0b/

Log:    Implement search_tree

diff --git a/rpython/jit/backend/x86/guard_compat.py 
b/rpython/jit/backend/x86/guard_compat.py
--- a/rpython/jit/backend/x86/guard_compat.py
+++ b/rpython/jit/backend/x86/guard_compat.py
@@ -3,11 +3,14 @@
 from rpython.rlib.rarithmetic import r_uint
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rtyper.annlowlevel import cast_instance_to_gcref
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref, llhelper
 from rpython.rtyper.annlowlevel import cast_gcref_to_instance
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
+from rpython.jit.metainterp.compile import GuardCompatibleDescr
 from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.metainterp.compile import GuardCompatibleDescr
+from rpython.jit.backend.x86 import rx86, codebuf, regloc
+from rpython.jit.backend.x86.regalloc import gpr_reg_mgr_cls
+from rpython.jit.backend.x86.arch import WORD, DEFAULT_FRAME_BYTES
 
 
 #
@@ -22,10 +25,10 @@
 #     JNE slow_case
 #     JMP *[reg2 + bc_most_recent + 8]
 #   slow_case:
+#     PUSH RDX        # save
 #     PUSH RAX        # save
-#     PUSH RDX        # save
-#     MOV RAX, reg    # the value to search for
-#     MOV RDX, reg2   # _backend_choices object
+#     MOV RDX=reg2, RAX=reg
+#            RDX is the _backend_choices object, RAX is the value to search for
 #     JMP search_tree    # see below
 #   sequel:
 #
@@ -96,7 +99,7 @@
 #     JNE left
 #
 #   found:
-#     MOV R11, [RDX + 8]
+#     MOV R11, [RDX + 8*R11]
 #     MOV RDX, [RSP+16]
 #     MOV [RDX + bc_most_recent], RAX
 #     MOV [RDX + bc_most_recent + 8], R11
@@ -107,10 +110,10 @@
 #   not_found:
 #     <save all registers to the jitframe RBP,
 #         reading and popping the original RAX and RDX off the stack>
-#     MOV RDX, [RSP]
-#     MOV R11, [RDX + bc_gcmap]
+#     MOV RDI, [RSP]
+#     MOV R11, [RDI + bc_gcmap]
 #     MOV [RBP + jf_gcmap], R11
-#     <call invoke_find_compatible(_backend_choices=RDX, value=RAX)>
+#     <call invoke_find_compatible(_backend_choices=RDI, value=RAX)>
 #     <_reload_frame_if_necessary>
 #     MOV R11, RAX
 #     <restore the non-saved registers>
@@ -161,9 +164,13 @@
                         ('bc_most_recent', PAIR),
                         ('bc_list', lltype.Array(PAIR)))
 
-@specialize.memo()
-def getofs(name):
+def _getofs(name):
     return llmemory.offsetof(BACKEND_CHOICES, name)
+BCGCMAP = _getofs('bc_gcmap')
+BCFAILDESCR = _getofs('bc_faildescr')
+BCMOSTRECENT = _getofs('bc_most_recent')
+BCLIST = _getofs('bc_list')
+del _getofs
 BCLISTLENGTHOFS = llmemory.arraylengthoffset(BACKEND_CHOICES.bc_list)
 BCLISTITEMSOFS = llmemory.itemoffsetof(BACKEND_CHOICES.bc_list, 0)
 PAIRSIZE = llmemory.sizeof(PAIR)
@@ -180,10 +187,10 @@
     return old != new
 
 def bchoices_trace(gc, obj_addr, callback, arg):
-    gc._trace_callback(callback, arg, obj_addr + getofs('bc_faildescr'))
-    bchoices_pair(gc, obj_addr + getofs('bc_most_recent'), callback, arg)
-    length = (obj_addr + getofs('bc_list') + BCLISTLENGTHOFS).signed[0]
-    array_addr = obj_addr + getofs('bc_list') + BCLISTITEMSOFS
+    gc._trace_callback(callback, arg, obj_addr + BCFAILDESCR)
+    bchoices_pair(gc, obj_addr + BCMOSTRECENT, callback, arg)
+    length = (obj_addr + BCLIST + BCLISTLENGTHOFS).signed[0]
+    array_addr = obj_addr + BCLIST + BCLISTITEMSOFS
     item_addr = array_addr
     i = 0
     changes = False
@@ -219,10 +226,15 @@
                                   compilation_info=eci)
 
 
+INVOKE_FIND_COMPATIBLE_FUNC = lltype.Ptr(lltype.FuncType(
+                [lltype.Ptr(BACKEND_CHOICES), llmemory.GCREF],
+                lltype.Signed))
+
 def invoke_find_compatible(bchoices, new_gcref):
     descr = bchoices.bc_faildescr
     descr = cast_gcref_to_instance(GuardCompatibleDescr, descr)
     try:
+        xxx # temp
         result = descr.find_compatible(cpu, new_gcref)
         if result == 0:
             result = descr._backend_failure_recovery
@@ -235,6 +247,9 @@
         bchoices.bc_most_recent.asmaddr = result
         return result
     except:             # oops!
+        if not we_are_translated():
+            import sys, pdb
+            pdb.post_mortem(sys.exc_info()[2])
         return descr._backend_failure_recovery
 
 def add_in_tree(bchoices, new_gcref, new_asmaddr):
@@ -242,7 +257,7 @@
     length = len(bchoices.bc_list)
     #
     gcref_base = lltype.cast_opaque_ptr(llmemory.GCREF, bchoices)
-    ofs = getofs('bc_list') + BCLISTITEMSOFS
+    ofs = BCLIST + BCLISTITEMSOFS
     ofs += (length - 1) * llmemory.sizeof(PAIR)
     ofs = _real_number(ofs)
     if llop.raw_load(lltype.Unsigned, gcref_base, ofs) != r_uint(-1):
@@ -273,7 +288,7 @@
     bchoices.bc_list[length - 1].asmaddr = new_asmaddr
     # --- no GC above ---
     addr = llmemory.cast_ptr_to_adr(bchoices)
-    addr += getofs('bc_list') + BCLISTITEMSOFS
+    addr += BCLIST + BCLISTITEMSOFS
     pairs_quicksort(addr, length)
     return bchoices
 
@@ -307,11 +322,98 @@
 
 def invalidate_cache(bchoices):
     """Write -1 inside bchoices.bc_most_recent.gcref."""
-    ofs = llmemory.offsetof(BACKEND_CHOICES, 'bc_most_recent')
-    invalidate_pair(bchoices, ofs)
+    invalidate_pair(bchoices, BCMOSTRECENT)
 
 
+def _fix_forward_label(mc, jmp_location):
+    offset = mc.get_relative_pos() - jmp_location
+    assert 0 < offset <= 127
+    mc.overwrite(jmp_location-1, chr(offset))
 
+def setup_once(assembler):
+    rax = regloc.eax.value
+    rdx = regloc.edx.value
+    rdi = regloc.edi.value
+    r11 = regloc.r11.value
+    frame_size = DEFAULT_FRAME_BYTES + 2 * WORD
+    # contains two extra words on the stack:
+    #    - saved RDX
+    #    - saved RAX
+
+    mc = codebuf.MachineCodeBlockWrapper()
+    mc.force_frame_size(frame_size)
+
+    ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS)
+    ofs2 = _real_number(BCLIST + BCLISTITEMSOFS)
+    mc.MOV_sr(16, rdx)                      # MOV [RSP+16], RDX
+    mc.MOV_rm(r11, (rdx, ofs1))             # MOV R11, [RDX + bc_list.length]
+    mc.ADD_ri(rdx, ofs2)                    # ADD RDX, $bc_list.items
+    mc.JMP_l8(0)                            # JMP loop
+    jmp_location = mc.get_relative_pos()
+    mc.force_frame_size(frame_size)
+
+    right_label = mc.get_relative_pos()
+    mc.LEA_ra(rdx, (rdx, r11, 3, 8))        # LEA RDX, [RDX + 8*R11 + 8]
+    left_label = mc.get_relative_pos()
+    mc.SHR_ri(r11, 1)                       # SHR R11, 1
+    mc.J_il8(rx86.Conditions['Z'], 0)       # JZ not_found
+    jz_location = mc.get_relative_pos()
+
+    _fix_forward_label(mc, jmp_location)    # loop:
+    mc.CMP_ra(rax, (rdx, r11, 3, -8))       # CMP RAX, [RDX + 8*R11 - 8]
+    mc.J_il8(rx86.Conditions['A'], right_label - (mc.get_relative_pos() + 2))
+    mc.J_il8(rx86.Conditions['NE'], left_label - (mc.get_relative_pos() + 2))
+
+    mc.MOV_ra(r11, (rdx, r11, 3, 0))        # MOV R11, [RDX + 8*R11]
+    mc.MOV_rs(rdx, 16)                      # MOV RDX, [RSP+16]
+    ofs = _real_number(BCMOSTRECENT)
+    mc.MOV_mr((rdx, ofs), rax)              # MOV [RDX+bc_most_recent], RAX
+    mc.MOV_mr((rdx, ofs + 8), r11)          # MOV [RDX+bc_most_recent+8], R11
+    mc.POP_r(rax)                           # POP RAX
+    mc.POP_r(rdx)                           # POP RDX
+    mc.JMP_r(r11)                           # JMP *R11
+    mc.force_frame_size(frame_size)
+
+    _fix_forward_label(mc, jz_location)     # not_found:
+
+    # read and pop the original RAX and RDX off the stack
+    base_ofs = assembler.cpu.get_baseofs_of_frame_field()
+    v = gpr_reg_mgr_cls.all_reg_indexes[rdx]
+    mc.POP_b(v * WORD + base_ofs)           # POP [RBP + saved_rdx]
+    v = gpr_reg_mgr_cls.all_reg_indexes[rax]
+    mc.POP_b(v * WORD + base_ofs)           # POP [RBP + saved_rax]
+    # save all other registers to the jitframe RBP
+    assembler._push_all_regs_to_frame(mc, [regloc.eax, regloc.edx],
+                                      withfloats=True)
+
+    bc_gcmap = _real_number(BCGCMAP)
+    jf_gcmap = assembler.cpu.get_ofs_of_frame_field('jf_gcmap')
+    mc.MOV_rs(rdi, 0)                       # MOV RDI, [RSP]
+    mc.MOV_rr(regloc.esi.value, rax)        # MOV RSI, RAX
+    mc.MOV_rm(r11, (rdi, bc_gcmap))         # MOV R11, [RDI + bc_gcmap]
+    mc.MOV_br(jf_gcmap, r11)                # MOV [RBP + jf_gcmap], R11
+    llfunc = llhelper(INVOKE_FIND_COMPATIBLE_FUNC, invoke_find_compatible)
+    llfunc = assembler.cpu.cast_ptr_to_int(llfunc)
+    mc.CALL(regloc.imm(llfunc))             # CALL invoke_find_compatible
+    assembler._reload_frame_if_necessary(mc)
+    mc.MOV_bi(jf_gcmap, 0)                  # MOV [RBP + jf_gcmap], 0
+
+    mc.MOV_rr(r11, rax)                     # MOV R11, RAX
+
+    # restore the registers that the CALL has clobbered.  Other other
+    # registers are saved above, for the gcmap, but don't need to be
+    # restored here.  (We restore RAX and RDX too.)
+    assembler._pop_all_regs_from_frame(mc, [], withfloats=True,
+                                       callee_only=True)
+    mc.JMP_r(r11)                           # JMP *R11
+
+    assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, [])
+
+
+
+
+
+# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 
 def generate_guard_compatible(assembler, guard_token, loc_reg, initial_value):
     # fast-path check
@@ -424,11 +526,6 @@
     # guard_compatible to update it if needed.
 
 
-def setup_once(assembler):
-    nb_registers = WORD * 2
-    assembler._guard_compat_checkers = [0] * nb_registers
-
-
 def _build_inner_loop(mc, regnum, tmp, immediate_return):
     pos = mc.get_relative_pos()
     mc.CMP_mr((tmp, WORD), regnum)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -396,6 +396,8 @@
     INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
     INSN_rs = insn(rex_w, chr(base+3), register(1,8), stack_sp(2))
     INSN_rm = insn(rex_w, chr(base+3), register(1,8), mem_reg_plus_const(2))
+    INSN_ra = insn(rex_w, chr(base+3), register(1,8),
+                                      mem_reg_plus_scaled_reg_plus_const(2))
     INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_(2))
     INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_(1), immediate(2,'b'))
     INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
@@ -418,7 +420,7 @@
     INSN_bi._always_inline_ = True      # try to constant-fold single_byte()
 
     return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
-            INSN_ji8, INSN_mi8, INSN_rs, INSN_ri32)
+            INSN_ji8, INSN_mi8, INSN_rs, INSN_ri32, INSN_ra)
 
 def select_8_or_32_bit_immed(insn_8, insn_32):
     def INSN(*args):
@@ -514,13 +516,13 @@
     INC_m = insn(rex_w, '\xFF', orbyte(0), mem_reg_plus_const(1))
     INC_j = insn(rex_w, '\xFF', orbyte(0), abs_(1))
 
-    AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_,ADD_rs, _ = common_modes(0)
-    OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_,_,_ = common_modes(1)
-    AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_,_,_ = common_modes(4)
-    SU1_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8,_,_ = 
common_modes(5)
-    SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_,_,_ = common_modes(3)
-    XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_ = common_modes(6)
-    CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32 = 
common_modes(7)
+    AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_,ADD_rs,_,_ = common_modes(0)
+    OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_,_,_,_ = common_modes(1)
+    AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_,_,_,_ = common_modes(4)
+    SU1_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8,_,_,_ = 
common_modes(5)
+    SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_,_,_,_ = common_modes(3)
+    XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_,_ = common_modes(6)
+    CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32,CMP_ra = 
common_modes(7)
 
     ADD32_mi32 = insn(rex_nw, '\x81', mem_reg_plus_const(1), immediate(2))
 
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py 
b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -936,6 +936,9 @@
             elif isinstance(llobj, llmemory.ArrayItemsOffset):
                 CARRAY = get_ctypes_type(llobj.TYPE)
                 llobj = CARRAY.items.offset
+            elif isinstance(llobj, llmemory.ArrayLengthOffset):
+                CARRAY = get_ctypes_type(llobj.TYPE)
+                llobj = CARRAY.length.offset
             else:
                 raise NotImplementedError(llobj)  # don't know about symbolic 
value
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to