Author: Armin Rigo <ar...@tunes.org> Branch: guard-compatible Changeset: r90028:9242b40aa211 Date: 2017-02-09 17:23 +0100 http://bitbucket.org/pypy/pypy/changeset/9242b40aa211/
Log: in-progress diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -269,6 +269,11 @@ guardtok.faildescr.rd_locs = positions return faildescrindex, target + def get_target_for_failure_recovery_of_guard_compat(self): + exc = False + withfloats = True + return self.failure_recovery_code[exc + 2 * withfloats] + def enter_portal_frame(self, op): if self.cpu.HAS_CODEMAP: self.codemap_builder.enter_portal_frame(op.getarg(0).getint(), diff --git a/rpython/jit/backend/llsupport/guard_compat.py b/rpython/jit/backend/llsupport/guard_compat.py --- a/rpython/jit/backend/llsupport/guard_compat.py +++ b/rpython/jit/backend/llsupport/guard_compat.py @@ -18,14 +18,18 @@ ('asmaddr', lltype.Signed)) BACKEND_CHOICES = lltype.GcStruct('BACKEND_CHOICES', ('bc_faildescr', llmemory.GCREF), + ('bc_gcmap', lltype.Ptr(jitframe.GCMAP)), ('bc_gc_table_tracer', llmemory.GCREF), + ('bc_search_tree', lltype.Signed), ('bc_most_recent', PAIR), ('bc_list', lltype.Array(PAIR))) def _getofs(name): return llmemory.offsetof(BACKEND_CHOICES, name) BCFAILDESCR = _getofs('bc_faildescr') +BCGCMAP = _getofs('bc_gcmap') BCGCTABLETRACER = _getofs('bc_gc_table_tracer') +BCSEARCHTREE = _getofs('bc_search_tree') BCMOSTRECENT = _getofs('bc_most_recent') BCLIST = _getofs('bc_list') del _getofs @@ -175,12 +179,9 @@ bchoices = lltype.malloc(BACKEND_CHOICES, 1) bchoices.bc_faildescr = cast_instance_to_gcref(guard_compat_descr) bchoices.bc_gc_table_tracer = lltype.nullptr(llmemory.GCREF.TO) # (*) - bchoices.bc_most_recent.gcref = gcref_to_unsigned(initial_gcref) - bchoices.bc_most_recent.asmaddr = -43 # (*) - bchoices.bc_list[0].gcref = gcref_to_unsigned(initial_gcref) - bchoices.bc_list[0].asmaddr = -43 # (*) + bchoices.bc_most_recent.gcref = r_uint(-1) + bchoices.bc_list[0].gcref = r_uint(-1) llop.gc_writebarrier(lltype.Void, bchoices) - # entries with (*) are fixed in patch_guard_compatible() return bchoices def descr_to_bchoices(descr): @@ -191,32 +192,31 @@ # ---no GC operation end--- return bchoices -def patch_guard_compatible(guard_token, rawstart, get_addr_in_gc_table, - gc_table_tracer): - # go to the address in the gctable, number 'bindex' - bindex = guard_token.guard_compat_bindex - choices_addr = get_addr_in_gc_table(bindex) - sequel_label = rawstart + guard_token.pos_jump_offset - failure_recovery = rawstart + guard_token.pos_recovery_stub - gcmap = guard_token.gcmap - # choices_addr: points to bchoices in the GC table - # sequel_label: "sequel:" label above - # failure_recovery: failure recovery address +def patch_guard_compatible(guard_token, get_addr_in_gc_table, + gc_table_tracer, search_tree_addr): guard_compat_descr = guard_token.faildescr assert isinstance(guard_compat_descr, GuardCompatibleDescr) + # + # read the initial value of '_backend_choices_addr', which is used + # to store the index of the '_backend_choices' gc object in the gc + # table + bindex = guard_compat_descr._backend_choices_addr + # + # go to this address in the gctable + choices_addr = get_addr_in_gc_table(bindex) + # + # now fix '_backend_choices_addr' to really point to the raw address + # in the gc table guard_compat_descr._backend_choices_addr = choices_addr - guard_compat_descr._backend_sequel_label = sequel_label - guard_compat_descr._backend_failure_recovery = failure_recovery - guard_compat_descr._backend_gcmap = gcmap # bchoices = descr_to_bchoices(guard_compat_descr) assert len(bchoices.bc_list) == 1 assert (cast_gcref_to_instance(GuardCompatibleDescr, bchoices.bc_faildescr) is guard_compat_descr) + bchoices.bc_gcmap = guard_token.gcmap bchoices.bc_gc_table_tracer = lltype.cast_opaque_ptr(llmemory.GCREF, gc_table_tracer) - bchoices.bc_most_recent.asmaddr = sequel_label - bchoices.bc_list[0].asmaddr = sequel_label + bchoices.bc_search_tree = search_tree_addr def invalidate_pair(bchoices, pair_ofs): gcref_base = lltype.cast_opaque_ptr(llmemory.GCREF, bchoices) diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -364,8 +364,7 @@ self.handle_call_assembler(op) continue if op.getopnum() == rop.GUARD_COMPATIBLE: - self.handle_guard_compatible(op) - continue + self.prepare_guard_compatible(op) if op.getopnum() == rop.JUMP or op.getopnum() == rop.FINISH: self.emit_pending_zeros() # @@ -978,8 +977,10 @@ self.gcrefs_recently_loaded[index] = load_op return load_op - def handle_guard_compatible(self, op): + def prepare_guard_compatible(self, op): from rpython.jit.backend.llsupport import guard_compat + # don't use _gcref_index here: we need our own index for + # the _backend_choices object c = op.getarg(1) assert isinstance(c, ConstPtr) descr = op.getdescr() @@ -987,9 +988,8 @@ bcindex = len(self.gcrefs_output_list) gcref = lltype.cast_opaque_ptr(llmemory.GCREF, bchoices) self.gcrefs_output_list.append(gcref) - new_op = op.copy_and_change(rop.GUARD_COMPATIBLE, - [op.getarg(0), ConstInt(bcindex)]) - self.emit_op(new_op) + assert isinstance(descr, guard_compat.GuardCompatibleDescr) + descr._backend_choices_addr = bcindex # fixed in patch_guard_compatible @always_inline def cpu_simplify_scale(cpu, index_box, factor, offset): diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -782,9 +782,9 @@ addr = rawstart + tok.pos_jump_offset tok.faildescr.adr_jump_offset = addr if tok.guard_compatible(): - guard_compat.patch_guard_compatible(tok, rawstart, - self._addr_from_gc_table, - self.gc_table_tracer) + guard_compat.patch_guard_compatible( + tok, self._addr_from_gc_table, + self.gc_table_tracer, self.guard_compat_search_tree) continue descr = tok.faildescr if descr.loop_version(): @@ -1832,18 +1832,11 @@ self.mc.UCOMISD(locs[0], locs[1]) else: self.mc.CMP(locs[0], locs[1]) + guard_token._guard_value_on = locs[0].value self.guard_success_cc = rx86.Conditions['E'] self.implement_guard(guard_token) - def genop_guard_guard_compatible(self, guard_op, guard_token, locs, ign): - loc_reg, loc_imm, loc_reg2 = locs - assert isinstance(loc_reg, RegLoc) - assert isinstance(loc_imm, ImmedLoc) # index of 'backend_choices' - assert isinstance(loc_reg2, RegLoc) - self.load_reg_from_gc_table(loc_reg2.value, loc_imm.value) - guard_compat.generate_guard_compatible(self, guard_token, - loc_reg.value, loc_imm.value, - loc_reg2.value) + genop_guard_guard_compatible = genop_guard_guard_value def _cmp_guard_class(self, locs): loc_ptr = locs[0] @@ -1982,6 +1975,11 @@ guardtok.faildescr, regalloc) # faildescrindex, target = self.store_info_on_descr(startpos, guardtok) + if guardtok.guard_compatible(): + assert startpos == self.mc.get_relative_pos() + guard_compat.generate_recovery_stub(self, guardtok) + xXXXx + # self.push_from_gc_table(faildescrindex) self.push_gcmap(self.mc, guardtok.gcmap, push=True) self.mc.JMP(imm(target)) diff --git a/rpython/jit/backend/x86/guard_compat.py b/rpython/jit/backend/x86/guard_compat.py --- a/rpython/jit/backend/x86/guard_compat.py +++ b/rpython/jit/backend/x86/guard_compat.py @@ -1,5 +1,5 @@ from rpython.rtyper.annlowlevel import llhelper -from rpython.jit.backend.x86 import rx86, codebuf, regloc +from rpython.jit.backend.x86 import rx86, codebuf, regloc, callbuilder from rpython.jit.backend.x86.regalloc import gpr_reg_mgr_cls from rpython.jit.backend.x86.arch import WORD, IS_X86_64, IS_X86_32 from rpython.jit.backend.x86.arch import DEFAULT_FRAME_BYTES @@ -17,27 +17,8 @@ # CMP reg, reg2 # JNE recovery_stub # sequel: -# <reg2 not used any more> # -# The difference is that 'recovery_stub' does not jump to one of the -# 'failure_recovery_code' versions, but instead it jumps to -# 'expand_guard_compatible'. The latter calls invoke_find_compatible. -# The result is one of: -# -# * 0: bail out. We jump to the 'failure_recovery_code'. -# -# * -1: continue running on the same path. We patch ofs(const-ptr) -# to contain the new value, and jump to 'sequel'. -# -# * otherwise, it's the address of a bridge. We jump to that bridge. -# -# This is the basic idea, but not the truth. Things are more -# complicated because we cache in the assembler the -# invoke_find_compatible call results. 'expand_guard_compatible' -# actually allocates a '_backend_choices' object, copies on it -# various data it got from the recovery_stub, then patches the -# recovery stub to this (the original recovery stub was padded if -# necessary to have enough room): +# The difference is in the 'recovery_stub': # # recovery_stub: # MOV R11, [RIP + ofs(_backend_choices)] @@ -52,9 +33,8 @@ # The faildescr for the GUARD_COMPATIBLE is a GuardCompatibleDescr. # Fields relevant for this discussion: # -# - _backend_ptr_addr: points inside the GC table, to ofs(const-ptr). -# ofs(_backend_choices) is just afterwards. -# Initially _backend_choices is NULL. +# - _backend_choices_addr: points inside the GC table, to +# ofs(_backend_choices) # - adr_jump_offset: raw address of the 'sequel' label (this field # is the same as on any other GuardDescr) # @@ -94,8 +74,8 @@ # When find_compatible() returns 0, it is not stored in bc_list, # but still stored in bc_most_recent, with 'guard_compat_recovery' # as the 'asmaddr'. Here is 'guard_compat_recovery': it emulates -# generate_quick_failure() from assembler.py, and so it plays the role -# of the original (patched) recovery stub. +# the non-GUARD_COMPATIBLE case of generate_quick_failure() from +# assembler.py. # # guard_compat_recovery: # PUSH R11 @@ -105,8 +85,8 @@ # Here is the x86-64 runtime code to walk the tree: # # search_tree: -# MOV [ESP+8], RCX # save the original value -# MOV [ESP+16], R11 # save the _backend_choices object +# MOV [RSP+8], RCX # save the original value +# MOV [RSP+16], R11 # save the _backend_choices object # MOV RCX, [R11 + bc_list.length] # a power of two minus one # ADD R11, $bc_list.items # JMP loop @@ -126,22 +106,24 @@ # # found: # MOV R11, [R11 + 8*RCX] # address to jump to next -# MOV RCX, [ESP+16] # reload the _backend_choices object +# MOV RCX, [RSP+16] # reload the _backend_choices object # MOV [RCX + bc_most_recent], RAX # MOV [RCX + bc_most_recent + 8], R11 -# MOV RCX, [ESP+8] # restore saved value +# MOV RCX, [RSP+8] # restore saved value # POP RAX # pushed by the caller -# JMP *R11 +# JMP *R11 # can't jump to guard_compat_recovery # # not_found: # <save all registers to the jitframe RBP, # reading and popping the original RAX and RCX off the stack> -# <call invoke_find_compatible(_backend_choices=[RSP], value=RAX), -# jitframe=RBP> +# <build an array of two words on the stack, with _backend_choices +# and value; the 'value' will be overwritten by +# invoke_find_compatible with the address to jump to next> +# <call invoke_find_compatible(p_arg=RSP, jitframe=RBP> # <_reload_frame_if_necessary> -# MOV R11, RAX # <restore all registers> -# JMP *R11 +# MOV R11, [RSP+array_element_1] # reload the _backend_choices object +# JMP *[RSP+array_element_2] # may jump to guard_compat_recovery # # # invoke_find_compatible(bchoices, new_gcref, jitframe): @@ -168,7 +150,7 @@ # Other issues: compile_bridge() called on a GuardCompatibleDescr must # not to do any patching, but instead it needs to clear # bchoices.bc_most_recent. Otherwise, we will likely directly jump to -# <failure_recovery> next time, if the newly added gcref is still in +# <guard_compat_recovery> next time, if the newly added gcref is still in # bc_most_recent.gcref. (We can't add it to bc_most_recent or bc_list # from compile_bridge(), because we don't know what the gcref should # be, but it doesn't matter.) @@ -186,29 +168,36 @@ mc.overwrite(jmp_location-1, chr(offset)) def build_once(assembler): + build_once_search_tree(assembler) + build_once_guard_compat_recovery(assembler) + +def build_once_search_tree(assembler): """Generate the 'search_tree' block of code""" rax = regloc.eax.value - rdx = regloc.edx.value + rcx = regloc.ecx.value rdi = regloc.edi.value r11 = regloc.r11.value - frame_size = DEFAULT_FRAME_BYTES + 2 * WORD - # contains two extra words on the stack: - # - saved RDX + frame_size = DEFAULT_FRAME_BYTES + 1 * WORD + # contains one extra word on the stack: # - saved RAX mc = codebuf.MachineCodeBlockWrapper() mc.force_frame_size(frame_size) + mc.INT3() if IS_X86_32: # save edi as an extra scratch register + XXX mc.MOV_sr(3*WORD, rdi) r11 = rdi # r11 doesn't exist on 32-bit, use "edi" instead + mc.MOV_sr(1*WORD, rcx) # MOV [RSP+8], ECX + mc.MOV_sr(2*WORD, r11) # MOV [RSP+16], R11 + ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS) ofs2 = _real_number(BCLIST + BCLISTITEMSOFS) - mc.MOV_sr(2*WORD, rdx) # MOV [RSP+16], RDX - mc.MOV_rm(r11, (rdx, ofs1)) # MOV R11, [RDX + bc_list.length] - # in the sequel, "RDX + bc_list.items" is a pointer to the leftmost + mc.MOV_rm(rcx, (r11, ofs1)) # MOV RCX, [R11 + bc_list.length] + # in the sequel, "R11 + bc_list.items" is a pointer to the leftmost # array item of the range still under consideration. The length of - # this range is R11, which is always a power-of-two-minus-1. + # this range is RCX, which is always a power-of-two-minus-1. mc.JMP_l8(0) # JMP loop jmp_location = mc.get_relative_pos() mc.force_frame_size(frame_size) @@ -216,28 +205,29 @@ SH = 3 if IS_X86_64 else 2 right_label = mc.get_relative_pos() - mc.LEA_ra(rdx, (rdx, r11, SH, WORD)) # LEA RDX, [RDX + 8*R11 + 8] + mc.LEA_ra(r11, (r11, rcx, SH, WORD)) # LEA R11, [R11 + 8*RCX + 8] left_label = mc.get_relative_pos() - mc.SHR_ri(r11, 1) # SHR R11, 1 + mc.SHR_ri(rcx, 1) # SHR RCX, 1 mc.J_il8(rx86.Conditions['Z'], 0) # JZ not_found jz_location = mc.get_relative_pos() _fix_forward_label(mc, jmp_location) # loop: - mc.CMP_ra(rax, (rdx, r11, SH, ofs2-WORD)) - # CMP RAX, [RDX + items + 8*R11 - 8] + mc.CMP_ra(rax, (r11, rcx, SH, ofs2-WORD)) + # CMP RAX, [R11 + items + 8*RCX - 8] mc.J_il8(rx86.Conditions['A'], right_label - (mc.get_relative_pos() + 2)) mc.J_il8(rx86.Conditions['NE'], left_label - (mc.get_relative_pos() + 2)) - mc.MOV_ra(r11, (rdx, r11, SH, ofs2)) # MOV R11, [RDX + items + 8*R11] - mc.MOV_rs(rdx, 2*WORD) # MOV RDX, [RSP+16] + mc.MOV_ra(r11, (r11, rcx, SH, ofs2)) # MOV R11, [R11 + items + 8*RCX] + mc.MOV_rs(rcx, 2*WORD) # MOV RCX, [RSP+16] ofs = _real_number(BCMOSTRECENT) - mc.MOV_mr((rdx, ofs), rax) # MOV [RDX+bc_most_recent], RAX - mc.MOV_mr((rdx, ofs+WORD), r11) # MOV [RDX+bc_most_recent+8], R11 + mc.MOV_mr((rcx, ofs), rax) # MOV [RCX+bc_most_recent], RAX + mc.MOV_mr((rcx, ofs+WORD), r11) # MOV [RCX+bc_most_recent+8], R11 + mc.MOV_rs(rcx, 1*WORD) # MOV RCX, [RSP+8] mc.POP_r(rax) # POP RAX - mc.POP_r(rdx) # POP RDX if IS_X86_64: mc.JMP_r(r11) # JMP *R11 elif IS_X86_32: + XXX mc.MOV_sr(0, r11) # r11==rdi here mc.MOV_rs(rdi, WORD) mc.JMP_s(0) @@ -246,24 +236,28 @@ _fix_forward_label(mc, jz_location) # not_found: if IS_X86_32: + XXX mc.MOV_rs(rdi, 3*WORD) - # read and pop the original RAX and RDX off the stack - base_ofs = assembler.cpu.get_baseofs_of_frame_field() - v = gpr_reg_mgr_cls.all_reg_indexes[rax] - mc.POP_b(v * WORD + base_ofs) # POP [RBP + saved_rax] - v = gpr_reg_mgr_cls.all_reg_indexes[rdx] - mc.POP_b(v * WORD + base_ofs) # POP [RBP + saved_rdx] - # save all other registers to the jitframe RBP - assembler._push_all_regs_to_frame(mc, [regloc.eax, regloc.edx], - withfloats=True) + # The _backend_choices object is still referenced from [RSP+16] + # (which becomes [RSP+8] after the POP), where it is the first of a + # two-words array passed as argument to invoke_find_compatible(). + # The second word is the value, from RAX, which we store now. + mc.MOV_sr(3*WORD, rax) # MOV [RSP+24], RAX + + # restore RAX and RCX + mc.MOV_rs(rcx, 1*WORD) # MOV RCX, [RSP+8] + mc.POP_r(rax) # POP RAX + + # save all registers to the jitframe RBP + assembler._push_all_regs_to_frame(mc, [], withfloats=True) if IS_X86_64: - mc.MOV_rs(rdi, 0) # MOV RDI, [RSP] - mc.MOV_rr(regloc.esi.value, rax) # MOV RSI, RAX - mc.MOV_rr(regloc.edx.value, # MOV RDX, RBP + mc.LEA_rs(rdi, 2 * WORD) # LEA RDI, [RSP+8] + mc.MOV_rr(regloc.esi.value, # MOV RSI, RBP regloc.ebp.value) elif IS_X86_32: + XXX # argument #1 is already in [ESP] mc.MOV_sr(1 * WORD, rax) mc.MOV_sr(2 * WORD, regloc.ebp.value) @@ -273,58 +267,89 @@ llfunc = assembler.cpu.cast_ptr_to_int(llfunc) mc.CALL(regloc.imm(llfunc)) # CALL invoke_find_compatible assembler._reload_frame_if_necessary(mc) - if IS_X86_64: - mc.MOV_rr(r11, rax) # MOV R11, RAX - elif IS_X86_32: - mc.MOV_sr(0, rax) # restore the registers that the CALL has clobbered, plus the ones # containing GC pointers that may have moved. That means we just - # restore them all. (We restore RAX and RDX and RDI too.) + # restore them all. assembler._pop_all_regs_from_frame(mc, [], withfloats=True) + + # jump to 'array_element_2'. In case this goes to + # guard_compat_recovery, we also reload the _backend_choices + # object from 'array_element_1' (the GC may have moved it, or + # it may be a completely new object). if IS_X86_64: - mc.JMP_r(r11) # JMP *R11 + mc.MOV_rs(r11, 1*WORD) # MOV R11, [RSP+8] + mc.JMP_s(2*WORD) # JMP *[RSP+16] elif IS_X86_32: + XXX mc.JMP_s(0) assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, []) -def generate_guard_compatible(assembler, guard_token, reg, bindex, reg2): - mc = assembler.mc - rax = regloc.eax.value - rdx = regloc.edx.value +def build_once_guard_compat_recovery(assembler): + """Generate the 'guard_compat_recovery' block of code""" + r11 = regloc.r11.value + mc = codebuf.MachineCodeBlockWrapper() + + ofs = _real_number(BCGCMAP) + mc.PUSH_r(r11) + mc.PUSH_m((r11, ofs)) + target = assembler.get_target_for_failure_recovery_of_guard_compat() + mc.JMP(regloc.imm(target)) + + assembler.guard_compat_recovery = mc.materialize(assembler.cpu, []) + + +def generate_recovery_stub(assembler, guard_token): + r11 = regloc.r11.value frame_size = DEFAULT_FRAME_BYTES + descr = guard_token.faildescr + assert isinstance(descr, GuardCompatibleDescr) + assembler.load_reg_from_gc_table(r11, descr._backend_choices_addr) + + mc = assembler.mc + reg = guard_token._guard_value_on ofs = _real_number(BCMOSTRECENT) - mc.CMP_rm(reg, (reg2, ofs)) # CMP reg, [reg2 + bc_most_recent] + mc.CMP_rm(reg, (r11, ofs)) # CMP reg, [R11 + bc_most_recent] mc.J_il8(rx86.Conditions['NE'], 0) # JNE slow_case jne_location = mc.get_relative_pos() - mc.JMP_m((reg2, ofs + WORD)) # JMP *[reg2 + bc_most_recent + 8] + mc.JMP_m((r11, ofs + WORD)) # JMP *[R11 + bc_most_recent + 8] mc.force_frame_size(frame_size) _fix_forward_label(mc, jne_location) # slow_case: - mc.PUSH_r(rdx) # PUSH RDX mc.PUSH_r(rax) # PUSH RAX - # manually move reg to RAX and reg2 to RDX - if reg2 == rax: - if reg == rdx: - mc.XCHG_rr(rax, rdx) - reg = rax - else: - mc.MOV_rr(rdx, rax) - reg2 = rdx if reg != rax: - assert reg2 != rax - mc.MOV_rr(rax, reg) - if reg2 != rdx: - mc.MOV_rr(rdx, reg2) + mc.MOV_rr(rax, reg) # MOV RAX, reg - mc.JMP(regloc.imm(assembler.guard_compat_search_tree)) - mc.force_frame_size(frame_size) + ofs = _real_number(BCSEARCHTREE) + mc.JMP_m((r11, ofs)) # JMP *[R11 + bc_search_tree] - # abuse this field to store the 'sequel' relative offset - guard_token.pos_jump_offset = mc.get_relative_pos() - guard_token.guard_compat_bindex = bindex - assembler.pending_guard_tokens.append(guard_token) + +#def generate_guard_compatible(assembler, guard_token, reg, reg2, gctable_index): +# mc = assembler.mc +# mc.CMP_rr(reg, reg2) # CMP reg, reg2 +# mc.J_il8(rx86.Conditions['E'], 0) # JE sequel +# je_location = mc.get_relative_pos() +# +# self.push_from_gc_table(guard_token.faildescrindex) +# mc.JMP(regloc.imm(assembler.guard_compat_second_case)) +# +# padding_end = start_pos + size_general_case - 2 +# while mc.get_relative_pos() < padding_end: +# mc.INT3() +# +# padding_end = mc.get_relative_pos() # in case it is actually bigger +# block_size = padding_end - start_pos + 2 +# assert 0 < block_size <= 255 +# mc.writechar(chr(block_size)) +# assert 0 <= reg <= 15 and 0 <= reg2 <= 15 +# mc.writechar(chr((reg2 << 4) | reg)) +# +# # abuse this field to store the 'sequel' relative offset +# guard_token.pos_jump_offset = mc.get_relative_pos() +# guard_token.guard_compat_bindex = gctable_index +# guard_token.............. +# assembler.pending_guard_tokens.append(guard_token) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -478,17 +478,7 @@ y = self.loc(op.getarg(1)) self.perform_guard(op, [x, y], None) - def consider_guard_compatible(self, op): - op.getdescr().make_a_counter_per_value(op, -1) # -1 not used here - args = op.getarglist() - assert args[0].type == REF # only supported case for now - assert isinstance(args[1], ConstInt) # by rewrite.py - tmp_box = TempVar() - x = self.rm.make_sure_var_in_reg(args[0]) - y = self.loc(args[1]) - z = self.rm.force_allocate_reg(tmp_box, args) - self.rm.possibly_free_var(tmp_box) - self.perform_guard(op, [x, y, z], None) + consider_guard_compatible = consider_guard_value def consider_guard_class(self, op): assert not isinstance(op.getarg(0), Const) diff --git a/rpython/jit/backend/x86/test/test_compatible.py b/rpython/jit/backend/x86/test/test_compatible.py --- a/rpython/jit/backend/x86/test/test_compatible.py +++ b/rpython/jit/backend/x86/test/test_compatible.py @@ -19,39 +19,42 @@ mc.writechar('\x00') # 4 gctable entries; 'bchoices' will be #3 # if IS_X86_64: - mc.MOV(regloc.ecx, regloc.edx) - mc.MOV(regloc.edx, regloc.edi) - mc.MOV(regloc.eax, regloc.esi) + mc.MOV(regloc.ecx, regloc.edx) # jitframe + mc.MOV(regloc.r11, regloc.edi) # _backend_choices + mc.MOV(regloc.eax, regloc.esi) # guarded value elif IS_X86_32: + XXX mc.MOV_rs(regloc.edx.value, 4) mc.MOV_rs(regloc.eax.value, 8) mc.MOV_rs(regloc.ecx.value, 12) # mc.PUSH(regloc.ebp) - mc.SUB(regloc.esp, regloc.imm(148 - 2*WORD)) # make a frame, and align stack + mc.SUB(regloc.esp, regloc.imm(144 - 2*WORD)) # make a frame, and align stack mc.MOV(regloc.ebp, regloc.ecx) # - mc.PUSH(regloc.imm(0xdddd)) mc.PUSH(regloc.imm(0xaaaa)) - mc.JMP(regloc.imm(cpu.assembler.guard_compat_search_tree)) + # jump to guard_compat_search_tree, but carefully: don't overwrite R11 + mc.MOV(regloc.esi, regloc.imm(cpu.assembler.guard_compat_search_tree)) + mc.JMP_r(regloc.esi.value) sequel = mc.get_relative_pos() # - mc.force_frame_size(148) + mc.force_frame_size(144) mc.SUB(regloc.eax, regloc.edx) - mc.ADD(regloc.esp, regloc.imm(148 - 2*WORD)) + mc.ADD(regloc.esp, regloc.imm(144 - 2*WORD)) mc.POP(regloc.ebp) mc.RET() # extra_paths = [] for i in range(11): - mc.force_frame_size(148) + mc.force_frame_size(144) extra_paths.append(mc.get_relative_pos()) mc.MOV(regloc.eax, regloc.imm(1000000 + i)) - mc.ADD(regloc.esp, regloc.imm(148 - 2*WORD)) + mc.ADD(regloc.esp, regloc.imm(144 - 2*WORD)) mc.POP(regloc.ebp) mc.RET() failure = extra_paths[10] rawstart = mc.materialize(cpu, []) + print 'rawstart:', hex(rawstart) call_me = rffi.cast(lltype.Ptr(lltype.FuncType( [lltype.Ptr(BACKEND_CHOICES), llmemory.GCREF, lltype.Ptr(jitframe.JITFRAME)], lltype.Signed)), @@ -63,16 +66,17 @@ llop.raw_store(lltype.Void, rawstart, 3 * WORD, bchoices) class FakeGuardToken: - guard_compat_bindex = 3 - pos_jump_offset = sequel - pos_recovery_stub = failure + #pos_jump_offset = sequel + #pos_recovery_stub = failure gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), 0x10111213) faildescr = guard_compat_descr guard_token = FakeGuardToken() + guard_compat_descr._backend_choices_addr = 3 - patch_guard_compatible(guard_token, rawstart, + patch_guard_compatible(guard_token, lambda index: rawstart + index * WORD, - lltype.nullptr(llmemory.GCREF.TO)) + lltype.nullptr(llmemory.GCREF.TO), + 9999) # ---- ready ---- _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit