Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r55281:b513e71c997d Date: 2012-06-03 12:18 +0200 http://bitbucket.org/pypy/pypy/changeset/b513e71c997d/
Log: hg merge diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py --- a/pypy/jit/backend/llsupport/gc.py +++ b/pypy/jit/backend/llsupport/gc.py @@ -577,7 +577,6 @@ def __init__(self, gc_ll_descr): self.llop1 = gc_ll_descr.llop1 self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR - self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR self.fielddescr_tid = gc_ll_descr.fielddescr_tid # GCClass = gc_ll_descr.GCClass @@ -592,6 +591,11 @@ self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = ( self.extract_flag_byte(self.jit_wb_cards_set)) + # + # the x86 backend uses the following "accidental" facts to + # avoid one instruction: + assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs + assert self.jit_wb_cards_set_singlebyte == -0x80 else: self.jit_wb_cards_set = 0 @@ -615,7 +619,7 @@ # returns a function with arguments [array, index, newvalue] llop1 = self.llop1 funcptr = llop1.get_write_barrier_from_array_failing_case( - self.WB_ARRAY_FUNCPTR) + self.WB_FUNCPTR) funcaddr = llmemory.cast_ptr_to_adr(funcptr) return cpu.cast_adr_to_int(funcaddr) # this may return 0 @@ -699,9 +703,7 @@ def _setup_write_barrier(self): self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType( - [llmemory.Address, llmemory.Address], lltype.Void)) - self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType( - [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void)) + [llmemory.Address], lltype.Void)) self.write_barrier_descr = WriteBarrierDescr(self) def _make_functions(self, really_not_translated): @@ -859,8 +861,7 @@ # the GC, and call it immediately llop1 = self.llop1 funcptr = llop1.get_write_barrier_failing_case(self.WB_FUNCPTR) - funcptr(llmemory.cast_ptr_to_adr(gcref_struct), - llmemory.cast_ptr_to_adr(gcref_newptr)) + funcptr(llmemory.cast_ptr_to_adr(gcref_struct)) def can_use_nursery_malloc(self, size): return size < self.max_size_of_young_obj diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py --- a/pypy/jit/backend/test/runner_test.py +++ b/pypy/jit/backend/test/runner_test.py @@ -1835,12 +1835,12 @@ assert not excvalue def test_cond_call_gc_wb(self): - def func_void(a, b): - record.append((a, b)) + def func_void(a): + record.append(a) record = [] # S = lltype.GcStruct('S', ('tid', lltype.Signed)) - FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void) + FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void) func_ptr = llhelper(lltype.Ptr(FUNC), func_void) funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): @@ -1866,26 +1866,25 @@ [BoxPtr(sgcref), ConstPtr(tgcref)], 'void', descr=WriteBarrierDescr()) if cond: - assert record == [(s, t)] + assert record == [s] else: assert record == [] def test_cond_call_gc_wb_array(self): - def func_void(a, b, c): - record.append((a, b, c)) + def func_void(a): + record.append(a) record = [] # S = lltype.GcStruct('S', ('tid', lltype.Signed)) - FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)], - lltype.Void) + FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void) func_ptr = llhelper(lltype.Ptr(FUNC), func_void) funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): jit_wb_if_flag = 4096 jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 - jit_wb_cards_set = 0 - def get_write_barrier_from_array_fn(self, cpu): + jit_wb_cards_set = 0 # <= without card marking + def get_write_barrier_fn(self, cpu): return funcbox.getint() # for cond in [False, True]: @@ -1902,13 +1901,15 @@ [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)], 'void', descr=WriteBarrierDescr()) if cond: - assert record == [(s, 123, s)] + assert record == [s] else: assert record == [] def test_cond_call_gc_wb_array_card_marking_fast_path(self): - def func_void(a, b, c): - record.append((a, b, c)) + def func_void(a): + record.append(a) + if cond == 1: # the write barrier sets the flag + s.data.tid |= 32768 record = [] # S = lltype.Struct('S', ('tid', lltype.Signed)) @@ -1922,34 +1923,40 @@ ('card6', lltype.Char), ('card7', lltype.Char), ('data', S)) - FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)], - lltype.Void) + FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void) func_ptr = llhelper(lltype.Ptr(FUNC), func_void) funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): jit_wb_if_flag = 4096 jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 - jit_wb_cards_set = 8192 - jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20') - jit_wb_cards_set_singlebyte = 0x20 + jit_wb_cards_set = 32768 + jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80') + jit_wb_cards_set_singlebyte = -0x80 jit_wb_card_page_shift = 7 def get_write_barrier_from_array_fn(self, cpu): return funcbox.getint() # - for BoxIndexCls in [BoxInt, ConstInt]: - for cond in [False, True]: + for BoxIndexCls in [BoxInt, ConstInt]*3: + for cond in [-1, 0, 1, 2]: + # cond=-1:GCFLAG_TRACK_YOUNG_PTRS, GCFLAG_CARDS_SET are not set + # cond=0: GCFLAG_CARDS_SET is never set + # cond=1: GCFLAG_CARDS_SET is not set, but the wb sets it + # cond=2: GCFLAG_CARDS_SET is already set print print '_'*79 print 'BoxIndexCls =', BoxIndexCls - print 'JIT_WB_CARDS_SET =', cond + print 'testing cond =', cond print value = random.randrange(-sys.maxint, sys.maxint) - value |= 4096 - if cond: - value |= 8192 + if cond >= 0: + value |= 4096 else: - value &= ~8192 + value &= ~4096 + if cond == 2: + value |= 32768 + else: + value &= ~32768 s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True) s.data.tid = value sgcref = rffi.cast(llmemory.GCREF, s.data) @@ -1958,11 +1965,13 @@ self.execute_operation(rop.COND_CALL_GC_WB_ARRAY, [BoxPtr(sgcref), box_index, BoxPtr(sgcref)], 'void', descr=WriteBarrierDescr()) - if cond: + if cond in [0, 1]: + assert record == [s.data] + else: assert record == [] + if cond in [1, 2]: assert s.card6 == '\x02' else: - assert record == [(s.data, (9<<7) + 17, s.data)] assert s.card6 == '\x00' assert s.card0 == '\x00' assert s.card1 == '\x00' @@ -1971,6 +1980,9 @@ assert s.card4 == '\x00' assert s.card5 == '\x00' assert s.card7 == '\x00' + if cond == 1: + value |= 32768 + assert s.data.tid == value def test_force_operations_returning_void(self): values = [] diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py --- a/pypy/jit/backend/x86/assembler.py +++ b/pypy/jit/backend/x86/assembler.py @@ -10,7 +10,7 @@ from pypy.rlib.jit import AsmInfo from pypy.jit.backend.model import CompiledLoopToken from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale, - gpr_reg_mgr_cls, _valid_addressing_size) + gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size) from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD, IS_X86_32, IS_X86_64) @@ -83,6 +83,7 @@ self.float_const_abs_addr = 0 self.malloc_slowpath1 = 0 self.malloc_slowpath2 = 0 + self.wb_slowpath = [0, 0, 0, 0] self.memcpy_addr = 0 self.setup_failure_recovery() self._debug = False @@ -109,9 +110,13 @@ self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn) self._build_failure_recovery(False) self._build_failure_recovery(True) + self._build_wb_slowpath(False) + self._build_wb_slowpath(True) if self.cpu.supports_floats: self._build_failure_recovery(False, withfloats=True) self._build_failure_recovery(True, withfloats=True) + self._build_wb_slowpath(False, withfloats=True) + self._build_wb_slowpath(True, withfloats=True) support.ensure_sse2_floats() self._build_float_constants() self._build_propagate_exception_path() @@ -344,6 +349,82 @@ rawstart = mc.materialize(self.cpu.asmmemmgr, []) self.stack_check_slowpath = rawstart + def _build_wb_slowpath(self, withcards, withfloats=False): + descr = self.cpu.gc_ll_descr.write_barrier_descr + if descr is None: + return + if not withcards: + func = descr.get_write_barrier_fn(self.cpu) + else: + if descr.jit_wb_cards_set == 0: + return + func = descr.get_write_barrier_from_array_fn(self.cpu) + if func == 0: + return + # + # This builds a helper function called from the slow path of + # write barriers. It must save all registers, and optionally + # all XMM registers. It takes a single argument just pushed + # on the stack even on X86_64. It must restore stack alignment + # accordingly. + mc = codebuf.MachineCodeBlockWrapper() + # + frame_size = (1 + # my argument, considered part of my frame + 1 + # my return address + len(gpr_reg_mgr_cls.save_around_call_regs)) + if withfloats: + frame_size += 16 # X86_32: 16 words for 8 registers; + # X86_64: just 16 registers + if IS_X86_32: + frame_size += 1 # argument to pass to the call + # + # align to a multiple of 16 bytes + frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1) + # + correct_esp_by = (frame_size - 2) * WORD + mc.SUB_ri(esp.value, correct_esp_by) + # + ofs = correct_esp_by + if withfloats: + for reg in xmm_reg_mgr_cls.save_around_call_regs: + ofs -= 8 + mc.MOVSD_sx(ofs, reg.value) + for reg in gpr_reg_mgr_cls.save_around_call_regs: + ofs -= WORD + mc.MOV_sr(ofs, reg.value) + # + if IS_X86_32: + mc.MOV_rs(eax.value, (frame_size - 1) * WORD) + mc.MOV_sr(0, eax.value) + elif IS_X86_64: + mc.MOV_rs(edi.value, (frame_size - 1) * WORD) + mc.CALL(imm(func)) + # + if withcards: + # A final TEST8 before the RET, for the caller. Careful to + # not follow this instruction with another one that changes + # the status of the CPU flags! + mc.MOV_rs(eax.value, (frame_size - 1) * WORD) + mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs), + imm(-0x80)) + # + ofs = correct_esp_by + if withfloats: + for reg in xmm_reg_mgr_cls.save_around_call_regs: + ofs -= 8 + mc.MOVSD_xs(reg.value, ofs) + for reg in gpr_reg_mgr_cls.save_around_call_regs: + ofs -= WORD + mc.MOV_rs(reg.value, ofs) + # + # ADD esp, correct_esp_by --- but cannot use ADD, because + # of its effects on the CPU flags + mc.LEA_rs(esp.value, correct_esp_by) + mc.RET16_i(WORD) + # + rawstart = mc.materialize(self.cpu.asmmemmgr, []) + self.wb_slowpath[withcards + 2 * withfloats] = rawstart + @staticmethod @rgc.no_collect def _release_gil_asmgcc(css): @@ -2324,102 +2405,83 @@ def genop_discard_cond_call_gc_wb(self, op, arglocs): # Write code equivalent to write_barrier() in the GC: it checks - # a flag in the object at arglocs[0], and if set, it calls the - # function remember_young_pointer() from the GC. The arguments - # to the call are in arglocs[:N]. The rest, arglocs[N:], contains - # registers that need to be saved and restored across the call. - # N is either 2 (regular write barrier) or 3 (array write barrier). + # a flag in the object at arglocs[0], and if set, it calls a + # helper piece of assembler. The latter saves registers as needed + # and call the function jit_remember_young_pointer() from the GC. descr = op.getdescr() if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # opnum = op.getopnum() - if opnum == rop.COND_CALL_GC_WB: - N = 2 - func = descr.get_write_barrier_fn(self.cpu) - card_marking = False - elif opnum == rop.COND_CALL_GC_WB_ARRAY: - N = 3 - func = descr.get_write_barrier_from_array_fn(self.cpu) - assert func != 0 - card_marking = descr.jit_wb_cards_set != 0 - else: - raise AssertionError(opnum) + card_marking = False + mask = descr.jit_wb_if_flag_singlebyte + if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0: + # assumptions the rest of the function depends on: + assert (descr.jit_wb_cards_set_byteofs == + descr.jit_wb_if_flag_byteofs) + assert descr.jit_wb_cards_set_singlebyte == -0x80 + card_marking = True + mask = descr.jit_wb_if_flag_singlebyte | -0x80 # loc_base = arglocs[0] self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs), - imm(descr.jit_wb_if_flag_singlebyte)) + imm(mask)) self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later jz_location = self.mc.get_relative_pos() # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking: - self.mc.TEST8(addr_add_const(loc_base, - descr.jit_wb_cards_set_byteofs), - imm(descr.jit_wb_cards_set_singlebyte)) - self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later - jnz_location = self.mc.get_relative_pos() + # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can + # been checked by the status flags of the previous TEST8 + self.mc.J_il8(rx86.Conditions['S'], 0) # patched later + js_location = self.mc.get_relative_pos() else: - jnz_location = 0 + js_location = 0 - # the following is supposed to be the slow path, so whenever possible - # we choose the most compact encoding over the most efficient one. - if IS_X86_32: - limit = -1 # push all arglocs on the stack - elif IS_X86_64: - limit = N - 1 # push only arglocs[N:] on the stack - for i in range(len(arglocs)-1, limit, -1): - loc = arglocs[i] - if isinstance(loc, RegLoc): - self.mc.PUSH_r(loc.value) - else: - assert not IS_X86_64 # there should only be regs in arglocs[N:] - self.mc.PUSH_i32(loc.getint()) - if IS_X86_64: - # We clobber these registers to pass the arguments, but that's - # okay, because consider_cond_call_gc_wb makes sure that any - # caller-save registers with values in them are present in - # arglocs[N:] too, so they are saved on the stack above and - # restored below. - if N == 2: - callargs = [edi, esi] - else: - callargs = [edi, esi, edx] - remap_frame_layout(self, arglocs[:N], callargs, - X86_64_SCRATCH_REG) + # Write only a CALL to the helper prepared in advance, passing it as + # argument the address of the structure we are writing into + # (the first argument to COND_CALL_GC_WB). + helper_num = card_marking + if self._regalloc.xrm.reg_bindings: + helper_num += 2 + if self.wb_slowpath[helper_num] == 0: # tests only + assert not we_are_translated() + self.cpu.gc_ll_descr.write_barrier_descr = descr + self._build_wb_slowpath(card_marking, + bool(self._regalloc.xrm.reg_bindings)) + assert self.wb_slowpath[helper_num] != 0 # - # misaligned stack in the call, but it's ok because the write barrier - # is not going to call anything more. Also, this assumes that the - # write barrier does not touch the xmm registers. (Slightly delicate - # assumption, given that the write barrier can end up calling the - # platform's malloc() from AddressStack.append(). XXX may need to - # be done properly) - self.mc.CALL(imm(func)) - if IS_X86_32: - self.mc.ADD_ri(esp.value, N*WORD) - for i in range(N, len(arglocs)): - loc = arglocs[i] - assert isinstance(loc, RegLoc) - self.mc.POP_r(loc.value) + self.mc.PUSH(loc_base) + self.mc.CALL(imm(self.wb_slowpath[helper_num])) - # if GCFLAG_CARDS_SET, then we can do the whole thing that would - # be done in the CALL above with just four instructions, so here - # is an inline copy of them if card_marking: - self.mc.JMP_l8(0) # jump to the exit, patched later - jmp_location = self.mc.get_relative_pos() - # patch the JNZ above - offset = self.mc.get_relative_pos() - jnz_location + # The helper ends again with a check of the flag in the object. + # So here, we can simply write again a 'JNS', which will be + # taken if GCFLAG_CARDS_SET is still not set. + self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later + jns_location = self.mc.get_relative_pos() + # + # patch the JS above + offset = self.mc.get_relative_pos() - js_location assert 0 < offset <= 127 - self.mc.overwrite(jnz_location-1, chr(offset)) + self.mc.overwrite(js_location-1, chr(offset)) # + # case GCFLAG_CARDS_SET: emit a few instructions to do + # directly the card flag setting loc_index = arglocs[1] if isinstance(loc_index, RegLoc): - # choose a scratch register - tmp1 = loc_index - self.mc.PUSH_r(tmp1.value) + if IS_X86_64 and isinstance(loc_base, RegLoc): + # copy loc_index into r11 + tmp1 = X86_64_SCRATCH_REG + self.mc.MOV_rr(tmp1.value, loc_index.value) + final_pop = False + else: + # must save the register loc_index before it is mutated + self.mc.PUSH_r(loc_index.value) + tmp1 = loc_index + final_pop = True # SHR tmp, card_page_shift self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift) # XOR tmp, -8 @@ -2427,7 +2489,9 @@ # BTS [loc_base], tmp self.mc.BTS(addr_add_const(loc_base, 0), tmp1) # done - self.mc.POP_r(tmp1.value) + if final_pop: + self.mc.POP_r(loc_index.value) + # elif isinstance(loc_index, ImmedLoc): byte_index = loc_index.value >> descr.jit_wb_card_page_shift byte_ofs = ~(byte_index >> 3) @@ -2435,11 +2499,12 @@ self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val)) else: raise AssertionError("index is neither RegLoc nor ImmedLoc") - # patch the JMP above - offset = self.mc.get_relative_pos() - jmp_location + # + # patch the JNS above + offset = self.mc.get_relative_pos() - jns_location assert 0 < offset <= 127 - self.mc.overwrite(jmp_location-1, chr(offset)) - # + self.mc.overwrite(jns_location-1, chr(offset)) + # patch the JZ above offset = self.mc.get_relative_pos() - jz_location assert 0 < offset <= 127 diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py --- a/pypy/jit/backend/x86/regalloc.py +++ b/pypy/jit/backend/x86/regalloc.py @@ -980,16 +980,6 @@ # or setarrayitem_gc. It avoids loading it twice from the memory. arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args) for i in range(N)] - # add eax, ecx and edx as extra "arguments" to ensure they are - # saved and restored. Fish in self.rm to know which of these - # registers really need to be saved (a bit of a hack). Moreover, - # we don't save and restore any SSE register because the called - # function, a GC write barrier, is known not to touch them. - # See remember_young_pointer() in rpython/memory/gc/generation.py. - for v, reg in self.rm.reg_bindings.items(): - if (reg in self.rm.save_around_call_regs - and self.rm.stays_alive(v)): - arglocs.append(reg) self.PerformDiscard(op, arglocs) self.rm.possibly_free_vars_for_op(op) diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py --- a/pypy/jit/backend/x86/rx86.py +++ b/pypy/jit/backend/x86/rx86.py @@ -316,6 +316,13 @@ assert rexbyte == 0 return 0 +# REX prefixes: 'rex_w' generates a REX_W, forcing the instruction +# to operate on 64-bit. 'rex_nw' doesn't, so the instruction operates +# on 32-bit or less; the complete REX prefix is omitted if unnecessary. +# 'rex_fw' is a special case which doesn't generate a REX_W but forces +# the REX prefix in all cases. It is only useful on instructions which +# have an 8-bit register argument, to force access to the "sil" or "dil" +# registers (as opposed to "ah-dh"). rex_w = encode_rex, 0, (0x40 | REX_W), None # a REX.W prefix rex_nw = encode_rex, 0, 0, None # an optional REX prefix rex_fw = encode_rex, 0, 0x40, None # a forced REX prefix @@ -496,9 +503,9 @@ AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0') OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0') - OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1), + OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1), immediate(2, 'b')) - OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1), + OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_, immediate(1), immediate(2, 'b')) NEG_r = insn(rex_w, '\xF7', register(1), '\xD8') @@ -531,7 +538,13 @@ PUSH_r = insn(rex_nw, register(1), '\x50') PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1)) + PUSH_i8 = insn('\x6A', immediate(1, 'b')) PUSH_i32 = insn('\x68', immediate(1, 'i')) + def PUSH_i(mc, immed): + if single_byte(immed): + mc.PUSH_i8(immed) + else: + mc.PUSH_i32(immed) POP_r = insn(rex_nw, register(1), '\x58') POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1)) diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py --- a/pypy/jit/backend/x86/test/test_rx86.py +++ b/pypy/jit/backend/x86/test/test_rx86.py @@ -183,7 +183,8 @@ def test_push32(): cb = CodeBuilder32 - assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00') + assert_encodes_as(cb, 'PUSH_i', (0x10009,), '\x68\x09\x00\x01\x00') + assert_encodes_as(cb, 'PUSH_i', (9,), '\x6A\x09') def test_sub_ji8(): cb = CodeBuilder32 diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py --- a/pypy/rpython/memory/gc/minimark.py +++ b/pypy/rpython/memory/gc/minimark.py @@ -111,10 +111,13 @@ # The following flag is set on externally raw_malloc'ed arrays of pointers. # They are allocated with some extra space in front of them for a bitfield, # one bit per 'card_page_indices' indices. -GCFLAG_HAS_CARDS = first_gcflag << 5 -GCFLAG_CARDS_SET = first_gcflag << 6 # <- at least one card bit is set +GCFLAG_HAS_CARDS = first_gcflag << 6 +GCFLAG_CARDS_SET = first_gcflag << 7 # <- at least one card bit is set +# note that GCFLAG_CARDS_SET is the most significant bit of a byte: +# this is required for the JIT (x86) -TID_MASK = (first_gcflag << 7) - 1 +#GCFLAG_UNUSED = first_gcflag << 5 # this flag is free +TID_MASK = (first_gcflag << 8) - 1 FORWARDSTUB = lltype.GcStruct('forwarding_stub', @@ -994,12 +997,9 @@ def _init_writebarrier_logic(self): DEBUG = self.DEBUG # The purpose of attaching remember_young_pointer to the instance - # instead of keeping it as a regular method is to help the JIT call it. - # Additionally, it makes the code in write_barrier() marginally smaller + # instead of keeping it as a regular method is to + # make the code in write_barrier() marginally smaller # (which is important because it is inlined *everywhere*). - # For x86, there is also an extra requirement: when the JIT calls - # remember_young_pointer(), it assumes that it will not touch the SSE - # registers, so it does not save and restore them (that's a *hack*!). def remember_young_pointer(addr_struct, newvalue): # 'addr_struct' is the address of the object in which we write. # 'newvalue' is the address that we are going to write in there. @@ -1033,6 +1033,17 @@ remember_young_pointer._dont_inline_ = True self.remember_young_pointer = remember_young_pointer # + def jit_remember_young_pointer(addr_struct): + # minimal version of the above, with just one argument, + # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set + self.old_objects_pointing_to_young.append(addr_struct) + objhdr = self.header(addr_struct) + objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS + if objhdr.tid & GCFLAG_NO_HEAP_PTRS: + objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS + self.prebuilt_root_objects.append(addr_struct) + self.jit_remember_young_pointer = jit_remember_young_pointer + # if self.card_page_indices > 0: self._init_writebarrier_with_card_marker() @@ -1087,60 +1098,21 @@ self.remember_young_pointer_from_array2 = ( remember_young_pointer_from_array2) - # xxx trying it out for the JIT: a 3-arguments version of the above - def remember_young_pointer_from_array3(addr_array, index, newvalue): + def jit_remember_young_pointer_from_array(addr_array): + # minimal version of the above, with just one argument, + # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set + # but GCFLAG_CARDS_SET is cleared. This tries to set + # GCFLAG_CARDS_SET if possible; otherwise, it falls back + # to jit_remember_young_pointer(). objhdr = self.header(addr_array) - # - # a single check for the common case of neither GCFLAG_HAS_CARDS - # nor GCFLAG_NO_HEAP_PTRS - if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0: - # common case: fast path, jump to the end of the function - pass - elif objhdr.tid & GCFLAG_HAS_CARDS == 0: - # no cards, but GCFLAG_NO_HEAP_PTRS is set. - objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS - self.prebuilt_root_objects.append(addr_array) - # jump to the end of the function + if objhdr.tid & GCFLAG_HAS_CARDS: + self.old_objects_with_cards_set.append(addr_array) + objhdr.tid |= GCFLAG_CARDS_SET else: - # case with cards. - # - # If the newly written address does not actually point to a - # young object, leave now. - if not self.appears_to_be_young(newvalue): - return - # - # 'addr_array' is a raw_malloc'ed array with card markers - # in front. Compute the index of the bit to set: - bitindex = index >> self.card_page_shift - byteindex = bitindex >> 3 - bitmask = 1 << (bitindex & 7) - # - # If the bit is already set, leave now. - addr_byte = self.get_card(addr_array, byteindex) - byte = ord(addr_byte.char[0]) - if byte & bitmask: - return - addr_byte.char[0] = chr(byte | bitmask) - # - if objhdr.tid & GCFLAG_CARDS_SET == 0: - self.old_objects_with_cards_set.append(addr_array) - objhdr.tid |= GCFLAG_CARDS_SET - return - # - # Logic for the no-cards case, put here to minimize the number - # of checks done at the start of the function - if DEBUG: # note: PYPY_GC_DEBUG=1 does not enable this - ll_assert(self.debug_is_old_object(addr_array), - "young array with no card but GCFLAG_TRACK_YOUNG_PTRS") - # - if self.appears_to_be_young(newvalue): - self.old_objects_pointing_to_young.append(addr_array) - objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS + self.jit_remember_young_pointer(addr_array) - remember_young_pointer_from_array3._dont_inline_ = True - assert self.card_page_indices > 0 - self.remember_young_pointer_from_array3 = ( - remember_young_pointer_from_array3) + self.jit_remember_young_pointer_from_array = ( + jit_remember_young_pointer_from_array) def get_card(self, obj, byteindex): size_gc_header = self.gcheaderbuilder.size_gc_header diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py --- a/pypy/rpython/memory/gctransform/framework.py +++ b/pypy/rpython/memory/gctransform/framework.py @@ -455,13 +455,12 @@ annmodel.SomeAddress()], annmodel.s_None, inline=True) - func = getattr(gcdata.gc, 'remember_young_pointer', None) + func = getattr(gcdata.gc, 'jit_remember_young_pointer', None) if func is not None: # func should not be a bound method, but a real function assert isinstance(func, types.FunctionType) self.write_barrier_failing_case_ptr = getfn(func, - [annmodel.SomeAddress(), - annmodel.SomeAddress()], + [annmodel.SomeAddress()], annmodel.s_None) func = getattr(GCClass, 'write_barrier_from_array', None) if func is not None: @@ -472,16 +471,15 @@ annmodel.SomeInteger()], annmodel.s_None, inline=True) - func = getattr(gcdata.gc, 'remember_young_pointer_from_array3', + func = getattr(gcdata.gc, + 'jit_remember_young_pointer_from_array', None) if func is not None: # func should not be a bound method, but a real function assert isinstance(func, types.FunctionType) self.write_barrier_from_array_failing_case_ptr = \ getfn(func, - [annmodel.SomeAddress(), - annmodel.SomeInteger(), - annmodel.SomeAddress()], + [annmodel.SomeAddress()], annmodel.s_None) self.statistics_ptr = getfn(GCClass.statistics.im_func, [s_gc, annmodel.SomeInteger()], _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit