Author: David Schneider <[email protected]>
Branch: ppc-jit-backend
Changeset: r56429:069eb5ce9bf0
Date: 2012-07-24 09:50 -0700
http://bitbucket.org/pypy/pypy/changeset/069eb5ce9bf0/
Log: (edelsohn, bivab) implement new version of cond_call_gc
diff --git a/pypy/jit/backend/ppc/opassembler.py
b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -1000,26 +1000,23 @@
opnum = op.getopnum()
card_marking = False
+ mask = descr.jit_wb_if_flag_singlebyte
if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
- N = 3
- addr = descr.get_write_barrier_from_array_fn(self.cpu)
- assert addr != 0
+ # assumptions the rest of the function depends on:
assert (descr.jit_wb_cards_set_byteofs ==
descr.jit_wb_if_flag_byteofs)
assert descr.jit_wb_cards_set_singlebyte == -0x80
card_marking = True
- else:
- N = 2
- addr = descr.get_write_barrier_fn(self.cpu)
+ mask = descr.jit_wb_if_flag_singlebyte | -0x80
+ #
loc_base = arglocs[0]
assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
with scratch_reg(self.mc):
self.mc.lbz(r.SCRATCH.value, loc_base.value,
descr.jit_wb_if_flag_byteofs)
-
# test whether this bit is set
- self.mc.andix(r.SCRATCH.value, r.SCRATCH.value,
- descr.jit_wb_if_flag_singlebyte)
+ mask &= 0xFF
+ self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask)
jz_location = self.mc.currpos()
self.mc.nop()
@@ -1027,57 +1024,65 @@
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
if card_marking:
- assert _check_imm_arg(descr.jit_wb_cards_set_byteofs)
- assert descr.jit_wb_cards_set_singlebyte == -0x80
with scratch_reg(self.mc):
self.mc.lbz(r.SCRATCH.value, loc_base.value,
descr.jit_wb_if_flag_byteofs)
+ self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
# test whether this bit is set
- self.mc.andix(r.SCRATCH.value, r.SCRATCH.value,
- descr.jit_wb_cards_set_singlebyte)
+ self.mc.cmpwi(0, r.SCRATCH.value, 0)
- jnz_location = self.mc.currpos()
+ js_location = self.mc.currpos()
self.mc.nop()
+ #self.mc.trap()
else:
- jnz_location = 0
+ js_location = 0
- # the following is supposed to be the slow path, so whenever possible
- # we choose the most compact encoding over the most efficient one.
- with Saved_Volatiles(self.mc):
- if N == 2:
- callargs = [r.r3, r.r4]
- else:
- callargs = [r.r3, r.r4, r.r5]
- remap_frame_layout(self, arglocs, callargs, r.SCRATCH)
- func = rffi.cast(lltype.Signed, addr)
- #
- # misaligned stack in the call, but it's ok because the write
- # barrier is not going to call anything more.
- self.mc.call(func)
+ # Write only a CALL to the helper prepared in advance, passing it as
+ # argument the address of the structure we are writing into
+ # (the first argument to COND_CALL_GC_WB).
+ helper_num = card_marking
+
+ if self._regalloc.fprm.reg_bindings:
+ helper_num += 2
+ if self.wb_slowpath[helper_num] == 0: # tests only
+ assert not we_are_translated()
+ self.cpu.gc_ll_descr.write_barrier_descr = descr
+ self._build_wb_slowpath(card_marking,
+ bool(self._regalloc.fprm.reg_bindings))
+ assert self.wb_slowpath[helper_num] != 0
+ #
+ if loc_base is not r.r3:
+ remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH)
+ addr = self.wb_slowpath[helper_num]
+ func = rffi.cast(lltype.Signed, addr)
+ self.mc.bl_abs(func)
# if GCFLAG_CARDS_SET, then we can do the whole thing that would
# be done in the CALL above with just four instructions, so here
# is an inline copy of them
if card_marking:
with scratch_reg(self.mc):
- jmp_location = self.mc.currpos()
+ jns_location = self.mc.currpos()
self.mc.nop() # jump to the exit, patched later
- # patch the JNZ above
+ # patch the JS above
offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jnz_location, 1)
- pmc.bc(12, 2, offset - jnz_location) # jump on equality
+ pmc = OverwritingBuilder(self.mc, js_location, 1)
+ # Jump if JS comparison is less than (bit set)
+ pmc.bc(12, 0, offset - js_location)
pmc.overwrite()
#
+ # case GCFLAG_CARDS_SET: emit a few instructions to do
+ # directly the card flag setting
loc_index = arglocs[1]
assert loc_index.is_reg()
- tmp1 = arglocs[-2]
- tmp2 = arglocs[-1]
+ tmp1 = loc_index
+ tmp2 = arglocs[-2]
#byteofs
s = 3 + descr.jit_wb_card_page_shift
- # use r20 as temporary register, save it in FORCE INDEX slot
- temp_reg = r.r20
+ # use r11 as temporary register, save it in FORCE INDEX slot
+ temp_reg = r.r11
self.mc.store(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
self.mc.srli_op(temp_reg.value, loc_index.value, s)
@@ -1097,24 +1102,21 @@
self.mc.stbx(r.SCRATCH.value, loc_base.value, temp_reg.value)
# done
- # restore temporary register r20
+ # restore temporary register r11
self.mc.load(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
- # patch the JMP above
+ # patch the JNS above
offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jmp_location, 1)
- pmc.b(offset - jmp_location)
+ pmc = OverwritingBuilder(self.mc, jns_location, 1)
+ # Jump if JNS comparison is not less than (bit not set)
+ pmc.bc(4, 0, offset - jns_location)
pmc.overwrite()
# patch the JZ above
- offset = self.mc.currpos() - jz_location
+ offset = self.mc.currpos()
pmc = OverwritingBuilder(self.mc, jz_location, 1)
- # We want to jump if the compared bits are not equal.
- # This corresponds to the x86 backend, which uses
- # the TEST operation. Hence, on first sight, it might
- # seem that we use the wrong condition here. This is
- # because TEST results in a 1 if the operands are different.
- pmc.bc(4, 2, offset)
+ # Jump if JZ comparison is zero (CMP 0 is equal)
+ pmc.bc(12, 2, offset - jz_location)
pmc.overwrite()
emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py
b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -89,11 +89,14 @@
failargs_limit)
self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
self.mc = None
- self.datablockwrapper = None
self.memcpy_addr = 0
+ self.pending_guards = None
self.fail_boxes_count = 0
self.current_clt = None
+ self.malloc_slowpath = 0
+ self.wb_slowpath = [0, 0, 0, 0]
self._regalloc = None
+ self.datablockwrapper = None
self.max_stack_params = 0
self.propagate_exception_path = 0
self.stack_check_slowpath = 0
@@ -497,6 +500,61 @@
self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
self.stack_check_slowpath = rawstart
+ def _build_wb_slowpath(self, withcards, withfloats=False):
+ descr = self.cpu.gc_ll_descr.write_barrier_descr
+ if descr is None:
+ return
+ if not withcards:
+ func = descr.get_write_barrier_fn(self.cpu)
+ else:
+ if descr.jit_wb_cards_set == 0:
+ return
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ if func == 0:
+ return
+ #
+ # This builds a helper function called from the slow path of
+ # write barriers. It must save all registers, and optionally
+ # all fp registers.
+ mc = PPCBuilder()
+ #
+ frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT)
+ + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
+ mc.make_function_prologue(frame_size)
+ for i in range(len(r.VOLATILES)):
+ mc.store(r.VOLATILES[i].value, r.SP.value,
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+ if self.cpu.supports_floats:
+ for i in range(len(r.VOLATILES_FLOAT)):
+ mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
+ (len(r.VOLATILES) + BACKCHAIN_SIZE +
MAX_REG_PARAMS + i) * WORD)
+
+ mc.call(rffi.cast(lltype.Signed, func))
+ if self.cpu.supports_floats:
+ for i in range(len(r.VOLATILES_FLOAT)):
+ mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
+ (len(r.VOLATILES) + BACKCHAIN_SIZE +
MAX_REG_PARAMS + i) * WORD)
+ for i in range(len(r.VOLATILES)):
+ mc.load(r.VOLATILES[i].value, r.SP.value,
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+ mc.restore_LR_from_caller_frame(frame_size)
+ #
+ if withcards:
+ # A final compare before the RET, for the caller. Careful to
+ # not follow this instruction with another one that changes
+ # the status of the CPU flags!
+ mc.lbz(r.SCRATCH.value, r.r3.value,
+ descr.jit_wb_if_flag_byteofs)
+ mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
+ mc.cmpwi(0, r.SCRATCH.value, 0)
+ #
+ mc.addi(r.SP.value, r.SP.value, frame_size)
+ mc.blr()
+ #
+ mc.prepare_insts_blocks()
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
def _build_propagate_exception_path(self):
if self.cpu.propagate_exception_v < 0:
return
@@ -662,6 +720,11 @@
def setup_once(self):
gc_ll_descr = self.cpu.gc_ll_descr
gc_ll_descr.initialize()
+ self._build_wb_slowpath(False)
+ self._build_wb_slowpath(True)
+ if self.cpu.supports_floats:
+ self._build_wb_slowpath(False, withfloats=True)
+ self._build_wb_slowpath(True, withfloats=True)
self._build_propagate_exception_path()
if gc_ll_descr.get_malloc_slowpath_addr is not None:
self._build_malloc_slowpath()
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit