Author: David Schneider <[email protected]>
Branch: arm-backend-2
Changeset: r51548:d06bbcb1c9fb
Date: 2012-01-20 17:13 +0100
http://bitbucket.org/pypy/pypy/changeset/d06bbcb1c9fb/
Log: (arigo, bivab) refactor the code used to make calls to handle more
work in the register allocator and perform calls only using
locations.
diff --git a/pypy/jit/backend/arm/opassembler.py
b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -361,14 +361,13 @@
self.gen_func_epilog()
return fcond
- def emit_op_call(self, op, args, regalloc, fcond,
- force_index=NO_FORCE_INDEX):
- adr = args[0].value
- arglist = op.getarglist()[1:]
+ def emit_op_call(self, op, arglocs, regalloc, fcond,
force_index=NO_FORCE_INDEX):
if force_index == NO_FORCE_INDEX:
force_index = self.write_new_force_index()
- cond = self._emit_call(force_index, adr, arglist,
- regalloc, fcond, op.result)
+ resloc = arglocs[0]
+ adr = arglocs[1]
+ arglist = arglocs[2:]
+ cond = self._emit_call(force_index, adr, arglist, fcond, resloc)
descr = op.getdescr()
#XXX Hack, Hack, Hack
if (op.result and not we_are_translated()):
@@ -379,15 +378,10 @@
self._ensure_result_bit_extension(loc, size, signed)
return cond
- # XXX improve this interface
- # emit_op_call_may_force
- # XXX improve freeing of stuff here
- # XXX add an interface that takes locations instead of boxes
- def _emit_call(self, force_index, adr, args, regalloc, fcond=c.AL,
- result=None):
- n_args = len(args)
- reg_args = count_reg_args(args)
-
+ def _emit_call(self, force_index, adr, arglocs, fcond=c.AL, resloc=None):
+ assert self._regalloc.before_call_called
+ n_args = len(arglocs)
+ reg_args = count_reg_args(arglocs)
# all arguments past the 4th go on the stack
n = 0 # used to count the number of words pushed on the stack, so we
#can later modify the SP back to its original value
@@ -396,7 +390,7 @@
stack_args = []
count = 0
for i in range(reg_args, n_args):
- arg = args[i]
+ arg = arglocs[i]
if arg.type != FLOAT:
count += 1
n += WORD
@@ -417,8 +411,7 @@
if arg is None:
self.mc.PUSH([r.ip.value])
else:
- self.regalloc_push(regalloc.loc(arg))
-
+ self.regalloc_push(arg)
# collect variables that need to go in registers and the registers they
# will be stored in
num = 0
@@ -427,16 +420,16 @@
non_float_regs = []
float_locs = []
for i in range(reg_args):
- arg = args[i]
+ arg = arglocs[i]
if arg.type == FLOAT and count % 2 != 0:
num += 1
count = 0
reg = r.caller_resp[num]
if arg.type == FLOAT:
- float_locs.append((regalloc.loc(arg), reg))
+ float_locs.append((arg, reg))
else:
- non_float_locs.append(regalloc.loc(arg))
+ non_float_locs.append(arg)
non_float_regs.append(reg)
if arg.type == FLOAT:
@@ -457,14 +450,12 @@
#the actual call
self.mc.BL(adr)
self.mark_gc_roots(force_index)
- regalloc.possibly_free_vars(args)
# readjust the sp in case we passed some args on the stack
if n > 0:
self._adjust_sp(-n, fcond=fcond)
# restore the argumets stored on the stack
- if result is not None:
- resloc = regalloc.after_call(result)
+ if resloc is not None:
if resloc.is_vfp_reg():
# move result to the allocated register
self.mov_to_vfp_loc(r.r0, r.r1, resloc)
@@ -889,8 +880,8 @@
length_box = TempInt()
length_loc = regalloc.force_allocate_reg(length_box,
forbidden_vars, selected_reg=r.r2)
- imm = regalloc.convert_to_imm(args[4])
- self.load(length_loc, imm)
+ immloc = regalloc.convert_to_imm(args[4])
+ self.load(length_loc, immloc)
if is_unicode:
bytes_box = TempPtr()
bytes_loc = regalloc.force_allocate_reg(bytes_box,
@@ -902,8 +893,9 @@
length_box = bytes_box
length_loc = bytes_loc
# call memcpy()
- self._emit_call(NO_FORCE_INDEX, self.memcpy_addr,
- [dstaddr_box, srcaddr_box, length_box], regalloc)
+ regalloc.before_call()
+ self._emit_call(NO_FORCE_INDEX, imm(self.memcpy_addr),
+ [dstaddr_loc, srcaddr_loc, length_loc])
regalloc.possibly_free_var(length_box)
regalloc.possibly_free_var(dstaddr_box)
@@ -993,17 +985,19 @@
# XXX Split into some helper methods
def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc,
fcond):
+ tmploc = arglocs[1]
+ resloc = arglocs[2]
+ callargs = arglocs[3:]
+
faildescr = guard_op.getdescr()
fail_index = self.cpu.get_fail_descr_number(faildescr)
self._write_fail_index(fail_index)
-
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
- # XXX check this
- # assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
- resbox = TempInt()
- self._emit_call(fail_index, descr._arm_func_addr,
- op.getarglist(), regalloc, fcond, result=resbox)
+ # check value
+ assert tmploc is r.r0
+ self._emit_call(fail_index, imm(descr._arm_func_addr),
+ callargs, fcond, resloc=tmploc)
if op.result is None:
value = self.cpu.done_with_this_frame_void_v
else:
@@ -1016,12 +1010,8 @@
value = self.cpu.done_with_this_frame_float_v
else:
raise AssertionError(kind)
- # check value
- resloc = regalloc.try_allocate_reg(resbox)
- assert resloc is r.r0
self.mc.gen_load_int(r.ip.value, value)
- self.mc.CMP_rr(resloc.value, r.ip.value)
- regalloc.possibly_free_var(resbox)
+ self.mc.CMP_rr(tmploc.value, r.ip.value)
fast_jmp_pos = self.mc.currpos()
self.mc.BKPT()
@@ -1035,14 +1025,12 @@
asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
with saved_registers(self.mc, r.caller_resp[1:] + [r.ip],
r.caller_vfp_resp):
- # resbox is allready in r0
- self.mov_loc_loc(arglocs[1], r.r1)
+ # result of previous call is in r0
+ self.mov_loc_loc(arglocs[0], r.r1)
self.mc.BL(asm_helper_adr)
- if op.result:
- resloc = regalloc.after_call(op.result)
- if resloc.is_vfp_reg():
- # move result to the allocated register
- self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+ if op.result and resloc.is_vfp_reg():
+ # move result to the allocated register
+ self.mov_to_vfp_loc(r.r0, r.r1, resloc)
# jump to merge point
jmp_pos = self.mc.currpos()
@@ -1063,11 +1051,10 @@
fielddescr = jd.vable_token_descr
assert isinstance(fielddescr, FieldDescr)
ofs = fielddescr.offset
- resloc = regalloc.force_allocate_reg(resbox)
- self.mov_loc_loc(arglocs[1], r.ip)
- self.mc.MOV_ri(resloc.value, 0)
- self.mc.STR_ri(resloc.value, r.ip.value, ofs)
- regalloc.possibly_free_var(resbox)
+ tmploc = regalloc.get_scratch_reg(INT)
+ self.mov_loc_loc(arglocs[0], r.ip)
+ self.mc.MOV_ri(tmploc.value, 0)
+ self.mc.STR_ri(tmploc.value, r.ip.value, ofs)
if op.result is not None:
# load the return value from fail_boxes_xxx[0]
@@ -1080,8 +1067,6 @@
adr = self.fail_boxes_float.get_addr_for_num(0)
else:
raise AssertionError(kind)
- resloc = regalloc.force_allocate_reg(op.result)
- regalloc.possibly_free_var(resbox)
self.mc.gen_load_int(r.ip.value, adr)
if op.result.type == FLOAT:
self.mc.VLDR(resloc.value, r.ip.value)
@@ -1118,14 +1103,48 @@
def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc,
fcond):
+ faildescr = guard_op.getdescr()
+ fail_index = self.cpu.get_fail_descr_number(faildescr)
+ self._write_fail_index(fail_index)
+ numargs = op.numargs()
+ callargs = arglocs[2:numargs]
+ adr = arglocs[1]
+ resloc = arglocs[0]
+ self._emit_call(fail_index, adr, callargs, fcond, resloc)
+
+ self.mc.LDR_ri(r.ip.value, r.fp.value)
+ self.mc.CMP_ri(r.ip.value, 0)
+ self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
+ return fcond
+
+ def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
+ fcond):
+
+ # first, close the stack in the sense of the asmgcc GC root tracker
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ numargs = op.numargs()
+ resloc = arglocs[0]
+ adr = arglocs[1]
+ callargs = arglocs[2:numargs]
+
+ if gcrootmap:
+ self.call_release_gil(gcrootmap, arglocs, fcond)
+ # do the call
+ faildescr = guard_op.getdescr()
+ fail_index = self.cpu.get_fail_descr_number(faildescr)
+ self._write_fail_index(fail_index)
+
+ self._emit_call(fail_index, adr, callargs, fcond, resloc)
+ # then reopen the stack
+ if gcrootmap:
+ self.call_reacquire_gil(gcrootmap, resloc, fcond)
+
self.mc.LDR_ri(r.ip.value, r.fp.value)
self.mc.CMP_ri(r.ip.value, 0)
- self._emit_guard(guard_op, arglocs, c.GE, save_exc=True)
+ self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
return fcond
- emit_guard_call_release_gil = emit_guard_call_may_force
-
def call_release_gil(self, gcrootmap, save_registers, fcond):
# First, we need to save away the registers listed in
# 'save_registers' that are not callee-save. XXX We assume that
@@ -1136,8 +1155,7 @@
regs_to_save.append(reg)
assert gcrootmap.is_shadow_stack
with saved_registers(self.mc, regs_to_save):
- self._emit_call(NO_FORCE_INDEX, self.releasegil_addr, [],
- self._regalloc, fcond)
+ self._emit_call(NO_FORCE_INDEX, imm(self.releasegil_addr), [],
fcond)
def call_reacquire_gil(self, gcrootmap, save_loc, fcond):
# save the previous result into the stack temporarily.
@@ -1154,8 +1172,7 @@
regs_to_save.append(r.ip) # for alingment
assert gcrootmap.is_shadow_stack
with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
- self._emit_call(NO_FORCE_INDEX, self.reacqgil_addr, [],
- self._regalloc, fcond)
+ self._emit_call(NO_FORCE_INDEX, imm(self.reacqgil_addr), [], fcond)
def write_new_force_index(self):
# for shadowstack only: get a new, unused force_index number and
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -553,12 +553,28 @@
args = self.prepare_op_math_sqrt(op, fcond)
self.assembler.emit_op_math_sqrt(op, args, self, fcond)
return
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
+ return self._prepare_call(op)
+
+ def _prepare_call(self, op, force_store=[], save_all_regs=False):
+ args = []
+ args.append(None)
+ for i in range(op.numargs()):
+ args.append(self.loc(op.getarg(i)))
+ # spill variables that need to be saved around calls
+ self.vfprm.before_call(save_all_regs=save_all_regs)
+ if not save_all_regs:
+ gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ save_all_regs = 2
+ self.rm.before_call(save_all_regs=save_all_regs)
+ if op.result:
+ resloc = self.after_call(op.result)
+ args[0] = resloc
+ self.before_call_called = True
return args
def prepare_op_call_malloc_gc(self, op, fcond):
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- return args
+ return self._prepare_call(op)
def _prepare_guard(self, op, args=None):
if args is None:
@@ -1033,58 +1049,25 @@
self._compute_hint_frame_locations_from_descr(descr)
def prepare_guard_call_may_force(self, op, guard_op, fcond):
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.assembler._write_fail_index(fail_index)
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- for v in guard_op.getfailargs():
- if v in self.rm.reg_bindings or v in self.vfprm.reg_bindings:
- self.force_spill_var(v)
- self.assembler.emit_op_call(op, args, self, fcond, fail_index)
- locs = self._prepare_guard(guard_op)
- self.possibly_free_vars(guard_op.getfailargs())
- return locs
-
- def prepare_guard_call_release_gil(self, op, guard_op, fcond):
- # first, close the stack in the sense of the asmgcc GC root tracker
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap:
- arglocs = []
- args = op.getarglist()
- for i in range(op.numargs()):
- loc = self._ensure_value_is_boxed(op.getarg(i), args)
- arglocs.append(loc)
- self.assembler.call_release_gil(gcrootmap, arglocs, fcond)
- # do the call
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.assembler._write_fail_index(fail_index)
- args = [imm(rffi.cast(lltype.Signed, op.getarg(0).getint()))]
- self.assembler.emit_op_call(op, args, self, fcond, fail_index)
- # then reopen the stack
- if gcrootmap:
- if op.result:
- result_loc = self.call_result_location(op.result)
- else:
- result_loc = None
- self.assembler.call_reacquire_gil(gcrootmap, result_loc, fcond)
- locs = self._prepare_guard(guard_op)
- return locs
+ args = self._prepare_call(op, save_all_regs=True)
+ return self._prepare_guard(guard_op, args)
+ prepare_guard_call_release_gil = prepare_guard_call_may_force
def prepare_guard_call_assembler(self, op, guard_op, fcond):
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
jd = descr.outermost_jitdriver_sd
assert jd is not None
- size = jd.portal_calldescr.get_result_size()
vable_index = jd.index_of_virtualizable
if vable_index >= 0:
self._sync_var(op.getarg(vable_index))
vable = self.frame_manager.loc(op.getarg(vable_index))
else:
vable = imm(0)
+ # make sure the call result location is free
+ tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
self.possibly_free_vars(guard_op.getfailargs())
- return [imm(size), vable]
+ return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
def _prepare_args_for_new_op(self, new_args):
gc_ll_descr = self.cpu.gc_ll_descr
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit