Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78378:88fff9cde657
Date: 2015-07-01 13:48 +0200
http://bitbucket.org/pypy/pypy/changeset/88fff9cde657/
Log: resolving issues with * accumulation, there where some assembler
routines i did not implement correctly (but where not invoked
beforehand)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -559,7 +559,6 @@
self.current_clt.allgcrefs,
self.current_clt.frame_info)
self._check_frame_depth(self.mc, regalloc.get_gcmap())
- #import pdb; pdb.set_trace()
self._accum_update_at_exit(arglocs, inputargs, faildescr, regalloc)
frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
operations)
codeendpos = self.mc.get_relative_pos()
@@ -2551,7 +2550,6 @@
elif accum_info.operation == '*':
self._accum_reduce_mul(arg, loc, tgtloc)
else:
- import pdb; pdb.set_trace()
not_implemented("accum operator %s not implemented" %
(accum_info.operation))
fail_locs[pos] = tgtloc
@@ -2559,13 +2557,13 @@
accum_info = accum_info.prev
def _accum_reduce_mul(self, arg, accumloc, targetloc):
- scratchloc = X86_64_SCRATCH_REG
- self.mc.mov(scratchloc, accumloc)
+ scratchloc = X86_64_XMM_SCRATCH_REG
+ self.mov(accumloc, scratchloc)
# swap the two elements
self.mc.SHUFPS_xxi(scratchloc.value, scratchloc.value, 0x01)
self.mc.MULPD(accumloc, scratchloc)
if accumloc is not targetloc:
- self.mc.mov(targetloc, accumloc)
+ self.mov(accumloc, targetloc)
def _accum_reduce_sum(self, arg, accumloc, targetloc):
# Currently the accumulator can ONLY be the biggest
@@ -2575,7 +2573,7 @@
self.mc.HADDPD(accumloc, accumloc)
# upper bits (> 64) are dirty (but does not matter)
if accumloc is not targetloc:
- self.mov(targetloc, accumloc)
+ self.mov(accumloc, targetloc)
return
elif arg.type == INT:
scratchloc = X86_64_SCRATCH_REG
@@ -2757,7 +2755,9 @@
def genop_vec_float_expand(self, op, arglocs, resloc):
srcloc, sizeloc = arglocs
size = sizeloc.value
- if size == 4:
+ if isinstance(srcloc, ConstFloatLoc):
+ self.mov(srcloc, resloc)
+ elif size == 4:
# the register allocator forces src to be the same as resloc
# r = (s[0], s[0], r[0], r[0])
# since resloc == srcloc: r = (r[0], r[0], r[0], r[0])
@@ -2864,7 +2864,7 @@
# if source is a normal register (unpack)
assert count == 1
assert si == 0
- self.mov(X86_64_XMM_SCRATCH_REG, srcloc)
+ self.mov(srcloc, X86_64_XMM_SCRATCH_REG)
src = X86_64_XMM_SCRATCH_REG.value
select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
self.mc.INSERTPS_xxi(resloc.value, src, select)
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -81,13 +81,11 @@
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y
return ConstFloatLoc(adr)
- def expand_float(self, var, const):
- assert isinstance(var, BoxVector)
- if var.getsize() == 4:
+ def expand_float(self, size, const):
+ if size == 4:
loc = self.expand_single_float(const)
else:
loc = self.expand_double_float(const)
- self.reg_bindings[var] = loc
return loc
def expand_double_float(self, f):
@@ -1632,16 +1630,19 @@
consider_vec_float_unpack = consider_vec_int_unpack
def consider_vec_float_expand(self, op):
+ result = op.result
+ assert isinstance(result, BoxVector)
arg = op.getarg(0)
+ args = op.getarglist()
if isinstance(arg, Const):
- resloc = self.xrm.expand_float(op.result, arg)
- # TODO consider this
- return
- args = op.getarglist()
- resloc = self.xrm.force_result_in_reg(op.result, arg, args)
- assert isinstance(op.result, BoxVector)
+ resloc = self.xrm.force_allocate_reg(result)
+ srcloc = self.xrm.expand_float(result.getsize(), arg)
+ else:
+ resloc = self.xrm.force_result_in_reg(op.result, arg, args)
+ srcloc = resloc
+
size = op.result.getsize()
- self.perform(op, [resloc, imm(size)], resloc)
+ self.perform(op, [srcloc, imm(size)], resloc)
def consider_vec_int_expand(self, op):
arg = op.getarg(0)
diff --git a/rpython/jit/metainterp/compile.py
b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -488,8 +488,7 @@
class ResumeGuardDescr(ResumeDescr):
_attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals',
- 'rd_frame_info_list', 'rd_pendingfields', 'rd_accum_list',
- 'status')
+ 'rd_frame_info_list', 'rd_pendingfields', 'status')
rd_numb = lltype.nullptr(NUMBERING)
rd_count = 0
diff --git a/rpython/jit/metainterp/history.py
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -156,7 +156,7 @@
index = -1
final_descr = False
- _attrs_ = ('adr_jump_offset', 'rd_locs', 'rd_loop_token')
+ _attrs_ = ('adr_jump_offset', 'rd_locs', 'rd_loop_token', 'rd_accum_list')
def handle_fail(self, deadframe, metainterp_sd, jitdriver_sd):
raise NotImplementedError
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -416,7 +416,6 @@
if vbox.gettype() == INT:
return self.extend_int(vbox, newtype)
else:
- import pdb; pdb.set_trace()
raise NotImplementedError("cannot yet extend float")
def extend_int(self, vbox, newtype):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -803,7 +803,7 @@
# considered. => tree pattern matching problem.
return None
operator = Accum.PLUS
- if opnum == rop.FLOAT_ADD:
+ if opnum == rop.FLOAT_MUL:
operator = Accum.MULTIPLY
accum = Accum(accum_var, accum_pos, operator)
return AccumPair(lnode, rnode, ptype, ptype, accum)
@@ -837,11 +837,10 @@
box = result
elif accum.operator == Accum.MULTIPLY:
# multiply is only supported by floats
- op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstInt(1)], box)
+ op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0)], box)
sched_data.invariant_oplist.append(op)
else:
- import pdb; pdb.set_trace()
- raise NotImplementedError
+ raise NotImplementedError("can only handle + and *")
result = BoxVectorAccum(box, accum.var, accum.operator)
# pack the scalar value
op = ResOperation(getpackopnum(box.gettype()),
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -512,8 +512,8 @@
'VEC_FLOAT_PACK/4', # VEC_FLOAT_PACK(vX, var/const, index,
item_count)
'VEC_INT_UNPACK/3', # iX|fX = VEC_INT_UNPACK(vX, index,
item_count)
'VEC_INT_PACK/4', # VEC_INT_PACK(vX, var/const, index,
item_count)
- 'VEC_FLOAT_EXPAND/1', # vX = VEC_FLOAT_EXPAND(var/const, item_count)
- 'VEC_INT_EXPAND/1', # vX = VEC_INT_EXPAND(var/const, item_count)
+ 'VEC_FLOAT_EXPAND/1', # vX = VEC_FLOAT_EXPAND(var/const)
+ 'VEC_INT_EXPAND/1', # vX = VEC_INT_EXPAND(var/const)
'VEC_BOX/1',
'_VEC_PURE_LAST',
#
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit