Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78378:88fff9cde657 Date: 2015-07-01 13:48 +0200 http://bitbucket.org/pypy/pypy/changeset/88fff9cde657/
Log: resolving issues with * accumulation, there where some assembler routines i did not implement correctly (but where not invoked beforehand) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -559,7 +559,6 @@ self.current_clt.allgcrefs, self.current_clt.frame_info) self._check_frame_depth(self.mc, regalloc.get_gcmap()) - #import pdb; pdb.set_trace() self._accum_update_at_exit(arglocs, inputargs, faildescr, regalloc) frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations) codeendpos = self.mc.get_relative_pos() @@ -2551,7 +2550,6 @@ elif accum_info.operation == '*': self._accum_reduce_mul(arg, loc, tgtloc) else: - import pdb; pdb.set_trace() not_implemented("accum operator %s not implemented" % (accum_info.operation)) fail_locs[pos] = tgtloc @@ -2559,13 +2557,13 @@ accum_info = accum_info.prev def _accum_reduce_mul(self, arg, accumloc, targetloc): - scratchloc = X86_64_SCRATCH_REG - self.mc.mov(scratchloc, accumloc) + scratchloc = X86_64_XMM_SCRATCH_REG + self.mov(accumloc, scratchloc) # swap the two elements self.mc.SHUFPS_xxi(scratchloc.value, scratchloc.value, 0x01) self.mc.MULPD(accumloc, scratchloc) if accumloc is not targetloc: - self.mc.mov(targetloc, accumloc) + self.mov(accumloc, targetloc) def _accum_reduce_sum(self, arg, accumloc, targetloc): # Currently the accumulator can ONLY be the biggest @@ -2575,7 +2573,7 @@ self.mc.HADDPD(accumloc, accumloc) # upper bits (> 64) are dirty (but does not matter) if accumloc is not targetloc: - self.mov(targetloc, accumloc) + self.mov(accumloc, targetloc) return elif arg.type == INT: scratchloc = X86_64_SCRATCH_REG @@ -2757,7 +2755,9 @@ def genop_vec_float_expand(self, op, arglocs, resloc): srcloc, sizeloc = arglocs size = sizeloc.value - if size == 4: + if isinstance(srcloc, ConstFloatLoc): + self.mov(srcloc, resloc) + elif size == 4: # the register allocator forces src to be the same as resloc # r = (s[0], s[0], r[0], r[0]) # since resloc == srcloc: r = (r[0], r[0], r[0], r[0]) @@ -2864,7 +2864,7 @@ # if source is a normal register (unpack) assert count == 1 assert si == 0 - self.mov(X86_64_XMM_SCRATCH_REG, srcloc) + self.mov(srcloc, X86_64_XMM_SCRATCH_REG) src = X86_64_XMM_SCRATCH_REG.value select = ((si & 0x3) << 6)|((ri & 0x3) << 4) self.mc.INSERTPS_xxi(resloc.value, src, select) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -81,13 +81,11 @@ rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y return ConstFloatLoc(adr) - def expand_float(self, var, const): - assert isinstance(var, BoxVector) - if var.getsize() == 4: + def expand_float(self, size, const): + if size == 4: loc = self.expand_single_float(const) else: loc = self.expand_double_float(const) - self.reg_bindings[var] = loc return loc def expand_double_float(self, f): @@ -1632,16 +1630,19 @@ consider_vec_float_unpack = consider_vec_int_unpack def consider_vec_float_expand(self, op): + result = op.result + assert isinstance(result, BoxVector) arg = op.getarg(0) + args = op.getarglist() if isinstance(arg, Const): - resloc = self.xrm.expand_float(op.result, arg) - # TODO consider this - return - args = op.getarglist() - resloc = self.xrm.force_result_in_reg(op.result, arg, args) - assert isinstance(op.result, BoxVector) + resloc = self.xrm.force_allocate_reg(result) + srcloc = self.xrm.expand_float(result.getsize(), arg) + else: + resloc = self.xrm.force_result_in_reg(op.result, arg, args) + srcloc = resloc + size = op.result.getsize() - self.perform(op, [resloc, imm(size)], resloc) + self.perform(op, [srcloc, imm(size)], resloc) def consider_vec_int_expand(self, op): arg = op.getarg(0) diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -488,8 +488,7 @@ class ResumeGuardDescr(ResumeDescr): _attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals', - 'rd_frame_info_list', 'rd_pendingfields', 'rd_accum_list', - 'status') + 'rd_frame_info_list', 'rd_pendingfields', 'status') rd_numb = lltype.nullptr(NUMBERING) rd_count = 0 diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py --- a/rpython/jit/metainterp/history.py +++ b/rpython/jit/metainterp/history.py @@ -156,7 +156,7 @@ index = -1 final_descr = False - _attrs_ = ('adr_jump_offset', 'rd_locs', 'rd_loop_token') + _attrs_ = ('adr_jump_offset', 'rd_locs', 'rd_loop_token', 'rd_accum_list') def handle_fail(self, deadframe, metainterp_sd, jitdriver_sd): raise NotImplementedError diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -416,7 +416,6 @@ if vbox.gettype() == INT: return self.extend_int(vbox, newtype) else: - import pdb; pdb.set_trace() raise NotImplementedError("cannot yet extend float") def extend_int(self, vbox, newtype): diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -803,7 +803,7 @@ # considered. => tree pattern matching problem. return None operator = Accum.PLUS - if opnum == rop.FLOAT_ADD: + if opnum == rop.FLOAT_MUL: operator = Accum.MULTIPLY accum = Accum(accum_var, accum_pos, operator) return AccumPair(lnode, rnode, ptype, ptype, accum) @@ -837,11 +837,10 @@ box = result elif accum.operator == Accum.MULTIPLY: # multiply is only supported by floats - op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstInt(1)], box) + op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0)], box) sched_data.invariant_oplist.append(op) else: - import pdb; pdb.set_trace() - raise NotImplementedError + raise NotImplementedError("can only handle + and *") result = BoxVectorAccum(box, accum.var, accum.operator) # pack the scalar value op = ResOperation(getpackopnum(box.gettype()), diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -512,8 +512,8 @@ 'VEC_FLOAT_PACK/4', # VEC_FLOAT_PACK(vX, var/const, index, item_count) 'VEC_INT_UNPACK/3', # iX|fX = VEC_INT_UNPACK(vX, index, item_count) 'VEC_INT_PACK/4', # VEC_INT_PACK(vX, var/const, index, item_count) - 'VEC_FLOAT_EXPAND/1', # vX = VEC_FLOAT_EXPAND(var/const, item_count) - 'VEC_INT_EXPAND/1', # vX = VEC_INT_EXPAND(var/const, item_count) + 'VEC_FLOAT_EXPAND/1', # vX = VEC_FLOAT_EXPAND(var/const) + 'VEC_INT_EXPAND/1', # vX = VEC_INT_EXPAND(var/const) 'VEC_BOX/1', '_VEC_PURE_LAST', # _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit