Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78012:6497803fbba6 Date: 2015-06-10 14:10 +0200 http://bitbucket.org/pypy/pypy/changeset/6497803fbba6/
Log: added a new field to the resume guard descr to handle accumulation variables at guard exit implemented the accumulation for float (64/32 bit) for x86 at guard exit (still need to fill the info at the new field of resume guard descrs) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1809,6 +1809,12 @@ """ self.mc.force_frame_size(DEFAULT_FRAME_BYTES) startpos = self.mc.get_relative_pos() + # accumulation of a vectorized loop needs to patch + # some vector registers (e.g. sum). + if guardtok.faildescr.update_at_exit is not None: + for pae in guardtok.faildescr.update_at_exit: + self._update_at_exit(guardtok.fail_locs,pae) + guardtok.fail_descr.update_at_exit = None fail_descr, target = self.store_info_on_descr(startpos, guardtok) self.mc.PUSH(imm(fail_descr)) self.push_gcmap(self.mc, guardtok.gcmap, push=True) @@ -2471,6 +2477,41 @@ # vector operations # ________________________________________ + def _accum_update_at_exit(self, fail_locs, accum_descr): + """ If accumulation is done in this loop, at the guard exit + some vector registers must be adjusted to yield the correct value""" + pass + loc = fail_locs[accum_descr.position] + vector_var = accum_descr.vector_var + scalar_var = accum_descr.scalar_var + if accum_descr.operator == '+': + # reduction using plus + self._accum_reduce_float_sum(vector_var, scalar_var, loc) + else: + raise NotImplementedError("accum operator %s not implemented" % + (accum_descr.operator)) + + def _accum_reduce_sum(self, vector_var, scalar_var, regloc): + assert isinstance(vector_var, BoxVector) + assert isinstance(scalar_var, Box) + # + if vector_var.gettype() == FLOAT: + if vector_var.getsize() == 8: + # r = (r[0]+r[1],r[0]+r[1]) + self.mc.HADDPD(regloc, regloc) + # upper bits (> 64) are dirty (but does not matter) + return + if vector_var.getsize() == 4: + # r = (r[0]+r[1],r[2]+r[3],r[0]+r[1],r[2]+r[3]) + self.mc.HADDPS(regloc, regloc) + self.mc.HADDPS(regloc, regloc) + # invoking it a second time will gather the whole sum + # at the first element position + # the upper bits (>32) are dirty (but does not matter) + return + + raise NotImplementedError("reduce sum for %s not impl." % vector_var) + def genop_vec_getarrayitem_raw(self, op, arglocs, resloc): # considers item scale (raw_load does not) base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -488,7 +488,8 @@ class ResumeGuardDescr(ResumeDescr): _attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals', - 'rd_frame_info_list', 'rd_pendingfields', 'status') + 'rd_frame_info_list', 'rd_pendingfields', 'status', + 'update_at_exit') rd_numb = lltype.nullptr(NUMBERING) rd_count = 0 @@ -498,6 +499,7 @@ rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO) status = r_uint(0) + update_at_exit = None def copy_all_attributes_from(self, other): assert isinstance(other, ResumeGuardDescr) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit