Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78012:6497803fbba6
Date: 2015-06-10 14:10 +0200
http://bitbucket.org/pypy/pypy/changeset/6497803fbba6/

Log:    added a new field to the resume guard descr to handle accumulation
        variables at guard exit implemented the accumulation for float
        (64/32 bit) for x86 at guard exit (still need to fill the info at
        the new field of resume guard descrs)

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1809,6 +1809,12 @@
         """
         self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
         startpos = self.mc.get_relative_pos()
+        # accumulation of a vectorized loop needs to patch
+        # some vector registers (e.g. sum).
+        if guardtok.faildescr.update_at_exit is not None:
+            for pae in guardtok.faildescr.update_at_exit:
+                self._update_at_exit(guardtok.fail_locs,pae)
+            guardtok.fail_descr.update_at_exit = None
         fail_descr, target = self.store_info_on_descr(startpos, guardtok)
         self.mc.PUSH(imm(fail_descr))
         self.push_gcmap(self.mc, guardtok.gcmap, push=True)
@@ -2471,6 +2477,41 @@
     # vector operations
     # ________________________________________
 
+    def _accum_update_at_exit(self, fail_locs, accum_descr):
+        """ If accumulation is done in this loop, at the guard exit
+        some vector registers must be adjusted to yield the correct value"""
+        pass
+        loc = fail_locs[accum_descr.position]
+        vector_var = accum_descr.vector_var
+        scalar_var = accum_descr.scalar_var
+        if accum_descr.operator == '+':
+            # reduction using plus
+            self._accum_reduce_float_sum(vector_var, scalar_var, loc)
+        else:
+            raise NotImplementedError("accum operator %s not implemented" %
+                                        (accum_descr.operator)) 
+
+    def _accum_reduce_sum(self, vector_var, scalar_var, regloc):
+        assert isinstance(vector_var, BoxVector)
+        assert isinstance(scalar_var, Box)
+        #
+        if vector_var.gettype() == FLOAT:
+            if vector_var.getsize() == 8:
+                # r = (r[0]+r[1],r[0]+r[1])
+                self.mc.HADDPD(regloc, regloc)
+                # upper bits (> 64) are dirty (but does not matter)
+                return
+            if vector_var.getsize() == 4:
+                # r = (r[0]+r[1],r[2]+r[3],r[0]+r[1],r[2]+r[3])
+                self.mc.HADDPS(regloc, regloc)
+                self.mc.HADDPS(regloc, regloc)
+                # invoking it a second time will gather the whole sum
+                # at the first element position
+                # the upper bits (>32) are dirty (but does not matter)
+                return
+
+        raise NotImplementedError("reduce sum for %s not impl." % vector_var)
+
     def genop_vec_getarrayitem_raw(self, op, arglocs, resloc):
         # considers item scale (raw_load does not)
         base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
diff --git a/rpython/jit/metainterp/compile.py 
b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -488,7 +488,8 @@
 
 class ResumeGuardDescr(ResumeDescr):
     _attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals',
-               'rd_frame_info_list', 'rd_pendingfields', 'status')
+               'rd_frame_info_list', 'rd_pendingfields', 'status',
+               'update_at_exit')
     
     rd_numb = lltype.nullptr(NUMBERING)
     rd_count = 0
@@ -498,6 +499,7 @@
     rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
 
     status = r_uint(0)
+    update_at_exit = None
 
     def copy_all_attributes_from(self, other):
         assert isinstance(other, ResumeGuardDescr)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to