Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78322:7419dfb817a7
Date: 2015-06-26 14:48 +0200
http://bitbucket.org/pypy/pypy/changeset/7419dfb817a7/

Log:    tyring to make things more easier, complexity gets hard to manage if
        extending the accumulation. trying to prevent the splitting entering
        the scheduling (work in progress)

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -188,6 +188,9 @@
     def new_vector_box(self, count = -1):
         if count == -1:
             count = self.count
+        assert count > 1
+        assert self.type in ('i','f')
+        assert self.size > 0
         return BoxVector(self.type, count, self.size, self.signed)
 
     def __repr__(self):
@@ -291,29 +294,29 @@
         pass
 
     def transform_pack(self):
-        self.off = 0
-        while self.off < self.pack.opcount():
-            op = self.pack.operations[self.off].getoperation()
-            args = op.getarglist()
-            #
-            self.before_argument_transform(args)
-            #
-            argument_infos = []
-            self.transform_arguments(args, argument_infos)
-            #
-            result = op.result
-            result = self.transform_result(result)
-            #
-            vop = ResOperation(op.vector, args, result, op.getdescr())
-            if op.is_guard():
-                assert isinstance(op, GuardResOp)
-                vop.setfailargs(op.getfailargs())
-                vop.rd_snapshot = op.rd_snapshot
-            self.preamble_ops.append(vop)
-            stride = self.consumed_operations(argument_infos, result)
-            self.costmodel.record_pack_savings(self.pack, stride)
-            assert stride != 0
-            self.off += stride
+        #self.off = 0
+        #while self.off < self.pack.opcount():
+        op = self.pack.operations[0].getoperation()
+        args = op.getarglist()
+        #
+        self.before_argument_transform(args)
+        #
+        argument_infos = []
+        self.transform_arguments(args, argument_infos)
+        #
+        result = op.result
+        result = self.transform_result(result)
+        #
+        vop = ResOperation(op.vector, args, result, op.getdescr())
+        if op.is_guard():
+            assert isinstance(op, GuardResOp)
+            vop.setfailargs(op.getfailargs())
+            vop.rd_snapshot = op.rd_snapshot
+        self.preamble_ops.append(vop)
+        #stride = self.consumed_operations(argument_infos, result)
+        self.costmodel.record_pack_savings(self.pack, self.pack.opcount())
+        #assert stride != 0
+        #self.off += stride
 
     def consumed_operations(self, argument_infos, result):
         ops = self.getoperations()
@@ -348,7 +351,7 @@
         return BoxVector(type, count, size, signed)
 
     def getoperations(self):
-        return self.pack.operations[self.off:]
+        return self.pack.operations
 
     def transform_arguments(self, args, argument_info):
         for i,arg in enumerate(args):
@@ -406,16 +409,14 @@
             argument_info.append(args[i].item_count)
 
     def gather(self, vboxes, target_count): # packed < packable and packed < 
stride:
-        i = 0
         (_, box) = vboxes[0]
+        i = 1
         while i < len(vboxes):
-            if i+1 >= len(vboxes):
-                break
-            (box2_pos, box2) = vboxes[i+1]
+            (box2_pos, box2) = vboxes[i]
             if box.getcount() + box2.getcount() <= target_count:
                 box = self.package(box, box.getcount(),
                                    box2, box2_pos, box2.getcount())
-            i += 2
+            i += 1
         return box
         pass
                 # OLD
@@ -453,8 +454,10 @@
     def extend_int(self, vbox, newtype):
         vbox_cloned = newtype.new_vector_box(vbox.item_count)
         self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
+        newsize = newtype.getsize()
+        assert newsize > 0
         op = ResOperation(rop.VEC_INT_SIGNEXT, 
-                          [vbox, ConstInt(newtype.getsize())],
+                          [vbox, ConstInt(newsize)],
                           vbox_cloned)
         self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), 
vbox.getcount())
         self.preamble_ops.append(op)
@@ -618,6 +621,9 @@
         if count * size > vec_reg_size:
             count = vec_reg_size // size
         signed = self.output_type.signed
+        assert type in ('i','f')
+        assert size > 0
+        assert count > 1
         return BoxVector(type, count, size, signed)
 
 class SignExtToVectorOp(OpToVectorOp):
@@ -625,15 +631,10 @@
         OpToVectorOp.__init__(self, intype, outtype)
         self.size = -1
 
-    def split_pack(self, pack, vec_reg_size):
-        op0 = pack.operations[0].getoperation()
-        sizearg = op0.getarg(1)
+    def before_argument_transform(self, args):
+        sizearg = args[1]
         assert isinstance(sizearg, ConstInt)
         self.size = sizearg.value
-        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
-        if vbox.getcount() * self.size > vec_reg_size:
-            return vec_reg_size // self.size
-        return vbox.getcount()
 
     def new_result_vector_box(self):
         type = self.output_type.gettype()
@@ -642,6 +643,9 @@
         if count * self.size > vec_reg_size:
             count = vec_reg_size // self.size
         signed = self.input_type.signed
+        assert type in ('i','f')
+        assert self.size > 0
+        assert count > 1
         return BoxVector(type, count, self.size, signed)
 
 class LoadToVectorLoad(OpToVectorOp):
@@ -655,18 +659,12 @@
         return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
     def before_argument_transform(self, args):
-        args.append(ConstInt(len(self.pack.operations)))
+        count = min(self.output_type.getcount(), len(self.getoperations()))
+        args.append(ConstInt(count))
 
     def getscalarsize(self):
         return self.output_type.getsize()
 
-    def new_result_vector_box(self):
-        type = self.output_type.gettype()
-        size = self.output_type.getsize()
-        count = len(self.pack.operations)
-        signed = self.output_type.signed
-        return BoxVector(type, count, size, signed)
-
 class StoreToVectorStore(OpToVectorOp):
     """
     Storing operations are special because they are not allowed
@@ -846,6 +844,28 @@
     def opcount(self):
         return len(self.operations)
 
+    def process_count(self):
+        return len(self.operations)
+
+    def is_full(self, vec_reg_size):
+        """ if one input element times the opcount is equal
+        to the vector register size, we are full!
+        """
+        ptype = self.input_type
+        if self.input_type is None:
+            # load does not have an input type, but only an output type
+            assert self.operations[0].getoperation().is_raw_load()
+            ptype = self.output_type
+        bytes = ptype.getsize() * self.process_count()
+        assert bytes <= vec_reg_size
+        if bytes == vec_reg_size:
+            return True
+        if ptype.getcount() != -1:
+            size = ptype.getcount() * ptype.getsize()
+            assert bytes <= size
+            return bytes == size
+        return False
+
     def opnum(self):
         assert len(self.operations) > 0
         return self.operations[0].getoperation().getopnum()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -747,8 +747,8 @@
         self.assert_packset_empty(vopt.packset, len(loop.operations),
                                   [(6,12), (5,11), (7,13)])
 
-    @pytest.mark.parametrize("descr", ['char','float','int','singlefloat'])
-    def test_packset_combine_simple(self,descr):
+    @pytest.mark.parametrize("descr,size", 
[('char',16),('float',2),('int',2),('singlefloat',4)])
+    def test_packset_combine_simple(self,descr,size):
         ops = """
         [p0,i0]
         i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
@@ -758,18 +758,7 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 4
-        assert len(vopt.packset.packs) == 1
-        self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
-        ops = """
-        [p0,i0]
-        i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
-        i1 = int_add(i0,1)
-        jump(p0,i1)
-        """.format(descr=descr)
-        loop = self.parse_loop(ops)
-        vopt = self.combine_packset(loop,3)
-        assert len(vopt.dependency_graph.memory_refs) == 4
-        assert len(vopt.packset.packs) == 1
+        assert len(vopt.packset.packs) == 16 // size
         self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
 
     @pytest.mark.parametrize("descr,stride",
@@ -786,7 +775,7 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 8
-        assert len(vopt.packset.packs) == 1
+        assert len(vopt.packset.packs) == (16//stride) * 2
         self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15))
 
     def test_packset_combine_2_loads_one_redundant(self):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -24,6 +24,7 @@
         getunpackopnum, PackType, determine_output_type, determine_trans)
 from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
+from rpython.rlib import listsort
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.rlib.jit import Counters
@@ -94,6 +95,9 @@
         else:
             raise
 
+def cmp_pack_lt(a,b):
+    return a.left.getindex() < b.left.getindex()
+packsort = listsort.make_timsort_class(lt=cmp_pack_lt)
 
 class VectorizingOptimizer(Optimizer):
     """ Try to unroll the loop and find instructions to group """
@@ -327,10 +331,13 @@
         pack_count = self.packset.pack_count()
         while True:
             for pack in self.packset.packs:
-                self.follow_use_defs(pack)
                 self.follow_def_uses(pack)
             if pack_count == self.packset.pack_count():
-                break
+                pack_count = self.packset.pack_count()
+                for pack in self.packset.packs:
+                    self.follow_use_defs(pack)
+                if pack_count == self.packset.pack_count():
+                    break
             pack_count = self.packset.pack_count()
 
     def follow_use_defs(self, pack):
@@ -371,6 +378,7 @@
             raise NotAVectorizeableLoop()
         i = 0
         j = 0
+        packsort(self.packset.packs)
         end_ij = len(self.packset.packs)
         while True:
             len_before = len(self.packset.packs)
@@ -381,6 +389,8 @@
                         j += 1
                         continue
                     pack1 = self.packset.packs[i]
+                    if pack1.is_full(self.cpu.vector_register_size):
+                        break
                     pack2 = self.packset.packs[j]
                     if pack1.rightmost_match_leftmost(pack2):
                         end_ij = self.packset.combine(i,j)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to