Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78324:313bcd2938c3
Date: 2015-06-26 16:34 +0200
http://bitbucket.org/pypy/pypy/changeset/313bcd2938c3/

Log:    finishing up changes

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -361,6 +361,7 @@
                 continue
             box_pos, vbox = self.sched_data.getvector_of_box(arg)
             if not vbox:
+                import pdb; pdb.set_trace()
                 # constant/variable expand this box
                 vbox = self.expand(arg, i)
                 self.sched_data.setvector_of_box(arg, 0, vbox)
@@ -875,6 +876,11 @@
             node.pack = None
             node.pack_position = -1
 
+    def update_pack_of_nodes(self):
+        for i,node in enumerate(self.operations):
+            node.pack = self
+            node.pack_position = i
+
     def rightmost_match_leftmost(self, other):
         assert isinstance(other, Pack)
         rightmost = self.operations[-1]
@@ -889,7 +895,8 @@
         return rightmost is leftmost and accum
 
     def __repr__(self):
-        return "Pack(%r)" % self.operations
+        opname = self.operations[0].getoperation().getopname()
+        return "Pack(%s,%r)" % (opname, self.operations)
 
     def is_accumulating(self):
         return self.accum is not None
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -747,8 +747,12 @@
         self.assert_packset_empty(vopt.packset, len(loop.operations),
                                   [(6,12), (5,11), (7,13)])
 
-    @pytest.mark.parametrize("descr,size", 
[('char',16),('float',2),('int',2),('singlefloat',4)])
-    def test_packset_combine_simple(self,descr,size):
+    @pytest.mark.parametrize("descr,packs,packidx", 
+                             [('char',1,  [(0,(1,3,5,7))]),
+                              ('float',2, [(0,(1,3)),(1,(5,7))]),
+                              ('int',2,   [(0,(1,3)),(1,(5,7))]),
+                              ('singlefloat',1,[(0,(1,3,5,7))])])
+    def test_packset_combine_simple(self,descr,packs,packidx):
         ops = """
         [p0,i0]
         i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
@@ -758,12 +762,13 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 4
-        assert len(vopt.packset.packs) == 16 // size
-        self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
+        assert len(vopt.packset.packs) == packs
+        for i,t in packidx:
+            self.assert_pack(vopt.packset.packs[i], t)
 
-    @pytest.mark.parametrize("descr,stride",
-            [('char',1),('float',8),('int',8),('singlefloat',4)])
-    def test_packset_combine_2_loads_in_trace(self, descr, stride):
+    @pytest.mark.parametrize("descr,stride,packs",
+            [('char',1,1),('float',8,4),('int',8,4),('singlefloat',4,2)])
+    def test_packset_combine_2_loads_in_trace(self, descr, stride,packs):
         ops = """
         [p0,i0]
         i3 = raw_load(p0, i0, descr={type}arraydescr)
@@ -775,24 +780,7 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 8
-        assert len(vopt.packset.packs) == (16//stride) * 2
-        self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15))
-
-    def test_packset_combine_2_loads_one_redundant(self):
-        py.test.skip("apply redundant load elimination?")
-        ops = """
-        [p0,i0]
-        i3 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
-        i1 = int_add(i0,1)
-        i4 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
-        jump(p0,i1)
-        """
-        loop = self.parse_loop(ops)
-        vopt = self.combine_packset(loop,3)
-        assert len(vopt.dependency_graph.memory_refs) == 8
-        assert len(vopt.packset.packs) == 2
-        self.assert_pack(vopt.packset.packs[0], (1,5,9))
-        self.assert_pack(vopt.packset.packs[1], (3,7,11))
+        assert len(vopt.packset.packs) == packs
 
     def test_packset_combine_no_candidates_packset_empty(self):
         ops = """
@@ -847,7 +835,10 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 12
-        assert len(vopt.packset.packs) == 4
+        if stride == 8:
+            assert len(vopt.packset.packs) == 8
+        else:
+            assert len(vopt.packset.packs) == 4
 
         for opindices in [(5,12,19,26),(6,13,20,27),
                           (7,14,21,28),(8,15,22,29)]:
@@ -859,7 +850,6 @@
              ('float_mul','float',8),
              ('int_add','int',8),
              ('int_sub','int',8),
-             ('int_mul','int',8),
             ])
     def test_schedule_vector_operation(self, op, descr, stride):
         ops = """
@@ -981,7 +971,7 @@
         [p0,i0]
         guard_early_exit() [p0,i0]
         i1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
-        i4 = int_mul(i1, 42)
+        i4 = int_sub(i1, 42)
         i3 = int_add(i0,1)
         i5 = int_lt(i3, 10)
         guard_true(i5) [p0, i0]
@@ -1000,7 +990,7 @@
         i4 = int_add(i0, 2)
         i5 = int_lt(i2, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
-        v2 = vec_int_mul(v1, v3)
+        v2 = vec_int_sub(v1, v3)
         jump(p0,i2,v3)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1011,7 +1001,7 @@
         [p0,i0,f3]
         guard_early_exit() [p0,i0]
         f1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
-        f4 = int_mul(f1, f3)
+        f4 = int_add(f1, f3)
         i3 = int_add(i0,1)
         i5 = int_lt(i3, 10)
         guard_true(i5) [p0, i0]
@@ -1030,7 +1020,7 @@
         i4 = int_add(i0, 2)
         i5 = int_lt(i2, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
-        v2 = vec_int_mul(v1, v3)
+        v2 = vec_int_add(v1, v3)
         jump(p0,i2,f3,v3)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1157,8 +1147,8 @@
         i7 = int_add(i1, 4)
         i14 = int_ge(i50, 36)
         v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
+        v19 = vec_cast_float_to_singlefloat(v17)
         v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
-        v19 = vec_cast_float_to_singlefloat(v17)
         v20 = vec_cast_float_to_singlefloat(v18)
         v21 = vec_float_pack(v19, v20, 2, 2)
         vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -374,12 +374,26 @@
                         self.packset.add_pack(pair)
 
     def combine_packset(self):
+        """ Combination is done iterating the packs that have
+        a sorted op index of the first operation (= left).
+        If a pack is marked as 'full', the next pack that is
+        encountered having the full_pack.right == pack.left,
+        the pack is removed. This is because the packs have
+        intersecting edges.
+        """
         if len(self.packset.packs) == 0:
             raise NotAVectorizeableLoop()
+        packsort(self.packset.packs).sort()
+        if not we_are_translated():
+            # ensure we are really sorted!
+            x = 0
+            for i,pack in enumerate(self.packset.packs):
+                assert x <= pack.left.getindex()
+                x = pack.left.getindex()
         i = 0
         j = 0
-        packsort(self.packset.packs)
         end_ij = len(self.packset.packs)
+        remove_left = {}
         while True:
             len_before = len(self.packset.packs)
             i = 0
@@ -389,14 +403,29 @@
                         j += 1
                         continue
                     pack1 = self.packset.packs[i]
+                    pack2 = self.packset.packs[j]
+                    # remove intermediate
+                    left = pack1.operations[0]
+                    if left in remove_left:
+                        remove_left[left] = pack1
+                        del self.packset.packs[i]
+                        end_ij -= 1
+                        continue
+                    # check if the pack is already full
                     if pack1.is_full(self.cpu.vector_register_size):
+                        pack1.update_pack_of_nodes()
+                        right = pack1.operations[-1]
+                        remove_left[right] = None
                         break
-                    pack2 = self.packset.packs[j]
                     if pack1.rightmost_match_leftmost(pack2):
                         end_ij = self.packset.combine(i,j)
-                    elif pack2.rightmost_match_leftmost(pack1):
-                        end_ij = self.packset.combine(j,i)
-                    j += 1
+                    else:
+                        # do not inc in rightmost_match_leftmost
+                        # this could miss some pack
+                        j += 1
+                # set for each node to which pack it belongs
+                self.packset.packs[i].update_pack_of_nodes()
+
                 j = 0
                 i += 1
             if len_before == len(self.packset.packs):
@@ -406,7 +435,15 @@
             # some test cases check the accumulation variables
             self.packset.accum_vars = {}
             print "packs:"
+            check = {}
+            fail = False
             for pack in self.packset.packs:
+                left = pack.operations[0]
+                right = pack.operations[-1]
+                if left in check or right in check:
+                    fail = True
+                check[left] = None
+                check[right] = None
                 accum = pack.accum
                 if accum:
                     self.packset.accum_vars[accum.var] = accum.pos
@@ -414,6 +451,8 @@
                 print " %dx %s (accum? %d) " % (len(pack.operations),
                                                 
pack.operations[0].op.getopname(),
                                                 accum is not None)
+            if fail:
+                assert False
 
     def schedule(self, vector=False):
         self.guard_early_exit = -1
@@ -463,6 +502,8 @@
     def _unpack_from_vector(self, i, arg, sched_data, renamer):
         (j, vbox) = sched_data.box_to_vbox.get(arg, (-1, None))
         if vbox:
+            if vbox in sched_data.invariant_vector_vars:
+                return arg
             arg_cloned = arg.clonebox()
             renamer.start_renaming(arg, arg_cloned)
             cj = ConstInt(j)
@@ -684,8 +725,6 @@
         is not iterated when calling this method. """
         pack_i = self.packs[i]
         pack_j = self.packs[j]
-        pack_i.clear()
-        pack_j.clear()
         operations = pack_i.operations
         for op in pack_j.operations[1:]:
             operations.append(op)
@@ -697,16 +736,19 @@
         pack.accum = pack_i.accum
         pack_i.accum = pack_j.accum = None
 
+        del self.packs[j]
+        return len(self.packs)
+        # OLD
         # instead of deleting an item in the center of pack array,
         # the last element is assigned to position j and
         # the last slot is freed. Order of packs doesn't matter
-        last_pos = len(self.packs) - 1
-        if j == last_pos:
-            del self.packs[j]
-        else:
-            self.packs[j] = self.packs[last_pos]
-            del self.packs[last_pos]
-        return last_pos
+        #last_pos = len(self.packs) - 1
+        #if j == last_pos:
+        #    del self.packs[j]
+        #else:
+        #    self.packs[j] = self.packs[last_pos]
+        #    del self.packs[last_pos]
+        #return last_pos
 
     def accumulates_pair(self, lnode, rnode, origin_pack):
         # lnode and rnode are isomorphic and dependent
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to