Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78326:d1a942296dd8
Date: 2015-06-26 17:46 +0200
http://bitbucket.org/pypy/pypy/changeset/d1a942296dd8/

Log:    rewritten scheduling tests to add type (could not be inferred
        easily), cost model passing again

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -229,6 +229,7 @@
         self.output_type = None
         self.costmodel = None
 
+
     def determine_input_type(self, op):
         arg = op.getarg(0)
         _, vbox = self.sched_data.getvector_of_box(arg)
@@ -267,9 +268,13 @@
         #
         self.check_if_pack_supported(pack)
         #
-        self.pack = pack
-        self.transform_pack()
-
+        if self.must_be_full_but_is_not(pack):
+            for op in pack.operations:
+                self.preamble_ops.append(op.getoperation())
+        else:
+            self.pack = pack
+            self.transform_pack()
+        #
         self.pack = None
         self.costmodel = None
         self.preamble_ops = None
@@ -277,6 +282,9 @@
         self.input_type = None
         self.output_type = None
 
+    def must_be_full_but_is_not(self, pack):
+        return False
+
     def split_pack(self, pack, vec_reg_size):
         """ Returns how many items of the pack should be
             emitted as vector operation. """
@@ -294,11 +302,9 @@
         pass
 
     def transform_pack(self):
-        op = self.pack.operations[0].getoperation()
+        op = self.pack.leftmost()
         args = op.getarglist()
-        #
         self.before_argument_transform(args)
-        #
         self.transform_arguments(args)
         #
         result = op.result
@@ -614,6 +620,7 @@
         assert isinstance(sizearg, ConstInt)
         self.size = sizearg.value
 
+
     def new_result_vector_box(self):
         type = self.output_type.gettype()
         count = self.input_type.getcount()
@@ -656,6 +663,11 @@
     def determine_input_type(self, op):
         return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
+    def must_be_full_but_is_not(self, pack):
+        vrs = self.sched_data.vec_reg_size
+        it = pack.input_type
+        return it.getsize() * it.getcount() < vrs
+
     def determine_output_type(self, op):
         return None
 
@@ -833,7 +845,6 @@
         ptype = self.input_type
         if self.input_type is None:
             # load does not have an input type, but only an output type
-            assert self.leftmost().is_raw_load()
             ptype = self.output_type
 
         op = self.leftmost()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py 
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -136,7 +136,7 @@
         savings = self.savings(loop1)
         assert savings == 2
 
-    @py.test.mark.parametrize("bytes,s", [(1,-1),(2,-1),(4,0),(8,-1)])
+    @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,-1)])
     def test_sum_float_to_int(self, bytes, s):
         loop1 = self.parse("""
         f10 = raw_load(p0, i0, descr=double)
@@ -150,13 +150,19 @@
         i15 = int_add(i16, i13)
         i17 = int_signext(i15, {c})
         """.format(c=bytes))
-        savings = self.savings(loop1)
-        # it does not benefit because signext has
-        # a very inefficient implementation (x86
-        # does not provide nice instr to convert
-        # integer sizes)
-        # signext -> no benefit, + 2x unpack
-        assert savings <= s
+        try:
+            savings = self.savings(loop1)
+            if s is None:
+                py.test.fail("must fail")
+            # it does not benefit because signext has
+            # a very inefficient implementation (x86
+            # does not provide nice instr to convert
+            # integer sizes)
+            # signext -> no benefit, + 2x unpack
+            assert savings <= s
+        except NotAProfitableLoop:
+            if s is not None:
+                py.test.fail("must not fail")
 
     def test_cast(self):
         loop1 = self.parse("""
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -3,7 +3,8 @@
 from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
 from rpython.jit.metainterp.optimizeopt.util import equaloplists, Renamer
 from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
-        Pack, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel)
+        Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel,
+        PackSet)
 from rpython.jit.metainterp.optimizeopt.dependency import Node
 from rpython.jit.metainterp.optimizeopt.schedule import PackType
 from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
@@ -14,6 +15,14 @@
 from rpython.jit.tool.oparser import parse as opparse
 from rpython.jit.tool.oparser_model import get_model
 
+F64 = PackType('f',8,True,2)
+F32 = PackType('f',4,True,4)
+F32_2 =  PackType('f',4,True,2)
+I64 = PackType('i',8,True,2)
+I32 = PackType('i',4,True,4)
+I32_2 =  PackType('i',4,True,2)
+I16 = PackType('i',2,True,8)
+
 class SchedulerBaseTest(DependencyBaseTest):
 
     def parse(self, source, inc_label_jump=True,
@@ -58,8 +67,8 @@
         del loop.operations[-1]
         return loop
 
-    def pack(self, loop, l, r):
-        return Pack([Node(op,1+l+i) for i,op in 
enumerate(loop.operations[1+l:1+r])], None, None)
+    def pack(self, loop, l, r, input_type, output_type):
+        return Pack([Node(op,1+l+i) for i,op in 
enumerate(loop.operations[1+l:1+r])], input_type, output_type)
 
     def schedule(self, loop_orig, packs, vec_reg_size=16, 
prepend_invariant=False, overwrite_funcs=None):
         loop = get_model(False).ExtendedTreeLoop("loop")
@@ -72,16 +81,32 @@
         for name, overwrite in (overwrite_funcs or {}).items():
             setattr(vsd, name, overwrite)
         renamer = Renamer()
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        jitdriver_sd = FakeJitDriverStaticData()
+        opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
+        pairs = []
         for pack in packs:
+            for i in range(len(pack.operations)-1):
+                o1 = pack.operations[i]
+                o2 = pack.operations[i+1]
+                pairs.append(Pair(o1,o2,pack.input_type,pack.output_type))
+
+        class FakePackSet(PackSet):
+            def __init__(self):
+                self.packs = None
+
+        opt.packset = FakePackSet()
+        opt.packset.packs = pairs
+
+        opt.combine_packset()
+
+        for pack in opt.packset.packs:
             if pack.opcount() == 1:
                 ops.append(pack.operations[0].getoperation())
             else:
                 for op in vsd.as_vector_operation(pack, renamer):
                     ops.append(op)
         loop.operations = ops
-        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
-        jitdriver_sd = FakeJitDriverStaticData()
-        opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
         opt.clear_newoperations()
         for op in ops:
             opt.unpack_from_vector(op, vsd, renamer)
@@ -106,7 +131,7 @@
         i14 = raw_load(p0, i4, descr=float)
         i15 = raw_load(p0, i5, descr=float)
         """)
-        pack1 = self.pack(loop1, 0, 6)
+        pack1 = self.pack(loop1, 0, 6, None, F32)
         loop2 = self.schedule(loop1, [pack1])
         loop3 = self.parse("""
         v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float)
@@ -123,9 +148,9 @@
         f10 = cast_int_to_float(i12)
         f11 = cast_int_to_float(i13)
         """)
-        pack1 = self.pack(loop1, 0, 2)
-        pack2 = self.pack(loop1, 2, 4)
-        pack3 = self.pack(loop1, 4, 6)
+        pack1 = self.pack(loop1, 0, 2, None, I64)
+        pack2 = self.pack(loop1, 2, 4, I64, I32_2)
+        pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
         loop2 = self.schedule(loop1, [pack1, pack2, pack3])
         loop3 = self.parse("""
         v10[i64|2] = vec_raw_load(p0, i0, 2, descr=long)
@@ -139,7 +164,7 @@
         i10 = int_add(i0, 73)
         i11 = int_add(i1, 73)
         """)
-        pack1 = self.pack(loop1, 0, 2)
+        pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
         v10[i64|2] = vec_box(2)
@@ -155,7 +180,7 @@
         f10 = float_add(f0, 73.0)
         f11 = float_add(f1, 73.0)
         """)
-        pack1 = self.pack(loop1, 0, 2)
+        pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
         v10[f64|2] = vec_box(2)
@@ -174,8 +199,8 @@
         f12 = float_add(f10, f5)
         f13 = float_add(f11, f5)
         """)
-        pack1 = self.pack(loop1, 0, 2)
-        pack2 = self.pack(loop1, 2, 4)
+        pack1 = self.pack(loop1, 0, 2, F64, F64)
+        pack2 = self.pack(loop1, 2, 4, F64, F64)
         loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
         loop3 = self.parse("""
         v10[f64|2] = vec_box(2)
@@ -199,7 +224,7 @@
         i10 = int_signext(i1, 4)
         i11 = int_signext(i1, 4)
         """, additional_args=['v10[i64|2]'])
-        pack1 = self.pack(loop1, 0, 2)
+        pack1 = self.pack(loop1, 0, 2, I64, I32_2)
         var = self.find_input_arg('v10', loop1)
         def i1inv103204(v):
             return 0, var
@@ -250,10 +275,11 @@
         raw_store(p1, i7, i24, descr=short)
         raw_store(p1, i8, i25, descr=short)
         """)
-        pack1 = self.pack(loop1, 0, 8)
-        pack2 = self.pack(loop1, 8, 16)
-        pack3 = self.pack(loop1, 16, 24)
-        pack4 = self.pack(loop1, 24, 32)
+        pack1 = self.pack(loop1, 0, 8, None, I64)
+        pack2 = self.pack(loop1, 8, 16, I64, I32_2)
+        I16_2 = PackType('i',2,True,2)
+        pack3 = self.pack(loop1, 16, 24, I32, I16_2)
+        pack4 = self.pack(loop1, 24, 32, I16, None)
         def void(b,c):
             pass
         loop2 = self.schedule(loop1, [pack1,pack2,pack3,pack4],
@@ -297,9 +323,9 @@
         raw_store(p1, i3, i12, descr=float)
         raw_store(p1, i4, i13, descr=float)
         """)
-        pack1 = self.pack(loop1, 0, 4)
-        pack2 = self.pack(loop1, 4, 8)
-        pack3 = self.pack(loop1, 8, 12)
+        pack1 = self.pack(loop1, 0, 4, None, I64)
+        pack2 = self.pack(loop1, 4, 8, I64, I32_2)
+        pack3 = self.pack(loop1, 8, 12, I32, None)
         loop2 = self.schedule(loop1, [pack1,pack2,pack3])
         loop3 = self.parse("""
         v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double) 
@@ -322,9 +348,9 @@
         guard_true(i12) []
         guard_true(i13) []
         """)
-        pack1 = self.pack(loop1, 0, 2)
-        pack2 = self.pack(loop1, 2, 4)
-        pack3 = self.pack(loop1, 4, 6)
+        pack1 = self.pack(loop1, 0, 2, None, I64)
+        pack2 = self.pack(loop1, 2, 4, I64, I64)
+        pack3 = self.pack(loop1, 4, 6, None, I64)
         loop2 = self.schedule(loop1, [pack1,pack2,pack3], 
prepend_invariant=True)
         loop3 = self.parse("""
         v9[i64|2] = vec_int_expand(255)
@@ -342,8 +368,8 @@
         raw_store(p0, i3, i10, descr=float)
         raw_store(p0, i4, i11, descr=float)
         """)
-        pack1 = self.pack(loop1, 0, 2)
-        pack2 = self.pack(loop1, 2, 4)
+        pack1 = self.pack(loop1, 0, 2, None, I32_2)
+        pack2 = self.pack(loop1, 2, 4, I32_2, None)
         loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
         loop3 = self.parse("""
         v1[ui32|2] = vec_raw_load(p0, i1, 2, descr=float)
@@ -361,7 +387,7 @@
         i10 = int_and(255, i1)
         i11 = int_and(255, i1)
         """)
-        pack1 = self.pack(loop1, 0, 2)
+        pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
         v1[i64|2] = vec_int_expand(255)
@@ -375,7 +401,7 @@
         i10 = int_and(255, i1)
         i11 = int_and(255, i1)
         """)
-        pack1 = self.pack(loop1, 0, 2)
+        pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
         v1[i64|2] = vec_int_expand(255)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -408,6 +408,7 @@
                     left = pack1.operations[0]
                     if left in remove_left:
                         remove_left[left] = pack1
+                        pack1.clear()
                         del self.packset.packs[i]
                         end_ij -= 1
                         continue
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to