Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78696:08d59f3ff88c
Date: 2015-07-28 18:45 +0200
http://bitbucket.org/pypy/pypy/changeset/08d59f3ff88c/

Log:    all but 2 vectoriztion tests passing again. the scheduling that
        prefers pure operations messes up these test cases

diff --git a/rpython/jit/backend/x86/vector_ext.py 
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -11,6 +11,7 @@
     xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
     X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc)
 from rpython.jit.backend.llsupport.regalloc import (get_scale, 
valid_addressing_size)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.lltypesystem import lltype
@@ -170,19 +171,23 @@
                 self.mc.MOVUPD(dest_loc, value_loc)
 
     def genop_vec_int_is_true(self, op, arglocs, resloc):
-        loc, size = arglocs
+        loc, sizeloc = arglocs
         temp = X86_64_XMM_SCRATCH_REG
         self.mc.PXOR(temp, temp)
         # every entry that is non zero -> becomes zero
         # zero entries become ones
-        self.mc.PCMPEQ(loc, temp, size)
+        self.mc.PCMPEQ(loc, temp, sizeloc.value)
         # a second time -> every zero entry (corresponding to non zero
         # entries before) become ones
-        self.mc.PCMPEQ(loc, temp, size)
+        self.mc.PCMPEQ(loc, temp, sizeloc.value)
 
     def genop_guard_vec_int_is_true(self, op, guard_op, guard_token, arglocs, 
resloc):
         self._guard_vector_true(op, arglocs[0])
-        self.implement_guard(guard_token, 'NZ')
+        guard_opnum = guard_op.getopnum()
+        if guard_opnum == rop.GUARD_TRUE:
+            self.implement_guard(guard_token, 'NZ')
+        else:
+            self.implement_guard(guard_token, 'Z')
 
     def genop_vec_int_mul(self, op, arglocs, resloc):
         loc0, loc1, itemsize_loc = arglocs
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py 
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -4,7 +4,7 @@
 from rpython.jit.metainterp.optimizeopt.util import equaloplists
 from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
         Pack, NotAProfitableLoop, VectorizingOptimizer)
-from rpython.jit.metainterp.optimizeopt.dependency import Node
+from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
 from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from rpython.jit.metainterp.optimizeopt.test.test_schedule import 
SchedulerBaseTest
 from rpython.jit.metainterp.optimizeopt.test.test_vectorize import 
(FakeMetaInterpStaticData,
@@ -35,8 +35,7 @@
         metainterp_sd = FakeMetaInterpStaticData(self.cpu)
         jitdriver_sd = FakeJitDriverStaticData()
         opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, [])
-        opt.build_dependency_graph()
-        graph = opt.dependency_graph
+        graph = opt.dependency_graph = DependencyGraph(loop)
         for k,m in graph.memory_refs.items():
             graph.memory_refs[k] = FakeMemoryRef(m.array, m.index_var)
         opt.find_adjacent_memory_refs()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -82,7 +82,7 @@
         opt.loop.operations = opt.get_newoperations()
         self.debug_print_operations(opt.loop)
         opt.clear_newoperations()
-        opt.build_dependency_graph()
+        opt.dependency_graph = DependencyGraph(loop)
         self.last_graph = opt.dependency_graph
         self.show_dot_graph(self.last_graph, self.test_name)
         return opt
@@ -278,20 +278,20 @@
         """
         opt_ops = """
         [p0,p1,p2,i0]
+        i4 = int_add(i0, 1)
+        i5 = int_le(i4, 10)
+        guard_true(i5) []
         i1 = raw_load(p1, i0, descr=floatarraydescr)
         i2 = raw_load(p2, i0, descr=floatarraydescr)
         i3 = int_add(i1,i2)
         raw_store(p0, i0, i3, descr=floatarraydescr)
-        i4 = int_add(i0, 1)
-        i5 = int_le(i4, 10)
-        guard_true(i5) []
+        i9 = int_add(i4, 1)
+        i10 = int_le(i9, 10)
+        guard_true(i10) []
         i6 = raw_load(p1, i4, descr=floatarraydescr)
         i7 = raw_load(p2, i4, descr=floatarraydescr)
         i8 = int_add(i6,i7)
         raw_store(p0, i4, i8, descr=floatarraydescr)
-        i9 = int_add(i4, 1)
-        i10 = int_le(i9, 10)
-        guard_true(i10) []
         jump(p0,p1,p2,i9)
         """
         self.assert_unroll_loop_equals(self.parse_loop(ops), 
self.parse_loop(opt_ops), 1)
@@ -334,8 +334,8 @@
         i4 = raw_load(p0,i1,descr=chararraydescr)
         jump(p0,i3,i4)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.build_dependency_graph()
+        loop = self.parse_loop(ops)
+        vopt = self.vectoroptimizer_unrolled(loop,0)
         assert len(vopt.dependency_graph.memory_refs) == 2
         self.assert_has_memory_ref_at(1)
         self.assert_has_memory_ref_at(2)
@@ -571,7 +571,7 @@
         """
         vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
         vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(3)
+        mref = self.getmemref(5)
         mref2 = self.getmemref(6)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -591,7 +591,7 @@
         """
         vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
         vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(3)
+        mref = self.getmemref(6)
         mref2 = self.getmemref(7)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -611,7 +611,7 @@
         """
         vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
         vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(3)
+        mref = self.getmemref(6)
         mref2 = self.getmemref(7)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -628,7 +628,7 @@
         """
         loop = self.parse_loop(ops)
         vopt = self.init_packset(loop,1)
-        self.assert_independent(1,5)
+        self.assert_independent(4,8)
         assert vopt.packset is not None
         assert len(vopt.dependency_graph.memory_refs) == 2
         assert len(vopt.packset.packs) == 1
@@ -748,18 +748,18 @@
         loop = self.parse_loop(ops)
         vopt = self.extend_packset(loop,1)
         assert len(vopt.dependency_graph.memory_refs) == 4
+        self.assert_independent(4,10)
         self.assert_independent(5,11)
         self.assert_independent(6,12)
-        self.assert_independent(7,13)
         assert len(vopt.packset.packs) == 3
         self.assert_packset_empty(vopt.packset, len(loop.operations),
-                                  [(6,12), (5,11), (7,13)])
+                                  [(6,12), (5,11), (4,10)])
 
     @pytest.mark.parametrize("descr,packs,packidx", 
-                             [('char',1,  [(0,(1,3,5,7))]),
-                              ('float',2, [(0,(1,3)),(1,(5,7))]),
-                              ('int',2,   [(0,(1,3)),(1,(5,7))]),
-                              ('singlefloat',1,[(0,(1,3,5,7))])])
+                             [('char',1,  [(0,(2,4,6,8))]),
+                              ('float',2, [(0,(2,4)),(1,(6,8))]),
+                              ('int',2,   [(0,(2,4)),(1,(6,8))]),
+                              ('singlefloat',1,[(0,(2,4,6,8))])])
     def test_packset_combine_simple(self,descr,packs,packidx):
         ops = """
         [p0,i0]
@@ -849,7 +849,7 @@
             assert len(vopt.packset.packs) == 4
 
         for opindices in [(5,12,19,26),(6,13,20,27),
-                          (7,14,21,28),(8,15,22,29)]:
+                          (7,14,21,28),(4,11,18,25)]:
             self.assert_has_pack_with(vopt.packset, opindices)
 
     @pytest.mark.parametrize('op,descr,stride',
@@ -874,7 +874,6 @@
         """.format(op=op,descr=descr,stride=1) # stride getarray is always 1
         vops = """
         [p0,p1,p2,i0]
-        guard_early_exit() []
         i10 = int_le(i0, 128)
         guard_true(i10) []
         i1 = int_add(i0, {stride})
@@ -907,7 +906,6 @@
         """
         opt="""
         [i0, i1, i2, i3, i4]
-        guard_early_exit() []
         i11 = int_add(i0, 1) 
         i6 = int_mul(i0, 8) 
         i12 = int_lt(i11, i1) 
@@ -941,7 +939,6 @@
           for i in range(0,14)])
         opt="""
         [p0,i0]
-        guard_early_exit() [p0,i0]
         i200 = int_add(i0, 1)
         i400 = int_lt(i200, 102)
         i2 = int_add(i0, 16)
@@ -989,7 +986,6 @@
         [p0,i0]
         v3 = vec_int_expand(42)
         label(p0,i0,v3)
-        guard_early_exit() [p0,i0]
         i20 = int_add(i0, 1)
         i30 = int_lt(i20, 10)
         i2 = int_add(i0, 2)
@@ -1019,7 +1015,6 @@
         [p0,i0,f3]
         v3 = vec_float_expand(f3)
         label(p0,i0,f3,v3)
-        guard_early_exit() [p0,i0]
         i20 = int_add(i0, 1)
         i30 = int_lt(i20, 10)
         i2 = int_add(i0, 2)
@@ -1047,7 +1042,6 @@
         """
         trace_opt = """
         [p0, i0, v2[f64|2]]
-        guard_early_exit() [p0, i0, v2[f64|2]]
         i1 = int_add(i0, 16)
         i2 = int_lt(i1, 100)
         guard_false(i2) [p0, i0, v[f64|2]]
@@ -1103,7 +1097,6 @@
         opt = """
         [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, 
i44, i21, i4, i0, i18]
         guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, 
p42, i43, f34, i28, p36, i41]
-        guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, 
i43, f34, i28, p36, i41]
         i50 = int_add(i28, 1) 
         i46 = int_add(i44, 8) 
         i48 = int_add(i41, 8) 
@@ -1142,7 +1135,6 @@
         """
         opt = """
         [p0, p1, i1]
-        guard_early_exit() []
         i3 = int_add(i1, 1)
         i4 = int_ge(i3, 36)
         i50 = int_add(i1, 4)
@@ -1184,7 +1176,6 @@
         """
         opt = """
         [p0, p1, p2, i0, i4]
-        guard_early_exit() []
         i5 = int_add(i4, 4)
         i1 = int_add(i0, 4)
         i186 = int_lt(i5, 100)
@@ -1219,39 +1210,6 @@
         vopt = self.vectorize(self.parse_loop(ops))
         self.assert_equal(vopt.loop, self.parse_loop(opt))
 
-    def test_call_prohibits_vectorization(self):
-        # think about this
-        py.test.skip("")
-        ops = """
-        [p31, i32, p3, i33, f10, p24, p34, p35, i19, p5, i36, p37, i28, f13, 
i29, i15]
-        guard_early_exit() [p5,p37,p34,p3,p24,i32,p35,i36,i33,f10,p31,i19]
-        f38 = raw_load(i28, i33, descr=floatarraydescr)
-        
guard_not_invalidated()[p5,p37,p34,p3,p24,f38,i32,p35,i36,i33,None,p31,i19]
-        i39 = int_add(i33, 8) 
-        f40 = float_mul(f38, 0.0)
-        i41 = float_eq(f40, f40)
-        guard_true(i41) 
[p5,p37,p34,p3,p24,f13,f38,i39,i32,p35,i36,None,None,p31,i19]
-        f42 = call(111, f38, f13, descr=writeadescr)
-        i43 = call(222, 333, descr=writeadescr)
-        f44 = float_mul(f42, 0.0)
-        i45 = float_eq(f44, f44)
-        guard_true(i45) 
[p5,p37,p34,p3,p24,f13,f38,i43,f42,i39,i32,p35,i36,None,None,p31,i19]
-        i46 = int_is_true(i43)
-        guard_false(i46) 
[p5,p37,p34,p3,p24,f13,f38,i43,f42,i39,i32,p35,i36,None,None,p31,i19]
-        raw_store(i29, i36, f42, descr=floatarraydescr)
-        i47 = int_add(i19, 1)
-        i48 = int_add(i36, 8)
-        i49 = int_ge(i47, i15)
-        guard_false(i49) 
[p5,p37,p34,p3,p24,i47,f38,i48,i39,i32,p35,None,None,None,p31,None]
-        jump(p31, i32, p3, i39, f38, p24, p34, p35, i47, p5, i48, p37, i28, 
f13, i29, i15)
-        """
-        try:
-            vopt = self.vectorize(self.parse_loop(ops))
-            self.debug_print_operations(vopt.loop)
-            py.test.fail("this loop should not be vectorized")
-        except NotAVectorizeableLoop:
-            pass
-
     def test_truediv_abs_neg_float(self):
         ops = """
         [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -229,18 +229,17 @@
                 # that are needed to resume.
                 if copied_op.is_guard():
                     assert isinstance(copied_op, GuardResOp)
-                    target_guard = copied_op
                     descr = copied_op.getdescr()
-                    assert isinstance(descr, ResumeGuardDescr)
-                    copied_op.setdescr(descr.clone())
-                    descr = target_guard.getdescr()
-                    # copy failargs/snapshot
-                    copied_op.rd_snapshot = \
-                      renamer.rename_rd_snapshot(copied_op.rd_snapshot,
-                                                 clone=True)
-                    renamed_failargs = \
-                        renamer.rename_failargs(copied_op, clone=True)
-                    copied_op.setfailargs(renamed_failargs)
+                    if descr:
+                        assert isinstance(descr, ResumeGuardDescr)
+                        copied_op.setdescr(descr.clone())
+                        # copy failargs/snapshot
+                        copied_op.rd_snapshot = \
+                          renamer.rename_rd_snapshot(copied_op.rd_snapshot,
+                                                     clone=True)
+                        renamed_failargs = \
+                            renamer.rename_failargs(copied_op, clone=True)
+                        copied_op.setfailargs(renamed_failargs)
                 #
                 self.emit_unrolled_operation(copied_op)
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to