Author: Richard Plangger <planri...@gmail.com>
Branch: vecopt-merge
Changeset: r79885:bb3eebb00aa2
Date: 2015-09-28 21:54 +0200
http://bitbucket.org/pypy/pypy/changeset/bb3eebb00aa2/

Log:    wunderbar! all but one test_zjit test passing

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -18,7 +18,7 @@
     from rpython.jit.metainterp import pyjitpl
     return pyjitpl._warmrunnerdesc.metainterp_sd.profiler
 
-class TestNumpyJit(Jit386Mixin):
+class TestNumpyJit(LLJitMixin):
     enable_opts = 
"intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
     graph = None
     interp = None
@@ -99,11 +99,6 @@
                                              backendopt=True,
                                              graph_and_interp_only=True,
                                              ProfilerClass=Profiler,
-                                             translate_support_code=True,
-                                             
translationoptions={'gc':'minimark',
-                                                                 
'gcrootfinder': 'asmgcc',
-                                                                 
'gcremovetypeptr': False
-                                                                },
                                              vec=True)
             self.__class__.interp = interp
             self.__class__.graph = graph
@@ -120,8 +115,6 @@
         self.compile_graph()
         profiler = get_profiler()
         profiler.start()
-        from rpython.jit.metainterp import pyjitpl
-        pyjitpl._warmrunnerdesc.jitcounter = counter.DeterministicJitCounter()
         reset_jit()
         i = self.code_mapping[name]
         retval = self.interp.eval_graph(self.graph, [i])
@@ -165,7 +158,7 @@
     def test_float32_add(self):
         result = self.run("float32_add")
         self.assert_float_equal(result, 15.0 + 15.0)
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_float_add():
         return """
@@ -198,7 +191,7 @@
     def test_float32_add_const(self):
         result = self.run("float32_add_const")
         self.assert_float_equal(result, 29.0 + 77.345)
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_float_add_const():
         return """
@@ -240,7 +233,7 @@
     def test_int_expand(self):
         result = self.run("int_expand")
         assert int(result) == 7+16+8+16
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_int32_expand():
         return """
@@ -255,7 +248,7 @@
     def test_int32_expand(self):
         result = self.run("int32_expand")
         assert int(result) == 7+16+8+16
-        self.check_vectorized(2, 2)
+        self.check_vectorized(2, 1)
 
     def define_int16_expand():
         return """
@@ -271,7 +264,7 @@
         i = 8
         assert int(result) == i*16 + sum(range(7,7+i))
         # currently is is not possible to accum for types with < 8 bytes
-        self.check_vectorized(3, 1)
+        self.check_vectorized(3, 0)
 
     def define_int8_expand():
         return """
@@ -289,7 +282,7 @@
         # neither does sum
         # a + c should work, but it is given as a parameter
         # thus the accum must handle this!
-        self.check_vectorized(3, 1)
+        self.check_vectorized(3, 0)
 
     def define_int32_add_const():
         return """
@@ -306,7 +299,7 @@
     def test_int32_add_const(self):
         result = self.run("int32_add_const")
         assert int(result) == 7+1+8+1+11+2+12+2
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_float_mul_array():
         return """
@@ -338,7 +331,7 @@
     def test_int32_mul_array(self):
         result = self.run("int32_mul_array")
         assert int(result) == 7*7+8*8+11*11+12*12
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_float32_mul_array():
         return """
@@ -366,7 +359,7 @@
     def test_conversion(self):
         result = self.run("conversion")
         assert result == sum(range(30)) + sum(range(30))
-        self.check_vectorized(2, 2) # only sum and astype(int) succeed
+        self.check_vectorized(4, 2) # only sum and astype(int) succeed
 
     def define_sum():
         return """
@@ -396,7 +389,7 @@
     def test_sum_int(self):
         result = self.run("sum_int")
         assert result == sum(range(65))
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def define_sum_multi():
         return """
@@ -420,7 +413,9 @@
     def test_sum_float_to_int16(self):
         result = self.run("sum_float_to_int16")
         assert result == sum(range(30))
-        self.check_vectorized(1, 0)
+        # one can argue that this is not desired,
+        # but unpacking exactly hits savings = 0
+        self.check_vectorized(1, 1)
     def define_sum_float_to_int32():
         return """
         a = |30|
@@ -504,7 +499,7 @@
         retval = self.interp.eval_graph(self.graph, [i])
         # check that we got only one loop
         assert len(get_stats().loops) == 1
-        self.check_vectorized(2, 1)
+        self.check_vectorized(3, 1)
 
     def define_prod():
         return """
@@ -823,7 +818,7 @@
         result = self.run("dot")
         assert result == 184
         self.check_trace_count(4)
-        self.check_vectorized(3,1)
+        self.check_vectorized(1,1)
 
     def define_argsort():
         return """
@@ -923,7 +918,7 @@
     def test_dot_matrix(self):
         result = self.run("dot_matrix")
         assert int(result) == 86
-        self.check_vectorized(2, 1)
+        self.check_vectorized(1, 1)
 
 
     # NOT WORKING
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -9,6 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.jit.metainterp.jitexc import NotAProfitableLoop
 from rpython.rlib.objectmodel import specialize, always_inline
+from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, 
NotAProfitableLoop
 
 
 class SchedulerState(object):
@@ -206,6 +207,25 @@
             return self.count
         return count
 
+class OpRestrict(object):
+    def __init__(self, argument_restris):
+        self.argument_restrictions = argument_restris
+
+    def check_operation(self, state, pack, op):
+        pass
+
+class OpMatchSizeTypeFirst(OpRestrict):
+    def check_operation(self, state, pack, op):
+        arg0 = op.getarg(0)
+        bytesize = arg0.bytesize
+        datatype = arg0.datatype
+
+        for arg in op.getarglist():
+            if arg.bytesize != bytesize:
+                raise NotAVectorizeableLoop()
+            if arg.datatype != datatype:
+                raise NotAVectorizeableLoop()
+
 class trans(object):
 
     TR_ANY = TypeRestrict()
@@ -215,43 +235,46 @@
     TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
     TR_INT32_2 = TypeRestrict(INT, 4, 2)
 
+    OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
+    OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
+
     # note that the following definition is x86 arch specific
     MAPPING = {
-        rop.VEC_INT_ADD:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_SUB:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_MUL:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_AND:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_OR:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_XOR:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_EQ:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
-        rop.VEC_INT_NE:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_ADD:            OR_MSTF_I,
+        rop.VEC_INT_SUB:            OR_MSTF_I,
+        rop.VEC_INT_MUL:            OR_MSTF_I,
+        rop.VEC_INT_AND:            OR_MSTF_I,
+        rop.VEC_INT_OR:             OR_MSTF_I,
+        rop.VEC_INT_XOR:            OR_MSTF_I,
+        rop.VEC_INT_EQ:             OR_MSTF_I,
+        rop.VEC_INT_NE:             OR_MSTF_I,
 
-        rop.VEC_FLOAT_ADD:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
-        rop.VEC_FLOAT_SUB:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
-        rop.VEC_FLOAT_MUL:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
-        rop.VEC_FLOAT_TRUEDIV:      [TR_ANY_FLOAT, TR_ANY_FLOAT],
-        rop.VEC_FLOAT_ABS:          [TR_ANY_FLOAT],
-        rop.VEC_FLOAT_NEG:          [TR_ANY_FLOAT],
+        rop.VEC_FLOAT_ADD:          OR_MSTF_F,
+        rop.VEC_FLOAT_SUB:          OR_MSTF_F,
+        rop.VEC_FLOAT_MUL:          OR_MSTF_F,
+        rop.VEC_FLOAT_TRUEDIV:      OR_MSTF_F,
+        rop.VEC_FLOAT_ABS:          OpRestrict([TR_ANY_FLOAT]),
+        rop.VEC_FLOAT_NEG:          OpRestrict([TR_ANY_FLOAT]),
 
-        rop.VEC_RAW_STORE:          [None, None, TR_ANY],
-        rop.VEC_SETARRAYITEM_RAW:   [None, None, TR_ANY],
-        rop.VEC_SETARRAYITEM_GC:    [None, None, TR_ANY],
+        rop.VEC_RAW_STORE:          OpRestrict([None, None, TR_ANY]),
+        rop.VEC_SETARRAYITEM_RAW:   OpRestrict([None, None, TR_ANY]),
+        rop.VEC_SETARRAYITEM_GC:    OpRestrict([None, None, TR_ANY]),
 
-        rop.GUARD_TRUE:             [TR_ANY_INTEGER],
-        rop.GUARD_FALSE:            [TR_ANY_INTEGER],
+        rop.GUARD_TRUE:             OpRestrict([TR_ANY_INTEGER]),
+        rop.GUARD_FALSE:            OpRestrict([TR_ANY_INTEGER]),
 
         ## irregular
-        rop.VEC_INT_SIGNEXT:        [TR_ANY_INTEGER],
+        rop.VEC_INT_SIGNEXT:        OpRestrict([TR_ANY_INTEGER]),
 
-        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  [TR_DOUBLE_2],
+        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  OpRestrict([TR_DOUBLE_2]),
         # weird but the trace will store single floats in int boxes
-        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  [TR_INT32_2],
-        rop.VEC_CAST_FLOAT_TO_INT:          [TR_DOUBLE_2],
-        rop.VEC_CAST_INT_TO_FLOAT:          [TR_INT32_2],
+        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  OpRestrict([TR_INT32_2]),
+        rop.VEC_CAST_FLOAT_TO_INT:          OpRestrict([TR_DOUBLE_2]),
+        rop.VEC_CAST_INT_TO_FLOAT:          OpRestrict([TR_INT32_2]),
 
-        rop.VEC_FLOAT_EQ:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
-        rop.VEC_FLOAT_NE:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
-        rop.VEC_INT_IS_TRUE:        [TR_ANY_INTEGER,TR_ANY_INTEGER],
+        rop.VEC_FLOAT_EQ:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+        rop.VEC_FLOAT_NE:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+        rop.VEC_INT_IS_TRUE:        
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
     }
 
 def turn_into_vector(state, pack):
@@ -259,6 +282,9 @@
     check_if_pack_supported(state, pack)
     state.costmodel.record_pack_savings(pack, pack.numops())
     left = pack.leftmost()
+    oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
+    if oprestrict is not None:
+        oprestrict.check_operation(state, pack, left)
     args = left.getarglist_copy()
     prepare_arguments(state, pack, args)
     vecop = VecOperation(left.vector, args, left,
@@ -287,9 +313,10 @@
     #    a) expand vars/consts before the label and add as argument
     #    b) expand vars created in the loop body
     #
-    restrictions = trans.MAPPING.get(pack.leftmost().vector, [])
-    if not restrictions:
+    oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
+    if not oprestrict:
         return
+    restrictions = oprestrict.argument_restrictions
     for i,arg in enumerate(args):
         if i >= len(restrictions) or restrictions[i] is None:
             # ignore this argument
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1234,6 +1234,23 @@
         vopt = self.vectorize(trace)
         self.assert_equal(trace, trace_opt)
 
+    def test_sum_int16_prevent(self):
+        trace = self.parse_loop("""
+        [i0, p1, i2, p3, i4, i5, i6]
+        i7 = raw_load_i(i5, i4, descr=int16arraydescr)
+        i8 = int_add(i0, i7)
+        i10 = int_add(i2, 1)
+        i12 = int_add(i4, 2)
+        i13 = int_ge(i10, i6)
+        guard_false(i13, 
descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 
0x7fe5a1848150>) [p3, i10, i8, i12, None, p1, None, None]
+        jump(i8, p1, i10, p3, i12, i5, i6)
+        """)
+        try:
+            vopt = self.vectorize(trace)
+            py.test.fail()
+        except NotAVectorizeableLoop:
+            pass
+
     def test_axis_sum(self):
         # TODO
         trace = """
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to