Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78299:269e30fb6042
Date: 2015-06-24 17:31 +0200
http://bitbucket.org/pypy/pypy/changeset/269e30fb6042/

Log:    adding guards as vector instructions. i'm not yet sure how this will
        work out, but could help to generate better loops for reductions

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -553,7 +553,7 @@
     def test_all(self):
         result = self.run("all")
         assert result == 1
-        self.check_vectorized(1, 0) # success?
+        self.check_vectorized(1, 1)
 
     def define_logical_xor_reduce():
         return """
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -603,6 +603,18 @@
     def determine_output_type(self, op):
         return None
 
+class PassThroughOp(OpToVectorOp):
+    """ This pass through is only applicable if the target
+    operation is capable of handling vector operations.
+    Guard true/false is such an example.
+    """
+    def __init__(self, args):
+        OpToVectorOp.__init__(self, args, None)
+
+    def determine_output_type(self, op):
+        return None
+
+GUARD_TF = PassThroughOp((PT_INT_GENERIC,))
 INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
 FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
 FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
@@ -637,6 +649,9 @@
     rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, 
PT_DOUBLE_2),
     rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
     rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
+
+    rop.GUARD_TRUE: GUARD_TF,
+    rop.GUARD_FALSE: GUARD_TF,
 }
 
 def determine_output_type(node, input_type):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -16,7 +16,12 @@
 
 class SchedulerBaseTest(DependencyBaseTest):
 
-    def parse(self, source, inc_label_jump=True):
+    def parse(self, source, inc_label_jump=True,
+              pargs=2,
+              iargs=10,
+              fargs=6,
+              additional_args=None,
+              replace_args=None):
         ns = {
             'double': self.floatarraydescr,
             'float': self.singlefloatarraydescr,
@@ -25,10 +30,24 @@
             'short': self.int16arraydescr,
             'char': self.chararraydescr,
         }
-        loop = opparse("        
[p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4]]\n"
 + source + \
-                       "\n        
jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4])",
-                       cpu=self.cpu,
-                       namespace=ns)
+        args = []
+        for prefix, rang in [('p',range(pargs)), ('i',range(iargs)), 
('f',range(fargs))]:
+            for i in rang:
+                args.append(prefix + str(i))
+
+        assert additional_args is None or isinstance(additional_args,list)
+        for arg in additional_args or []:
+            args.append(arg)
+        for k,v in (replace_args or {}).items():
+            for i,_ in enumerate(args):
+                if k == args[i]:
+                    args[i] = v
+                    break
+        indent = "        "
+        joinedargs = ','.join(args)
+        fmt = (indent, joinedargs, source, indent, joinedargs)
+        src = "%s[%s]\n%s\n%sjump(%s)" % fmt
+        loop = opparse(src, cpu=self.cpu, namespace=ns)
         if inc_label_jump:
             token = JitCellToken()
             loop.operations = \
@@ -163,21 +182,19 @@
                 return arg
         raise Exception("could not find %s in args %s" % (name, 
loop.inputargs))
 
-    def test_signext_int16(self):
+    def test_signext_int32(self):
         loop1 = self.parse("""
-        i10 = int_signext(i1, 2)
-        i11 = int_signext(i1, 2)
-        i12 = int_signext(i1, 2)
-        i13 = int_signext(i1, 2)
-        """)
-        pack1 = self.pack(loop1, 0, 4)
-        v103204 = self.find_input_arg('v103204', loop1)
-        def i1inv103204(var):
-            return 0, v103204
+        i10 = int_signext(i1, 4)
+        i11 = int_signext(i1, 4)
+        """, additional_args=['v10[i64|2]'])
+        pack1 = self.pack(loop1, 0, 2)
+        var = self.find_input_arg('v10', loop1)
+        def i1inv103204(v):
+            return 0, var
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True, 
getvboxfunc=i1inv103204)
         loop3 = self.parse("""
-        v11[i16|4] = vec_int_signext(v103204[i32|4], 2)
-        """, False)
+        v11[i32|2] = vec_int_signext(v10[i64|2], 4)
+        """, False, additional_args=['v10[i64|2]'])
         self.assert_equal(loop2, loop3)
 
     def test_cast_float_to_int(self):
@@ -275,13 +292,12 @@
         self.assert_equal(loop2, loop3)
 
     def test_all(self):
-        py.test.skip("this could be an improvement")
         loop1 = self.parse("""
         i10 = raw_load(p0, i1, descr=long)
         i11 = raw_load(p0, i2, descr=long)
         #
-        i12 = int_and(i10, i6)
-        i13 = int_and(i11, i12)
+        i12 = int_and(i10, 255)
+        i13 = int_and(i11, 255)
         #
         guard_true(i12) []
         guard_true(i13) []
@@ -289,9 +305,10 @@
         pack1 = self.pack(loop1, 0, 2)
         pack2 = self.pack(loop1, 2, 4)
         pack3 = self.pack(loop1, 4, 6)
-        loop2 = self.schedule(loop1, [pack1,pack2,pack3])
+        loop2 = self.schedule(loop1, [pack1,pack2,pack3], 
prepend_invariant=True)
         loop3 = self.parse("""
-        v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long) 
+        v9[i64|2] = vec_int_expand(255)
+        v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long)
         v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
         guard_true(v11[i64|2]) []
         """, False)
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -812,6 +812,10 @@
     rop.CAST_SINGLEFLOAT_TO_FLOAT: rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT,
     rop.CAST_INT_TO_FLOAT: rop.VEC_CAST_INT_TO_FLOAT,
     rop.CAST_FLOAT_TO_INT: rop.VEC_CAST_FLOAT_TO_INT,
+
+    # guard
+    rop.GUARD_TRUE: rop.GUARD_TRUE,
+    rop.GUARD_FALSE: rop.GUARD_FALSE,
 }
 
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to