[pypy-commit] pypy vecopt: added a new resop class for casting operations, added a test to ensure they are created correctly

plan_rich Wed, 24 Jun 2015 03:03:52 -0700

Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78286:e812d5febce9
Date: 2015-06-24 10:36 +0200
http://bitbucket.org/pypy/pypy/changeset/e812d5febce9/


Log:    added a new resop class for casting operations, added a test to
        ensure they are created correctly

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2683,14 +2683,7 @@
         tosize = tosizeloc.value
         if size == tosize:
             return # already the right size
-        if size == 4 and tosize == 2:
-            scratch = X86_64_SCRATCH_REG
-            self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
-            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
-            self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
-            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
-            self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
-        elif size == 4 and tosize == 8:
+        if size == 4 and tosize == 8:
             scratch = X86_64_SCRATCH_REG.value
             self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
             self.mc.PINSRQ_xri(resloc.value, scratch, 1)
@@ -2704,7 +2697,13 @@
             self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
             self.mc.PINSRD_xri(resloc.value, scratch, 1)
         else:
-            raise NotImplementedError("sign ext missing: " + str(size) + " -> 
" + str(tosize))
+            # note that all other conversions are not implemented
+            # on purpose. it needs many x86 op codes to implement
+            # the missing combinations. even if they are implemented
+            # the speedup might only be modest...
+            # the optimization does not emit such code!
+            msg = "vec int signext (%d->%d)" % (size, tosize)
+            raise NotImplementedError(msg)
 
     def genop_vec_float_expand(self, op, arglocs, resloc):
         srcloc, sizeloc = arglocs
@@ -2716,6 +2715,8 @@
             self.mc.SHUFPS_xxi(resloc.value, srcloc.value, 0)
         elif size == 8:
             self.mc.MOVDDUP(resloc, srcloc)
+        else:
+            raise AssertionError("float of size %d not supported" % (size,))
 
     def genop_vec_int_expand(self, op, arglocs, resloc):
         srcloc, sizeloc = arglocs
@@ -2737,7 +2738,7 @@
             self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
             self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
         else:
-            raise NotImplementedError("missing size %d for int expand" % 
(size,))
+            raise AssertionError("cannot handle size %d (int expand)" % 
(size,))
 
     def genop_vec_int_pack(self, op, arglocs, resloc):
         resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs
@@ -2748,7 +2749,9 @@
         residx = residxloc.value
         count = countloc.value
         # for small data type conversion this can be quite costy
-        # j = pack(i,4,4)
+        # NOTE there might be some combinations that can be handled
+        # more efficiently! e.g.
+        # v2 = pack(v0,v1,4,4)
         si = srcidx
         ri = residx
         k = count
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -642,15 +642,15 @@
 
     def profitable_pack(self, lnode, rnode, origin_pack):
         lpacknode = origin_pack.left
-        if self.prohibit_packing(lpacknode.getoperation(), 
lnode.getoperation()):
+        if self.prohibit_packing(origin_pack, lpacknode.getoperation(), 
lnode.getoperation()):
             return False
         rpacknode = origin_pack.right
-        if self.prohibit_packing(rpacknode.getoperation(), 
rnode.getoperation()):
+        if self.prohibit_packing(origin_pack, rpacknode.getoperation(), 
rnode.getoperation()):
             return False
 
         return True
 
-    def prohibit_packing(self, packed, inquestion):
+    def prohibit_packing(self, pack, packed, inquestion):
         """ Blocks the packing of some operations """
         if inquestion.vector == -1:
             return True
@@ -658,10 +658,15 @@
             if packed.getarg(1) == inquestion.result:
                 return True
         if inquestion.casts_box():
-            #input_type = packed.output_type
-            #if not input_type:
-            #    return True
-            pass
+            # prohibit the packing of signext calls that
+            # cast to int16/int8.
+            input_type = pack.output_type
+            if input_type:
+                py.test.set_trace()
+                insize = input_type.getsize()
+                outtype,outsize = inquestion.cast_to()
+                if outsize < 4 and insize != outsize:
+                    return True
         return False
 
     def combine(self, i, j):
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1,6 +1,5 @@
 from rpython.rlib.objectmodel import we_are_translated
 
-
 def ResOperation(opnum, args, result, descr=None):
     cls = opclasses[opnum]
     op = cls(result)
@@ -26,6 +25,7 @@
     boolreflex = -1
     boolinverse = -1
     vector = -1
+    casts = ('\x00', -1, '\x00', -1)
 
     _attrs_ = ('result',)
 
@@ -190,12 +190,13 @@
         return self._cls_has_bool_result
 
     def casts_box(self):
-        opnum = self.getopnum()
-        return opnum == rop.INT_SIGNEXT or \
-               rop.CAST_FLOAT_TO_INT <= opnum <= rop.CAST_SINGLEFLOAT_TO_FLOAT 
or \
-               rop._VEC_CAST_FIRST <= opnum <= rop._VEC_CAST_LAST or \
-               rop.CAST_PTR_TO_INT == opnum or \
-               rop.CAST_INT_TO_PTR == opnum
+        return False
+
+    def cast_to(self):
+        return ('\x00',-1)
+
+    def cast_from(self):
+        return ('\x00',-1)
 
 # ===================
 # Top of the hierachy
@@ -204,6 +205,23 @@
 class PlainResOp(AbstractResOp):
     pass
 
+class CastResOp(AbstractResOp):
+    def casts_box(self):
+        return True
+
+    def cast_to(self):
+        _, _, to_type, size = self.casts
+        if self.casts[3] == 0:
+            if self.getopnum() == rop.INT_SIGNEXT:
+                arg = self.getarg(1)
+                assert isinstance(arg, ConstInt)
+                return (to_type,arg.value)
+            else:
+                raise NotImplementedError
+        return (to_type,size)
+
+    def cast_from(self):
+        return ('\x00',-1)
 
 class ResOpWithDescr(AbstractResOp):
 
@@ -629,6 +647,20 @@
     '_LAST',     # for the backend to add more internal operations
 ]
 
+FLOAT = 'f'
+INT = 'i'
+_cast_ops = {
+    'INT_SIGNEXT': (INT, 0, INT, 0),
+    'CAST_FLOAT_TO_INT': (FLOAT, 8, INT, 4),
+    'CAST_INT_TO_FLOAT': (INT, 4, FLOAT, 8),
+    'CAST_FLOAT_TO_SINGLEFLOAT': (FLOAT, 8, FLOAT, 4),
+    'CAST_SINGLEFLOAT_TO_FLOAT': (FLOAT, 4, FLOAT, 8),
+    'CAST_PTR_TO_INT': (INT, 0, INT, 4),
+    'CAST_INT_TO_PTR': (INT, 4, INT, 0),
+}
+del FLOAT
+del INT
+
 # ____________________________________________________________
 
 class rop(object):
@@ -639,7 +671,6 @@
 oparity = []     # mapping numbers to the arity of the operation or -1
 opwithdescr = [] # mapping numbers to a flag "takes a descr"
 
-
 def setup(debug_print=False):
     for i, name in enumerate(_oplist):
         if debug_print:
@@ -691,6 +722,8 @@
     if is_guard:
         assert withdescr
         baseclass = GuardResOp
+    elif name in _cast_ops:
+        baseclass = CastResOp
     elif withdescr:
         baseclass = ResOpWithDescr
     else:
@@ -780,21 +813,26 @@
     rop.CAST_FLOAT_TO_INT: rop.VEC_CAST_FLOAT_TO_INT,
 }
 
+
 def setup2():
     for cls in opclasses:
         if cls is None:
             continue
         opnum = cls.opnum
+        name = opname[opnum]
         if opnum in _opboolreflex:
             cls.boolreflex = _opboolreflex[opnum]
         if opnum in _opboolinverse:
             cls.boolinverse = _opboolinverse[opnum]
         if opnum in _opvector:
             cls.vector = _opvector[opnum]
+        if name in _cast_ops:
+            cls.casts = _cast_ops[name]
 setup2()
 del _opboolinverse
 del _opboolreflex
 del _opvector
+del _cast_ops
 
 def get_deep_immutable_oplist(operations):
     """
diff --git a/rpython/jit/metainterp/test/test_resoperation.py 
b/rpython/jit/metainterp/test/test_resoperation.py
--- a/rpython/jit/metainterp/test/test_resoperation.py
+++ b/rpython/jit/metainterp/test/test_resoperation.py
@@ -83,3 +83,12 @@
     py.test.raises(TypeError, "newops[0] = 'foobar'")
     py.test.raises(AssertionError, "newops[0].setarg(0, 'd')")
     py.test.raises(AssertionError, "newops[0].setdescr('foobar')")
+
+def test_cast_ops():
+    op = rop.ResOperation(rop.rop.INT_SIGNEXT, ['a', 1], 'c')
+    assert op.casts_box()
+    assert isinstance(op, rop.CastResOp)
+    assert op.cast_to == ('i',1)
+    op = rop.ResOperation(rop.rop.CAST_FLOAT_TO_INT, ['a'], 'c')
+    assert op.casts_box()
+    assert isinstance(op, rop.CastResOp)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy vecopt: added a new resop class for casting operations, added a test to ensure they are created correctly

Reply via email to