Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78286:e812d5febce9
Date: 2015-06-24 10:36 +0200
http://bitbucket.org/pypy/pypy/changeset/e812d5febce9/
Log: added a new resop class for casting operations, added a test to
ensure they are created correctly
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2683,14 +2683,7 @@
tosize = tosizeloc.value
if size == tosize:
return # already the right size
- if size == 4 and tosize == 2:
- scratch = X86_64_SCRATCH_REG
- self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
- self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
- self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
- self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
- self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
- elif size == 4 and tosize == 8:
+ if size == 4 and tosize == 8:
scratch = X86_64_SCRATCH_REG.value
self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
self.mc.PINSRQ_xri(resloc.value, scratch, 1)
@@ -2704,7 +2697,13 @@
self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
self.mc.PINSRD_xri(resloc.value, scratch, 1)
else:
- raise NotImplementedError("sign ext missing: " + str(size) + " ->
" + str(tosize))
+ # note that all other conversions are not implemented
+ # on purpose. it needs many x86 op codes to implement
+ # the missing combinations. even if they are implemented
+ # the speedup might only be modest...
+ # the optimization does not emit such code!
+ msg = "vec int signext (%d->%d)" % (size, tosize)
+ raise NotImplementedError(msg)
def genop_vec_float_expand(self, op, arglocs, resloc):
srcloc, sizeloc = arglocs
@@ -2716,6 +2715,8 @@
self.mc.SHUFPS_xxi(resloc.value, srcloc.value, 0)
elif size == 8:
self.mc.MOVDDUP(resloc, srcloc)
+ else:
+ raise AssertionError("float of size %d not supported" % (size,))
def genop_vec_int_expand(self, op, arglocs, resloc):
srcloc, sizeloc = arglocs
@@ -2737,7 +2738,7 @@
self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
else:
- raise NotImplementedError("missing size %d for int expand" %
(size,))
+ raise AssertionError("cannot handle size %d (int expand)" %
(size,))
def genop_vec_int_pack(self, op, arglocs, resloc):
resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs
@@ -2748,7 +2749,9 @@
residx = residxloc.value
count = countloc.value
# for small data type conversion this can be quite costy
- # j = pack(i,4,4)
+ # NOTE there might be some combinations that can be handled
+ # more efficiently! e.g.
+ # v2 = pack(v0,v1,4,4)
si = srcidx
ri = residx
k = count
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -642,15 +642,15 @@
def profitable_pack(self, lnode, rnode, origin_pack):
lpacknode = origin_pack.left
- if self.prohibit_packing(lpacknode.getoperation(),
lnode.getoperation()):
+ if self.prohibit_packing(origin_pack, lpacknode.getoperation(),
lnode.getoperation()):
return False
rpacknode = origin_pack.right
- if self.prohibit_packing(rpacknode.getoperation(),
rnode.getoperation()):
+ if self.prohibit_packing(origin_pack, rpacknode.getoperation(),
rnode.getoperation()):
return False
return True
- def prohibit_packing(self, packed, inquestion):
+ def prohibit_packing(self, pack, packed, inquestion):
""" Blocks the packing of some operations """
if inquestion.vector == -1:
return True
@@ -658,10 +658,15 @@
if packed.getarg(1) == inquestion.result:
return True
if inquestion.casts_box():
- #input_type = packed.output_type
- #if not input_type:
- # return True
- pass
+ # prohibit the packing of signext calls that
+ # cast to int16/int8.
+ input_type = pack.output_type
+ if input_type:
+ py.test.set_trace()
+ insize = input_type.getsize()
+ outtype,outsize = inquestion.cast_to()
+ if outsize < 4 and insize != outsize:
+ return True
return False
def combine(self, i, j):
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1,6 +1,5 @@
from rpython.rlib.objectmodel import we_are_translated
-
def ResOperation(opnum, args, result, descr=None):
cls = opclasses[opnum]
op = cls(result)
@@ -26,6 +25,7 @@
boolreflex = -1
boolinverse = -1
vector = -1
+ casts = ('\x00', -1, '\x00', -1)
_attrs_ = ('result',)
@@ -190,12 +190,13 @@
return self._cls_has_bool_result
def casts_box(self):
- opnum = self.getopnum()
- return opnum == rop.INT_SIGNEXT or \
- rop.CAST_FLOAT_TO_INT <= opnum <= rop.CAST_SINGLEFLOAT_TO_FLOAT
or \
- rop._VEC_CAST_FIRST <= opnum <= rop._VEC_CAST_LAST or \
- rop.CAST_PTR_TO_INT == opnum or \
- rop.CAST_INT_TO_PTR == opnum
+ return False
+
+ def cast_to(self):
+ return ('\x00',-1)
+
+ def cast_from(self):
+ return ('\x00',-1)
# ===================
# Top of the hierachy
@@ -204,6 +205,23 @@
class PlainResOp(AbstractResOp):
pass
+class CastResOp(AbstractResOp):
+ def casts_box(self):
+ return True
+
+ def cast_to(self):
+ _, _, to_type, size = self.casts
+ if self.casts[3] == 0:
+ if self.getopnum() == rop.INT_SIGNEXT:
+ arg = self.getarg(1)
+ assert isinstance(arg, ConstInt)
+ return (to_type,arg.value)
+ else:
+ raise NotImplementedError
+ return (to_type,size)
+
+ def cast_from(self):
+ return ('\x00',-1)
class ResOpWithDescr(AbstractResOp):
@@ -629,6 +647,20 @@
'_LAST', # for the backend to add more internal operations
]
+FLOAT = 'f'
+INT = 'i'
+_cast_ops = {
+ 'INT_SIGNEXT': (INT, 0, INT, 0),
+ 'CAST_FLOAT_TO_INT': (FLOAT, 8, INT, 4),
+ 'CAST_INT_TO_FLOAT': (INT, 4, FLOAT, 8),
+ 'CAST_FLOAT_TO_SINGLEFLOAT': (FLOAT, 8, FLOAT, 4),
+ 'CAST_SINGLEFLOAT_TO_FLOAT': (FLOAT, 4, FLOAT, 8),
+ 'CAST_PTR_TO_INT': (INT, 0, INT, 4),
+ 'CAST_INT_TO_PTR': (INT, 4, INT, 0),
+}
+del FLOAT
+del INT
+
# ____________________________________________________________
class rop(object):
@@ -639,7 +671,6 @@
oparity = [] # mapping numbers to the arity of the operation or -1
opwithdescr = [] # mapping numbers to a flag "takes a descr"
-
def setup(debug_print=False):
for i, name in enumerate(_oplist):
if debug_print:
@@ -691,6 +722,8 @@
if is_guard:
assert withdescr
baseclass = GuardResOp
+ elif name in _cast_ops:
+ baseclass = CastResOp
elif withdescr:
baseclass = ResOpWithDescr
else:
@@ -780,21 +813,26 @@
rop.CAST_FLOAT_TO_INT: rop.VEC_CAST_FLOAT_TO_INT,
}
+
def setup2():
for cls in opclasses:
if cls is None:
continue
opnum = cls.opnum
+ name = opname[opnum]
if opnum in _opboolreflex:
cls.boolreflex = _opboolreflex[opnum]
if opnum in _opboolinverse:
cls.boolinverse = _opboolinverse[opnum]
if opnum in _opvector:
cls.vector = _opvector[opnum]
+ if name in _cast_ops:
+ cls.casts = _cast_ops[name]
setup2()
del _opboolinverse
del _opboolreflex
del _opvector
+del _cast_ops
def get_deep_immutable_oplist(operations):
"""
diff --git a/rpython/jit/metainterp/test/test_resoperation.py
b/rpython/jit/metainterp/test/test_resoperation.py
--- a/rpython/jit/metainterp/test/test_resoperation.py
+++ b/rpython/jit/metainterp/test/test_resoperation.py
@@ -83,3 +83,12 @@
py.test.raises(TypeError, "newops[0] = 'foobar'")
py.test.raises(AssertionError, "newops[0].setarg(0, 'd')")
py.test.raises(AssertionError, "newops[0].setdescr('foobar')")
+
+def test_cast_ops():
+ op = rop.ResOperation(rop.rop.INT_SIGNEXT, ['a', 1], 'c')
+ assert op.casts_box()
+ assert isinstance(op, rop.CastResOp)
+ assert op.cast_to == ('i',1)
+ op = rop.ResOperation(rop.rop.CAST_FLOAT_TO_INT, ['a'], 'c')
+ assert op.casts_box()
+ assert isinstance(op, rop.CastResOp)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit