Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r86076:b6f69665e955
Date: 2016-08-08 13:34 +0200
http://bitbucket.org/pypy/pypy/changeset/b6f69665e955/
Log: added unpack/pack test stressing the operation
diff --git a/rpython/jit/backend/ppc/vector_ext.py
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -92,83 +92,35 @@
self.VEC_DOUBLE_WORD_ONES = mem
def emit_vec_load_f(self, op, arglocs, regalloc):
- resloc, baseloc, indexloc, size_loc, ofs, integer_loc, aligned_loc =
arglocs
+ resloc, baseloc, indexloc, size_loc, ofs, integer_loc = arglocs
indexloc = self._apply_offset(indexloc, ofs)
itemsize = size_loc.value
- if itemsize == 4:
+ if integer_loc.value:
+ self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
+ elif itemsize == 4:
self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value)
elif itemsize == 8:
self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
+ else:
+ not_implemented("vec_load_f itemsize %d" % itemsize)
- def emit_vec_load_i(self, op, arglocs, regalloc):
- resloc, baseloc, indexloc, size_loc, ofs, \
- Vhiloc, Vloloc, Vploc, tloc = arglocs
- indexloc = self._apply_offset(indexloc, ofs)
- Vlo = Vloloc.value
- Vhi = Vhiloc.value
- self.mc.lvx(Vhi, indexloc.value, baseloc.value)
- Vp = Vploc.value
- t = tloc.value
- if IS_BIG_ENDIAN:
- self.mc.lvsl(Vp, indexloc.value, baseloc.value)
- else:
- self.mc.lvsr(Vp, indexloc.value, baseloc.value)
- self.mc.addi(t, baseloc.value, 16)
- self.mc.lvx(Vlo, indexloc.value, t)
- if IS_BIG_ENDIAN:
- self.mc.vperm(resloc.value, Vhi, Vlo, Vp)
- else:
- self.mc.vperm(resloc.value, Vlo, Vhi, Vp)
+ emit_vec_load_i = emit_vec_load_f
def emit_vec_store(self, op, arglocs, regalloc):
baseloc, indexloc, valueloc, sizeloc, baseofs, \
- integer_loc, aligned_loc = arglocs
+ integer_loc = arglocs
indexloc = self._apply_offset(indexloc, baseofs)
assert baseofs.value == 0
if integer_loc.value:
- Vloloc = regalloc.vrm.get_scratch_reg(type=INT)
- Vhiloc = regalloc.vrm.get_scratch_reg(type=INT)
- Vploc = regalloc.vrm.get_scratch_reg(type=INT)
- tloc = regalloc.rm.get_scratch_reg()
- V1sloc = regalloc.vrm.get_scratch_reg(type=INT)
- V1s = V1sloc.value
- V0sloc = regalloc.vrm.get_scratch_reg(type=INT)
- V0s = V0sloc.value
- Vmaskloc = regalloc.vrm.get_scratch_reg(type=INT)
- Vmask = Vmaskloc.value
- Vlo = Vhiloc.value
- Vhi = Vloloc.value
- Vp = Vploc.value
- t = tloc.value
- Vs = valueloc.value
- # UFF, that is a lot of code for storing unaligned!
- # probably a lot of room for improvement (not locally,
- # but in general for the algorithm)
- self.mc.lvx(Vhi, indexloc.value, baseloc.value)
- #self.mc.lvsr(Vp, indexloc.value, baseloc.value)
- if IS_BIG_ENDIAN:
- self.mc.lvsr(Vp, indexloc.value, baseloc.value)
- else:
- self.mc.lvsl(Vp, indexloc.value, baseloc.value)
- self.mc.addi(t, baseloc.value, 16)
- self.mc.lvx(Vlo, indexloc.value, t)
- self.mc.vspltisb(V1s, -1)
- self.mc.vspltisb(V0s, 0)
- if IS_BIG_ENDIAN:
- self.mc.vperm(Vmask, V0s, V1s, Vp)
- else:
- self.mc.vperm(Vmask, V1s, V0s, Vp)
- self.mc.vperm(Vs, Vs, Vs, Vp)
- self.mc.vsel(Vlo, Vs, Vlo, Vmask)
- self.mc.vsel(Vhi, Vhi, Vs, Vmask)
- self.mc.stvx(Vlo, indexloc.value, t)
- self.mc.stvx(Vhi, indexloc.value, baseloc.value)
+ self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
else:
itemsize = sizeloc.value
if itemsize == 4:
self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value)
elif itemsize == 8:
self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
+ else:
+ not_implemented("vec_store itemsize %d" % itemsize)
def emit_vec_int_add(self, op, arglocs, regalloc):
resloc, loc0, loc1, size_loc = arglocs
@@ -631,7 +583,6 @@
not descr.is_array_of_structs()
itemsize, ofs, _ = unpack_arraydescr(descr)
integer = not (descr.is_array_of_floats() or descr.getconcrete_type()
== FLOAT)
- aligned = False
args = op.getarglist()
a0 = op.getarg(0)
a1 = op.getarg(1)
@@ -639,28 +590,9 @@
ofs_loc = self.ensure_reg(a1)
result_loc = self.force_allocate_vector_reg(op)
return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs),
- imm(integer), imm(aligned)]
+ imm(integer)]
- def _prepare_load_i(self, op):
- descr = op.getdescr()
- assert isinstance(descr, ArrayDescr)
- assert not descr.is_array_of_pointers() and \
- not descr.is_array_of_structs()
- itemsize, ofs, _ = unpack_arraydescr(descr)
- args = op.getarglist()
- a0 = op.getarg(0)
- a1 = op.getarg(1)
- base_loc = self.ensure_reg(a0)
- ofs_loc = self.ensure_reg(a1)
- result_loc = self.force_allocate_vector_reg(op)
- tloc = self.rm.get_scratch_reg()
- Vhiloc = self.vrm.get_scratch_reg(type=INT)
- Vloloc = self.vrm.get_scratch_reg(type=INT)
- Vploc = self.vrm.get_scratch_reg(type=INT)
- return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs),
- Vhiloc, Vloloc, Vploc, tloc]
-
- prepare_vec_load_i = _prepare_load_i
+ prepare_vec_load_i = _prepare_load
prepare_vec_load_f = _prepare_load
def prepare_vec_arith(self, op):
@@ -720,9 +652,8 @@
valueloc = self.ensure_vector_reg(a2)
integer = not (descr.is_array_of_floats() or descr.getconcrete_type()
== FLOAT)
- aligned = False
return [baseloc, ofsloc, valueloc,
- imm(itemsize), imm(ofs), imm(integer), imm(aligned)]
+ imm(itemsize), imm(ofs), imm(integer)]
def prepare_vec_int_signext(self, op):
assert isinstance(op, VectorOp)
diff --git a/rpython/jit/backend/x86/vector_ext.py
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -531,6 +531,8 @@
self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
self.mc.UNPCKHPD(resloc, srcloc)
# if they are equal nothing is to be done
+ else:
+ not_implemented("pack/unpack for size %d", size)
genop_vec_unpack_f = genop_vec_pack_f
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -18,6 +18,13 @@
from rpython.rlib.objectmodel import (specialize, is_annotation_constant,
always_inline)
from rpython.jit.backend.detect_cpu import getcpuclass
+from rpython.jit.tool.oparser import parse
+from rpython.jit.metainterp.history import (AbstractFailDescr,
+ AbstractDescr,
+ BasicFailDescr, BasicFinalDescr,
+ JitCellToken, TargetToken,
+ ConstInt, ConstPtr,
+ Const, ConstFloat)
CPU = getcpuclass()
@@ -78,7 +85,6 @@
enable_opts =
'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
def setup_method(self, method):
- import pdb; pdb.set_trace()
if not self.supports_vector_ext():
py.test.skip("this cpu %s has no implemented vector backend" % CPU)
@@ -718,5 +724,80 @@
res = self.meta_interp(f, [22], vec_all=True, vec_guard_ratio=5)
assert res == f(22)
+ def run_unpack(self, unpack, vector_type, assignments, float=True):
+ vars = {'v':0,'f':0,'i':0}
+ def newvar(type):
+ c = vars[type]
+ vars[type] = c + 1
+ if type == 'v':
+ return type + str(c) + vector_type
+ return type + str(c)
+ targettoken = TargetToken()
+ finaldescr = BasicFinalDescr(1)
+ args = []
+ args_values = []
+ pack = []
+ suffix = 'f' if float else 'i'
+ for var, vals in assignments.items():
+ v = newvar('v')
+ pack.append('%s = vec_%s()' % (v, suffix))
+ for i,val in enumerate(vals):
+ args_values.append(val)
+ f = newvar('f')
+ args.append(f)
+ count = 1
+ # create a new variable
+ vo = v
+ v = newvar('v')
+ pack.append('%s = vec_pack_%s(%s, %s, %d, %d)' % \
+ (v, suffix, vo, f, i, count))
+ vars['x'] = v
+ packs = '\n '.join(pack)
+ resvar = suffix + '{'+suffix+'}'
+ source = '''
+ [{args}]
+ label({args}, descr=targettoken)
+ {packs}
+ {unpack}
+ finish({resvar}, descr=finaldescr)
+ '''.format(args=','.join(args),packs=packs,
unpack=unpack.format(**vars),
+ resvar=resvar.format(**vars))
+ loop = parse(source, namespace={'targettoken': targettoken,
+ 'finaldescr': finaldescr})
+
+ cpu = self.CPUClass(rtyper=None, stats=None)
+ cpu.setup_once()
+ #
+ looptoken = JitCellToken()
+ cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+ deadframe = cpu.execute_token(looptoken, *args_values)
+ print(source)
+ if float:
+ return cpu.get_float_value(deadframe, 0)
+ else:
+ return cpu.get_int_value(deadframe, 0)
+
+ def test_unpack(self):
+ # double unpack
+ assert self.run_unpack("f{f} = vec_unpack_f({x}, 0, 1)",
+ "[2xf64]", {'x': (1.2,-1)}) == 1.2
+ assert self.run_unpack("f{f} = vec_unpack_f({x}, 1, 1)",
+ "[2xf64]", {'x': (50.33,4321.0)}) == 4321.0
+ # int64
+ assert self.run_unpack("i{i} = vec_unpack_i({x}, 0, 1)",
+ "[2xi64]", {'x': (11,12)}, float=False) == 11
+ assert self.run_unpack("i{i} = vec_unpack_i({x}, 1, 1)",
+ "[2xi64]", {'x': (14,15)}, float=False) == 15
+
+ ## integer unpack (byte)
+ for i in range(16):
+ op = "i{i} = vec_unpack_i({x}, %d, 1)" % i
+ assert self.run_unpack(op, "[16xi8]", {'x': [127,1]*8},
float=False) == (127 if i%2==0 else 1)
+ if i < 8:
+ assert self.run_unpack(op, "[2xi16]", {'x': [2**15-1,0]*4},
float=False) == (2**15-1 if i%2==0 else 0)
+ if i < 4:
+ assert self.run_unpack(op, "[2xi32]", {'x': [2**31-1,0]*4},
float=False) == (2**31-1 if i%2==0 else 0)
+
+
class TestLLtype(LLJitMixin, VectorizeTests):
pass
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -299,6 +299,7 @@
vecinfo.datatype = match.group(3)
vecinfo.bytesize = int(match.group(4)) // 8
resop._vec_debug_info = vecinfo
+ resop.bytesize = vecinfo.bytesize
return var[:var.find('[')]
vecinfo = VectorizationInfo(resop)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit