Author: Richard Plangger <planri...@gmail.com> Branch: ppc-vsx-support Changeset: r86076:b6f69665e955 Date: 2016-08-08 13:34 +0200 http://bitbucket.org/pypy/pypy/changeset/b6f69665e955/
Log: added unpack/pack test stressing the operation diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py --- a/rpython/jit/backend/ppc/vector_ext.py +++ b/rpython/jit/backend/ppc/vector_ext.py @@ -92,83 +92,35 @@ self.VEC_DOUBLE_WORD_ONES = mem def emit_vec_load_f(self, op, arglocs, regalloc): - resloc, baseloc, indexloc, size_loc, ofs, integer_loc, aligned_loc = arglocs + resloc, baseloc, indexloc, size_loc, ofs, integer_loc = arglocs indexloc = self._apply_offset(indexloc, ofs) itemsize = size_loc.value - if itemsize == 4: + if integer_loc.value: + self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value) + elif itemsize == 4: self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value) elif itemsize == 8: self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value) + else: + not_implemented("vec_load_f itemsize %d" % itemsize) - def emit_vec_load_i(self, op, arglocs, regalloc): - resloc, baseloc, indexloc, size_loc, ofs, \ - Vhiloc, Vloloc, Vploc, tloc = arglocs - indexloc = self._apply_offset(indexloc, ofs) - Vlo = Vloloc.value - Vhi = Vhiloc.value - self.mc.lvx(Vhi, indexloc.value, baseloc.value) - Vp = Vploc.value - t = tloc.value - if IS_BIG_ENDIAN: - self.mc.lvsl(Vp, indexloc.value, baseloc.value) - else: - self.mc.lvsr(Vp, indexloc.value, baseloc.value) - self.mc.addi(t, baseloc.value, 16) - self.mc.lvx(Vlo, indexloc.value, t) - if IS_BIG_ENDIAN: - self.mc.vperm(resloc.value, Vhi, Vlo, Vp) - else: - self.mc.vperm(resloc.value, Vlo, Vhi, Vp) + emit_vec_load_i = emit_vec_load_f def emit_vec_store(self, op, arglocs, regalloc): baseloc, indexloc, valueloc, sizeloc, baseofs, \ - integer_loc, aligned_loc = arglocs + integer_loc = arglocs indexloc = self._apply_offset(indexloc, baseofs) assert baseofs.value == 0 if integer_loc.value: - Vloloc = regalloc.vrm.get_scratch_reg(type=INT) - Vhiloc = regalloc.vrm.get_scratch_reg(type=INT) - Vploc = regalloc.vrm.get_scratch_reg(type=INT) - tloc = regalloc.rm.get_scratch_reg() - V1sloc = regalloc.vrm.get_scratch_reg(type=INT) - V1s = V1sloc.value - V0sloc = regalloc.vrm.get_scratch_reg(type=INT) - V0s = V0sloc.value - Vmaskloc = regalloc.vrm.get_scratch_reg(type=INT) - Vmask = Vmaskloc.value - Vlo = Vhiloc.value - Vhi = Vloloc.value - Vp = Vploc.value - t = tloc.value - Vs = valueloc.value - # UFF, that is a lot of code for storing unaligned! - # probably a lot of room for improvement (not locally, - # but in general for the algorithm) - self.mc.lvx(Vhi, indexloc.value, baseloc.value) - #self.mc.lvsr(Vp, indexloc.value, baseloc.value) - if IS_BIG_ENDIAN: - self.mc.lvsr(Vp, indexloc.value, baseloc.value) - else: - self.mc.lvsl(Vp, indexloc.value, baseloc.value) - self.mc.addi(t, baseloc.value, 16) - self.mc.lvx(Vlo, indexloc.value, t) - self.mc.vspltisb(V1s, -1) - self.mc.vspltisb(V0s, 0) - if IS_BIG_ENDIAN: - self.mc.vperm(Vmask, V0s, V1s, Vp) - else: - self.mc.vperm(Vmask, V1s, V0s, Vp) - self.mc.vperm(Vs, Vs, Vs, Vp) - self.mc.vsel(Vlo, Vs, Vlo, Vmask) - self.mc.vsel(Vhi, Vhi, Vs, Vmask) - self.mc.stvx(Vlo, indexloc.value, t) - self.mc.stvx(Vhi, indexloc.value, baseloc.value) + self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value) else: itemsize = sizeloc.value if itemsize == 4: self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value) elif itemsize == 8: self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value) + else: + not_implemented("vec_store itemsize %d" % itemsize) def emit_vec_int_add(self, op, arglocs, regalloc): resloc, loc0, loc1, size_loc = arglocs @@ -631,7 +583,6 @@ not descr.is_array_of_structs() itemsize, ofs, _ = unpack_arraydescr(descr) integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) - aligned = False args = op.getarglist() a0 = op.getarg(0) a1 = op.getarg(1) @@ -639,28 +590,9 @@ ofs_loc = self.ensure_reg(a1) result_loc = self.force_allocate_vector_reg(op) return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs), - imm(integer), imm(aligned)] + imm(integer)] - def _prepare_load_i(self, op): - descr = op.getdescr() - assert isinstance(descr, ArrayDescr) - assert not descr.is_array_of_pointers() and \ - not descr.is_array_of_structs() - itemsize, ofs, _ = unpack_arraydescr(descr) - args = op.getarglist() - a0 = op.getarg(0) - a1 = op.getarg(1) - base_loc = self.ensure_reg(a0) - ofs_loc = self.ensure_reg(a1) - result_loc = self.force_allocate_vector_reg(op) - tloc = self.rm.get_scratch_reg() - Vhiloc = self.vrm.get_scratch_reg(type=INT) - Vloloc = self.vrm.get_scratch_reg(type=INT) - Vploc = self.vrm.get_scratch_reg(type=INT) - return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs), - Vhiloc, Vloloc, Vploc, tloc] - - prepare_vec_load_i = _prepare_load_i + prepare_vec_load_i = _prepare_load prepare_vec_load_f = _prepare_load def prepare_vec_arith(self, op): @@ -720,9 +652,8 @@ valueloc = self.ensure_vector_reg(a2) integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) - aligned = False return [baseloc, ofsloc, valueloc, - imm(itemsize), imm(ofs), imm(integer), imm(aligned)] + imm(itemsize), imm(ofs), imm(integer)] def prepare_vec_int_signext(self, op): assert isinstance(op, VectorOp) diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py --- a/rpython/jit/backend/x86/vector_ext.py +++ b/rpython/jit/backend/x86/vector_ext.py @@ -531,6 +531,8 @@ self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) self.mc.UNPCKHPD(resloc, srcloc) # if they are equal nothing is to be done + else: + not_implemented("pack/unpack for size %d", size) genop_vec_unpack_f = genop_vec_pack_f diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -18,6 +18,13 @@ from rpython.rlib.objectmodel import (specialize, is_annotation_constant, always_inline) from rpython.jit.backend.detect_cpu import getcpuclass +from rpython.jit.tool.oparser import parse +from rpython.jit.metainterp.history import (AbstractFailDescr, + AbstractDescr, + BasicFailDescr, BasicFinalDescr, + JitCellToken, TargetToken, + ConstInt, ConstPtr, + Const, ConstFloat) CPU = getcpuclass() @@ -78,7 +85,6 @@ enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll' def setup_method(self, method): - import pdb; pdb.set_trace() if not self.supports_vector_ext(): py.test.skip("this cpu %s has no implemented vector backend" % CPU) @@ -718,5 +724,80 @@ res = self.meta_interp(f, [22], vec_all=True, vec_guard_ratio=5) assert res == f(22) + def run_unpack(self, unpack, vector_type, assignments, float=True): + vars = {'v':0,'f':0,'i':0} + def newvar(type): + c = vars[type] + vars[type] = c + 1 + if type == 'v': + return type + str(c) + vector_type + return type + str(c) + targettoken = TargetToken() + finaldescr = BasicFinalDescr(1) + args = [] + args_values = [] + pack = [] + suffix = 'f' if float else 'i' + for var, vals in assignments.items(): + v = newvar('v') + pack.append('%s = vec_%s()' % (v, suffix)) + for i,val in enumerate(vals): + args_values.append(val) + f = newvar('f') + args.append(f) + count = 1 + # create a new variable + vo = v + v = newvar('v') + pack.append('%s = vec_pack_%s(%s, %s, %d, %d)' % \ + (v, suffix, vo, f, i, count)) + vars['x'] = v + packs = '\n '.join(pack) + resvar = suffix + '{'+suffix+'}' + source = ''' + [{args}] + label({args}, descr=targettoken) + {packs} + {unpack} + finish({resvar}, descr=finaldescr) + '''.format(args=','.join(args),packs=packs, unpack=unpack.format(**vars), + resvar=resvar.format(**vars)) + loop = parse(source, namespace={'targettoken': targettoken, + 'finaldescr': finaldescr}) + + cpu = self.CPUClass(rtyper=None, stats=None) + cpu.setup_once() + # + looptoken = JitCellToken() + cpu.compile_loop(loop.inputargs, loop.operations, looptoken) + deadframe = cpu.execute_token(looptoken, *args_values) + print(source) + if float: + return cpu.get_float_value(deadframe, 0) + else: + return cpu.get_int_value(deadframe, 0) + + def test_unpack(self): + # double unpack + assert self.run_unpack("f{f} = vec_unpack_f({x}, 0, 1)", + "[2xf64]", {'x': (1.2,-1)}) == 1.2 + assert self.run_unpack("f{f} = vec_unpack_f({x}, 1, 1)", + "[2xf64]", {'x': (50.33,4321.0)}) == 4321.0 + # int64 + assert self.run_unpack("i{i} = vec_unpack_i({x}, 0, 1)", + "[2xi64]", {'x': (11,12)}, float=False) == 11 + assert self.run_unpack("i{i} = vec_unpack_i({x}, 1, 1)", + "[2xi64]", {'x': (14,15)}, float=False) == 15 + + ## integer unpack (byte) + for i in range(16): + op = "i{i} = vec_unpack_i({x}, %d, 1)" % i + assert self.run_unpack(op, "[16xi8]", {'x': [127,1]*8}, float=False) == (127 if i%2==0 else 1) + if i < 8: + assert self.run_unpack(op, "[2xi16]", {'x': [2**15-1,0]*4}, float=False) == (2**15-1 if i%2==0 else 0) + if i < 4: + assert self.run_unpack(op, "[2xi32]", {'x': [2**31-1,0]*4}, float=False) == (2**31-1 if i%2==0 else 0) + + class TestLLtype(LLJitMixin, VectorizeTests): pass diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py --- a/rpython/jit/tool/oparser.py +++ b/rpython/jit/tool/oparser.py @@ -299,6 +299,7 @@ vecinfo.datatype = match.group(3) vecinfo.bytesize = int(match.group(4)) // 8 resop._vec_debug_info = vecinfo + resop.bytesize = vecinfo.bytesize return var[:var.find('[')] vecinfo = VectorizationInfo(resop) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit