Author: Richard Plangger <planri...@gmail.com>
Branch: ppc-vsx-support
Changeset: r86076:b6f69665e955
Date: 2016-08-08 13:34 +0200
http://bitbucket.org/pypy/pypy/changeset/b6f69665e955/

Log:    added unpack/pack test stressing the operation

diff --git a/rpython/jit/backend/ppc/vector_ext.py 
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -92,83 +92,35 @@
         self.VEC_DOUBLE_WORD_ONES = mem
 
     def emit_vec_load_f(self, op, arglocs, regalloc):
-        resloc, baseloc, indexloc, size_loc, ofs, integer_loc, aligned_loc = 
arglocs
+        resloc, baseloc, indexloc, size_loc, ofs, integer_loc = arglocs
         indexloc = self._apply_offset(indexloc, ofs)
         itemsize = size_loc.value
-        if itemsize == 4:
+        if integer_loc.value:
+            self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
+        elif itemsize == 4:
             self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value)
         elif itemsize == 8:
             self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
+        else:
+            not_implemented("vec_load_f itemsize %d" % itemsize)
 
-    def emit_vec_load_i(self, op, arglocs, regalloc):
-        resloc, baseloc, indexloc, size_loc, ofs, \
-            Vhiloc, Vloloc, Vploc, tloc = arglocs
-        indexloc = self._apply_offset(indexloc, ofs)
-        Vlo = Vloloc.value
-        Vhi = Vhiloc.value
-        self.mc.lvx(Vhi, indexloc.value, baseloc.value)
-        Vp = Vploc.value
-        t = tloc.value
-        if IS_BIG_ENDIAN:
-            self.mc.lvsl(Vp, indexloc.value, baseloc.value)
-        else:
-            self.mc.lvsr(Vp, indexloc.value, baseloc.value)
-        self.mc.addi(t, baseloc.value, 16)
-        self.mc.lvx(Vlo, indexloc.value, t)
-        if IS_BIG_ENDIAN:
-            self.mc.vperm(resloc.value, Vhi, Vlo, Vp)
-        else:
-            self.mc.vperm(resloc.value, Vlo, Vhi, Vp)
+    emit_vec_load_i = emit_vec_load_f
 
     def emit_vec_store(self, op, arglocs, regalloc):
         baseloc, indexloc, valueloc, sizeloc, baseofs, \
-            integer_loc, aligned_loc = arglocs
+            integer_loc = arglocs
         indexloc = self._apply_offset(indexloc, baseofs)
         assert baseofs.value == 0
         if integer_loc.value:
-            Vloloc = regalloc.vrm.get_scratch_reg(type=INT)
-            Vhiloc = regalloc.vrm.get_scratch_reg(type=INT)
-            Vploc = regalloc.vrm.get_scratch_reg(type=INT)
-            tloc = regalloc.rm.get_scratch_reg()
-            V1sloc = regalloc.vrm.get_scratch_reg(type=INT)
-            V1s = V1sloc.value
-            V0sloc = regalloc.vrm.get_scratch_reg(type=INT)
-            V0s = V0sloc.value
-            Vmaskloc = regalloc.vrm.get_scratch_reg(type=INT)
-            Vmask = Vmaskloc.value
-            Vlo = Vhiloc.value
-            Vhi = Vloloc.value
-            Vp = Vploc.value
-            t = tloc.value
-            Vs = valueloc.value
-            # UFF, that is a lot of code for storing unaligned!
-            # probably a lot of room for improvement (not locally,
-            # but in general for the algorithm)
-            self.mc.lvx(Vhi, indexloc.value, baseloc.value)
-            #self.mc.lvsr(Vp, indexloc.value, baseloc.value)
-            if IS_BIG_ENDIAN:
-                self.mc.lvsr(Vp, indexloc.value, baseloc.value)
-            else:
-                self.mc.lvsl(Vp, indexloc.value, baseloc.value)
-            self.mc.addi(t, baseloc.value, 16)
-            self.mc.lvx(Vlo, indexloc.value, t)
-            self.mc.vspltisb(V1s, -1)
-            self.mc.vspltisb(V0s, 0)
-            if IS_BIG_ENDIAN:
-                self.mc.vperm(Vmask, V0s, V1s, Vp)
-            else:
-                self.mc.vperm(Vmask, V1s, V0s, Vp)
-            self.mc.vperm(Vs, Vs, Vs, Vp)
-            self.mc.vsel(Vlo, Vs, Vlo, Vmask)
-            self.mc.vsel(Vhi, Vhi, Vs, Vmask)
-            self.mc.stvx(Vlo, indexloc.value, t)
-            self.mc.stvx(Vhi, indexloc.value, baseloc.value)
+            self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
         else:
             itemsize = sizeloc.value
             if itemsize == 4:
                 self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value)
             elif itemsize == 8:
                 self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
+            else:
+                not_implemented("vec_store itemsize %d" % itemsize)
 
     def emit_vec_int_add(self, op, arglocs, regalloc):
         resloc, loc0, loc1, size_loc = arglocs
@@ -631,7 +583,6 @@
                not descr.is_array_of_structs()
         itemsize, ofs, _ = unpack_arraydescr(descr)
         integer = not (descr.is_array_of_floats() or descr.getconcrete_type() 
== FLOAT)
-        aligned = False
         args = op.getarglist()
         a0 = op.getarg(0)
         a1 = op.getarg(1)
@@ -639,28 +590,9 @@
         ofs_loc = self.ensure_reg(a1)
         result_loc = self.force_allocate_vector_reg(op)
         return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs),
-                imm(integer), imm(aligned)]
+                imm(integer)]
 
-    def _prepare_load_i(self, op):
-        descr = op.getdescr()
-        assert isinstance(descr, ArrayDescr)
-        assert not descr.is_array_of_pointers() and \
-               not descr.is_array_of_structs()
-        itemsize, ofs, _ = unpack_arraydescr(descr)
-        args = op.getarglist()
-        a0 = op.getarg(0)
-        a1 = op.getarg(1)
-        base_loc = self.ensure_reg(a0)
-        ofs_loc = self.ensure_reg(a1)
-        result_loc = self.force_allocate_vector_reg(op)
-        tloc = self.rm.get_scratch_reg()
-        Vhiloc = self.vrm.get_scratch_reg(type=INT)
-        Vloloc = self.vrm.get_scratch_reg(type=INT)
-        Vploc = self.vrm.get_scratch_reg(type=INT)
-        return [result_loc, base_loc, ofs_loc, imm(itemsize), imm(ofs),
-                Vhiloc, Vloloc, Vploc, tloc]
-
-    prepare_vec_load_i = _prepare_load_i
+    prepare_vec_load_i = _prepare_load
     prepare_vec_load_f = _prepare_load
 
     def prepare_vec_arith(self, op):
@@ -720,9 +652,8 @@
         valueloc = self.ensure_vector_reg(a2)
 
         integer = not (descr.is_array_of_floats() or descr.getconcrete_type() 
== FLOAT)
-        aligned = False
         return [baseloc, ofsloc, valueloc,
-                imm(itemsize), imm(ofs), imm(integer), imm(aligned)]
+                imm(itemsize), imm(ofs), imm(integer)]
 
     def prepare_vec_int_signext(self, op):
         assert isinstance(op, VectorOp)
diff --git a/rpython/jit/backend/x86/vector_ext.py 
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -531,6 +531,8 @@
                             self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
                             self.mc.UNPCKHPD(resloc, srcloc)
                         # if they are equal nothing is to be done
+        else:
+            not_implemented("pack/unpack for size %d", size)
 
     genop_vec_unpack_f = genop_vec_pack_f
 
diff --git a/rpython/jit/metainterp/test/test_vector.py 
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -18,6 +18,13 @@
 from rpython.rlib.objectmodel import (specialize, is_annotation_constant,
         always_inline)
 from rpython.jit.backend.detect_cpu import getcpuclass
+from rpython.jit.tool.oparser import parse
+from rpython.jit.metainterp.history import (AbstractFailDescr,
+                                            AbstractDescr,
+                                            BasicFailDescr, BasicFinalDescr,
+                                            JitCellToken, TargetToken,
+                                            ConstInt, ConstPtr,
+                                            Const, ConstFloat)
 
 CPU = getcpuclass()
 
@@ -78,7 +85,6 @@
     enable_opts = 
'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
 
     def setup_method(self, method):
-        import pdb; pdb.set_trace()
         if not self.supports_vector_ext():
             py.test.skip("this cpu %s has no implemented vector backend" % CPU)
 
@@ -718,5 +724,80 @@
         res = self.meta_interp(f, [22], vec_all=True, vec_guard_ratio=5)
         assert res == f(22)
 
+    def run_unpack(self, unpack, vector_type, assignments, float=True):
+        vars = {'v':0,'f':0,'i':0}
+        def newvar(type):
+            c = vars[type]
+            vars[type] = c + 1
+            if type == 'v':
+                return type + str(c) + vector_type
+            return type + str(c)
+        targettoken = TargetToken()
+        finaldescr = BasicFinalDescr(1)
+        args = []
+        args_values = []
+        pack = []
+        suffix = 'f' if float else 'i'
+        for var, vals in assignments.items():
+            v = newvar('v')
+            pack.append('%s = vec_%s()' % (v, suffix))
+            for i,val in enumerate(vals):
+                args_values.append(val)
+                f = newvar('f')
+                args.append(f)
+                count = 1
+                # create a new variable
+                vo = v
+                v = newvar('v')
+                pack.append('%s = vec_pack_%s(%s, %s, %d, %d)' % \
+                            (v, suffix, vo, f, i, count))
+            vars['x'] = v
+        packs = '\n        '.join(pack)
+        resvar = suffix + '{'+suffix+'}'
+        source = '''
+        [{args}]
+        label({args}, descr=targettoken)
+        {packs}
+        {unpack}
+        finish({resvar}, descr=finaldescr)
+        '''.format(args=','.join(args),packs=packs, 
unpack=unpack.format(**vars),
+                   resvar=resvar.format(**vars))
+        loop = parse(source, namespace={'targettoken': targettoken,
+                                        'finaldescr': finaldescr})
+
+        cpu = self.CPUClass(rtyper=None, stats=None)
+        cpu.setup_once()
+        #
+        looptoken = JitCellToken()
+        cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        deadframe = cpu.execute_token(looptoken, *args_values)
+        print(source)
+        if float:
+            return cpu.get_float_value(deadframe, 0)
+        else:
+            return cpu.get_int_value(deadframe, 0)
+
+    def test_unpack(self):
+        # double unpack
+        assert self.run_unpack("f{f} = vec_unpack_f({x}, 0, 1)",
+                               "[2xf64]", {'x': (1.2,-1)}) == 1.2
+        assert self.run_unpack("f{f} = vec_unpack_f({x}, 1, 1)",
+                               "[2xf64]", {'x': (50.33,4321.0)}) == 4321.0
+        # int64
+        assert self.run_unpack("i{i} = vec_unpack_i({x}, 0, 1)",
+                               "[2xi64]", {'x': (11,12)}, float=False) == 11
+        assert self.run_unpack("i{i} = vec_unpack_i({x}, 1, 1)",
+                               "[2xi64]", {'x': (14,15)}, float=False) == 15
+
+        ## integer unpack (byte)
+        for i in range(16):
+            op = "i{i} = vec_unpack_i({x}, %d, 1)" % i
+            assert self.run_unpack(op, "[16xi8]", {'x': [127,1]*8}, 
float=False) == (127 if i%2==0 else 1)
+            if i < 8:
+                assert self.run_unpack(op, "[2xi16]", {'x': [2**15-1,0]*4}, 
float=False) == (2**15-1 if i%2==0 else 0)
+            if i < 4:
+                assert self.run_unpack(op, "[2xi32]", {'x': [2**31-1,0]*4}, 
float=False) == (2**31-1 if i%2==0 else 0)
+
+
 class TestLLtype(LLJitMixin, VectorizeTests):
     pass
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -299,6 +299,7 @@
             vecinfo.datatype = match.group(3)
             vecinfo.bytesize = int(match.group(4)) // 8
             resop._vec_debug_info = vecinfo
+            resop.bytesize = vecinfo.bytesize
             return var[:var.find('[')]
 
         vecinfo = VectorizationInfo(resop)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to