Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r77902:eb3cc9cf75f4
Date: 2015-06-05 12:59 +0200
http://bitbucket.org/pypy/pypy/changeset/eb3cc9cf75f4/

Log:    int expansion for int16 and int8 added, int32/16 test added first
        already passes

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -226,6 +226,32 @@
         assert int(result) == 7+16+8+16
         self.check_vectorized(2, 2)
 
+    def define_int16_expand():
+        return """
+        a = astype(|30|, int16)
+        c = astype(|1|, int16)
+        c[0] = 16i
+        b = a + c
+        sum(b -> 7:14)
+        """
+    def test_int16_expand(self):
+        result = self.run("int16_expand")
+        assert int(result) == 8*16 + sum(range(7,15))
+        self.check_vectorized(2, 2)
+
+    def define_int8_expand():
+        return """
+        a = astype(|30|, int16)
+        c = astype(|1|, int16)
+        c[0] = 8i
+        b = a + c
+        sum(b -> 0:17)
+        """
+    def test_int16_expand(self):
+        result = self.run("int16_expand")
+        assert int(result) == 16*8 + sum(range(0,17))
+        self.check_vectorized(2, 2)
+
     def define_int32_add_const():
         return """
         a = astype(|30|, int32)
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -54,6 +54,7 @@
         self.float_const_abs_addr = 0
         self.single_float_const_neg_addr = 0
         self.single_float_const_abs_addr = 0
+        self.expand_byte_mask_addr = 0
         self.malloc_slowpath = 0
         self.malloc_slowpath_varsize = 0
         self.wb_slowpath = [0, 0, 0, 0, 0]
@@ -102,9 +103,11 @@
         single_abs_const = 
'\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F'
         # 0x80000000800000008000000080000000
         single_neg_const = 
'\x00\x00\x00\x80\x00\x00\x00\x80\x00\x00\x00\x80\x00\x00\x00\x80'
+        zero_const = '\x00' * 16
         #
         data = neg_const + abs_const + \
-               single_neg_const + single_abs_const
+               single_neg_const + single_abs_const + \
+               zero_const
         datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
         float_constants = datablockwrapper.malloc_aligned(len(data), 
alignment=16)
         datablockwrapper.done()
@@ -115,6 +118,7 @@
         self.float_const_abs_addr = float_constants + 16
         self.single_float_const_neg_addr = float_constants + 32
         self.single_float_const_abs_addr = float_constants + 48
+        self.expand_byte_mask_addr = float_constants + 64
 
     def set_extra_stack_depth(self, mc, value):
         if self._is_asmgcc():
@@ -2641,7 +2645,18 @@
         assert isinstance(srcloc, RegLoc)
         assert not srcloc.is_xmm
         size = sizeloc.value
-        if size == 8:
+        if size == 1:
+            self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
+            self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
+        elif size == 2:
+            self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
+            self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
+            self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
+            self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+        elif size == 4:
+            self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
+            self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+        elif size == 8:
             self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
             self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
         else:
diff --git a/rpython/jit/backend/x86/regloc.py 
b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -715,6 +715,8 @@
     PUNPCKLDQ = _binaryop('PUNPCKLDQ')
     PUNPCKHDQ = _binaryop('PUNPCKHDQ')
 
+    PSHUFB = _binaryop('PSHUFB')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -743,6 +743,11 @@
     SHUFPD_xxi = xmminsn('\x66', rex_nw, '\x0F\xC6', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
 
     PSHUFD_xxi = xmminsn('\x66', rex_nw, '\x0F\x70', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
+    PSHUFHW_xxi = xmminsn('\xF3', rex_nw, '\x0F\x70', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
+    PSHUFLW_xxi = xmminsn('\xF2', rex_nw, '\x0F\x70', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
+    PSHUFB_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), 
register(2), '\xC0')
+    PSHUFB_xm = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), 
mem_reg_plus_const(2))
+
 
     # following require SSE4_1
     PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
diff --git a/rpython/jit/metainterp/history.py 
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -395,7 +395,7 @@
                 t = 'b'
             self._str = '%s%d' % (t, Box._counter)
             if self.type == VECTOR:
-                self._str = '%s%d[%s%d#%d]' % (t, Box._counter, self.item_type,
+                self._str = '%s%d[%s%d|%d]' % (t, Box._counter, self.item_type,
                                                self.item_size * 8, 
self.item_count)
             Box._counter += 1
         return self._str
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -22,7 +22,7 @@
             'long': self.intarraydescr,
             'int': self.int32arraydescr,
         }
-        loop = opparse("        
[p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5]\n" + source 
+ \
+        loop = opparse("        
[p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4]]\n"
 + source + \
                        "\n        
jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5)",
                        cpu=self.cpu,
                        namespace=ns)
@@ -39,13 +39,15 @@
     def pack(self, loop, l, r):
         return [Node(op,1+l+i) for i,op in enumerate(loop.operations[1+l:1+r])]
 
-    def schedule(self, loop_orig, packs, vec_reg_size=16, 
prepend_invariant=False):
+    def schedule(self, loop_orig, packs, vec_reg_size=16, 
prepend_invariant=False, getvboxfunc=None):
         loop = get_model(False).ExtendedTreeLoop("loop")
         loop.original_jitcell_token = loop_orig.original_jitcell_token
         loop.inputargs = loop_orig.inputargs
 
         ops = []
         vsd = VecScheduleData(vec_reg_size)
+        if getvboxfunc is not None:
+            vsd.getvector_of_box = getvboxfunc
         for pack in packs:
             if len(pack) == 1:
                 ops.append(pack[0].getoperation())
@@ -73,7 +75,7 @@
         pack1 = self.pack(loop1, 0, 6)
         loop2 = self.schedule(loop1, [pack1])
         loop3 = self.parse("""
-        v1[i32#4] = vec_raw_load(p0, i0, 4, descr=float)
+        v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float)
         i14 = raw_load(p0, i4, descr=float)
         i15 = raw_load(p0, i5, descr=float)
         """, False)
@@ -90,9 +92,9 @@
         pack2 = self.pack(loop1, 2, 4)
         loop2 = self.schedule(loop1, [pack1, pack2])
         loop3 = self.parse("""
-        v1[i64#2] = vec_raw_load(p0, i0, 2, descr=long)
-        v2[i32#2] = vec_int_signext(v1[i64#2], 4)
-        v3[f64#2] = vec_cast_int_to_float(v2[i32#2])
+        v10[i64|2] = vec_raw_load(p0, i0, 2, descr=long)
+        v20[i32|2] = vec_int_signext(v10[i64|2], 4)
+        v30[f64|2] = vec_cast_int_to_float(v20[i32|2])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -104,12 +106,12 @@
         pack1 = self.pack(loop1, 0, 2)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
-        v1[i64#2] = vec_box(2)
-        v2[i64#2] = vec_int_pack(v1[i64#2], i0, 0, 1)
-        v3[i64#2] = vec_int_pack(v2[i64#2], i1, 1, 1)
-        v4[i64#2] = vec_int_expand(73)
+        v10[i64|2] = vec_box(2)
+        v20[i64|2] = vec_int_pack(v10[i64|2], i0, 0, 1)
+        v30[i64|2] = vec_int_pack(v20[i64|2], i1, 1, 1)
+        v40[i64|2] = vec_int_expand(73)
         #
-        v5[i64#2] = vec_int_add(v3[i64#2], v4[i64#2])
+        v50[i64|2] = vec_int_add(v30[i64|2], v40[i64|2])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -120,12 +122,12 @@
         pack1 = self.pack(loop1, 0, 2)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse("""
-        v1[f64#2] = vec_box(2)
-        v2[f64#2] = vec_float_pack(v1[f64#2], f0, 0, 1)
-        v3[f64#2] = vec_float_pack(v2[f64#2], f1, 1, 1)
-        v4[f64#2] = vec_float_expand(73.0)
+        v10[f64|2] = vec_box(2)
+        v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
+        v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
+        v40[f64|2] = vec_float_expand(73.0)
         #
-        v5[f64#2] = vec_float_add(v3[f64#2], v4[f64#2])
+        v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -140,12 +142,35 @@
         pack2 = self.pack(loop1, 2, 4)
         loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
         loop3 = self.parse("""
-        v1[f64#2] = vec_box(2)
-        v2[f64#2] = vec_float_pack(v1[f64#2], f0, 0, 1)
-        v3[f64#2] = vec_float_pack(v2[f64#2], f1, 1, 1)
-        v4[f64#2] = vec_float_expand(f5) # only expaned once
+        v10[f64|2] = vec_box(2)
+        v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
+        v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
+        v40[f64|2] = vec_float_expand(f5) | only expaned once
         #
-        v5[f64#2] = vec_float_add(v3[f64#2], v4[f64#2])
-        v6[f64#2] = vec_float_add(v5[f64#2], v4[f64#2])
+        v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
+        v60[f64|2] = vec_float_add(v50[f64|2], v40[f64|2])
         """, False)
         self.assert_equal(loop2, loop3)
+
+    def find_input_arg(self, name, loop):
+        for arg in loop.inputargs:
+            if str(arg).startswith(name):
+                return arg
+        raise Exception("could not find %s in args %s" % (name, 
loop.inputargs))
+
+    def test_signext_int16(self):
+        loop1 = self.parse("""
+        i10 = int_signext(i1, 2)
+        i11 = int_signext(i1, 2)
+        i12 = int_signext(i1, 2)
+        i13 = int_signext(i1, 2)
+        """)
+        pack1 = self.pack(loop1, 0, 4)
+        v103204 = self.find_input_arg('v103204', loop1)
+        def i1inv103204(var):
+            return 0, v103204
+        loop2 = self.schedule(loop1, [pack1], prepend_invariant=True, 
getvboxfunc=i1inv103204)
+        loop3 = self.parse("""
+        v11[i16|4] = vec_int_signext(v103204[i32|4], 2)
+        """, False)
+        self.assert_equal(loop2, loop3)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -776,7 +776,7 @@
 class PackType(object):
     UNKNOWN_TYPE = '-'
 
-    def __init__(self, type, size, signed, count=-1, scalar_cost=1, 
vector_cost=1):
+    def __init__(self, type, size, signed, count=-1):
         assert type in (FLOAT, INT, PackType.UNKNOWN_TYPE)
         self.type = type
         self.size = size
@@ -826,7 +826,6 @@
     def clone(self):
         return PackType(self.type, self.size, self.signed, self.count)
 
-
 class OpToVectorOp(object):
     def __init__(self, arg_ptypes, result_ptype):
         self.arg_ptypes = [a for a in arg_ptypes] # do not use a tuple. 
rpython cannot union
@@ -837,6 +836,9 @@
         self.input_type = None
         self.output_type = None
 
+    def clone_vbox_set_count(self, box, count):
+        return BoxVector(box.item_type, count, box.item_size, box.item_signed)
+
     def is_vector_arg(self, i):
         if i < 0 or i >= len(self.arg_ptypes):
             return False
@@ -985,8 +987,7 @@
         return vbox_cloned
 
     def unpack(self, vbox, index, count, arg_ptype):
-        vbox_cloned = vbox.clonebox()
-        vbox_cloned.item_count = count
+        vbox_cloned = self.clone_vbox_set_count(vbox, count)
         opnum = rop.VEC_FLOAT_UNPACK
         if vbox.item_type == INT:
             opnum = rop.VEC_INT_UNPACK
@@ -1012,8 +1013,8 @@
             if pos == -1:
                 i += 1
                 continue
-            new_box = tgt_box.clonebox()
-            new_box.item_count += src_box.item_count
+            count = tgt_box.item_count + src_box.item_count
+            new_box = self.clone_vbox_set_count(tgt_box, count)
             op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
                                       ConstInt(src_box.item_count)], new_box)
             self.preamble_ops.append(op)
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -123,12 +123,12 @@
             box = ts.BoxRef()
             _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('v'):
-            pattern = re.compile('.*\[(u?)(i|f)(\d+)#(\d+)\]')
+            pattern = re.compile('.*\[(u?)(i|f)(\d+)(#|\|)(\d+)\]')
             match = pattern.match(elem)
             if match:
                 item_type = match.group(2)[0]
                 item_size = int(match.group(3)) // 8
-                item_count = int(match.group(4))
+                item_count = int(match.group(5))
                 item_signed = not (match.group(1) == 'u')
                 box = self.model.BoxVector(item_type, item_count, item_size, 
item_signed)
                 lbracket = elem.find('[')
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to