Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r77949:1fc0d9cd2612
Date: 2015-06-08 10:28 +0200
http://bitbucket.org/pypy/pypy/changeset/1fc0d9cd2612/

Log:    removed manual test since it is tested in auto test now fixed some
        other tests in the x86 backend

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -232,14 +232,14 @@
         c = astype(|1|, int16)
         c[0] = 16i
         b = a + c
-        d = b -> 7:9
+        d = b -> 7:15
         sum(d)
         """
     def test_int16_expand(self):
         result = self.run("int16_expand")
-        i = 2
+        i = 8
         assert int(result) == i*16 + sum(range(7,7+i))
-        self.check_vectorized(2, 2)
+        self.check_vectorized(3, 2) # TODO sum at the end
 
     def define_int8_expand():
         return """
@@ -253,7 +253,7 @@
     def test_int8_expand(self):
         result = self.run("int8_expand")
         assert int(result) == 16*8 + sum(range(0,17))
-        self.check_vectorized(2, 2)
+        self.check_vectorized(3, 2)
 
     def define_int32_add_const():
         return """
diff --git a/rpython/jit/backend/tool/viewcode.py 
b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -57,6 +57,7 @@
         'x86_32': 'i386',
         'x86_64': 'i386:x86-64',
         'x86-64': 'i386:x86-64',
+        'x86-64-sse4': 'i386:x86-64',
         'i386': 'i386',
         'arm': 'arm',
         'arm_32': 'arm',
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2615,24 +2615,24 @@
             return # already the right size
         if size == 4 and tosize == 2:
             scratch = X86_64_SCRATCH_REG
-            self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
-            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
-            self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
-            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
-            self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
+            self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000)
+            self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4)
+            self.mc.PINSRW_xri8(resloc.value, scratch.value, 2)
+            self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6)
+            self.mc.PINSRW_xri8(resloc.value, scratch.value, 3)
         elif size == 4 and tosize == 8:
             scratch = X86_64_SCRATCH_REG.value
-            self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
-            self.mc.PINSRQ_xri(resloc.value, scratch, 1)
-            self.mc.PEXTRD_rxi(scratch, srcloc.value, 0)
-            self.mc.PINSRQ_xri(resloc.value, scratch, 0)
+            self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1)
+            self.mc.PINSRQ_xri8(resloc.value, scratch, 1)
+            self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0)
+            self.mc.PINSRQ_xri8(resloc.value, scratch, 0)
         elif size == 8 and tosize == 4:
             # is there a better sequence to move them?
             scratch = X86_64_SCRATCH_REG.value
-            self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
-            self.mc.PINSRD_xri(resloc.value, scratch, 0)
-            self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
-            self.mc.PINSRD_xri(resloc.value, scratch, 1)
+            self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0)
+            self.mc.PINSRD_xri8(resloc.value, scratch, 0)
+            self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1)
+            self.mc.PINSRD_xri8(resloc.value, scratch, 1)
         else:
             raise NotImplementedError("sign ext missing: " + str(size) + " -> 
" + str(tosize))
 
@@ -2653,19 +2653,19 @@
         assert not srcloc.is_xmm
         size = sizeloc.value
         if size == 1:
-            self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
+            self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0)
             self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
         elif size == 2:
-            self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
-            self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
-            self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
-            self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+            self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0)
+            self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4)
+            self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0)
+            self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0)
         elif size == 4:
-            self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
-            self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+            self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0)
+            self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0)
         elif size == 8:
-            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
-            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
+            self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0)
+            self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1)
         else:
             raise NotImplementedError("missing size %d for int expand" % 
(size,))
 
@@ -2676,34 +2676,36 @@
         srcidx = srcidxloc.value
         residx = residxloc.value
         count = countloc.value
+        # for small data type conversion this can be quite costy
+        # j = pack(i,4,4)
         si = srcidx
         ri = residx
         k = count
         while k > 0:
             if size == 8:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
-                    self.mc.PINSRQ_xri(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
+                    self.mc.PINSRQ_xri8(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si)
             elif size == 4:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
-                    self.mc.PINSRD_xri(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
+                    self.mc.PINSRD_xri8(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si)
             elif size == 2:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
-                    self.mc.PINSRW_xri(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
+                    self.mc.PINSRW_xri8(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si)
             elif size == 1:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
-                    self.mc.PINSRB_xri(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, 
sourceloc.value, si)
+                    self.mc.PINSRB_xri8(resultloc.value, 
X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si)
             si += 1
             ri += 1
             k -= 1
@@ -2732,9 +2734,9 @@
                         self.mov(X86_64_XMM_SCRATCH_REG, srcloc)
                         src = X86_64_XMM_SCRATCH_REG.value
                     select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
-                    self.mc.INSERTPS_xxi(resloc.value, src, select)
+                    self.mc.INSERTPS_xxi8(resloc.value, src, select)
                 else:
-                    self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si)
+                    self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si)
                 si += 1
                 ri += 1
                 k -= 1
@@ -2755,12 +2757,12 @@
                         # r = (s[1], r[1])
                         if resloc != srcloc:
                             self.mc.UNPCKHPD(resloc, srcloc)
-                        self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
+                        self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
                     else:
                         assert residx == 1
                         # r = (r[0], s[1])
                         if resloc != srcloc:
-                            self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
+                            self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
                             self.mc.UNPCKHPD(resloc, srcloc)
                         # if they are equal nothing is to be done
 
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -728,10 +728,9 @@
     MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), 
stack_bp(2))
     MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), 
stack_sp(2))
 
-    PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', 
immediate(2, 'b'))
-
     MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), 
'\xC0')
 
+    PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', 
immediate(2, 'b'))
     PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1), 
                         orbyte(0x3 << 3), '\xC0', immediate(2, 'b'))
     UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), 
register(2), '\xC0')
diff --git a/rpython/jit/backend/x86/test/test_rx86.py 
b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -245,77 +245,3 @@
         assert len(cls.MULTIBYTE_NOPs) == 16
         for i in range(16):
             assert len(cls.MULTIBYTE_NOPs[i]) == i
-
-def test_pextr():
-    s = CodeBuilder64()
-    s.PEXTRW_rxi(R.r11, R.xmm0,0)
-    assert s.getvalue() == '\x66\x44\x0f\xc5\xd8\x00'
-    s.clear()
-    s.PEXTRW_rxi(R.edi, R.xmm15, 15)
-    assert s.getvalue() == '\x66\x41\x0f\xc5\xff\x0f'
-    s.clear()
-    s.PEXTRD_rxi(R.eax, R.xmm11, 2)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x16\xd8\x02'
-    s.clear()
-    s.PEXTRD_rxi(R.r11, R.xmm5, 2)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x16\xeb\x02'
-    s.clear()
-    s.PEXTRQ_rxi(R.ebp, R.xmm0, 7)
-    assert s.getvalue() == '\x66\x48\x0f\x3a\x16\xc5\x07'
-    # BYTE
-    s.clear()
-    s.PEXTRB_rxi(R.eax, R.xmm13, 24)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x14\xe8\x18'
-    s.clear()
-    s.PEXTRB_rxi(R.r15, R.xmm5, 33)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x14\xef\x21'
-    # EXTR SINGLE FLOAT
-    s.clear()
-    s.EXTRACTPS_rxi(R.eax, R.xmm15, 2)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x17\xf8\x02'
-    s.clear()
-    s.EXTRACTPS_rxi(R.r11, R.xmm0, 1)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x17\xc3\x01'
-    s.clear()
-    s.EXTRACTPS_rxi(R.eax, R.xmm0, 1)
-    assert s.getvalue() == '\x66\x0f\x3a\x17\xc0\x01'
-    s.clear()
-    s.EXTRACTPS_rxi(R.r15, R.xmm15, 4)
-    assert s.getvalue() == '\x66\x45\x0f\x3a\x17\xff\x04'
-
-def test_pinsr():
-    s = CodeBuilder64()
-    s.PINSRW_xri(R.xmm0, R.r11,0)
-    assert s.getvalue() == '\x66\x41\x0f\xc4\xc3\x00'
-    s.clear()
-    s.PINSRW_xri(R.xmm15, R.edi, 15)
-    assert s.getvalue() == '\x66\x44\x0f\xc4\xff\x0f'
-    s.clear()
-    s.PINSRD_xri(R.xmm11, R.eax, 2)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x22\xd8\x02'
-    s.clear()
-    s.PINSRD_xri(R.xmm5, R.r11, 2)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x22\xeb\x02'
-    s.clear()
-    s.PINSRQ_xri(R.xmm0, R.ebp, 7)
-    assert s.getvalue() == '\x66\x48\x0f\x3a\x22\xc5\x07'
-    # BYTE
-    s.clear()
-    s.PINSRB_xri(R.xmm13, R.eax, 24)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x20\xe8\x18'
-    s.clear()
-    s.PINSRB_xri(R.xmm5, R.r15, 33)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x20\xef\x21'
-    # EXTR SINGLE FLOAT
-    s.clear()
-    s.INSERTPS_xxi(R.xmm15, R.xmm0, 2)
-    assert s.getvalue() == '\x66\x44\x0f\x3a\x21\xf8\x02'
-    s.clear()
-    s.INSERTPS_xxi(R.xmm0, R.xmm11, 1)
-    assert s.getvalue() == '\x66\x41\x0f\x3a\x21\xc3\x01'
-    s.clear()
-    s.INSERTPS_xxi(R.xmm0, R.xmm0, 1)
-    assert s.getvalue() == '\x66\x0f\x3a\x21\xc0\x01'
-    s.clear()
-    s.INSERTPS_xxi(R.xmm15, R.xmm15, 4)
-    assert s.getvalue() == '\x66\x45\x0f\x3a\x21\xff\x04'
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py 
b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -196,6 +196,8 @@
                 instrname = 'MOVD'
             if argmodes == 'xb':
                 py.test.skip('"as" uses an undocumented alternate encoding??')
+            if argmodes == 'xx' and self.WORD != 8:
+                instrname = 'MOVQ'
         #
         for args in args_lists:
             suffix = ""
@@ -328,6 +330,15 @@
                 (instrname == 'MULTIBYTE')
         )
 
+    def should_skip_instruction_bit32(self, instrname, argmodes):
+        if self.WORD != 8:
+            return (
+                # the test suite uses 64 bit registers instead of 32 bit...
+                (instrname == 'PEXTRQ') or
+                (instrname == 'PINSRQ')
+            )
+
+        return False
 
 
     def complete_test(self, methname):
@@ -336,7 +347,8 @@
         else:
             instrname, argmodes = methname, ''
 
-        if self.should_skip_instruction(instrname, argmodes):
+        if self.should_skip_instruction(instrname, argmodes) or \
+           self.should_skip_instruction_bit32(instrname, argmodes):
             print "Skipping %s" % methname
             return
 
@@ -370,6 +382,19 @@
         else:
             instr_suffix = None
 
+        if instrname.find('EXTR') != -1 or \
+           instrname.find('INSR') != -1 or \
+           instrname.find('INSERT') != -1 or \
+           instrname.find('EXTRACT') != -1 or \
+           instrname.find('SRLDQ') != -1 or \
+           instrname.find('SHUF') != -1:
+            realargmodes = []
+            for mode in argmodes:
+                if mode == 'i':
+                    mode = 'i8'
+                realargmodes.append(mode)
+            argmodes = realargmodes
+
         print "Testing %s with argmodes=%r" % (instrname, argmodes)
         self.methname = methname
         self.is_xmm_insn = getattr(getattr(self.X86_CodeBuilder,
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py 
b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -24,7 +24,10 @@
         return (
                 super(TestRx86_64, self).should_skip_instruction(instrname, 
argmodes) or
                 # Not testing FSTP on 64-bit for now
-                (instrname == 'FSTP')
+                (instrname == 'FSTP') or
+                # the test suite uses 64 bit registers instead of 32 bit...
+                (instrname == 'PEXTRD') or
+                (instrname == 'PINSRD')
         )
 
     def array_tests(self):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to