Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77921:3c733c6463df
Date: 2015-06-05 19:40 +0200
http://bitbucket.org/pypy/pypy/changeset/3c733c6463df/

Log:    extract instructions sse4 had some wrong parameters in assembler
        added test case for that

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -232,11 +232,13 @@
         c = astype(|1|, int16)
         c[0] = 16i
         b = a + c
-        sum(b -> 7:14)
+        d = b -> 7:9
+        sum(d)
         """
     def test_int16_expand(self):
         result = self.run("int16_expand")
-        assert int(result) == 8*16 + sum(range(7,15))
+        i = 2
+        assert int(result) == i*16 + sum(range(7,7+i))
         self.check_vectorized(2, 2)
 
     def define_int8_expand():
@@ -245,10 +247,11 @@
         c = astype(|1|, int16)
         c[0] = 8i
         b = a + c
-        sum(b -> 0:17)
+        d = b -> 0:17
+        sum(d)
         """
-    def test_int16_expand(self):
-        result = self.run("int16_expand")
+    def test_int8_expand(self):
+        result = self.run("int8_expand")
         assert int(result) == 16*8 + sum(range(0,17))
         self.check_vectorized(2, 2)
 
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2613,7 +2613,14 @@
         tosize = tosizeloc.value
         if size == tosize:
             return # already the right size
-        if size == 4 and tosize == 8:
+        if size == 4 and tosize == 2:
+            scratch = X86_64_SCRATCH_REG
+            self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
+            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
+            self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
+            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
+            self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
+        elif size == 4 and tosize == 8:
             scratch = X86_64_SCRATCH_REG.value
             self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
             self.mc.PINSRQ_xri(resloc.value, scratch, 1)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -750,15 +750,15 @@
 
 
     # following require SSE4_1
-    PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
-    PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
-    PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC4', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
-    PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
-    EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', register(2,8), 
register(1), '\xC0', immediate(3, 'b'))
+    PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(1), 
register(2,8), '\xC0', immediate(3, 'b'))
+    PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(1), 
register(2,8), '\xC0', immediate(3, 'b'))
+    PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
+    PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1), 
register(2,8), '\xC0', immediate(3, 'b'))
+    EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', register(1), 
register(2,8), '\xC0', immediate(3, 'b'))
     
-    PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
-    PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
-    PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
+    PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), 
register(2,8), '\xC0', immediate(3, 'b'))
+    PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), 
register(2,8), '\xC0', immediate(3, 'b'))
+    PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8), 
register(2,8), '\xC0', immediate(3, 'b'))
     PINSRB_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x20', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
     INSERTPS_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x21', register(1,8), 
register(2), '\xC0', immediate(3, 'b'))
 
diff --git a/rpython/jit/backend/x86/test/test_rx86.py 
b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -14,6 +14,9 @@
     def getvalue(self):
         return ''.join(self.buffer)
 
+    def clear(self):
+        self.buffer = []
+
     def force_frame_size(self, frame_size):
         pass
 
@@ -242,3 +245,34 @@
         assert len(cls.MULTIBYTE_NOPs) == 16
         for i in range(16):
             assert len(cls.MULTIBYTE_NOPs[i]) == i
+
+def test_pextr():
+    s = CodeBuilder64()
+    s.PEXTRW_rxi(R.r11, R.xmm0,0)
+    assert s.getvalue() == '\x66\x44\x0f\xc5\xd8\x00'
+    s.clear()
+    s.PEXTRW_rxi(R.edi, R.xmm15, 15)
+    assert s.getvalue() == '\x66\x41\x0f\xc5\xff\x0f'
+    s.clear()
+    s.PEXTRD_rxi(R.eax, R.xmm11, 2)
+    assert s.getvalue() == '\x66\x44\x0f\x3a\x16\xd8\x02'
+    s.clear()
+    s.PEXTRD_rxi(R.r11, R.xmm5, 2)
+    assert s.getvalue() == '\x66\x41\x0f\x3a\x16\xeb\x02'
+    s.clear()
+    s.PEXTRQ_rxi(R.ebp, R.xmm0, 7)
+    assert s.getvalue() == '\x66\x48\x0f\x3a\x16\xc5\x07'
+    # BYTE
+    s.clear()
+    s.PEXTRB_rxi(R.eax, R.xmm13, 24)
+    assert s.getvalue() == '\x66\x44\x0f\x3a\x14\xe8\x18'
+    s.clear()
+    s.PEXTRB_rxi(R.r15, R.xmm5, 33)
+    assert s.getvalue() == '\x66\x41\x0f\x3a\x14\xef\x21'
+    # EXTR SINGLE FLOAT
+    s.clear()
+    s.EXTRACTPS_rxi(R.eax, R.xmm15, 2)
+    assert s.getvalue() == '\x66\x44\x0f\x3a\x17\xf8\x02'
+    s.clear()
+    s.EXTRACTPS_rxi(R.r11, R.xmm0, 1)
+    assert s.getvalue() == '\x66\x41\x0f\x3a\x17\xc3\x01'
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -145,7 +145,7 @@
         v10[f64|2] = vec_box(2)
         v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
         v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
-        v40[f64|2] = vec_float_expand(f5) | only expaned once
+        v40[f64|2] = vec_float_expand(f5) # only expaned once
         #
         v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
         v60[f64|2] = vec_float_add(v50[f64|2], v40[f64|2])
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to