Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77921:3c733c6463df
Date: 2015-06-05 19:40 +0200
http://bitbucket.org/pypy/pypy/changeset/3c733c6463df/
Log: extract instructions sse4 had some wrong parameters in assembler
added test case for that
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -232,11 +232,13 @@
c = astype(|1|, int16)
c[0] = 16i
b = a + c
- sum(b -> 7:14)
+ d = b -> 7:9
+ sum(d)
"""
def test_int16_expand(self):
result = self.run("int16_expand")
- assert int(result) == 8*16 + sum(range(7,15))
+ i = 2
+ assert int(result) == i*16 + sum(range(7,7+i))
self.check_vectorized(2, 2)
def define_int8_expand():
@@ -245,10 +247,11 @@
c = astype(|1|, int16)
c[0] = 8i
b = a + c
- sum(b -> 0:17)
+ d = b -> 0:17
+ sum(d)
"""
- def test_int16_expand(self):
- result = self.run("int16_expand")
+ def test_int8_expand(self):
+ result = self.run("int8_expand")
assert int(result) == 16*8 + sum(range(0,17))
self.check_vectorized(2, 2)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2613,7 +2613,14 @@
tosize = tosizeloc.value
if size == tosize:
return # already the right size
- if size == 4 and tosize == 8:
+ if size == 4 and tosize == 2:
+ scratch = X86_64_SCRATCH_REG
+ self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
+ self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
+ self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
+ self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
+ self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
+ elif size == 4 and tosize == 8:
scratch = X86_64_SCRATCH_REG.value
self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
self.mc.PINSRQ_xri(resloc.value, scratch, 1)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -750,15 +750,15 @@
# following require SSE4_1
- PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(2,8),
register(1), '\xC0', immediate(3, 'b'))
- PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(2,8),
register(1), '\xC0', immediate(3, 'b'))
- PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC4', register(2,8),
register(1), '\xC0', immediate(3, 'b'))
- PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(2,8),
register(1), '\xC0', immediate(3, 'b'))
- EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', register(2,8),
register(1), '\xC0', immediate(3, 'b'))
+ PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(1),
register(2,8), '\xC0', immediate(3, 'b'))
+ PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(1),
register(2,8), '\xC0', immediate(3, 'b'))
+ PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
+ PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1),
register(2,8), '\xC0', immediate(3, 'b'))
+ EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', register(1),
register(2,8), '\xC0', immediate(3, 'b'))
- PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
- PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
- PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
+ PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8),
register(2,8), '\xC0', immediate(3, 'b'))
+ PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8),
register(2,8), '\xC0', immediate(3, 'b'))
+ PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8),
register(2,8), '\xC0', immediate(3, 'b'))
PINSRB_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x20', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
INSERTPS_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x21', register(1,8),
register(2), '\xC0', immediate(3, 'b'))
diff --git a/rpython/jit/backend/x86/test/test_rx86.py
b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -14,6 +14,9 @@
def getvalue(self):
return ''.join(self.buffer)
+ def clear(self):
+ self.buffer = []
+
def force_frame_size(self, frame_size):
pass
@@ -242,3 +245,34 @@
assert len(cls.MULTIBYTE_NOPs) == 16
for i in range(16):
assert len(cls.MULTIBYTE_NOPs[i]) == i
+
+def test_pextr():
+ s = CodeBuilder64()
+ s.PEXTRW_rxi(R.r11, R.xmm0,0)
+ assert s.getvalue() == '\x66\x44\x0f\xc5\xd8\x00'
+ s.clear()
+ s.PEXTRW_rxi(R.edi, R.xmm15, 15)
+ assert s.getvalue() == '\x66\x41\x0f\xc5\xff\x0f'
+ s.clear()
+ s.PEXTRD_rxi(R.eax, R.xmm11, 2)
+ assert s.getvalue() == '\x66\x44\x0f\x3a\x16\xd8\x02'
+ s.clear()
+ s.PEXTRD_rxi(R.r11, R.xmm5, 2)
+ assert s.getvalue() == '\x66\x41\x0f\x3a\x16\xeb\x02'
+ s.clear()
+ s.PEXTRQ_rxi(R.ebp, R.xmm0, 7)
+ assert s.getvalue() == '\x66\x48\x0f\x3a\x16\xc5\x07'
+ # BYTE
+ s.clear()
+ s.PEXTRB_rxi(R.eax, R.xmm13, 24)
+ assert s.getvalue() == '\x66\x44\x0f\x3a\x14\xe8\x18'
+ s.clear()
+ s.PEXTRB_rxi(R.r15, R.xmm5, 33)
+ assert s.getvalue() == '\x66\x41\x0f\x3a\x14\xef\x21'
+ # EXTR SINGLE FLOAT
+ s.clear()
+ s.EXTRACTPS_rxi(R.eax, R.xmm15, 2)
+ assert s.getvalue() == '\x66\x44\x0f\x3a\x17\xf8\x02'
+ s.clear()
+ s.EXTRACTPS_rxi(R.r11, R.xmm0, 1)
+ assert s.getvalue() == '\x66\x41\x0f\x3a\x17\xc3\x01'
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -145,7 +145,7 @@
v10[f64|2] = vec_box(2)
v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
- v40[f64|2] = vec_float_expand(f5) | only expaned once
+ v40[f64|2] = vec_float_expand(f5) # only expaned once
#
v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
v60[f64|2] = vec_float_add(v50[f64|2], v40[f64|2])
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit