Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77949:1fc0d9cd2612 Date: 2015-06-08 10:28 +0200 http://bitbucket.org/pypy/pypy/changeset/1fc0d9cd2612/
Log: removed manual test since it is tested in auto test now fixed some other tests in the x86 backend diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -232,14 +232,14 @@ c = astype(|1|, int16) c[0] = 16i b = a + c - d = b -> 7:9 + d = b -> 7:15 sum(d) """ def test_int16_expand(self): result = self.run("int16_expand") - i = 2 + i = 8 assert int(result) == i*16 + sum(range(7,7+i)) - self.check_vectorized(2, 2) + self.check_vectorized(3, 2) # TODO sum at the end def define_int8_expand(): return """ @@ -253,7 +253,7 @@ def test_int8_expand(self): result = self.run("int8_expand") assert int(result) == 16*8 + sum(range(0,17)) - self.check_vectorized(2, 2) + self.check_vectorized(3, 2) def define_int32_add_const(): return """ diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py --- a/rpython/jit/backend/tool/viewcode.py +++ b/rpython/jit/backend/tool/viewcode.py @@ -57,6 +57,7 @@ 'x86_32': 'i386', 'x86_64': 'i386:x86-64', 'x86-64': 'i386:x86-64', + 'x86-64-sse4': 'i386:x86-64', 'i386': 'i386', 'arm': 'arm', 'arm_32': 'arm', diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2615,24 +2615,24 @@ return # already the right size if size == 4 and tosize == 2: scratch = X86_64_SCRATCH_REG - self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000) - self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4) - self.mc.PINSRW_xri(resloc.value, scratch.value, 2) - self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6) - self.mc.PINSRW_xri(resloc.value, scratch.value, 3) + self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000) + self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4) + self.mc.PINSRW_xri8(resloc.value, scratch.value, 2) + self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6) + self.mc.PINSRW_xri8(resloc.value, scratch.value, 3) elif size == 4 and tosize == 8: scratch = X86_64_SCRATCH_REG.value - self.mc.PEXTRD_rxi(scratch, srcloc.value, 1) - self.mc.PINSRQ_xri(resloc.value, scratch, 1) - self.mc.PEXTRD_rxi(scratch, srcloc.value, 0) - self.mc.PINSRQ_xri(resloc.value, scratch, 0) + self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1) + self.mc.PINSRQ_xri8(resloc.value, scratch, 1) + self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0) + self.mc.PINSRQ_xri8(resloc.value, scratch, 0) elif size == 8 and tosize == 4: # is there a better sequence to move them? scratch = X86_64_SCRATCH_REG.value - self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0) - self.mc.PINSRD_xri(resloc.value, scratch, 0) - self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1) - self.mc.PINSRD_xri(resloc.value, scratch, 1) + self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0) + self.mc.PINSRD_xri8(resloc.value, scratch, 0) + self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1) + self.mc.PINSRD_xri8(resloc.value, scratch, 1) else: raise NotImplementedError("sign ext missing: " + str(size) + " -> " + str(tosize)) @@ -2653,19 +2653,19 @@ assert not srcloc.is_xmm size = sizeloc.value if size == 1: - self.mc.PINSRB_xri(resloc.value, srcloc.value, 0) + self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0) self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr)) elif size == 2: - self.mc.PINSRW_xri(resloc.value, srcloc.value, 0) - self.mc.PINSRW_xri(resloc.value, srcloc.value, 4) - self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0) - self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0) + self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0) + self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4) + self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0) + self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0) elif size == 4: - self.mc.PINSRD_xri(resloc.value, srcloc.value, 0) - self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0) + self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0) + self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0) elif size == 8: - self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0) - self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1) + self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0) + self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1) else: raise NotImplementedError("missing size %d for int expand" % (size,)) @@ -2676,34 +2676,36 @@ srcidx = srcidxloc.value residx = residxloc.value count = countloc.value + # for small data type conversion this can be quite costy + # j = pack(i,4,4) si = srcidx ri = residx k = count while k > 0: if size == 8: if resultloc.is_xmm: - self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRQ_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si) + self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si) elif size == 4: if resultloc.is_xmm: - self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRD_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si) + self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si) elif size == 2: if resultloc.is_xmm: - self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRW_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si) + self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si) elif size == 1: if resultloc.is_xmm: - self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRB_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si) + self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si) si += 1 ri += 1 k -= 1 @@ -2732,9 +2734,9 @@ self.mov(X86_64_XMM_SCRATCH_REG, srcloc) src = X86_64_XMM_SCRATCH_REG.value select = ((si & 0x3) << 6)|((ri & 0x3) << 4) - self.mc.INSERTPS_xxi(resloc.value, src, select) + self.mc.INSERTPS_xxi8(resloc.value, src, select) else: - self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si) + self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si) si += 1 ri += 1 k -= 1 @@ -2755,12 +2757,12 @@ # r = (s[1], r[1]) if resloc != srcloc: self.mc.UNPCKHPD(resloc, srcloc) - self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) + self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1) else: assert residx == 1 # r = (r[0], s[1]) if resloc != srcloc: - self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) + self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1) self.mc.UNPCKHPD(resloc, srcloc) # if they are equal nothing is to be done diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py --- a/rpython/jit/backend/x86/rx86.py +++ b/rpython/jit/backend/x86/rx86.py @@ -728,10 +728,9 @@ MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2)) MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2)) - PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b')) - MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0') + PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b')) PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1), orbyte(0x3 << 3), '\xC0', immediate(2, 'b')) UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0') diff --git a/rpython/jit/backend/x86/test/test_rx86.py b/rpython/jit/backend/x86/test/test_rx86.py --- a/rpython/jit/backend/x86/test/test_rx86.py +++ b/rpython/jit/backend/x86/test/test_rx86.py @@ -245,77 +245,3 @@ assert len(cls.MULTIBYTE_NOPs) == 16 for i in range(16): assert len(cls.MULTIBYTE_NOPs[i]) == i - -def test_pextr(): - s = CodeBuilder64() - s.PEXTRW_rxi(R.r11, R.xmm0,0) - assert s.getvalue() == '\x66\x44\x0f\xc5\xd8\x00' - s.clear() - s.PEXTRW_rxi(R.edi, R.xmm15, 15) - assert s.getvalue() == '\x66\x41\x0f\xc5\xff\x0f' - s.clear() - s.PEXTRD_rxi(R.eax, R.xmm11, 2) - assert s.getvalue() == '\x66\x44\x0f\x3a\x16\xd8\x02' - s.clear() - s.PEXTRD_rxi(R.r11, R.xmm5, 2) - assert s.getvalue() == '\x66\x41\x0f\x3a\x16\xeb\x02' - s.clear() - s.PEXTRQ_rxi(R.ebp, R.xmm0, 7) - assert s.getvalue() == '\x66\x48\x0f\x3a\x16\xc5\x07' - # BYTE - s.clear() - s.PEXTRB_rxi(R.eax, R.xmm13, 24) - assert s.getvalue() == '\x66\x44\x0f\x3a\x14\xe8\x18' - s.clear() - s.PEXTRB_rxi(R.r15, R.xmm5, 33) - assert s.getvalue() == '\x66\x41\x0f\x3a\x14\xef\x21' - # EXTR SINGLE FLOAT - s.clear() - s.EXTRACTPS_rxi(R.eax, R.xmm15, 2) - assert s.getvalue() == '\x66\x44\x0f\x3a\x17\xf8\x02' - s.clear() - s.EXTRACTPS_rxi(R.r11, R.xmm0, 1) - assert s.getvalue() == '\x66\x41\x0f\x3a\x17\xc3\x01' - s.clear() - s.EXTRACTPS_rxi(R.eax, R.xmm0, 1) - assert s.getvalue() == '\x66\x0f\x3a\x17\xc0\x01' - s.clear() - s.EXTRACTPS_rxi(R.r15, R.xmm15, 4) - assert s.getvalue() == '\x66\x45\x0f\x3a\x17\xff\x04' - -def test_pinsr(): - s = CodeBuilder64() - s.PINSRW_xri(R.xmm0, R.r11,0) - assert s.getvalue() == '\x66\x41\x0f\xc4\xc3\x00' - s.clear() - s.PINSRW_xri(R.xmm15, R.edi, 15) - assert s.getvalue() == '\x66\x44\x0f\xc4\xff\x0f' - s.clear() - s.PINSRD_xri(R.xmm11, R.eax, 2) - assert s.getvalue() == '\x66\x44\x0f\x3a\x22\xd8\x02' - s.clear() - s.PINSRD_xri(R.xmm5, R.r11, 2) - assert s.getvalue() == '\x66\x41\x0f\x3a\x22\xeb\x02' - s.clear() - s.PINSRQ_xri(R.xmm0, R.ebp, 7) - assert s.getvalue() == '\x66\x48\x0f\x3a\x22\xc5\x07' - # BYTE - s.clear() - s.PINSRB_xri(R.xmm13, R.eax, 24) - assert s.getvalue() == '\x66\x44\x0f\x3a\x20\xe8\x18' - s.clear() - s.PINSRB_xri(R.xmm5, R.r15, 33) - assert s.getvalue() == '\x66\x41\x0f\x3a\x20\xef\x21' - # EXTR SINGLE FLOAT - s.clear() - s.INSERTPS_xxi(R.xmm15, R.xmm0, 2) - assert s.getvalue() == '\x66\x44\x0f\x3a\x21\xf8\x02' - s.clear() - s.INSERTPS_xxi(R.xmm0, R.xmm11, 1) - assert s.getvalue() == '\x66\x41\x0f\x3a\x21\xc3\x01' - s.clear() - s.INSERTPS_xxi(R.xmm0, R.xmm0, 1) - assert s.getvalue() == '\x66\x0f\x3a\x21\xc0\x01' - s.clear() - s.INSERTPS_xxi(R.xmm15, R.xmm15, 4) - assert s.getvalue() == '\x66\x45\x0f\x3a\x21\xff\x04' diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py @@ -196,6 +196,8 @@ instrname = 'MOVD' if argmodes == 'xb': py.test.skip('"as" uses an undocumented alternate encoding??') + if argmodes == 'xx' and self.WORD != 8: + instrname = 'MOVQ' # for args in args_lists: suffix = "" @@ -328,6 +330,15 @@ (instrname == 'MULTIBYTE') ) + def should_skip_instruction_bit32(self, instrname, argmodes): + if self.WORD != 8: + return ( + # the test suite uses 64 bit registers instead of 32 bit... + (instrname == 'PEXTRQ') or + (instrname == 'PINSRQ') + ) + + return False def complete_test(self, methname): @@ -336,7 +347,8 @@ else: instrname, argmodes = methname, '' - if self.should_skip_instruction(instrname, argmodes): + if self.should_skip_instruction(instrname, argmodes) or \ + self.should_skip_instruction_bit32(instrname, argmodes): print "Skipping %s" % methname return @@ -370,6 +382,19 @@ else: instr_suffix = None + if instrname.find('EXTR') != -1 or \ + instrname.find('INSR') != -1 or \ + instrname.find('INSERT') != -1 or \ + instrname.find('EXTRACT') != -1 or \ + instrname.find('SRLDQ') != -1 or \ + instrname.find('SHUF') != -1: + realargmodes = [] + for mode in argmodes: + if mode == 'i': + mode = 'i8' + realargmodes.append(mode) + argmodes = realargmodes + print "Testing %s with argmodes=%r" % (instrname, argmodes) self.methname = methname self.is_xmm_insn = getattr(getattr(self.X86_CodeBuilder, diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py @@ -24,7 +24,10 @@ return ( super(TestRx86_64, self).should_skip_instruction(instrname, argmodes) or # Not testing FSTP on 64-bit for now - (instrname == 'FSTP') + (instrname == 'FSTP') or + # the test suite uses 64 bit registers instead of 32 bit... + (instrname == 'PEXTRD') or + (instrname == 'PINSRD') ) def array_tests(self): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit