Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77953:6e0e98c3d70a Date: 2015-06-08 10:50 +0200 http://bitbucket.org/pypy/pypy/changeset/6e0e98c3d70a/
Log: reverted the 8 immediate suffix (solved differently for tests) diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -243,17 +243,18 @@ def define_int8_expand(): return """ - a = astype(|30|, int16) - c = astype(|1|, int16) + a = astype(|30|, int8) + c = astype(|1|, int8) c[0] = 8i b = a + c d = b -> 0:17 sum(d) """ def test_int8_expand(self): + py.test.skip("TODO implement assembler") result = self.run("int8_expand") - assert int(result) == 16*8 + sum(range(0,17)) - self.check_vectorized(3, 2) + assert int(result) == 8*8 + sum(range(0,17)) + self.check_vectorized(3, 2) # TODO sum at the end def define_int32_add_const(): return """ diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2615,24 +2615,24 @@ return # already the right size if size == 4 and tosize == 2: scratch = X86_64_SCRATCH_REG - self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000) - self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4) - self.mc.PINSRW_xri8(resloc.value, scratch.value, 2) - self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6) - self.mc.PINSRW_xri8(resloc.value, scratch.value, 3) + self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000) + self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4) + self.mc.PINSRW_xri(resloc.value, scratch.value, 2) + self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6) + self.mc.PINSRW_xri(resloc.value, scratch.value, 3) elif size == 4 and tosize == 8: scratch = X86_64_SCRATCH_REG.value - self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1) - self.mc.PINSRQ_xri8(resloc.value, scratch, 1) - self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0) - self.mc.PINSRQ_xri8(resloc.value, scratch, 0) + self.mc.PEXTRD_rxi(scratch, srcloc.value, 1) + self.mc.PINSRQ_xri(resloc.value, scratch, 1) + self.mc.PEXTRD_rxi(scratch, srcloc.value, 0) + self.mc.PINSRQ_xri(resloc.value, scratch, 0) elif size == 8 and tosize == 4: # is there a better sequence to move them? scratch = X86_64_SCRATCH_REG.value - self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0) - self.mc.PINSRD_xri8(resloc.value, scratch, 0) - self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1) - self.mc.PINSRD_xri8(resloc.value, scratch, 1) + self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0) + self.mc.PINSRD_xri(resloc.value, scratch, 0) + self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1) + self.mc.PINSRD_xri(resloc.value, scratch, 1) else: raise NotImplementedError("sign ext missing: " + str(size) + " -> " + str(tosize)) @@ -2653,19 +2653,19 @@ assert not srcloc.is_xmm size = sizeloc.value if size == 1: - self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0) + self.mc.PINSRB_xri(resloc.value, srcloc.value, 0) self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr)) elif size == 2: - self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0) - self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4) - self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0) - self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0) + self.mc.PINSRW_xri(resloc.value, srcloc.value, 0) + self.mc.PINSRW_xri(resloc.value, srcloc.value, 4) + self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0) + self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0) elif size == 4: - self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0) - self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0) + self.mc.PINSRD_xri(resloc.value, srcloc.value, 0) + self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0) elif size == 8: - self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0) - self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1) + self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0) + self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1) else: raise NotImplementedError("missing size %d for int expand" % (size,)) @@ -2684,28 +2684,28 @@ while k > 0: if size == 8: if resultloc.is_xmm: - self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRQ_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si) + self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si) elif size == 4: if resultloc.is_xmm: - self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRD_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si) + self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si) elif size == 2: if resultloc.is_xmm: - self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRW_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si) + self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si) elif size == 1: if resultloc.is_xmm: - self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si) - self.mc.PINSRB_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri) + self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si) + self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri) else: - self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si) + self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si) si += 1 ri += 1 k -= 1 @@ -2734,9 +2734,9 @@ self.mov(X86_64_XMM_SCRATCH_REG, srcloc) src = X86_64_XMM_SCRATCH_REG.value select = ((si & 0x3) << 6)|((ri & 0x3) << 4) - self.mc.INSERTPS_xxi8(resloc.value, src, select) + self.mc.INSERTPS_xxi(resloc.value, src, select) else: - self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si) + self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si) si += 1 ri += 1 k -= 1 @@ -2757,12 +2757,12 @@ # r = (s[1], r[1]) if resloc != srcloc: self.mc.UNPCKHPD(resloc, srcloc) - self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1) + self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) else: assert residx == 1 # r = (r[0], s[1]) if resloc != srcloc: - self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1) + self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1) self.mc.UNPCKHPD(resloc, srcloc) # if they are equal nothing is to be done diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py @@ -332,6 +332,7 @@ def should_skip_instruction_bit32(self, instrname, argmodes): if self.WORD != 8: + # those are tested in the 64 bit test case return ( # the test suite uses 64 bit registers instead of 32 bit... (instrname == 'PEXTRQ') or diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py --- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py +++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py @@ -26,6 +26,7 @@ # Not testing FSTP on 64-bit for now (instrname == 'FSTP') or # the test suite uses 64 bit registers instead of 32 bit... + # it is tested in the 32 bit test! (instrname == 'PEXTRD') or (instrname == 'PINSRD') ) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit