The new added splitter will generate (insn 58 56 59 2 (set (reg:V4HI 20 xmm0 [129]) (vec_duplicate:V4HI (reg:HI 22 xmm2 [123]))) "testcase.c":16:21 -1
But we only have (define_insn "*vec_dupv4hi" [(set (match_operand:V4HI 0 "register_operand" "=y,Yw") (vec_duplicate:V4HI (truncate:HI (match_operand:SI 1 "register_operand" "0,Yw"))))] The patch add patterns for V4HI and V2HI. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: PR target/112532 * config/i386/mmx.md (*vec_dup<mode>): Extend for V4HI and V2HI. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112532.c: New test. --- gcc/config/i386/mmx.md | 8 ++++---- gcc/testsuite/gcc.target/i386/pr112532.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112532.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a3d08bb9d3b..e4b89160fc0 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -5277,8 +5277,8 @@ (define_insn "*vec_dupv4hi" (set_attr "mode" "DI,TI")]) (define_insn "*vec_dup<mode>" - [(set (match_operand:V4F_64 0 "register_operand" "=Yw") - (vec_duplicate:V4F_64 + [(set (match_operand:V4FI_64 0 "register_operand" "=Yw") + (vec_duplicate:V4FI_64 (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))] "TARGET_MMX_WITH_SSE" "%vpshuflw\t{$0, %1, %0|%0, %1, 0}" @@ -5869,8 +5869,8 @@ (define_insn "*vec_dupv2hi" (set_attr "mode" "TI")]) (define_insn "*vec_dup<mode>" - [(set (match_operand:V2F_32 0 "register_operand" "=Yw") - (vec_duplicate:V2F_32 + [(set (match_operand:V2FI_32 0 "register_operand" "=Yw") + (vec_duplicate:V2FI_32 (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))] "TARGET_SSE2" "%vpshuflw\t{$0, %1, %0|%0, %1, 0}" diff --git a/gcc/testsuite/gcc.target/i386/pr112532.c b/gcc/testsuite/gcc.target/i386/pr112532.c new file mode 100644 index 00000000000..690f1d9670d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112532.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-msse4 -O2" } */ + +typedef char __attribute__((__vector_size__(2))) v16u8; +typedef int __attribute__((__vector_size__(8))) v64u8; +typedef unsigned short __attribute__((__vector_size__(2))) v16u16; +typedef unsigned short __attribute__((__vector_size__(8))) v64u16; +v64u16 foo0_v64u16_0; +int __attribute__((__vector_size__(4 * sizeof(int)))) foo0_v128u32_0; +__attribute__((__vector_size__(8 * sizeof(short)))) unsigned short foo0_v128u16_0; +v16u16 foo0_v16u16_0; +v16u8 foo0() { + v16u16 v16u16_1 = __builtin_shufflevector(__builtin_shufflevector(__builtin_convertvector(foo0_v128u32_0, v64u16),foo0_v16u16_0, 1, 4, 2, 0, 0, 2, 2, 2),foo0_v16u16_0, 7); + foo0_v64u16_0 -= (short)v16u16_1; + v64u16 v64u16_3 = __builtin_shufflevector(v16u16_1, __builtin_shufflevector((v16u16){}, foo0_v128u16_0, 7, 0), 0, 1, 2, 2); + return (union {v16u8 b;}) + {((union { + v64u8 a; + int b; + })(v64u8)v64u16_3).b}.b + (v16u8)v16u16_1; +} -- 2.31.1