Standard scalar operation patterns which preserve the rest of the vector look like
(vec_merge:V2DF (vec_duplicate:V2DF (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) (parallel [ (const_int 0 [0])])) (reg:DF 87)) (reg/v:V2DF 85 [ x ]) (const_int 1 [0x1])])) Add such pattens to i386 backend and convert VEC_CONCAT patterns to standard standard scalar operation patterns. gcc/ PR target/54855 * simplify-rtx.c (simplify_binary_operation_1): Convert VEC_CONCAT patterns to standard standard scalar operation patterns. * config/i386/sse.md (*<sse>_vm<plusminus_insn><mode>3): New. (*<sse>_vm<multdiv_mnemonic><mode>3): Likewise. gcc/testsuite/ PR target/54855 * gcc.target/i386/pr54855-1.c: New test. * gcc.target/i386/pr54855-2.c: Likewise. * gcc.target/i386/pr54855-3.c: Likewise. * gcc.target/i386/pr54855-4.c: Likewise. * gcc.target/i386/pr54855-5.c: Likewise. * gcc.target/i386/pr54855-6.c: Likewise. * gcc.target/i386/pr54855-7.c: Likewise. --- gcc/config/i386/sse.md | 45 +++++++++++++++++++++ gcc/simplify-rtx.c | 49 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr54855-1.c | 16 ++++++++ gcc/testsuite/gcc.target/i386/pr54855-2.c | 15 +++++++ gcc/testsuite/gcc.target/i386/pr54855-3.c | 14 +++++++ gcc/testsuite/gcc.target/i386/pr54855-4.c | 14 +++++++ gcc/testsuite/gcc.target/i386/pr54855-5.c | 16 ++++++++ gcc/testsuite/gcc.target/i386/pr54855-6.c | 14 +++++++ gcc/testsuite/gcc.target/i386/pr54855-7.c | 14 +++++++ 9 files changed, 197 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-7.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5dc0930ac1f..03b6f3369fc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1719,6 +1719,28 @@ (set_attr "type" "sseadd") (set_attr "mode" "<MODE>")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*<sse>_vm<plusminus_insn><mode>3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (plusminus:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} + v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "<ssescalarmode>")]) + (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 @@ -1773,6 +1795,29 @@ (set_attr "type" "ssemul") (set_attr "mode" "<MODE>")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*<sse>_vm<multdiv_mnemonic><mode>3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (multdiv:<ssescalarmode> + (vec_select:<ssescalarmode> + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} + v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse<multdiv_mnemonic>") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "direct,double") + (set_attr "mode" "<ssescalarmode>")]) + (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 83580a259f3..c32544381d0 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -4023,6 +4023,55 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0), gen_rtx_PARALLEL (VOIDmode, vec)); } + + /* Turn + + (vec_concat:V2DF + (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) + (parallel [ (const_int 0 [0])])) + (reg:DF 87)) + (vec_select:DF (reg/v:V2DF 85 [ x ]) + (parallel [ (const_int 1 [0x1])]))) + + into standard scalar operation patterns which preserve the + rest of the vector: + + (vec_merge:V2DF + (vec_duplicate:V2DF + (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) + (parallel [ (const_int 0 [0])])) + (reg:DF 87)) + (reg/v:V2DF 85 [ x ]) + (const_int 1 [0x1])])) + + */ + if (GET_CODE (trueop1) == VEC_SELECT + && XVECLEN (XEXP (trueop1, 1), 0) == 1 + && INTVAL (XVECEXP (XEXP (trueop1, 1), 0, 0)) == 1 + && GET_MODE (XEXP (trueop1, 0)) == mode + && op0_mode == GET_MODE_INNER (mode) + && (GET_CODE (trueop0) == PLUS + || GET_CODE (trueop0) == MINUS + || GET_CODE (trueop0) == MULT + || GET_CODE (trueop0) == DIV) + && GET_CODE (XEXP (trueop0, 0)) == VEC_SELECT + && rtx_equal_p (XEXP (trueop1, 0), XEXP (XEXP (trueop0, 0), 0)) + && XVECLEN (XEXP (XEXP (trueop0, 0), 1), 0) == 1 + && INTVAL (XVECEXP (XEXP (XEXP (trueop0, 0), 1), 0, 0)) == 0) + { + op0 = XEXP (trueop1, 0); + op1 = XEXP (trueop0, 1); + rtvec vec = rtvec_alloc (1); + RTVEC_ELT (vec, 0) = const0_rtx; + rtx op2 = simplify_gen_binary (VEC_SELECT, op0_mode, op0, + gen_rtx_PARALLEL (VOIDmode, + vec)); + op2 = simplify_gen_binary (GET_CODE (trueop0), + op0_mode, op2, op1); + op2 = gen_rtx_VEC_DUPLICATE (mode, op2); + return simplify_gen_ternary (VEC_MERGE, mode, mode, op2, + op0, GEN_INT (1)); + } } return 0; diff --git a/gcc/testsuite/gcc.target/i386/pr54855-1.c b/gcc/testsuite/gcc.target/i386/pr54855-1.c new file mode 100644 index 00000000000..693aafa09ab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "addsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +__m128d +_mm_add_sd (__m128d x, __m128d y) +{ + __m128d z = __extension__ (__m128d)(__v2df) + { (((__v2df) x)[0] + ((__v2df) y)[0]), ((__v2df) x)[1] }; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-2.c b/gcc/testsuite/gcc.target/i386/pr54855-2.c new file mode 100644 index 00000000000..20c6f8eb529 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "mulsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); + +__v2df +_mm_mul_sd (__v2df x, __v2df y) +{ + __v2df z = x; + z[0] = x[0] * y[0]; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-3.c b/gcc/testsuite/gcc.target/i386/pr54855-3.c new file mode 100644 index 00000000000..3c15dfc93d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x) +{ + x[0] -= 1.; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-4.c b/gcc/testsuite/gcc.target/i386/pr54855-4.c new file mode 100644 index 00000000000..32eb28e852a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x, double a) +{ + x[0] -= a; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-5.c b/gcc/testsuite/gcc.target/i386/pr54855-5.c new file mode 100644 index 00000000000..e06999074e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-5.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-times "mulpd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); + +__v2df +foo (__v2df x, __v2df y) +{ + x[0] -= y[0]; + x *= y; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-6.c b/gcc/testsuite/gcc.target/i386/pr54855-6.c new file mode 100644 index 00000000000..8f44d17b6d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "divss" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movss" } } */ + +typedef float vec __attribute__((vector_size(16))); + +vec +foo (vec x, float f) +{ + x[0] /= f; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-7.c b/gcc/testsuite/gcc.target/i386/pr54855-7.c new file mode 100644 index 00000000000..a551bd5c92f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "divss" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movss" } } */ + +typedef float vec __attribute__((vector_size(16))); + +vec +foo (vec x) +{ + x[0] /= 2.1f; + return x; +} -- 2.20.1