Hi: This is a follow up of [1]. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Pushed to trunk. [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576514.html
gcc/ChangeLog: * config/i386/sse.md (cond_<insn><mode>): New expander. (cond_mul<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/cond_op_addsubmul_d-1.c: New test. * gcc.target/i386/cond_op_addsubmul_d-2.c: New test. * gcc.target/i386/cond_op_addsubmul_q-1.c: New test. * gcc.target/i386/cond_op_addsubmul_q-2.c: New test. * gcc.target/i386/cond_op_addsubmul_w-1.c: New test. * gcc.target/i386/cond_op_addsubmul_w-2.c: New test. --- gcc/config/i386/sse.md | 88 +++++++++++++++++-- .../gcc.target/i386/cond_op_addsubmul_d-1.c | 32 +++++++ .../gcc.target/i386/cond_op_addsubmul_d-2.c | 76 ++++++++++++++++ .../gcc.target/i386/cond_op_addsubmul_q-1.c | 7 ++ .../gcc.target/i386/cond_op_addsubmul_q-2.c | 4 + .../gcc.target/i386/cond_op_addsubmul_w-1.c | 6 ++ .../gcc.target/i386/cond_op_addsubmul_w-2.c | 5 ++ 7 files changed, 210 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-2.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8bf1764d3d5..52b2b4214d7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -333,6 +333,14 @@ (define_mode_iterator VI48_AVX512VL [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) +(define_mode_iterator VI1248_AVX512VLBW + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VF_AVX512VL [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) @@ -11803,6 +11811,24 @@ (define_expand "<insn><mode>3" "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") +(define_expand "cond_<insn><mode>" + [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand") + (vec_merge:VI1248_AVX512VLBW + (plusminus:VI1248_AVX512VLBW + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand") + (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand")) + (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512F" +{ + emit_insn (gen_<insn><mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_expand "<insn><mode>3_mask" [(set (match_operand:VI48_AVX512VL 0 "register_operand") (vec_merge:VI48_AVX512VL @@ -11929,6 +11955,24 @@ (define_expand "mul<mode>3" DONE; }) +(define_expand "cond_mul<mode>" + [(set (match_operand:VI2_AVX512VL 0 "register_operand") + (vec_merge:VI2_AVX512VL + (mult:VI2_AVX512VL + (match_operand:VI2_AVX512VL 2 "vector_operand") + (match_operand:VI2_AVX512VL 3 "vector_operand")) + (match_operand:VI2_AVX512VL 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512BW" +{ + emit_insn (gen_mul<mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_expand "mul<mode>3<mask_name>" [(set (match_operand:VI2_AVX2 0 "register_operand") (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand") @@ -12363,6 +12407,24 @@ (define_insn "*sse2_pmaddwd" (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "cond_mul<mode>" + [(set (match_operand:VI8_AVX512VL 0 "register_operand") + (vec_merge:VI8_AVX512VL + (mult:VI8_AVX512VL + (match_operand:VI8_AVX512VL 2 "vector_operand") + (match_operand:VI8_AVX512VL 3 "vector_operand")) + (match_operand:VI8_AVX512VL 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512DQ" +{ + emit_insn (gen_avx512dq_mul<mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_insn "avx512dq_mul<mode>3<mask_name>" [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") (mult:VI8_AVX512VL @@ -12375,6 +12437,24 @@ (define_insn "avx512dq_mul<mode>3<mask_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_expand "cond_mul<mode>" + [(set (match_operand:VI4_AVX512VL 0 "register_operand") + (vec_merge:VI4_AVX512VL + (mult:VI4_AVX512VL + (match_operand:VI4_AVX512VL 2 "vector_operand") + (match_operand:VI4_AVX512VL 3 "vector_operand")) + (match_operand:VI4_AVX512VL 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512F" +{ + emit_insn (gen_mul<mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_expand "mul<mode>3<mask_name>" [(set (match_operand:VI4_AVX512F 0 "register_operand") (mult:VI4_AVX512F @@ -14043,14 +14123,6 @@ (define_insn "*<code><mode>3" ] (const_string "<sseinsnmode>")))]) -(define_mode_iterator VI1248_AVX512VLBW - [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") - (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") - (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") - (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") - V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) - (define_mode_iterator AVX512ZEXTMASK [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-1.c new file mode 100644 index 00000000000..c506fa566f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "optimized" } } */ +#ifndef NUM +#define NUM 800 +#endif +#ifndef TYPE +#define TYPE int +#endif + +TYPE a[NUM], b[NUM], c[NUM], d[NUM], e[NUM], j[NUM]; +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X,Y) ((X) < (Y) ? (Y) : (X)) + +#define BIN(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP e[i]; \ + else \ + a[i] = MAX(d[i], e[i]); \ + } + + +BIN (add, +); +BIN (sub, -); +BIN (mul, *); diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-2.c new file mode 100644 index 00000000000..490f4afbf18 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_d-2.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256" } */ +#define AVX512VL +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#include "cond_op_addsubmul_d-1.c" +#define BINO2(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP e[i]; \ + else \ + j[i] = MAX(d[i], e[i]); \ + } + +BINO2 (add, +); +BINO2 (sub, -); +BINO2 (mul, *); + +static void +test_256 (void) +{ + int sign = -1; + for (int i = 0; i != NUM; i++) + { + a[i] = 0; + d[i] = i * 2; + e[i] = i * i * 3 - i * 9 + 153; + b[i] = i * 83; + c[i] = b[i] + sign; + sign *= -1; + j[i] = 1; + } + foo_add (); + foo_o2_add (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + b[i] = 1; + } + + foo_sub (); + foo_o2_sub (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } + + foo_mul (); + foo_o2_mul (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } +} + +static void +test_128 () +{ + +} diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-1.c new file mode 100644 index 00000000000..5e25350b8b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-1.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -DTYPE=long -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "optimized" } } */ +#define AVX512DQ +#include "cond_op_addsubmul_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-2.c new file mode 100644 index 00000000000..09a87deb529 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_q-2.c @@ -0,0 +1,4 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -mavx512dq -DTYPE=long" } */ + +#include "cond_op_addsubmul_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-1.c new file mode 100644 index 00000000000..80d78d9b704 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-1.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -DTYPE=short -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "optimized" } } */ +#include "cond_op_addsubmul_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-2.c new file mode 100644 index 00000000000..fdcdb34346c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmul_w-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -mavx512bw -DTYPE=short" } */ + +#define AVX512BW +#include "cond_op_addsubmul_d-2.c" -- 2.27.0