LGTM
On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.d...@rivai.ai> wrote: > > This patch add combine patterns to combine vfsgnj.vv + vcond_mask > to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend > currently. We will send another patch to take this issue. > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move. > (*cond_copysign<mode>): New combine pattern. > * config/riscv/riscv-v.cc (needs_fp_rounding): Extend. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test. > * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test. > * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test. > * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New > test. > * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New > test. > > --- > gcc/config/riscv/autovec-opt.md | 68 +++++++++---- > gcc/config/riscv/riscv-v.cc | 4 +- > .../rvv/autovec/cond/cond_copysign-run.c | 99 +++++++++++++++++++ > .../rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++ > .../rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++ > .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++ > .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++ > 7 files changed, 349 insertions(+), 20 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c > > diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md > index 58e80044f1e..f759525f96b 100644 > --- a/gcc/config/riscv/autovec-opt.md > +++ b/gcc/config/riscv/autovec-opt.md > @@ -609,6 +609,10 @@ > (set_attr "mode" "<V_DOUBLE_TRUNC>") > (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))]) > > +;; > ============================================================================= > +;; Combine op + vmerge to cond_op > +;; > ============================================================================= > + > ;; Combine <op> and vcond_mask generated by midend into cond_len_<op> > ;; Currently supported operations: > ;; abs(FP) > @@ -651,25 +655,6 @@ > DONE; > }) > > -;; Combine vlmax neg and UNSPEC_VCOPYSIGN > -(define_insn_and_split "*copysign<mode>_neg" > - [(set (match_operand:VF 0 "register_operand") > - (neg:VF > - (unspec:VF [ > - (match_operand:VF 1 "register_operand") > - (match_operand:VF 2 "register_operand") > - ] UNSPEC_VCOPYSIGN)))] > - "TARGET_VECTOR && can_create_pseudo_p ()" > - "#" > - "&& 1" > - [(const_int 0)] > -{ > - riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode), > - riscv_vector::BINARY_OP, operands); > - DONE; > -} > -[(set_attr "type" "vector")]) > - > ;; Combine sign_extend/zero_extend(vf2) and vcond_mask > (define_insn_and_split "*cond_<optab><v_double_trunc><mode>" > [(set (match_operand:VWEXTI 0 "register_operand") > @@ -918,6 +903,27 @@ > } > [(set_attr "type" "vector")]) > > +;; Combine vfsgnj.vv + vcond_mask > +(define_insn_and_split "*cond_copysign<mode>" > + [(set (match_operand:VF 0 "register_operand") > + (if_then_else:VF > + (match_operand:<VM> 1 "register_operand") > + (unspec:VF > + [(match_operand:VF 2 "register_operand") > + (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN) > + (match_operand:VF 4 "register_operand")))] > + "TARGET_VECTOR && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode); > + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], > operands[4], > + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; > + riscv_vector::expand_cond_len_binop (icode, ops); > + DONE; > +}) > + > ;; > ============================================================================= > ;; Combine extend + binop to widen_binop > ;; > ============================================================================= > @@ -1119,3 +1125,27 @@ > DONE; > } > [(set_attr "type" "vfwmul")]) > + > + > +;; > ============================================================================= > +;; Misc combine patterns > +;; > ============================================================================= > + > +;; Combine vlmax neg and UNSPEC_VCOPYSIGN > +(define_insn_and_split "*copysign<mode>_neg" > + [(set (match_operand:VF 0 "register_operand") > + (neg:VF > + (unspec:VF [ > + (match_operand:VF 1 "register_operand") > + (match_operand:VF 2 "register_operand") > + ] UNSPEC_VCOPYSIGN)))] > + "TARGET_VECTOR && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > +{ > + riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode), > + riscv_vector::BINARY_OP, operands); > + DONE; > +} > +[(set_attr "type" "vector")]) > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 4d95bd773a2..76e6094f45b 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode) > && icode != maybe_code_for_pred_extend (mode) > /* narrower-INT -> FP */ > && icode != maybe_code_for_pred_widen (FLOAT, mode) > - && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode); > + && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode) > + /* vfsgnj */ > + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode); > } > > /* Subroutine to expand COND_LEN_* patterns. */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c > new file mode 100644 > index 00000000000..be37854c135 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c > @@ -0,0 +1,99 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "-std=c99 -fno-vect-cost-model > --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ > + > +#include "cond_copysign-template.h" > + > +#include <assert.h> > + > +#define SZ 512 > + > +#define EPS 1e-6 > + > +#define INIT_PRED() > \ > + int pred[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + pred[i] = i % 3; > \ > + } > + > +#define RUN(TYPE, VAL) > \ > + TYPE a##TYPE[SZ]; > \ > + TYPE b##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a##TYPE[i] = i; > \ > + b##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < > EPS); > + > +#define RUN2(TYPE, VAL) > \ > + TYPE a2##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a2##TYPE[i] = i; > \ > + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); > + > +#define RUN3(TYPE, VAL) > \ > + TYPE a3##TYPE[SZ]; > \ > + TYPE b3##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a3##TYPE[i] = (i & 1) ? -i : i; > \ > + b3##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); > + > +#define RUN4(TYPE, VAL) > \ > + TYPE a4##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a4##TYPE[i] = -i; > \ > + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); > + > +#define RUN5(TYPE, VAL) > \ > + TYPE a5##TYPE[SZ]; > \ > + TYPE b5##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a5##TYPE[i] = i; > \ > + b5##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] > \ > + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); > + > +#define RUN6(TYPE, VAL) > \ > + TYPE a6##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a6##TYPE[i] = i; > \ > + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); > + > +#define RUN_ALL() > \ > + RUN (float, 5) > \ > + RUN (double, 6) > \ > + RUN2 (float, 11) > \ > + RUN2 (double, 12) > \ > + RUN3 (float, 16) > \ > + RUN3 (double, 18) > \ > + RUN4 (float, 17) > \ > + RUN4 (double, 19) > \ > + RUN5 (float, 123) > \ > + RUN5 (double, 523) > \ > + RUN6 (float, 777) > \ > + RUN6 (double, 877) > + > +int > +main () > +{ > + INIT_PRED () > + RUN_ALL () > +} > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c > new file mode 100644 > index 00000000000..cef531b9700 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model > -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax > -ffast-math" } */ > + > +#include "cond_copysign-template.h" > + > +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ > +/* 1. The vectorizer wraps scalar variants of copysign into vector constants > which > + expand cannot handle currently. > + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. > */ > +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } > } } */ > +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ > +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c > new file mode 100644 > index 00000000000..cc2aa4de757 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model > -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax > -ffast-math" } */ > + > +#include "cond_copysign-template.h" > + > +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ > +/* 1. The vectorizer wraps scalar variants of copysign into vector constants > which > + expand cannot handle currently. > + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. > */ > +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } > } } */ > +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ > +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h > new file mode 100644 > index 00000000000..4191500fd83 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h > @@ -0,0 +1,81 @@ > +#include <stdint-gcc.h> > + > +#define TEST_TYPE(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, > \ > + TYPE *restrict b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; > \ > + } > + > +#define TEST_TYPE2(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, TYPE b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; > \ > + } > + > +#define TEST_TYPE3(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, > \ > + TYPE *restrict b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] > \ > + = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; > \ > + } > + > +#define TEST_TYPE4(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, TYPE b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : > dst[i]; \ > + } > + > +#define TEST_TYPE5(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, > \ > + TYPE *restrict b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; > \ > + } > + > +#define TEST_TYPE6(TYPE, SUFFIX) > \ > + __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, > \ > + TYPE *restrict a, TYPE b, > \ > + int *restrict pred, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; > \ > + } > + > +#define TEST_ALL() > \ > + TEST_TYPE (_Float16, f16) > \ > + TEST_TYPE (float, f) > \ > + TEST_TYPE (double, ) > \ > + TEST_TYPE2 (_Float16, f16) > \ > + TEST_TYPE2 (float, f) > \ > + TEST_TYPE2 (double, ) > \ > + TEST_TYPE3 (_Float16, f16) > \ > + TEST_TYPE3 (float, f) > \ > + TEST_TYPE3 (double, ) > \ > + TEST_TYPE4 (_Float16, f16) > \ > + TEST_TYPE4 (float, f) > \ > + TEST_TYPE4 (double, ) > \ > + TEST_TYPE5 (_Float16, f16) > \ > + TEST_TYPE5 (float, f) > \ > + TEST_TYPE5 (double, ) > \ > + TEST_TYPE6 (_Float16, f16) > \ > + TEST_TYPE6 (float, f) > \ > + TEST_TYPE6 (double, ) > + > +TEST_ALL () > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c > new file mode 100644 > index 00000000000..6e337f9e74c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c > @@ -0,0 +1,93 @@ > +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */ > +/* { dg-additional-options "-std=c99 -fno-vect-cost-model > --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ > + > +#include "cond_copysign-template.h" > + > +#include <assert.h> > + > +#define SZ 512 > + > +#define EPS 1e-6 > + > +#define INIT_PRED() > \ > + int pred[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + pred[i] = i % 3; > \ > + } > + > +#define RUN(TYPE, VAL) > \ > + TYPE a##TYPE[SZ]; > \ > + TYPE b##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a##TYPE[i] = i; > \ > + b##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < > EPS); > + > +#define RUN2(TYPE, VAL) > \ > + TYPE a2##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a2##TYPE[i] = i; > \ > + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); > + > +#define RUN3(TYPE, VAL) > \ > + TYPE a3##TYPE[SZ]; > \ > + TYPE b3##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a3##TYPE[i] = (i & 1) ? -i : i; > \ > + b3##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); > + > +#define RUN4(TYPE, VAL) > \ > + TYPE a4##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a4##TYPE[i] = -i; > \ > + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); > + > +#define RUN5(TYPE, VAL) > \ > + TYPE a5##TYPE[SZ]; > \ > + TYPE b5##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + { > \ > + a5##TYPE[i] = i; > \ > + b5##TYPE[i] = (i & 1) ? VAL : -VAL; > \ > + } > \ > + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] > \ > + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); > + > +#define RUN6(TYPE, VAL) > \ > + TYPE a6##TYPE[SZ]; > \ > + for (int i = 0; i < SZ; i++) > \ > + a6##TYPE[i] = i; > \ > + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); > \ > + for (int i = 0; i < SZ; i++) > \ > + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); > + > +#define RUN_ALL() > \ > + RUN (_Float16, 5) > \ > + RUN2 (_Float16, 11) > \ > + RUN3 (_Float16, 16) > \ > + RUN4 (_Float16, 17) > \ > + RUN5 (_Float16, 123) > \ > + RUN6 (_Float16, 777) > + > +int > +main () > +{ > + INIT_PRED () > + RUN_ALL () > +} > -- > 2.36.3 >