This pattern enables the combine pass (or late-combine, depending on the case) to merge a float_extend'ed vec_duplicate into a (possibly negated) minus-mult RTL instruction.
Before this patch, we have six instructions, e.g.: vsetivli zero,4,e32,m1,ta,ma fcvt.s.h fa5,fa5 vfmv.v.f v4,fa5 vfwcvt.f.f.v v1,v3 vsetvli zero,zero,e32,m1,ta,ma vfnmadd.vv v1,v4,v2 After, we get only one: vfwnmacc.vf v1,fa5,v2 PR target/119100 gcc/ChangeLog: * config/riscv/autovec-opt.md (*vfwnmacc_vf_<mode>): New pattern. (*vfwnmsac_vf_<mode>): New pattern. * config/riscv/riscv.cc (get_vector_binary_rtx_cost): Add support for a vec_duplicate in a neg. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwnmacc and vfwnmsac. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c: New test. --- gcc/config/riscv/autovec-opt.md | 51 +++++++++++++++++++ gcc/config/riscv/riscv.cc | 5 +- .../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 4 ++ .../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 4 ++ .../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 6 ++- .../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 6 ++- .../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 4 ++ .../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 4 ++ .../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 2 + .../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 2 + .../rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c | 17 +++++++ .../rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c | 17 +++++++ .../rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c | 17 +++++++ .../rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c | 17 +++++++ 14 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md index f372f0e6a69..12217c03304 100644 --- gcc/config/riscv/autovec-opt.md +++ gcc/config/riscv/autovec-opt.md @@ -1844,3 +1844,54 @@ (define_insn_and_split "*extend_vf_<mode>" } [(set_attr "type" "vfwmuladd")] ) + +;; vfwnmacc.vf +(define_insn_and_split "*vfwnmacc_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (minus:VWEXTF + (mult:VWEXTF + (neg:VWEXTF + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand")))) + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))) + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3]}; + riscv_vector::emit_vlmax_insn( + code_for_pred_widen_mul_neg_scalar(MINUS, <MODE>mode), + riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) + +;; vfwnmsac.vf +(define_insn_and_split "*vfwnmsac_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (minus:VWEXTF + (match_operand:VWEXTF 1 "register_operand") + (mult:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand"))))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3]}; + riscv_vector::emit_vlmax_insn( + code_for_pred_widen_mul_neg_scalar (PLUS, <MODE>mode), + riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) diff --git gcc/config/riscv/riscv.cc gcc/config/riscv/riscv.cc index a4428f0e96d..bde93ae8e15 100644 --- gcc/config/riscv/riscv.cc +++ gcc/config/riscv/riscv.cc @@ -3965,11 +3965,14 @@ get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) rtx op_0 = XEXP (x, 0); rtx op_1 = XEXP (x, 1); + rtx neg; if (GET_CODE (op_0) == VEC_DUPLICATE || GET_CODE (op_1) == VEC_DUPLICATE) return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE) + else if (GET_CODE (neg = op_0) == NEG + && (GET_CODE (op_1) == VEC_DUPLICATE + || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE)) return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); else return COSTS_N_INSNS (1); diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c index b17fd8ec1f1..811f26c156a 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c @@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, -, nacc) DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, -, nsac) DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, +, acc) DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac) +DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, -, nacc) +DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, -, nsac) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac) /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwnmacc.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwnmsac.vf} 1 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c index efd887dc8bf..ca82ead9d28 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c @@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_0 (float, +, -, nacc) DEF_VF_MULOP_ACC_CASE_0 (float, -, -, nsac) DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, +, acc) DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac) +DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, -, nacc) +DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, -, nsac) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac) /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwnmacc.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwnmsac.vf} 1 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c index 84987a9c0f4..3a39303f942 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c @@ -13,5 +13,7 @@ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.s.h} 2 } } */ -/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */ +/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */ +/* { dg-final { scan-assembler-times {vfmv.v.f} 12 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c index dbd3d022d5e..b4618bae70e 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c @@ -13,5 +13,7 @@ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.d.s} 2 } } */ -/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */ +/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */ +/* { dg-final { scan-assembler-times {vfmv.v.f} 12 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c index 5f0d7585e65..58afaa4aef9 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c @@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, -, nacc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, -, nsac, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, +, acc) DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac) +DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, -, nacc) +DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, -, nsac) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac) /* { dg-final { scan-assembler {vfnmsac.vf} } } */ /* { dg-final { scan-assembler {vfwmacc.vf} } } */ /* { dg-final { scan-assembler {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler {vfwnmsac.vf} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c index 951b0ef2a67..0e95774a489 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c @@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_1 (float, +, -, nacc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (float, -, -, nsac, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, +, acc) DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac) +DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, -, nacc) +DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, -, nsac) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac) /* { dg-final { scan-assembler {vfnmsac.vf} } } */ /* { dg-final { scan-assembler {vfwmacc.vf} } } */ /* { dg-final { scan-assembler {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler {vfwnmsac.vf} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c index a4edd92f1ef..559df6c7976 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c @@ -13,4 +13,6 @@ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */ /* { dg-final { scan-assembler {fcvt.s.h} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c index 4eb28e585a0..03f9c5a3d86 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c @@ -13,4 +13,6 @@ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */ /* { dg-final { scan-assembler {fcvt.d.s} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c new file mode 100644 index 00000000000..6be7d720603 --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 _Float16 +#define T2 float +#define NAME nacc +#define OP + +#define NEG - + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 + +#include "vf_mulop_widen_run.h" diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c new file mode 100644 index 00000000000..851c335d64d --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 float +#define T2 double +#define NAME nacc +#define OP + +#define NEG - + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 + +#include "vf_mulop_widen_run.h" diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c new file mode 100644 index 00000000000..dd28234b6e0 --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 _Float16 +#define T2 float +#define NAME nsac +#define OP - +#define NEG - + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 + +#include "vf_mulop_widen_run.h" diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c new file mode 100644 index 00000000000..9eacacea44b --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 float +#define T2 double +#define NAME nsac +#define OP - +#define NEG - + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 + +#include "vf_mulop_widen_run.h" -- 2.39.5