RE: [PATCH v1] RISC-V: Refine the code gen for ceil auto vectorization.
Sure thing, will send V2 for this. Pan From: juzhe.zh...@rivai.ai Sent: Friday, September 22, 2023 7:26 PM To: Li, Pan2 ; gcc-patches Cc: Li, Pan2 ; Wang, Yanzhang ; kito.cheng Subject: Re: [PATCH v1] RISC-V: Refine the code gen for ceil auto vectorization. I prefer change expand_vec_copysign into emit_vec_copysign。 Likewise, emit_fabs. ...etc. juzhe.zh...@rivai.ai<mailto:juzhe.zh...@rivai.ai> From: pan2.li<mailto:pan2...@intel.com> Date: 2023-09-22 19:19 To: gcc-patches<mailto:gcc-patches@gcc.gnu.org> CC: juzhe.zhong<mailto:juzhe.zh...@rivai.ai>; pan2.li<mailto:pan2...@intel.com>; yanzhang.wang<mailto:yanzhang.w...@intel.com>; kito.cheng<mailto:kito.ch...@gmail.com> Subject: [PATCH v1] RISC-V: Refine the code gen for ceil auto vectorization. From: Pan Li mailto:pan2...@intel.com>> We vectorized below ceil code already. void test_ceil (float *out, float *in, int count) { for (unsigned i = 0; i < count; i++) out[i] = __builtin_ceilf (in[i]); } Before this patch: vfmv.v.xv4,fa0 // can be removed vfabs.v v0,v1 vmv1r.v v2,v1 // can be removed vmflt.vvv0,v0,v4 // can be refined to vmflt.vf vfcvt.x.f.v v3,v1,v0.t vfcvt.f.x.v v2,v3,v0.t vfsgnj.vv v2,v2,v1 After this patch: vfabs.v v1,v2 vmflt.vfv0,v1,fa5 vfcvt.x.f.v v3,v2,v0.t vfcvt.f.x.v v1,v3,v0.t vfsgnj.vv v1,v1,v2 We can generate better code include below items. * Remove vfmv.v.f. * Take vmflt.vf instead of vmflt.vv. * Remove vmv1r.v. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vec_float_cmp_mask): Refactor. (expand_vec_abs): New function impl. (expand_vec_cvt_x_f): Ditto. (expand_vec_cvt_f_x): Ditto. (expand_vec_ceil): Refine. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/math-ceil-0.c: Adjust body check. * gcc.target/riscv/rvv/autovec/unop/math-ceil-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/math-ceil-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/math-ceil-3.c: Ditto. Signed-off-by: Pan Li mailto:pan2...@intel.com>> --- gcc/config/riscv/riscv-v.cc | 71 --- .../riscv/rvv/autovec/unop/math-ceil-0.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-1.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-2.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-3.c | 5 +- 5 files changed, 49 insertions(+), 42 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4d0e1d8d1a9..ea2b01f6a6e 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3560,26 +3560,17 @@ static rtx expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar, machine_mode vec_fp_mode) { - /* Step-1: Get the abs float value for mask generation. */ - rtx tmp = gen_reg_rtx (vec_fp_mode); - rtx abs_ops[] = {tmp, fp_vector}; - insn_code icode = code_for_pred (ABS, vec_fp_mode); - emit_vlmax_insn (icode, UNARY_OP, abs_ops); - - /* Step-2: Prepare the scalar float compare register. */ + /* Step-1: Prepare the scalar float compare register. */ rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode)); emit_insn (gen_move_insn (fp_reg, fp_scalar)); - /* Step-3: Prepare the vector float compare register. */ - rtx vec_dup = gen_reg_rtx (vec_fp_mode); - icode = code_for_pred_broadcast (vec_fp_mode); - rtx vfmv_ops[] = {vec_dup, fp_reg}; - emit_vlmax_insn (icode, UNARY_OP, vfmv_ops); - - /* Step-4: Generate the mask. */ + /* Step-2: Generate the mask. */ machine_mode mask_mode = get_mask_mode (vec_fp_mode); rtx mask = gen_reg_rtx (mask_mode); - expand_vec_cmp (mask, code, tmp, vec_dup); + rtx cmp = gen_rtx_fmt_ee (code, mask_mode, fp_vector, fp_reg); + rtx cmp_ops[] = {mask, cmp, fp_vector, fp_reg}; + insn_code icode = code_for_pred_cmp_scalar (vec_fp_mode); + emit_vlmax_insn (icode, COMPARE_OP, cmp_ops); return mask; } @@ -3594,29 +3585,57 @@ expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1, emit_vlmax_insn (icode, BINARY_OP, sgnj_ops); } +static void +expand_vec_abs (rtx op_dest, rtx op_src, machine_mode vec_mode) +{ + rtx abs_ops[] = {op_dest, op_src}; + insn_code icode = code_for_pred (ABS, vec_mode); + + emit_vlmax_insn (icode, UNARY_OP, abs_ops); +} + +static void +expand_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask, + insn_type type, machine_mode vec_mode) +{ + rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src}; + insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode); + + emit_vlmax_insn (icode, type, cvt_x_ops); +} + +static void +expand_vec_cvt_f_x (rtx op_dest, rtx op_src, rtx mask, + insn_type type, machine_mode vec_mode) +{ + rtx cvt_fp_ops[] = {op_dest, mask, op_dest, op_src}; + insn_code icode = code_for_pred (FLOAT, vec_mode); + + emit_vlmax_insn (icode, type, cvt_fp_ops); +} + void expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode, machine_mode vec_int_mode) { - /* Step-1: Generate the mask on con
Re: [PATCH v1] RISC-V: Refine the code gen for ceil auto vectorization.
I prefer change expand_vec_copysign into emit_vec_copysign。 Likewise, emit_fabs. ...etc. juzhe.zh...@rivai.ai From: pan2.li Date: 2023-09-22 19:19 To: gcc-patches CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng Subject: [PATCH v1] RISC-V: Refine the code gen for ceil auto vectorization. From: Pan Li We vectorized below ceil code already. void test_ceil (float *out, float *in, int count) { for (unsigned i = 0; i < count; i++) out[i] = __builtin_ceilf (in[i]); } Before this patch: vfmv.v.xv4,fa0 // can be removed vfabs.v v0,v1 vmv1r.v v2,v1 // can be removed vmflt.vvv0,v0,v4 // can be refined to vmflt.vf vfcvt.x.f.v v3,v1,v0.t vfcvt.f.x.v v2,v3,v0.t vfsgnj.vv v2,v2,v1 After this patch: vfabs.v v1,v2 vmflt.vfv0,v1,fa5 vfcvt.x.f.v v3,v2,v0.t vfcvt.f.x.v v1,v3,v0.t vfsgnj.vv v1,v1,v2 We can generate better code include below items. * Remove vfmv.v.f. * Take vmflt.vf instead of vmflt.vv. * Remove vmv1r.v. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vec_float_cmp_mask): Refactor. (expand_vec_abs): New function impl. (expand_vec_cvt_x_f): Ditto. (expand_vec_cvt_f_x): Ditto. (expand_vec_ceil): Refine. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/math-ceil-0.c: Adjust body check. * gcc.target/riscv/rvv/autovec/unop/math-ceil-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/math-ceil-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/math-ceil-3.c: Ditto. Signed-off-by: Pan Li --- gcc/config/riscv/riscv-v.cc | 71 --- .../riscv/rvv/autovec/unop/math-ceil-0.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-1.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-2.c | 5 +- .../riscv/rvv/autovec/unop/math-ceil-3.c | 5 +- 5 files changed, 49 insertions(+), 42 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4d0e1d8d1a9..ea2b01f6a6e 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3560,26 +3560,17 @@ static rtx expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar, machine_mode vec_fp_mode) { - /* Step-1: Get the abs float value for mask generation. */ - rtx tmp = gen_reg_rtx (vec_fp_mode); - rtx abs_ops[] = {tmp, fp_vector}; - insn_code icode = code_for_pred (ABS, vec_fp_mode); - emit_vlmax_insn (icode, UNARY_OP, abs_ops); - - /* Step-2: Prepare the scalar float compare register. */ + /* Step-1: Prepare the scalar float compare register. */ rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode)); emit_insn (gen_move_insn (fp_reg, fp_scalar)); - /* Step-3: Prepare the vector float compare register. */ - rtx vec_dup = gen_reg_rtx (vec_fp_mode); - icode = code_for_pred_broadcast (vec_fp_mode); - rtx vfmv_ops[] = {vec_dup, fp_reg}; - emit_vlmax_insn (icode, UNARY_OP, vfmv_ops); - - /* Step-4: Generate the mask. */ + /* Step-2: Generate the mask. */ machine_mode mask_mode = get_mask_mode (vec_fp_mode); rtx mask = gen_reg_rtx (mask_mode); - expand_vec_cmp (mask, code, tmp, vec_dup); + rtx cmp = gen_rtx_fmt_ee (code, mask_mode, fp_vector, fp_reg); + rtx cmp_ops[] = {mask, cmp, fp_vector, fp_reg}; + insn_code icode = code_for_pred_cmp_scalar (vec_fp_mode); + emit_vlmax_insn (icode, COMPARE_OP, cmp_ops); return mask; } @@ -3594,29 +3585,57 @@ expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1, emit_vlmax_insn (icode, BINARY_OP, sgnj_ops); } +static void +expand_vec_abs (rtx op_dest, rtx op_src, machine_mode vec_mode) +{ + rtx abs_ops[] = {op_dest, op_src}; + insn_code icode = code_for_pred (ABS, vec_mode); + + emit_vlmax_insn (icode, UNARY_OP, abs_ops); +} + +static void +expand_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask, + insn_type type, machine_mode vec_mode) +{ + rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src}; + insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode); + + emit_vlmax_insn (icode, type, cvt_x_ops); +} + +static void +expand_vec_cvt_f_x (rtx op_dest, rtx op_src, rtx mask, + insn_type type, machine_mode vec_mode) +{ + rtx cvt_fp_ops[] = {op_dest, mask, op_dest, op_src}; + insn_code icode = code_for_pred (FLOAT, vec_mode); + + emit_vlmax_insn (icode, type, cvt_fp_ops); +} + void expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode, machine_mode vec_int_mode) { - /* Step-1: Generate the mask on const fp. */ + /* Step-1: Get the abs float value for mask generation. */ + expand_vec_abs (op_0, op_1, vec_fp_mode); + + /* Step-2: Generate the mask on const fp. */ rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode)); - rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode); + rtx mask = expand_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode); - /* Step-2: Convert to integer on mask, with rounding up (aka ceil). */ + /* Step-3: Convert to integer on mask, with rounding up (aka ceil). */ rtx tmp = gen_reg_rtx (vec_int_mode); - rtx cvt_x_ops[] =