On Mon, Jul 1, 2024 at 8:17 AM Kewen.Lin <[email protected]> wrote:
>
> Hi,
>
> As PR115659 shows, assuming c = x CMP y, there are some
> folding chances for patterns r = c ? 0/z : z/-1:
> - For r = c ? 0 : z, it can be folded into r = ~c & z.
> - For r = c ? z : -1, it can be folded into r = ~c | z.
>
> But BIT_AND/BIT_IOR applied on one BIT_NOT operand is a
> compound operation, I'm not sure if each target with
> vector capability have a single vector instruction for it,
> if no, it's arguable to consider it always beats vector
> selection (like vector constant gets hoisted or combined
> and selection has same latency as normal logical operation).
> So IMHO we probably need to query target with new optabs.
> So this patch is to introduce new optabs andc, iorc and its
> corresponding internal functions BIT_{ANDC,IORC} (looking
> for suggestion for naming optabs and ifns), and if targets
> defines such optabs for vector modes, it means targets
> support these hardware insns and should be not worse than
> vector selection. btw, the rs6000 changes are meant to
> give an example for a target supporting andc/iorc.
>
> Does this sound reasonable?
I think it's reasonable to have andc - there are quite some CPUs
that have this op on GPRs as well I think, called andn (but I don't
want to get into bike-shedding). A corresponding iorc is then
a natural extension (likewise xorc). AVX512 has a very powerful
vector ternlog (but no scalar andn).
I was surprised to not see an existing optab for andn.
So OK from my side in case there are no negative comments or
bikeshedding on the name. I can't approve the rs6000 changes
though.
Thanks,
Richard.
> BR,
> Kewen
> -----
>
> PR tree-optimzation/115659
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000-builtins.def: Update some bif expanders by
> replacing orc<mode>3 with iorc<mode>3.
> * config/rs6000/rs6000-string.cc (expand_cmp_vec_sequence): Update gen
> function by replacing orc<mode>3 with iorc<mode>3.
> * config/rs6000/rs6000.md (orc<mode>3): Rename to ...
> (iorc<mode>3): ... this.
> * doc/md.texi: Document andcm3 and iorcm3.
> * gimple-isel.cc (gimple_expand_vec_cond_expr): Add more foldings for
> patterns x CMP y ? 0 : z and x CMP y ? z : -1.
> * internal-fn.def (BIT_ANDC): New internal function.
> (BIT_IORC): Likewise.
> * optabs.def (andc, iorc): New optab.
> ---
> gcc/config/rs6000/rs6000-builtins.def | 24 ++++++++++++------------
> gcc/config/rs6000/rs6000-string.cc | 2 +-
> gcc/config/rs6000/rs6000.md | 2 +-
> gcc/doc/md.texi | 10 ++++++++++
> gcc/gimple-isel.cc | 24 ++++++++++++++++++++++++
> gcc/internal-fn.def | 4 ++++
> gcc/optabs.def | 2 ++
> 7 files changed, 54 insertions(+), 14 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-builtins.def
> b/gcc/config/rs6000/rs6000-builtins.def
> index 3bc7fed6956..736890fe6cb 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2147,40 +2147,40 @@
> NEG_V2DI negv2di2 {}
>
> const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
> - ORC_V16QI orcv16qi3 {}
> + ORC_V16QI iorcv16qi3 {}
>
> const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
> - ORC_V16QI_UNS orcv16qi3 {}
> + ORC_V16QI_UNS iorcv16qi3 {}
>
> const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
> - ORC_V1TI orcv1ti3 {}
> + ORC_V1TI iorcv1ti3 {}
>
> const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
> - ORC_V1TI_UNS orcv1ti3 {}
> + ORC_V1TI_UNS iorcv1ti3 {}
>
> const vd __builtin_altivec_orc_v2df (vd, vd);
> - ORC_V2DF orcv2df3 {}
> + ORC_V2DF iorcv2df3 {}
>
> const vsll __builtin_altivec_orc_v2di (vsll, vsll);
> - ORC_V2DI orcv2di3 {}
> + ORC_V2DI iorcv2di3 {}
>
> const vull __builtin_altivec_orc_v2di_uns (vull, vull);
> - ORC_V2DI_UNS orcv2di3 {}
> + ORC_V2DI_UNS iorcv2di3 {}
>
> const vf __builtin_altivec_orc_v4sf (vf, vf);
> - ORC_V4SF orcv4sf3 {}
> + ORC_V4SF iorcv4sf3 {}
>
> const vsi __builtin_altivec_orc_v4si (vsi, vsi);
> - ORC_V4SI orcv4si3 {}
> + ORC_V4SI iorcv4si3 {}
>
> const vui __builtin_altivec_orc_v4si_uns (vui, vui);
> - ORC_V4SI_UNS orcv4si3 {}
> + ORC_V4SI_UNS iorcv4si3 {}
>
> const vss __builtin_altivec_orc_v8hi (vss, vss);
> - ORC_V8HI orcv8hi3 {}
> + ORC_V8HI iorcv8hi3 {}
>
> const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
> - ORC_V8HI_UNS orcv8hi3 {}
> + ORC_V8HI_UNS iorcv8hi3 {}
>
> const vsc __builtin_altivec_vclzb (vsc);
> VCLZB clzv16qi2 {}
> diff --git a/gcc/config/rs6000/rs6000-string.cc
> b/gcc/config/rs6000/rs6000-string.cc
> index 917f5572a6d..c4c62e8e2f9 100644
> --- a/gcc/config/rs6000/rs6000-string.cc
> +++ b/gcc/config/rs6000/rs6000-string.cc
> @@ -743,7 +743,7 @@ expand_cmp_vec_sequence (unsigned HOST_WIDE_INT
> bytes_to_compare,
> rtx cmp_combined = gen_reg_rtx (load_mode);
> emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
> emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
> - emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
> + emit_insn (gen_iorcv16qi3 (vec_result, cmp_zero, cmp_res));
> emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result,
> zero_reg));
> }
> }
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index a5d20594789..276a5c9cf2d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7324,7 +7324,7 @@ (define_expand "nand<mode>3"
>
> ;; The canonical form is to have the negated element first, so we need to
> ;; reverse arguments.
> -(define_expand "orc<mode>3"
> +(define_expand "iorc<mode>3"
> [(set (match_operand:BOOL_128 0 "vlogical_operand")
> (ior:BOOL_128
> (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand"))
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 5730bda80dc..fb448c946cd 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5543,6 +5543,16 @@ means of constraints requiring operands 1 and 0 to be
> the same location.
> @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
> Similar, for other arithmetic operations.
>
> +@cindex @code{andc@var{m}3} instruction pattern
> +@item @samp{andc@var{m}3}
> +Like @code{and@var{m}3}, but it uses bitwise-complement of operand 2
> +rather than operand 2 itself.
> +
> +@cindex @code{iorc@var{m}3} instruction pattern
> +@item @samp{iorc@var{m}3}
> +Like @code{ior@var{m}3}, but it uses bitwise-complement of operand 2
> +rather than operand 2 itself.
> +
> @cindex @code{addv@var{m}4} instruction pattern
> @item @samp{addv@var{m}4}
> Like @code{add@var{m}3} but takes a @code{code_label} as operand 3 and
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 71af1a8cd97..fd27bac41e2 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -284,6 +284,30 @@ gimple_expand_vec_cond_expr (struct function *fun,
> gimple_stmt_iterator *gsi,
> /* r = c ? z : c. */
> op2 = new_op0;
> }
> + bool op1_zerop = integer_zerop (op1);
> + bool op2_minus_onep = integer_minus_onep (op2);
> + if (op1_zerop
> + && (direct_internal_fn_supported_p (IFN_BIT_ANDC, vtype,
> + OPTIMIZE_FOR_BOTH)))
> + {
> + tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> + tree new_op0 = make_ssa_name (vtype);
> + gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> + gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> + return gimple_build_call_internal (IFN_BIT_ANDC, 2, op2,
> + new_op0);
> + }
> + else if (op2_minus_onep
> + && (direct_internal_fn_supported_p (IFN_BIT_IORC,
> vtype,
> +
> OPTIMIZE_FOR_BOTH)))
> + {
> + tree conv_op = build1 (VIEW_CONVERT_EXPR, vtype, op0);
> + tree new_op0 = make_ssa_name (vtype);
> + gassign *new_stmt = gimple_build_assign (new_op0, conv_op);
> + gsi_insert_seq_before (gsi, new_stmt, GSI_SAME_STMT);
> + return gimple_build_call_internal (IFN_BIT_IORC, 2, op1,
> + new_op0);
> + }
> }
>
> /* When the compare has EH we do not want to forward it when
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index a8c83437ada..994bbd9b4dd 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -593,6 +593,10 @@ DEF_INTERNAL_FN (DIVMODBITINT, ECF_LEAF, ". O . O . R .
> R . ")
> DEF_INTERNAL_FN (FLOATTOBITINT, ECF_LEAF | ECF_NOTHROW, ". O . . ")
> DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")
>
> +/* Bitwise functions. */
> +DEF_INTERNAL_OPTAB_FN (BIT_ANDC, ECF_CONST, andc, binary)
> +DEF_INTERNAL_OPTAB_FN (BIT_IORC, ECF_CONST, iorc, binary)
> +
> #undef DEF_INTERNAL_WIDENING_OPTAB_FN
> #undef DEF_INTERNAL_SIGNED_COND_FN
> #undef DEF_INTERNAL_COND_FN
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index bc2611abdc2..bcf7cc0fa58 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -540,3 +540,5 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
> OPTAB_D (len_load_optab, "len_load_$a")
> OPTAB_D (len_store_optab, "len_store_$a")
> OPTAB_D (select_vl_optab, "select_vl$a")
> +OPTAB_D (andc_optab, "andc$a3")
> +OPTAB_D (iorc_optab, "iorc$a3")
> --
> 2.43.0