On Thu, Jul 25, 2024 at 3:34 PM Robin Dapp <rdapp....@gmail.com> wrote: > > Hi, > > In preparation for the maskload else operand I split off this patch. The > patch > looks through SSA names for the conditions passed to inverse_conditions_p > which > helps match.pd recognize more redundant vec_cond expressions. It also adds > VCOND_MASK to the respective iterators in match.pd.
IFN_VCOND_MASK should only appear after ISEL so I fail to see why we need this? Why are the VEC_CONDs not folded earlier? > Is this acceptable without a separate test? There will of course be several > hits once we emit VEC_COND_EXPRs after masked loads. > > The initial version of the patch looked "through" each condition individually. > That caused the following problem on p10 during phiopt: > > foo = blah <= 0 > cond2: foo ? c : x > cond1: blah > 0 ? b : cond1 > -> (match.pd:6205) > res = blah > 0 ? b : c > which is invalid gimple (as blah > 0 is directly used and not put in > a variable). but cond1: blah > 0 ? b : cond1 is already invalid GIMPLE? Was this supposed to hit only on transitional pattern stmts?! I don't see how (for cond_op (COND_BINARY) (simplify (vec_cond @0 (view_convert? (cond_op @0 @1 @2 @3)) @4) (with { tree op_type = TREE_TYPE (@3); } (if (element_precision (type) == element_precision (op_type)) (view_convert (cond_op @0 @1 @2 (view_convert:op_type @4)))))) (simplify (vec_cond @0 @1 (view_convert? (cond_op @2 @3 @4 @5))) (with { tree op_type = TREE_TYPE (@5); } (if (inverse_conditions_p (@0, @2) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1))))))) ever can match at the moment (on valid GIMPLE). I'd open-code inverse_conditions_p like (for cmp (tcc_comparison) icmp (inverted_tcc_comparison) (vec_cond (cmp@0 @00 @01) @1 (view_convert? (cond_op (icmp@2 @20 @21 @3 @4 @5))) (if (invert_tree_comparison (cmp, HONOR_NANS (@00)) == icmp) ... that's the way other patterns do it. Richard. > Therefore, for now, I restricted the SSA_NAME check to both conditions > simultaneously so we don't run into this situation. There must be a better > way, though? > > Bootstrapped and regtested on x86, aarch64 and power10. > Regtested on armv8.8-a+sve using qemu as well as riscv64. > > Regards > Robin > > gcc/ChangeLog: > > * fold-const.cc (inverse_conditions_p): Look through SSA names. > * match.pd: Add VCOND_MASK to "cond" iterators. > --- > gcc/fold-const.cc | 22 ++++++++++++++++++++++ > gcc/match.pd | 28 +++++++++++++++------------- > 2 files changed, 37 insertions(+), 13 deletions(-) > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > index 83c32dd10d4..1fc5d97dccc 100644 > --- a/gcc/fold-const.cc > +++ b/gcc/fold-const.cc > @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see > #include "vec-perm-indices.h" > #include "asan.h" > #include "gimple-range.h" > +#include "cfgexpand.h" > > /* Nonzero if we are folding constants inside an initializer or a C++ > manifestly-constant-evaluated context; zero otherwise. > @@ -3010,6 +3011,27 @@ compcode_to_comparison (enum comparison_code code) > bool > inverse_conditions_p (const_tree cond1, const_tree cond2) > { > + /* If both conditions are SSA names, look through them. > + Right now callees in match use one of the conditions directly and > + we might end up having one in a COND_EXPR like > + res = a > b ? c : d > + instead of > + cnd = a > b > + res = cnd ? c : d. > + > + Therefore always consider both conditions simultaneously. */ > + if (TREE_CODE (cond1) == SSA_NAME > + && TREE_CODE (cond2) == SSA_NAME) > + { > + gimple *gcond1 = SSA_NAME_DEF_STMT (cond1); > + if (is_gimple_assign (gcond1)) > + cond1 = gimple_assign_rhs_to_tree (gcond1); > + > + gimple *gcond2 = SSA_NAME_DEF_STMT (cond2); > + if (is_gimple_assign (gcond2)) > + cond2 = gimple_assign_rhs_to_tree (gcond2); > + } > + > return (COMPARISON_CLASS_P (cond1) > && COMPARISON_CLASS_P (cond2) > && (invert_tree_comparison > diff --git a/gcc/match.pd b/gcc/match.pd > index cf359b0ec0f..f244e6deff5 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -5601,7 +5601,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > /* (a ? x : y) == (b ? x : y) --> (a^b) ? FALSE : TRUE */ > /* (a ? x : y) != (b ? y : x) --> (a^b) ? FALSE : TRUE */ > /* (a ? x : y) == (b ? y : x) --> (a^b) ? TRUE : FALSE */ > -(for cnd (cond vec_cond) > +(for cnd (cond vec_cond IFN_VCOND_MASK) > (for eqne (eq ne) > (simplify > (eqne:c (cnd @0 @1 @2) (cnd @3 @1 @2)) > @@ -5614,14 +5614,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask > types are compatible. */ > -(simplify > - (vec_cond @0 VECTOR_CST@1 VECTOR_CST@2) > - (if (VECTOR_BOOLEAN_TYPE_P (type) > - && types_match (type, TREE_TYPE (@0))) > - (if (integer_zerop (@1) && integer_all_onesp (@2)) > - (bit_not @0) > - (if (integer_all_onesp (@1) && integer_zerop (@2)) > - @0)))) > +(for cnd (vec_cond IFN_VCOND_MASK) > + (simplify > + (cnd @0 VECTOR_CST@1 VECTOR_CST@2) > + (if (VECTOR_BOOLEAN_TYPE_P (type) > + && types_match (type, TREE_TYPE (@0))) > + (if (integer_zerop (@1) && integer_all_onesp (@2)) > + (bit_not @0) > + (if (integer_all_onesp (@1) && integer_zerop (@2)) > + @0))))) > > /* A few simplifications of "a ? CST1 : CST2". */ > /* NOTE: Only do this on gimple as the if-chain-to-switch > @@ -6049,7 +6050,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > { build_int_cst (integer_type_node, prec - 1);})))))) > #endif > > -(for cnd (cond vec_cond) > +(for cnd (cond vec_cond IFN_VCOND_MASK) > /* (a != b) ? (a - b) : 0 -> (a - b) */ > (simplify > (cnd (ne:c @0 @1) (minus@2 @0 @1) integer_zerop) > @@ -6185,7 +6186,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (TYPE_UNSIGNED (type)) > (cond (ge @0 @1) (negate @0) @2))) > > -(for cnd (cond vec_cond) > +(for cnd (cond vec_cond IFN_VCOND_MASK) > /* A ? B : (A ? X : C) -> A ? B : C. */ > (simplify > (cnd @0 (cnd @0 @1 @2) @3) > @@ -6210,8 +6211,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > /* A ? B : B -> B. */ > (simplify > (cnd @0 @1 @1) > - @1) > + @1)) > > +(for cnd (cond vec_cond) > /* !A ? B : C -> A ? C : B. */ > (simplify > (cnd (logical_inverted_value truth_valued_p@0) @1 @2) > @@ -6232,7 +6234,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > Note that all these transformations are correct if A is > NaN, since the two alternatives (A and -A) are also NaNs. */ > > -(for cnd (cond vec_cond) > +(for cnd (cond vec_cond IFN_VCOND_MASK) > /* A == 0 ? A : -A same as -A */ > (for cmp (eq uneq) > (simplify > -- > 2.45.2 >