On Tue, May 13, 2025 at 5:23 AM liuhongt <[email protected]> wrote:
>
> Updated in V2
> >
> > Can you instead of mangling in float support use separate (match like
> > for the below cases?
> I tried, but reported duplicated defination since they share same pattern
> like
>
> (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3))
>
> No idea how to split that.
Hmm, OK.
> >
> > > @@ -11308,6 +11311,50 @@ and,
> > > && single_use (@4)
> > > && single_use (@5))))
> > >
> > > +(match (cond_expr_convert_p @0 @2 @3 @6)
> > > + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3))
> > > + (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math
> > > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> >
> > so this fails to constrain the comparison types (above we check
> > INTEGRAL_TYPE_P),
> > if it happens to be a vector type using TYPE_PRECISION will ICE.
> >
> > I think the main intent of the vectorizer pattern is to match up the
> > _size_ of the
> > vector elements, so maybe re-formulate the constraint this way with
> > operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
> >
> Changed.
> > This is also because precision on floats is not equal to the number of bits
> > in
> > the mode.
> >
> > > + && TYPE_PRECISION (TREE_TYPE (@0))
> > > + == TYPE_PRECISION (TREE_TYPE (@2))
> > > + && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> > > + && TREE_TYPE (@2) == TREE_TYPE (@3)
> > > + && single_use (@4)
> > > + && single_use (@5))))
> > > +
> > > +(match (cond_expr_convert_p @0 @2 @3 @6)
> > > + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3))
> > > + (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math
> > > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> > > + && TYPE_PRECISION (TREE_TYPE (@0))
> > > + == TYPE_PRECISION (TREE_TYPE (@2))
> > > + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> > > + && TREE_TYPE (@2) == TREE_TYPE (@3)
> >
> > Please use types_match () instead of TREE_TYPE pointer compares.
> Changed.
> >
> > > + && single_use (@4)
> > > + && single_use (@5))))
> > > +
> > > +(match (cond_expr_convert_p @0 @2 @3 @6)
> > > + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3))
> >
> > I think the same issue exists for INTEGER_CSTs.
> INTEGER_CSTs are already handled by vect_recog_over_widening_pattern.
> >
> > > + (if ((INTEGRAL_TYPE_P (type)
> > > + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
> > > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> > > + && TYPE_PRECISION (TREE_TYPE (@0))
> > > + == TYPE_PRECISION (TREE_TYPE (@3))
> > > + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3))
> > > + && single_use (@5)
> > > + && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2))))
> >
> > I'm not sure this is a good check? Say, for type == double and
> > typeof(@3) == float
> > the REAL_CST can have extra precision that you'd drop when rewriting this as
> > (double)(cnd ? (float)@2 : @3). You'd need to check the REAL_CST is exactly
> > representable in the type of @3. Same for a possible integer case. Same
> > for
> If the REAL_CST is exactly reprensentable, const_unop can return
> corresponding tree.
> Otherwise NULL_TREE is returned.
That's not true - this is only the case for -frounding-math, otherwise we
happily truncate.
> > handling fix_trunc/float when one case is a constant.
> >
> > Can you split the patch into two to separate out the handling of constants?
> Changed.
>
>
> Bootstrapped and regtested separately for the first patch and the second
> on x86_64-pc-linux-gnu{-m32,}.
>
> Ok for trunk?
The patch below is OK.
Thanks,
Richard.
> For floating point, !flag_trapping_math is needed for the pattern
> which transforms 2 conversions to 1 conversion, and may lose 1
> potential trap. There shouldn't be any accuracy issue.
>
> gcc/ChangeLog:
>
> PR tree-optimization/103771
> * match.pd (cond_expr_convert_p): Extend the match to handle
> scalar floating point type.
> * tree-vect-patterns.cc
> (vect_recog_cond_expr_convert_pattern): Handle floating point
> type.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr103771-4.c: New test.
> ---
> gcc/match.pd | 52 +++++++++++---
> gcc/testsuite/gcc.target/i386/pr103771-4.c | 82 ++++++++++++++++++++++
> gcc/tree-vect-patterns.cc | 8 ++-
> 3 files changed, 131 insertions(+), 11 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-4.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ab496d923cc..789e3d33326 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -11294,26 +11294,58 @@ and,
> (match (ctz_table_index @1 @2 @3)
> (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
>
> +/* Floatint point/integer comparison and integer->integer
> + or floating point -> float point conversion. */
> (match (cond_expr_convert_p @0 @2 @3 @6)
> (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3))
> - (if (INTEGRAL_TYPE_P (type)
> - && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> - && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> - && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> - && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> - && TYPE_PRECISION (TREE_TYPE (@0))
> - == TYPE_PRECISION (TREE_TYPE (@2))
> - && TYPE_PRECISION (TREE_TYPE (@0))
> - == TYPE_PRECISION (TREE_TYPE (@3))
> + (if ((INTEGRAL_TYPE_P (type)
> + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
> + && ((INTEGRAL_TYPE_P (TREE_TYPE (@2))
> + && INTEGRAL_TYPE_P (TREE_TYPE (@3)))
> + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> + && types_match (TREE_TYPE (@2), TREE_TYPE (@3))))
> + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@2)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@3)))
> /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in
> signess when convert is truncation, but not ok for extension since
> it's sign_extend vs zero_extend. */
> - && (TYPE_PRECISION (TREE_TYPE (@0)) > TYPE_PRECISION (type)
> + && (known_gt (tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (@0))),
> + tree_to_poly_uint64 (TYPE_SIZE (type)))
> || (TYPE_UNSIGNED (TREE_TYPE (@2))
> == TYPE_UNSIGNED (TREE_TYPE (@3))))
> && single_use (@4)
> && single_use (@5))))
>
> +/* Floating point or integer comparison and integer to floating point
> + conversion. */
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3))
> + (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math
> + && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> + && types_match (TREE_TYPE (@2), TREE_TYPE (@3))
> + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@2)))
> + && single_use (@4)
> + && single_use (@5))))
> +
> +/* Floating point or integer comparison and floating point to integer
> + conversion. */
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3))
> + (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math
> + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> + && types_match (TREE_TYPE (@2), TREE_TYPE (@3))
> + && !operand_equal_p (TYPE_SIZE (type),
> + TYPE_SIZE (TREE_TYPE (@0)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@2)))
> + && single_use (@4)
> + && single_use (@5))))
> +
> (for bit_op (bit_and bit_ior bit_xor)
> (match (bitwise_induction_p @0 @2 @3)
> (bit_op:c
> diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c
> b/gcc/testsuite/gcc.target/i386/pr103771-4.c
> new file mode 100644
> index 00000000000..299337dce6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c
> @@ -0,0 +1,82 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */
> +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */
> +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors"
> 6 "vect" { target { ! ia32 } } } } */
> +
> +void
> +foo (float* a, float* b, int* c, int* d, long long* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + long long tmp = c[i];
> + long long tmp2 = d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo1 (double* a, double* b, long long* c, long long* d, int* __restrict e,
> int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + int tmp = (int)c[i];
> + int tmp2 = (int)d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo2 (float* a, float* b, int* c, int* d, double* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + double tmp = c[i];
> + double tmp2 = d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo3 (double* a, double* b, long long* c, long long* d, float* __restrict e,
> int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i];
> + float tmp2 = d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo4 (int* a, int* b, int* c, int* d, double* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + double tmp = c[i];
> + double tmp2 = d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo5 (long long* a, long long* b, long long* c, long long* d, float*
> __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i];
> + float tmp2 = d[i];
> + if (a[i] < b[i])
> + tmp = tmp2;
> + e[i] = tmp;
> + }
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index ca19add83c0..d8484766cf7 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1098,6 +1098,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
> tree lhs, match[4], temp, type, new_lhs, op2;
> gimple *cond_stmt;
> gimple *pattern_stmt;
> + enum tree_code code = NOP_EXPR;
>
> if (!last_stmt)
> return NULL;
> @@ -1111,6 +1112,11 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
>
> vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
>
> + if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
> + code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR :
> CONVERT_EXPR;
> + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
> + code = FIX_TRUNC_EXPR;
> +
> op2 = match[2];
> type = TREE_TYPE (match[1]);
> if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
> @@ -1127,7 +1133,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
> append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
> get_vectype_for_scalar_type (vinfo, type));
> new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> - pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
> + pattern_stmt = gimple_build_assign (new_lhs, code, temp);
> *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
>
> if (dump_enabled_p ())
> --
> 2.34.1
>