On Mon, Apr 28, 2025 at 3:35 PM <[email protected]> wrote:
>
> From: Pan Li <[email protected]>
>
> This patch would like to support the form 7 of the unsigned
> integer SAT_ADD, aka below example.
>
> #define DEF_SAT_U_ADD_FMT_7(WT, T) \
> T __attribute__((noinline)) \
> sat_u_add_##WT##_##T##_fmt_7(T x, T y) \
> { \
> T max = -1; \
> WT val = (WT)x + (WT)y; \
> return val > max ? max : (T)val; \
> }
>
> DEF_SAT_U_ADD_FMT_7(uint64_t, uint32_t)
>
> If we take -O3 build with -fdump-tree-optimized, we will have
>
> Before this patch:
> 5 │ __attribute__((noinline))
> 6 │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
> 7 │ {
> 8 │ uint64_t val;
> 9 │ long unsigned int _1;
> 10 │ long unsigned int _2;
> 11 │ uint32_t _3;
> 12 │ uint32_t _7;
> 13 │
> 14 │ <bb 2> [local count: 1073741824]:
> 15 │ _1 = (long unsigned int) x_4(D);
> 16 │ _2 = (long unsigned int) y_5(D);
> 17 │ val_6 = _1 + _2;
> 18 │ if (val_6 <= 4294967295)
> 19 │ goto <bb 3>; [65.00%]
> 20 │ else
> 21 │ goto <bb 4>; [35.00%]
> 22 │
> 23 │ <bb 3> [local count: 697932184]:
> 24 │ _7 = x_4(D) + y_5(D);
> 25 │
> 26 │ <bb 4> [local count: 1073741824]:
> 27 │ # _3 = PHI <4294967295(2), _7(3)>
> 28 │ return _3;
> 29 │
> 30 │ }
>
> After this patch:
> 4 │ __attribute__((noinline))
> 5 │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
> 6 │ {
> 7 │ uint32_t _3;
> 8 │
> 9 │ <bb 2> [local count: 1073741824]:
> 10 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
> 11 │ return _3;
> 12 │
> 13 │ }
>
> This change also effects on vector mode too.
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add form 7 matching pattern for unsigned integer
> SAT_ADD.
>
> Signed-off-by: Pan Li <[email protected]>
> ---
> gcc/match.pd | 16 +++++++++++++++-
> 1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ba036e52837..e63e2783d79 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3241,7 +3241,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> SAT_U_ADD = IMAGPART (SUM) != 0 ? -1 : REALPART (SUM) */
> (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop)
> integer_minus_onep (realpart @2))
> - (if (types_match (type, @0) && int_fits_type_p (@1, type)))))
> + (if (types_match (type, @0) && int_fits_type_p (@1, type))))
> + (match (unsigned_integer_sat_add @0 @1)
> + /* WIDEN_SUM = (WT)X + (WT)Y
> + SAT_U_ADD = WIDEN_SUM > MAX ? MAX : (NT)WIDEN_SUM */
> + (cond^ (le (plus:c (convert@2 @0) (convert@3 @1)) INTEGER_CST@4)
> + (plus:c @0 @1) integer_minus_onep)
I think it's enough to put :c on one of the (plus
OK with that change.
> + (if (types_match (type, @0, @1) && types_match (@2, @3))
> + (with
> + {
> + unsigned precision = TYPE_PRECISION (type);
> + unsigned widen_precision = TYPE_PRECISION (TREE_TYPE (@2));
> + wide_int max = wi::mask (precision, false, widen_precision);
> + wide_int c4 = wi::to_wide (@4);
> + }
> + (if (wi::eq_p (c4, max) && widen_precision > precision))))))
>
> /* Saturation sub for unsigned integer. */
> (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
> --
> 2.43.0
>