On Tue, Sep 9, 2025 at 3:31 AM <[email protected]> wrote:
>
> From: Pan Li <[email protected]>
>
> The widening-mul will insert a cast for the widen-mul, the
> function build_and_insert_cast is design to take care of it.
>
> In some case the optimized gimple has some unnecessary cast,
> for example as below code.
>
> #define SAT_U_MUL_FMT_5(NT, WT) \
> NT __attribute__((noinline)) \
> sat_u_mul_##NT##_from_##WT##_fmt_5 (NT a, NT b) \
> { \
> WT x = (WT)a * (WT)b; \
> NT hi = x >> (sizeof(NT) * 8); \
> NT lo = (NT)x; \
> return lo | -!!hi; \
> }
>
> SAT_U_MUL_FMT_5(uint64_t, uint128_t)
>
> There will be a additional cast to uint128_t after optimized,
> this patch would like to refine this by checking the def of
> the rhs cast, if it comes from a cast with less or equal
> precision, the rhs of the def will be leveraged.
>
> Before this patch:
> 29 │ _1 = (__int128 unsigned) a_8(D);
> 30 │ _2 = (__int128 unsigned) b_9(D);
> 31 │ _35 = (unsigned long) _1;
> 32 │ _34 = (unsigned long) _2;
> 33 │ x_10 = _35 w* _34;
>
> After this patch:
> 27 │ _35 = (unsigned long) a_8(D);
> 28 │ _34 = (unsigned long) b_9(D);
> 29 │ x_10 = _35 w* _34;
>
> gcc/ChangeLog:
>
> * tree-ssa-math-opts.cc (build_and_insert_cast): Refine
> the cast insert by check the rhs of val.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/sat/widen-mul-0.c: New test.
> * gcc.target/riscv/sat/widen-mul-1.c: New test.
> * gcc.target/riscv/sat/widen-mul-2.c: New test.
> * gcc.target/riscv/sat/widen-mul-3.c: New test.
> * gcc.target/riscv/sat/widen-mul.h: New test.
>
> Signed-off-by: Pan Li <[email protected]>
> ---
> .../gcc.target/riscv/sat/widen-mul-0.c | 8 +++++++
> .../gcc.target/riscv/sat/widen-mul-1.c | 8 +++++++
> .../gcc.target/riscv/sat/widen-mul-2.c | 8 +++++++
> .../gcc.target/riscv/sat/widen-mul-3.c | 8 +++++++
> .../gcc.target/riscv/sat/widen-mul.h | 15 +++++++++++++
> gcc/tree-ssa-math-opts.cc | 21 ++++++++++++++++++-
> 6 files changed, 67 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/sat/widen-mul-0.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/sat/widen-mul-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/sat/widen-mul-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/sat/widen-mul-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/sat/widen-mul.h
>
> diff --git a/gcc/testsuite/gcc.target/riscv/sat/widen-mul-0.c
> b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-0.c
> new file mode 100644
> index 00000000000..1074fa9d5f4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-0.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
> +
> +#include "widen-mul.h"
> +
> +SAT_U_MUL_FMT_5(uint8_t, uint128_t)
> +
> +/* { dg-final { scan-tree-dump-not " = (__int128 unsigned) " "optimized" } }
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/sat/widen-mul-1.c
> b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-1.c
> new file mode 100644
> index 00000000000..5f8f0dd733c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
> +
> +#include "widen-mul.h"
> +
> +SAT_U_MUL_FMT_5(uint16_t, uint128_t)
> +
> +/* { dg-final { scan-tree-dump-not " = (__int128 unsigned) " "optimized" } }
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/sat/widen-mul-2.c
> b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-2.c
> new file mode 100644
> index 00000000000..4c54cc07b53
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
> +
> +#include "widen-mul.h"
> +
> +SAT_U_MUL_FMT_5(uint32_t, uint128_t)
> +
> +/* { dg-final { scan-tree-dump-not " = (__int128 unsigned) " "optimized" } }
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/sat/widen-mul-3.c
> b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-3.c
> new file mode 100644
> index 00000000000..d3dd97104b7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/sat/widen-mul-3.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */
> +
> +#include "widen-mul.h"
> +
> +SAT_U_MUL_FMT_5(uint64_t, uint128_t)
> +
> +/* { dg-final { scan-tree-dump-not " = (__int128 unsigned) " "optimized" } }
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/sat/widen-mul.h
> b/gcc/testsuite/gcc.target/riscv/sat/widen-mul.h
> new file mode 100644
> index 00000000000..a0a44084e58
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/sat/widen-mul.h
> @@ -0,0 +1,15 @@
> +#include <stdint.h>
> +
> +#if __riscv_xlen == 64
> +typedef unsigned __int128 uint128_t;
> +#endif
> +
> +#define SAT_U_MUL_FMT_5(NT, WT) \
> +NT __attribute__((noinline)) \
> +sat_u_mul_##NT##_from_##WT##_fmt_5 (NT a, NT b) \
> +{ \
> + WT x = (WT)a * (WT)b; \
> + NT hi = x >> (sizeof(NT) * 8); \
> + NT lo = (NT)x; \
> + return lo | -!!hi; \
> +}
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index bfad4cf5c99..51b512771a7 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -1632,7 +1632,26 @@ build_and_insert_cast (gimple_stmt_iterator *gsi,
> location_t loc,
> tree type, tree val)
> {
> tree result = make_ssa_name (type);
> - gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
> + tree rhs = val;
> +
> + if (TREE_CODE (val) == SSA_NAME)
> + {
> + gimple *def = SSA_NAME_DEF_STMT (val);
> +
> + if (is_gimple_assign (def)
> + && gimple_assign_rhs_code (def) == NOP_EXPR)
&& CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
> + {
> + tree cast_rhs = gimple_assign_rhs1 (def);
> + unsigned rhs_prec = TYPE_PRECISION (TREE_TYPE (cast_rhs));
> + unsigned type_prec = TYPE_PRECISION (type);
> +
> + if (type_prec >= rhs_prec)
I think you need to verify the intermediate precision (that of 'val')
is also >= rhs_prec, otherwise we risk to lose the effect of a truncation.
OK with such check added.
Thanks,
Richard.
> + rhs = cast_rhs;
> + }
> + }
> +
> + gassign *stmt = gimple_build_assign (result, NOP_EXPR, rhs);
> +
> gimple_set_location (stmt, loc);
> gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
> return result;
> --
> 2.43.0
>