On Thu, Jan 15, 2026 at 8:38 PM Alice Carlotti <[email protected]> wrote:
>
> The meaning of poly_int values changes depending on whether we are in
> streaming or non-streaming mode, but this dependency is not explicitly
> tracked. Locally-streaming functions can change streaming state in the
> prologue and epilogue, so it is unsafe to apply shrink wrapping to these
> functions, as doing so could change the mode seen by instructions like
> cntd.
>
>
> Is this OK for master and backport to affected branches?
>
> gcc/ChangeLog:
>
> PR target/123624
> * config/aarch64/aarch64.cc
> (aarch64_fndecl_enables_pstate_sm): New.
> (aarch64_cfun_enables_pstate_sm): Use the above function.
> (aarch64_fndecl_optimization): New helper.
> (aarch64_set_current_function): Disable unsafe shrink-wrapping.
I think this is the hard approach to do this.
The easy approach is to return false from aarch64_use_return_insn_p if
you want to disable shrink wrapping for a function.
So just in aarch64_use_return_insn_p, add:
if (aarch64_cfun_enables_pstate_sm ())
return false;
If that does not work, then let's add a target hook that is used here.
Because there might be other targets that want to disable shrink
wrapping based on the attributes and disabling it via the flag seems
like a hack.
Thanks,
Andrew Pinski
>
> gcc/testsuite/ChangeLog:
>
> PR target/123624
> * gcc.target/aarch64/sme/sme-shrinkwrap.c: New test.
>
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index
> 293afa52b3b38781b765ca939ed51c280313bab4..ccab1797306cfe5bee9d750e5eed4864e433a23f
> 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2649,6 +2649,16 @@ aarch64_fndecl_pstate_sm (const_tree fndecl)
> return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl));
> }
>
> +/* Return true if PSTATE.SM is 1 in the body of function FNDECL,
> + but is not guaranteed to be 1 on entry. */
> +
> +static bool
> +aarch64_fndecl_enables_pstate_sm (const_tree fndecl)
> +{
> + return (aarch64_fndecl_is_locally_streaming (fndecl)
> + && (aarch64_fntype_pstate_sm (TREE_TYPE (fndecl))
> + != AARCH64_ISA_MODE_SM_ON));
> +}
> /* Return true if function FNDECL has state STATE_NAME, either by creating
> new state itself or by sharing state with callers. */
>
> @@ -2728,8 +2738,7 @@ aarch64_cfun_has_new_state (const char *state_name)
> static bool
> aarch64_cfun_enables_pstate_sm ()
> {
> - return (aarch64_fndecl_is_locally_streaming (cfun->decl)
> - && aarch64_cfun_incoming_pstate_sm () != AARCH64_ISA_MODE_SM_ON);
> + return aarch64_fndecl_enables_pstate_sm (cfun->decl);
> }
>
> /* Return true if the current function has state STATE_NAME, either by
> @@ -20234,6 +20243,21 @@ aarch64_fndecl_options (tree fndecl)
> return target_option_default_node;
> }
>
> +/* Return the optimization_node for FNDECL, or the current optimization
> + if FNDECL is null. */
> +
> +static tree
> +aarch64_fndecl_optimization (tree fndecl)
> +{
> + if (!fndecl)
> + return optimization_current_node;
> +
> + if (tree optimization = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl))
> + return optimization;
> +
> + return optimization_default_node;
> +}
> +
> /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
> like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
> of the function, if such exists. This function may be called multiple
> @@ -20245,6 +20269,9 @@ aarch64_set_current_function (tree fndecl)
> {
> tree old_tree = aarch64_fndecl_options (aarch64_previous_fndecl);
> tree new_tree = aarch64_fndecl_options (fndecl);
> + tree old_optimization
> + = aarch64_fndecl_optimization (aarch64_previous_fndecl);
> + tree new_optimization = aarch64_fndecl_optimization (fndecl);
>
> auto new_isa_mode = (fndecl
> ? aarch64_fndecl_isa_mode (fndecl)
> @@ -20269,6 +20296,7 @@ aarch64_set_current_function (tree fndecl)
> the default have been handled by aarch64_save_restore_target_globals
> from
> aarch64_pragma_target_parse. */
> if (old_tree == new_tree
> + && old_optimization == new_optimization
> && (!fndecl || aarch64_previous_fndecl)
> && (isa_flags & AARCH64_FL_ISA_MODES).val[0] == new_isa_mode)
> {
> @@ -20286,6 +20314,8 @@ aarch64_set_current_function (tree fndecl)
> /* First set the target options. */
> cl_target_option_restore (&global_options, &global_options_set,
> TREE_TARGET_OPTION (new_tree));
> + cl_optimization_restore (&global_options, &global_options_set,
> + TREE_OPTIMIZATION (new_optimization));
>
> /* The ISA mode can vary based on function type attributes and
> function declaration attributes. Make sure that the target
> @@ -20301,14 +20331,21 @@ aarch64_set_current_function (tree fndecl)
> &global_options_set);
> DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_tree;
>
> - tree new_optimize = build_optimization_node (&global_options,
> - &global_options_set);
> - if (new_optimize != optimization_default_node)
> - DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
> }
>
> + /* Shrink-wrapping is unsafe when the function prologue and epilogue
> contain
> + streaming state changes, because the meaning of a poly_int depends
> + implicitly upon the current streaming state. */
> + if (fndecl && aarch64_fndecl_enables_pstate_sm (fndecl))
> + flag_shrink_wrap = 0;
> +
> aarch64_save_restore_target_globals (new_tree);
>
> + tree updated_optimization = build_optimization_node (&global_options,
> + &global_options_set);
> + if (updated_optimization != new_optimization)
> + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = updated_optimization;
> +
> gcc_assert (AARCH64_ISA_MODE == new_isa_mode);
> }
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
> b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..540521ef790f44dc86cb0f3a282eac2a75719e9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
> @@ -0,0 +1,72 @@
> +/* { dg-options "-O3 -fshrink-wrap" } */
> +/* { dg-do run { target { aarch64_sme_hw && aarch64_sve_hw } } } */
> +/* { dg-do compile { target { ! { aarch64_sme_hw && aarch64_sve_hw } } } } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sme.h>
> +
> +#pragma GCC target "+sve"
> +
> +[[gnu::noipa]]
> +int callee (int x)
> +{
> + return 0;
> +}
> +
> +/*
> +** foo:
> +** cbnz w0, [^\n]*
> +** cntd x0
> +** ret
> +** ...
> +*/
> +__arm_streaming
> +int foo(int x)
> +{
> + if (x)
> + return callee(3);
> + return svcntd();
> +}
> +
> +/*
> +** bar:
> +** sub [^\n]*
> +** cntd [^\n]*
> +** str [^\n]*
> +** stp [^\n]*
> +** stp [^\n]*
> +** stp [^\n]*
> +** stp [^\n]*
> +** smstart [^\n]*
> +** ...
> +*/
> +__arm_locally_streaming
> +int bar(int x)
> +{
> + if (x)
> + return callee(3);
> + return svcntd();
> +}
> +
> +/*
> +** baz:
> +** cbnz w0, [^\n]*
> +** cntd x0
> +** ret
> +** ...
> +*/
> +__arm_streaming
> +int baz(int x)
> +{
> + if (x)
> + return callee(3);
> + return svcntd();
> +}
> +
> +[[gnu::noipa]]
> +int main()
> +{
> + if (bar(0) != svcntsd())
> + __builtin_abort();
> + return 0;
> +}