On Thu, Jan 15, 2026 at 8:38 PM Alice Carlotti <[email protected]> wrote:
>
> The meaning of poly_int values changes depending on whether we are in
> streaming or non-streaming mode, but this dependency is not explicitly
> tracked.  Locally-streaming functions can change streaming state in the
> prologue and epilogue, so it is unsafe to apply shrink wrapping to these
> functions, as doing so could change the mode seen by instructions like
> cntd.
>
>
> Is this OK for master and backport to affected branches?
>
> gcc/ChangeLog:
>
>         PR target/123624
>         * config/aarch64/aarch64.cc
>         (aarch64_fndecl_enables_pstate_sm): New.
>         (aarch64_cfun_enables_pstate_sm): Use the above function.
>         (aarch64_fndecl_optimization): New helper.
>         (aarch64_set_current_function): Disable unsafe shrink-wrapping.

I think this is the hard approach to do this.
The easy approach is to return false from aarch64_use_return_insn_p if
you want to disable shrink wrapping for a function.
So just in aarch64_use_return_insn_p, add:
if (aarch64_cfun_enables_pstate_sm ())
  return false;

If that does not work, then let's add a target hook that is used here.
Because there might be other targets that want to disable shrink
wrapping based on the attributes and disabling it via the flag seems
like a hack.

Thanks,
Andrew Pinski


>
> gcc/testsuite/ChangeLog:
>
>         PR target/123624
>         * gcc.target/aarch64/sme/sme-shrinkwrap.c: New test.
>
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 293afa52b3b38781b765ca939ed51c280313bab4..ccab1797306cfe5bee9d750e5eed4864e433a23f
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2649,6 +2649,16 @@ aarch64_fndecl_pstate_sm (const_tree fndecl)
>    return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl));
>  }
>
> +/* Return true if PSTATE.SM is 1 in the body of function FNDECL,
> +   but is not guaranteed to be 1 on entry.  */
> +
> +static bool
> +aarch64_fndecl_enables_pstate_sm (const_tree fndecl)
> +{
> +  return (aarch64_fndecl_is_locally_streaming (fndecl)
> +         && (aarch64_fntype_pstate_sm (TREE_TYPE (fndecl))
> +             != AARCH64_ISA_MODE_SM_ON));
> +}
>  /* Return true if function FNDECL has state STATE_NAME, either by creating
>     new state itself or by sharing state with callers.  */
>
> @@ -2728,8 +2738,7 @@ aarch64_cfun_has_new_state (const char *state_name)
>  static bool
>  aarch64_cfun_enables_pstate_sm ()
>  {
> -  return (aarch64_fndecl_is_locally_streaming (cfun->decl)
> -         && aarch64_cfun_incoming_pstate_sm () != AARCH64_ISA_MODE_SM_ON);
> +  return aarch64_fndecl_enables_pstate_sm (cfun->decl);
>  }
>
>  /* Return true if the current function has state STATE_NAME, either by
> @@ -20234,6 +20243,21 @@ aarch64_fndecl_options (tree fndecl)
>    return target_option_default_node;
>  }
>
> +/* Return the optimization_node for FNDECL, or the current optimization
> +   if FNDECL is null.  */
> +
> +static tree
> +aarch64_fndecl_optimization (tree fndecl)
> +{
> +  if (!fndecl)
> +    return optimization_current_node;
> +
> +  if (tree optimization = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl))
> +    return optimization;
> +
> +  return optimization_default_node;
> +}
> +
>  /* Implement TARGET_SET_CURRENT_FUNCTION.  Unpack the codegen decisions
>     like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
>     of the function, if such exists.  This function may be called multiple
> @@ -20245,6 +20269,9 @@ aarch64_set_current_function (tree fndecl)
>  {
>    tree old_tree = aarch64_fndecl_options (aarch64_previous_fndecl);
>    tree new_tree = aarch64_fndecl_options (fndecl);
> +  tree old_optimization
> +    = aarch64_fndecl_optimization (aarch64_previous_fndecl);
> +  tree new_optimization = aarch64_fndecl_optimization (fndecl);
>
>    auto new_isa_mode = (fndecl
>                        ? aarch64_fndecl_isa_mode (fndecl)
> @@ -20269,6 +20296,7 @@ aarch64_set_current_function (tree fndecl)
>       the default have been handled by aarch64_save_restore_target_globals 
> from
>       aarch64_pragma_target_parse.  */
>    if (old_tree == new_tree
> +      && old_optimization == new_optimization
>        && (!fndecl || aarch64_previous_fndecl)
>        && (isa_flags & AARCH64_FL_ISA_MODES).val[0] == new_isa_mode)
>      {
> @@ -20286,6 +20314,8 @@ aarch64_set_current_function (tree fndecl)
>    /* First set the target options.  */
>    cl_target_option_restore (&global_options, &global_options_set,
>                             TREE_TARGET_OPTION (new_tree));
> +  cl_optimization_restore (&global_options, &global_options_set,
> +                          TREE_OPTIMIZATION (new_optimization));
>
>    /* The ISA mode can vary based on function type attributes and
>       function declaration attributes.  Make sure that the target
> @@ -20301,14 +20331,21 @@ aarch64_set_current_function (tree fndecl)
>                                            &global_options_set);
>        DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_tree;
>
> -      tree new_optimize = build_optimization_node (&global_options,
> -                                                  &global_options_set);
> -      if (new_optimize != optimization_default_node)
> -       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
>      }
>
> +  /* Shrink-wrapping is unsafe when the function prologue and epilogue 
> contain
> +     streaming state changes, because the meaning of a poly_int depends
> +     implicitly upon the current streaming state.  */
> +  if (fndecl && aarch64_fndecl_enables_pstate_sm (fndecl))
> +      flag_shrink_wrap = 0;
> +
>    aarch64_save_restore_target_globals (new_tree);
>
> +  tree updated_optimization = build_optimization_node (&global_options,
> +                                                  &global_options_set);
> +  if (updated_optimization != new_optimization)
> +    DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = updated_optimization;
> +
>    gcc_assert (AARCH64_ISA_MODE == new_isa_mode);
>  }
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c 
> b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..540521ef790f44dc86cb0f3a282eac2a75719e9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
> @@ -0,0 +1,72 @@
> +/* { dg-options "-O3 -fshrink-wrap" } */
> +/* { dg-do run { target { aarch64_sme_hw && aarch64_sve_hw } } } */
> +/* { dg-do compile { target { ! { aarch64_sme_hw && aarch64_sve_hw } } } } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sme.h>
> +
> +#pragma GCC target "+sve"
> +
> +[[gnu::noipa]]
> +int callee (int x)
> +{
> +  return 0;
> +}
> +
> +/*
> +** foo:
> +**     cbnz    w0, [^\n]*
> +**     cntd    x0
> +**     ret
> +**     ...
> +*/
> +__arm_streaming
> +int foo(int x)
> +{
> +    if (x)
> +        return callee(3);
> +    return svcntd();
> +}
> +
> +/*
> +** bar:
> +**     sub     [^\n]*
> +**     cntd    [^\n]*
> +**     str     [^\n]*
> +**     stp     [^\n]*
> +**     stp     [^\n]*
> +**     stp     [^\n]*
> +**     stp     [^\n]*
> +**     smstart [^\n]*
> +**     ...
> +*/
> +__arm_locally_streaming
> +int bar(int x)
> +{
> +    if (x)
> +        return callee(3);
> +    return svcntd();
> +}
> +
> +/*
> +** baz:
> +**     cbnz    w0, [^\n]*
> +**     cntd    x0
> +**     ret
> +**     ...
> +*/
> +__arm_streaming
> +int baz(int x)
> +{
> +    if (x)
> +        return callee(3);
> +    return svcntd();
> +}
> +
> +[[gnu::noipa]]
> +int main()
> +{
> +  if (bar(0) != svcntsd())
> +    __builtin_abort();
> +  return 0;
> +}

Reply via email to