On Tue, 2 Dec 2025, Tamar Christina wrote:

> The example in the PR
> 
> #include <vector>
> 
> std::vector<bool> x, y;
> int main() { return x == y; }
> 
> now vectorizes but the attributes on std::vector indicate that the vector is
> aligned to the natural vector alignment.  In C this is equivalent to the
> testcase
> 
> int f (int a[12], int b[12], int n)
> {
>     a = __builtin_assume_aligned (a, 16);
>     b = __builtin_assume_aligned (b, 16);
>     for (int i = 0; i < n; i++)
>       {
>         if (b[i] == 0)
>           return 0;
>         if (a[0] > b[i])
>           return 1;
>       }
>     return 2;
> }
> 
> Here the load a[0] is loop invariant, and the vectorizer hoists this out of 
> the
> loop into the pre-header.  For early break this isn't safe to do as a[0] is
> conditionally valid based on the conditions in the block preceding it.  As 
> such
> we need some guarantee that the load is valid before we can hoist it or the 
> load
> needs to be unconditional (e.g. in the loop header block).
> 
> Conceptually alignment peeling can provide this guarantee since making it
> through the prologue means the invariant value was loaded at least once and so
> we know the address is valid.  At the moment however there's no real defined
> order between how GCC inserts conditions in the pre-header, so having tried to
> change the order a few times the load always ends up before the prologue.  So
> for now I marked it as a missed optimization.
> 
> Since we still can hoist invariant loads if in the header, I didn't change
> LOOP_VINFO_NO_DATA_DEPENDENCIES since that would be global and instead I
> modified the usage site of LOOP_VINFO_NO_DATA_DEPENDENCIES.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Pushed to master.
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/122868
>       * tree-vect-stmts.cc (vectorizable_load): Don't hoist loop invariant
>       conditional loads unless in header.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/122868
>       * gcc.dg/vect/vect-early-break_140-pr122868_1.c: New test.
>       * gcc.dg/vect/vect-early-break_140-pr122868_2.c: New test.
>       * gcc.dg/vect/vect-early-break_140-pr122868_3.c: New test.
>       * gcc.dg/vect/vect-early-break_140-pr122868_4.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..80264bd4f31c85d3eaca11430c7edeabcb635296
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
> @@ -0,0 +1,39 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_sizes_16B_8B } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +__attribute__ ((noipa))
> +int f (int a[12], int b[12], int n)
> +{
> +#ifdef __arm__
> +    a = __builtin_assume_aligned (a, 8);
> +    b = __builtin_assume_aligned (b, 8);
> +#else
> +    a = __builtin_assume_aligned (a, 16);
> +    b = __builtin_assume_aligned (b, 16);
> +#endif
> +    for (int i = 0; i < n; i++)
> +      {
> +        if (b[i] == 0)
> +          return 0;
> +        if (a[0] > b[i])
> +          return 1;
> +      }
> +    return 2;
> +}
> +
> +int main ()
> +{
> +   check_vect ();
> +
> +   int *a = 0;
> +   int b[12] = {0};
> +   return f (a, b, 10);
> +}
> +
> +/* { dg-final { scan-tree-dump "not hoisting invariant load due to early 
> break" "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..90222fcffd7c98a4187053326cd6f88bfd2bcb63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
> @@ -0,0 +1,31 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +__attribute__ ((noipa))
> +int f (int a[12], int b[12], int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      {
> +        if (b[i] == 0)
> +          return 0;
> +        if (a[0] > b[i])
> +          return 1;
> +      }
> +    return 2;
> +}
> +
> +int main ()
> +{
> +   check_vect ();
> +
> +   int *a = 0;
> +   int b[12] = {0};
> +   return f (a, b, 10);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to 
> early break" 0 "vect" { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..670804f8ce537a1381714a44e4b1d42b66ed6b61
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
> @@ -0,0 +1,39 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_sizes_16B_8B } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +__attribute__ ((noipa))
> +int f (int a[12], int b[12], int n)
> +{
> +#ifdef __arm__
> +    a = __builtin_assume_aligned (a, 8);
> +    b = __builtin_assume_aligned (b, 8);
> +#else
> +    a = __builtin_assume_aligned (a, 16);
> +    b = __builtin_assume_aligned (b, 16);
> +#endif
> +    for (int i = 0; i < n; i++)
> +      {
> +        if (a[0] > b[i])
> +          return 0;
> +        if (b[i] == 0)
> +          return 1;
> +      }
> +    return 2;
> +}
> +
> +int main ()
> +{
> +   check_vect ();
> +
> +   int a[12] = {1};
> +   int b[12] = {0};
> +   return f (a, b, 10);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to 
> early break" 0 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..de2aff287f4fa146ef8cb7e476f63a877e51fedf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
> @@ -0,0 +1,31 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +__attribute__ ((noipa))
> +int f (int a[12], int b[12], int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      {
> +        if (a[0] > b[i])
> +          return 0;
> +        if (b[i] == 0)
> +          return 0;
> +      }
> +    return 2;
> +}
> +
> +int main ()
> +{
> +   check_vect ();
> +
> +   int a[12] = {1};
> +   int b[12] = {0};
> +   return f (a, b, 10);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to 
> early break" 0 "vect" } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 
> 1d7e50afcde1096d5598b43ab8d49454eb68385b..a47bbd3345b1e291d0d3ae571cf5666b66b02706
>  100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9880,6 +9880,34 @@ vectorizable_load (vec_info *vinfo,
>        transform time.  */
>        bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
>                     && !nested_in_vect_loop);
> +
> +      /* It is unsafe to hoist a conditional load over the conditions that 
> make
> +      it valid.  When early break this means that any invariant load can't be
> +      hoisted unless it's in the loop header or if we know something else has
> +      verified the load is valid to do.  Alignment peeling would do this
> +      since getting through the prologue means the load was done at least
> +      once and so the vector main body is free to hoist it.  However today
> +      GCC will hoist the load above the PFA loop.  As such that makes it
> +      still invalid and so we can't allow it today.  */
> +      auto stmt_bb
> +     = gimple_bb (STMT_VINFO_STMT (
> +                     vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[0])));
> +      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> +       && !DR_SCALAR_KNOWN_BOUNDS (dr_info)
> +       && stmt_bb != loop->header)
> +     {
> +       if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
> +           && dump_enabled_p ())
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "not hoisting invariant load due to early break"
> +                          "constraints\n");
> +       else if (dump_enabled_p ())
> +         dump_printf_loc (MSG_NOTE, vect_location,
> +                          "not hoisting invariant load due to early break"
> +                          "constraints\n");
> +       hoist_p = false;
> +     }
> +
>        bool uniform_p = true;
>        for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
>       {

I'd do the above in this loop as you are assuming that all invariant
loads in the group behave the same which isn't guaranteed, esp.
the DR_SCALAR_KNOWN_BOUNDS check which means dr_info is also not
the same across members in the SLP node.

Richard.

> 
> 
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to