On Fri, Jan 23, 2026 at 2:54 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> Since allowing "unsupported" scales by just multiplying there was an
> issue with how the vec_offset was adjusted:
>
> For "real" gathers/scatters we have a separate vec_offset per stmt copy.
> For strided gather/scatter, however, there is just one vec_offset common
> to all copies.
>
> In case of an unsupported scale we need to multiply vec_offset with the
> required scale which is currently done like this:
>  vec_offset = vec_offset * scale_constant;
>
> Thus, for more than one copy of a strided gather/scatter, we will
> erroneously multiply an already scaled vec_offset.
>
> This patch only performs the vec_offset scaling
>  - for each copy in real gathers/scatters or
>  - once for the first copy for strided gathers/scatters.
>
> Bootstrapped and regtested on x86, power10, and aarch64.
> Regtested on riscv64.

OK.

Thanks,
Richard.

> Regards
>  Robin
>
>         PR tree-optimization/123767
>
> gcc/ChangeLog:
>
>         * tree-vect-stmts.cc (vectorizable_store): Only scale offset
>         once.
>         (vectorizable_load): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/sve/pr123767.c: New test.
> ---
>  .../gcc.target/aarch64/sve/pr123767.c         | 35 ++++++++++++++++
>  gcc/tree-vect-stmts.cc                        | 42 ++++++++++++-------
>  2 files changed, 61 insertions(+), 16 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
> new file mode 100644
> index 00000000000..5b123c05768
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv9-a -O3 -mmax-vectorization 
> -msve-vector-bits=128 -mautovec-preference=sve-only -fdump-tree-vect-details" 
> } */
> +
> +struct partition_elem
> +{
> +  int class_element;
> +  struct partition_elem* next;
> +  unsigned class_count;
> +};
> +
> +typedef struct partition_def
> +{
> +  int num_elements;
> +  struct partition_elem elements[1];
> +} *partition;
> +
> +partition part;
> +
> +partition
> +partition_new (int num_elements)
> +{
> +  int e;
> +
> +  /* No need to allocate memory, just a compile test.  */
> +  for (e = 0; e < num_elements; ++e)
> +    {
> +      part->elements[e].class_element = e;
> +      part->elements[e].next = &(part->elements[e]);
> +      part->elements[e].class_count = 1;
> +    }
> +
> +  return part;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 5fc115e1a17..e65ccb05661 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo,
>                       (&stmts, ls.supported_offset_vectype, vec_offset);
>                   if (ls.supported_scale)
>                     {
> -                     tree mult_cst = build_int_cst
> -                       (TREE_TYPE (TREE_TYPE (vec_offset)),
> -                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
> -                     tree mult = build_vector_from_val
> -                       (TREE_TYPE (vec_offset), mult_cst);
> -                     vec_offset = gimple_build
> -                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> -                        vec_offset, mult);
> +                     /* Only scale the vec_offset if we haven't already.  */
> +                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> +                         || j == 0)
> +                       {
> +                         tree mult_cst = build_int_cst
> +                           (TREE_TYPE (TREE_TYPE (vec_offset)),
> +                            SLP_TREE_GS_SCALE (slp_node) / 
> ls.supported_scale);
> +                         tree mult = build_vector_from_val
> +                           (TREE_TYPE (vec_offset), mult_cst);
> +                         vec_offset = gimple_build
> +                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> +                            vec_offset, mult);
> +                       }
>                       scale = size_int (ls.supported_scale);
>                     }
>                   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> @@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo,
>                       (&stmts, ls.supported_offset_vectype, vec_offset);
>                   if (ls.supported_scale)
>                     {
> -                     tree mult_cst = build_int_cst
> -                       (TREE_TYPE (TREE_TYPE (vec_offset)),
> -                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
> -                     tree mult = build_vector_from_val
> -                       (TREE_TYPE (vec_offset), mult_cst);
> -                     vec_offset = gimple_build
> -                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> -                        vec_offset, mult);
> +                     /* Only scale the vec_offset if we haven't already.  */
> +                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> +                         || i == 0)
> +                       {
> +                         tree mult_cst = build_int_cst
> +                           (TREE_TYPE (TREE_TYPE (vec_offset)),
> +                            SLP_TREE_GS_SCALE (slp_node) / 
> ls.supported_scale);
> +                         tree mult = build_vector_from_val
> +                           (TREE_TYPE (vec_offset), mult_cst);
> +                         vec_offset = gimple_build
> +                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> +                            vec_offset, mult);
> +                       }
>                       scale = size_int (ls.supported_scale);
>                     }
>                   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> --
> 2.52.0

Reply via email to