On Fri, Jan 23, 2026 at 2:54 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> Since allowing "unsupported" scales by just multiplying there was an
> issue with how the vec_offset was adjusted:
>
> For "real" gathers/scatters we have a separate vec_offset per stmt copy.
> For strided gather/scatter, however, there is just one vec_offset common
> to all copies.
>
> In case of an unsupported scale we need to multiply vec_offset with the
> required scale which is currently done like this:
> vec_offset = vec_offset * scale_constant;
>
> Thus, for more than one copy of a strided gather/scatter, we will
> erroneously multiply an already scaled vec_offset.
>
> This patch only performs the vec_offset scaling
> - for each copy in real gathers/scatters or
> - once for the first copy for strided gathers/scatters.
>
> Bootstrapped and regtested on x86, power10, and aarch64.
> Regtested on riscv64.
OK.
Thanks,
Richard.
> Regards
> Robin
>
> PR tree-optimization/123767
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vectorizable_store): Only scale offset
> once.
> (vectorizable_load): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/pr123767.c: New test.
> ---
> .../gcc.target/aarch64/sve/pr123767.c | 35 ++++++++++++++++
> gcc/tree-vect-stmts.cc | 42 ++++++++++++-------
> 2 files changed, 61 insertions(+), 16 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
> new file mode 100644
> index 00000000000..5b123c05768
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv9-a -O3 -mmax-vectorization
> -msve-vector-bits=128 -mautovec-preference=sve-only -fdump-tree-vect-details"
> } */
> +
> +struct partition_elem
> +{
> + int class_element;
> + struct partition_elem* next;
> + unsigned class_count;
> +};
> +
> +typedef struct partition_def
> +{
> + int num_elements;
> + struct partition_elem elements[1];
> +} *partition;
> +
> +partition part;
> +
> +partition
> +partition_new (int num_elements)
> +{
> + int e;
> +
> + /* No need to allocate memory, just a compile test. */
> + for (e = 0; e < num_elements; ++e)
> + {
> + part->elements[e].class_element = e;
> + part->elements[e].next = &(part->elements[e]);
> + part->elements[e].class_count = 1;
> + }
> +
> + return part;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 5fc115e1a17..e65ccb05661 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo,
> (&stmts, ls.supported_offset_vectype, vec_offset);
> if (ls.supported_scale)
> {
> - tree mult_cst = build_int_cst
> - (TREE_TYPE (TREE_TYPE (vec_offset)),
> - SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
> - tree mult = build_vector_from_val
> - (TREE_TYPE (vec_offset), mult_cst);
> - vec_offset = gimple_build
> - (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> - vec_offset, mult);
> + /* Only scale the vec_offset if we haven't already. */
> + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> + || j == 0)
> + {
> + tree mult_cst = build_int_cst
> + (TREE_TYPE (TREE_TYPE (vec_offset)),
> + SLP_TREE_GS_SCALE (slp_node) /
> ls.supported_scale);
> + tree mult = build_vector_from_val
> + (TREE_TYPE (vec_offset), mult_cst);
> + vec_offset = gimple_build
> + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> + vec_offset, mult);
> + }
> scale = size_int (ls.supported_scale);
> }
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> @@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo,
> (&stmts, ls.supported_offset_vectype, vec_offset);
> if (ls.supported_scale)
> {
> - tree mult_cst = build_int_cst
> - (TREE_TYPE (TREE_TYPE (vec_offset)),
> - SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
> - tree mult = build_vector_from_val
> - (TREE_TYPE (vec_offset), mult_cst);
> - vec_offset = gimple_build
> - (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> - vec_offset, mult);
> + /* Only scale the vec_offset if we haven't already. */
> + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> + || i == 0)
> + {
> + tree mult_cst = build_int_cst
> + (TREE_TYPE (TREE_TYPE (vec_offset)),
> + SLP_TREE_GS_SCALE (slp_node) /
> ls.supported_scale);
> + tree mult = build_vector_from_val
> + (TREE_TYPE (vec_offset), mult_cst);
> + vec_offset = gimple_build
> + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
> + vec_offset, mult);
> + }
> scale = size_int (ls.supported_scale);
> }
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> --
> 2.52.0