On 23/01/2026 13:54, Robin Dapp wrote:
Hi,

Since allowing "unsupported" scales by just multiplying there was an
issue with how the vec_offset was adjusted:

For "real" gathers/scatters we have a separate vec_offset per stmt copy.
For strided gather/scatter, however, there is just one vec_offset common
to all copies.

In case of an unsupported scale we need to multiply vec_offset with the
required scale which is currently done like this:
  vec_offset = vec_offset * scale_constant;

Thus, for more than one copy of a strided gather/scatter, we will
erroneously multiply an already scaled vec_offset.

This patch only performs the vec_offset scaling
  - for each copy in real gathers/scatters or
  - once for the first copy for strided gathers/scatters.

Bootstrapped and regtested on x86, power10, and aarch64.
Regtested on riscv64.

Regards
  Robin

        PR tree-optimization/123767

Just to confirm this patch fixes the original SEGFAULT that caused the PR and FWIW LGTM given my fairly basic understanding of whats going on.

Thank you,
Alfie


gcc/ChangeLog:

        * tree-vect-stmts.cc (vectorizable_store): Only scale offset
        once.
        (vectorizable_load): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/pr123767.c: New test.
---
  .../gcc.target/aarch64/sve/pr123767.c         | 35 ++++++++++++++++
  gcc/tree-vect-stmts.cc                        | 42 ++++++++++++-------
  2 files changed, 61 insertions(+), 16 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr123767.c

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
new file mode 100644
index 00000000000..5b123c05768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv9-a -O3 -mmax-vectorization -msve-vector-bits=128 
-mautovec-preference=sve-only -fdump-tree-vect-details" } */
+
+struct partition_elem
+{
+  int class_element;
+  struct partition_elem* next;
+  unsigned class_count;
+};
+
+typedef struct partition_def
+{
+  int num_elements;
+  struct partition_elem elements[1];
+} *partition;
+
+partition part;
+
+partition
+partition_new (int num_elements)
+{
+  int e;
+
+  /* No need to allocate memory, just a compile test.  */
+  for (e = 0; e < num_elements; ++e)
+    {
+      part->elements[e].class_element = e;
+      part->elements[e].next = &(part->elements[e]);
+      part->elements[e].class_count = 1;
+    }
+
+  return part;
+}
+
+/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5fc115e1a17..e65ccb05661 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo,
                      (&stmts, ls.supported_offset_vectype, vec_offset);
                  if (ls.supported_scale)
                    {
-                     tree mult_cst = build_int_cst
-                       (TREE_TYPE (TREE_TYPE (vec_offset)),
-                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
-                     tree mult = build_vector_from_val
-                       (TREE_TYPE (vec_offset), mult_cst);
-                     vec_offset = gimple_build
-                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
-                        vec_offset, mult);
+                     /* Only scale the vec_offset if we haven't already.  */
+                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+                         || j == 0)
+                       {
+                         tree mult_cst = build_int_cst
+                           (TREE_TYPE (TREE_TYPE (vec_offset)),
+                            SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                         tree mult = build_vector_from_val
+                           (TREE_TYPE (vec_offset), mult_cst);
+                         vec_offset = gimple_build
+                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                            vec_offset, mult);
+                       }
                      scale = size_int (ls.supported_scale);
                    }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
@@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo,
                      (&stmts, ls.supported_offset_vectype, vec_offset);
                  if (ls.supported_scale)
                    {
-                     tree mult_cst = build_int_cst
-                       (TREE_TYPE (TREE_TYPE (vec_offset)),
-                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
-                     tree mult = build_vector_from_val
-                       (TREE_TYPE (vec_offset), mult_cst);
-                     vec_offset = gimple_build
-                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
-                        vec_offset, mult);
+                     /* Only scale the vec_offset if we haven't already.  */
+                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+                         || i == 0)
+                       {
+                         tree mult_cst = build_int_cst
+                           (TREE_TYPE (TREE_TYPE (vec_offset)),
+                            SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                         tree mult = build_vector_from_val
+                           (TREE_TYPE (vec_offset), mult_cst);
+                         vec_offset = gimple_build
+                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                            vec_offset, mult);
+                       }
                      scale = size_int (ls.supported_scale);
                    }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);

Reply via email to