On 23/01/2026 13:54, Robin Dapp wrote:
Hi,
Since allowing "unsupported" scales by just multiplying there was an
issue with how the vec_offset was adjusted:
For "real" gathers/scatters we have a separate vec_offset per stmt copy.
For strided gather/scatter, however, there is just one vec_offset common
to all copies.
In case of an unsupported scale we need to multiply vec_offset with the
required scale which is currently done like this:
vec_offset = vec_offset * scale_constant;
Thus, for more than one copy of a strided gather/scatter, we will
erroneously multiply an already scaled vec_offset.
This patch only performs the vec_offset scaling
- for each copy in real gathers/scatters or
- once for the first copy for strided gathers/scatters.
Bootstrapped and regtested on x86, power10, and aarch64.
Regtested on riscv64.
Regards
Robin
PR tree-optimization/123767
Just to confirm this patch fixes the original SEGFAULT that caused the
PR and FWIW LGTM given my fairly basic understanding of whats going on.
Thank you,
Alfie
gcc/ChangeLog:
* tree-vect-stmts.cc (vectorizable_store): Only scale offset
once.
(vectorizable_load): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pr123767.c: New test.
---
.../gcc.target/aarch64/sve/pr123767.c | 35 ++++++++++++++++
gcc/tree-vect-stmts.cc | 42 ++++++++++++-------
2 files changed, 61 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
new file mode 100644
index 00000000000..5b123c05768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv9-a -O3 -mmax-vectorization -msve-vector-bits=128
-mautovec-preference=sve-only -fdump-tree-vect-details" } */
+
+struct partition_elem
+{
+ int class_element;
+ struct partition_elem* next;
+ unsigned class_count;
+};
+
+typedef struct partition_def
+{
+ int num_elements;
+ struct partition_elem elements[1];
+} *partition;
+
+partition part;
+
+partition
+partition_new (int num_elements)
+{
+ int e;
+
+ /* No need to allocate memory, just a compile test. */
+ for (e = 0; e < num_elements; ++e)
+ {
+ part->elements[e].class_element = e;
+ part->elements[e].next = &(part->elements[e]);
+ part->elements[e].class_count = 1;
+ }
+
+ return part;
+}
+
+/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5fc115e1a17..e65ccb05661 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo,
(&stmts, ls.supported_offset_vectype, vec_offset);
if (ls.supported_scale)
{
- tree mult_cst = build_int_cst
- (TREE_TYPE (TREE_TYPE (vec_offset)),
- SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
- tree mult = build_vector_from_val
- (TREE_TYPE (vec_offset), mult_cst);
- vec_offset = gimple_build
- (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
- vec_offset, mult);
+ /* Only scale the vec_offset if we haven't already. */
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ || j == 0)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ }
scale = size_int (ls.supported_scale);
}
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
@@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo,
(&stmts, ls.supported_offset_vectype, vec_offset);
if (ls.supported_scale)
{
- tree mult_cst = build_int_cst
- (TREE_TYPE (TREE_TYPE (vec_offset)),
- SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
- tree mult = build_vector_from_val
- (TREE_TYPE (vec_offset), mult_cst);
- vec_offset = gimple_build
- (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
- vec_offset, mult);
+ /* Only scale the vec_offset if we haven't already. */
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ || i == 0)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ }
scale = size_int (ls.supported_scale);
}
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);