The LD3/ST3 and LD4/ST4 address cost code had no test coverage (oops).
This patch fixes that and updates it for the new structure modes.
The test only covers Advanced SIMD because SVE doesn't have
post-increment forms.

Tested on aarch64-linxu-gnu & pushed.

Richard


gcc/
        * config/aarch64/aarch64.c (aarch64_ldn_stn_vectors): New function.
        (aarch64_address_cost): Use it instead of testing for CImode and
        XImode directly.

gcc/testsuite/
        * gcc.target/aarch64/neoverse_v1_1.c: New test.
---
 gcc/config/aarch64/aarch64.c                  | 22 +++++++++++++++++--
 .../gcc.target/aarch64/neoverse_v1_1.c        | 15 +++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846..19f67415234 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3108,6 +3108,23 @@ aarch64_vl_bytes (machine_mode mode, unsigned int 
vec_flags)
   return BYTES_PER_SVE_PRED;
 }
 
+/* If MODE holds an array of vectors, return the number of vectors
+   in the array, otherwise return 1.  */
+
+static unsigned int
+aarch64_ldn_stn_vectors (machine_mode mode)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (vec_flags == (VEC_ADVSIMD | VEC_PARTIAL | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode), 8).to_constant ();
+  if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode), 16).to_constant ();
+  if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode),
+                     BYTES_PER_SVE_VECTOR).to_constant ();
+  return 1;
+}
+
 /* Given an Advanced SIMD vector mode MODE and a tuple size NELEMS, return the
    corresponding vector structure mode.  */
 static opt_machine_mode
@@ -12511,9 +12528,10 @@ aarch64_address_cost (rtx x,
          cost += addr_cost->pre_modify;
        else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
          {
-           if (mode == CImode)
+           unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+           if (nvectors == 3)
              cost += addr_cost->post_modify_ld3_st3;
-           else if (mode == XImode)
+           else if (nvectors == 4)
              cost += addr_cost->post_modify_ld4_st4;
            else
              cost += addr_cost->post_modify;
diff --git a/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c 
b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c
new file mode 100644
index 00000000000..c1563f01861
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+
+void
+foo (short *restrict x, short y[restrict][128])
+{
+  for (int i = 0; i < 128; ++i)
+    {
+      y[0][i] = x[i * 3 + 0];
+      y[1][i] = x[i * 3 + 1];
+      y[2][i] = x[i * 3 + 2];
+    }
+}
+
+/* This shouldn't be a post-increment.  */
+/* { dg-final { scan-assembler {ld3\t{[^{}]*}, \[x[0-9]+\]\n} } } */
-- 
2.25.1

Reply via email to