https://gcc.gnu.org/g:0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0

commit r15-1244-g0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Thu Jun 13 12:48:21 2024 +0100

    aarch64: Fix invalid nested subregs [PR115464]
    
    The testcase extracts one arm_neon.h vector from a pair (one subreg)
    and then reinterprets the result as an SVE vector (another subreg).
    Each subreg makes sense individually, but we can't fold them together
    into a single subreg: it's 32 bytes -> 16 bytes -> 16*N bytes,
    but the interpretation of 32 bytes -> 16*N bytes depends on
    whether N==1 or N>1.
    
    Since the second subreg makes sense individually, simplify_subreg
    should bail out rather than ICE on it.  simplify_gen_subreg will
    then do the same (because it already checks validate_subreg).
    This leaves simplify_gen_subreg returning null, requiring the
    caller to take appropriate action.
    
    I think this is relatively likely to occur elsewhere, so the patch
    adds a helper for forcing a subreg, allowing a temporary pseudo to
    be created where necessary.
    
    I'll follow up by using force_subreg in more places.  This patch
    is intended to be a minimal backportable fix for the PR.
    
    gcc/
            PR target/115464
            * simplify-rtx.cc (simplify_context::simplify_subreg): Don't try
            to fold two subregs together if their relationship isn't known
            at compile time.
            * explow.h (force_subreg): Declare.
            * explow.cc (force_subreg): New function.
            * config/aarch64/aarch64-sve-builtins-base.cc
            (svset_neonq_impl::expand): Use it instead of simplify_gen_subreg.
    
    gcc/testsuite/
            PR target/115464
            * gcc.target/aarch64/sve/acle/general/pr115464.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc           |  2 +-
 gcc/explow.cc                                             | 15 +++++++++++++++
 gcc/explow.h                                              |  2 ++
 gcc/simplify-rtx.cc                                       |  5 +++++
 .../gcc.target/aarch64/sve/acle/general/pr115464.c        | 13 +++++++++++++
 5 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index dea2f6e6bfc4..823d60040f9a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1174,7 +1174,7 @@ public:
        Advanced SIMD argument as an SVE vector.  */
     if (!BYTES_BIG_ENDIAN
        && is_undef (CALL_EXPR_ARG (e.call_expr, 0)))
-      return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
+      return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
 
     rtx_vector_builder builder (VNx16BImode, 16, 2);
     for (unsigned int i = 0; i < 16; i++)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 8e5f6b8e6804..f6843398c4b0 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -745,6 +745,21 @@ force_reg (machine_mode mode, rtx x)
   return temp;
 }
 
+/* Like simplify_gen_subreg, but force OP into a new register if the
+   subreg cannot be formed directly.  */
+
+rtx
+force_subreg (machine_mode outermode, rtx op,
+             machine_mode innermode, poly_uint64 byte)
+{
+  rtx x = simplify_gen_subreg (outermode, op, innermode, byte);
+  if (x)
+    return x;
+
+  op = copy_to_mode_reg (innermode, op);
+  return simplify_gen_subreg (outermode, op, innermode, byte);
+}
+
 /* If X is a memory ref, copy its contents to a new temp reg and return
    that reg.  Otherwise, return X.  */
 
diff --git a/gcc/explow.h b/gcc/explow.h
index 16aa02cfb689..cbd1fcb7eb34 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -42,6 +42,8 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode);
    Args are mode (in case value is a constant) and the value.  */
 extern rtx force_reg (machine_mode, rtx);
 
+extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64);
+
 /* Return given rtx, copied into a new temp reg if it was in memory.  */
 extern rtx force_not_mem (rtx);
 
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 3ee95f74d3db..35ba54c62921 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7737,6 +7737,11 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
       poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
       rtx newx;
 
+      /* Make sure that the relationship between the two subregs is
+        known at compile time.  */
+      if (!ordered_p (outersize, innermostsize))
+       return NULL_RTX;
+
       if (outermode == innermostmode
          && known_eq (byte, 0U)
          && known_eq (SUBREG_BYTE (op), 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
new file mode 100644
index 000000000000..d728d1325edb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+#include <arm_neon_sve_bridge.h>
+
+svuint16_t
+convolve4_4_x (uint16x8x2_t permute_tbl)
+{
+    return svset_neonq_u16 (svundef_u16 (), permute_tbl.val[1]);
+}
+
+/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */

Reply via email to