https://gcc.gnu.org/g:2c1c2485a4b1aca746ac693041e51ea6da5c64ca

commit r14-9836-g2c1c2485a4b1aca746ac693041e51ea6da5c64ca
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Mon Apr 8 16:53:32 2024 +0100

    aarch64: Fix expansion of svsudot [PR114607]
    
    Not sure how this happend, but: svsudot is supposed to be expanded
    as USDOT with the operands swapped.  However, a thinko in the
    expansion of svsudot meant that the arguments weren't in fact
    swapped; the attempted swap was just a no-op.  And the testcases
    blithely accepted that.
    
    gcc/
            PR target/114607
            * config/aarch64/aarch64-sve-builtins-base.cc
            (svusdot_impl::expand): Fix botched attempt to swap the operands
            for svsudot.
    
    gcc/testsuite/
            PR target/114607
            * gcc.target/aarch64/sve/acle/asm/sudot_s32.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc           | 2 +-
 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 5be2315a3c6..0d2edf3f19e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2809,7 +2809,7 @@ public:
        version) is through the USDOT instruction but with the second and third
        inputs swapped.  */
     if (m_su)
-      e.rotate_inputs_left (1, 2);
+      e.rotate_inputs_left (1, 3);
     /* The ACLE function has the same order requirements as for svdot.
        While there's no requirement for the RTL pattern to have the same sort
        of order as that for <sur>dot_prod, it's easier to read.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c
index 4b452619eee..e06b69affab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c
@@ -6,7 +6,7 @@
 
 /*
 ** sudot_s32_tied1:
-**     usdot   z0\.s, z2\.b, z4\.b
+**     usdot   z0\.s, z4\.b, z2\.b
 **     ret
 */
 TEST_TRIPLE_Z (sudot_s32_tied1, svint32_t, svint8_t, svuint8_t,
@@ -17,7 +17,7 @@ TEST_TRIPLE_Z (sudot_s32_tied1, svint32_t, svint8_t, 
svuint8_t,
 ** sudot_s32_tied2:
 **     mov     (z[0-9]+)\.d, z0\.d
 **     movprfx z0, z4
-**     usdot   z0\.s, z2\.b, \1\.b
+**     usdot   z0\.s, \1\.b, z2\.b
 **     ret
 */
 TEST_TRIPLE_Z_REV (sudot_s32_tied2, svint32_t, svint8_t, svuint8_t,
@@ -27,7 +27,7 @@ TEST_TRIPLE_Z_REV (sudot_s32_tied2, svint32_t, svint8_t, 
svuint8_t,
 /*
 ** sudot_w0_s32_tied:
 **     mov     (z[0-9]+\.b), w0
-**     usdot   z0\.s, z2\.b, \1
+**     usdot   z0\.s, \1, z2\.b
 **     ret
 */
 TEST_TRIPLE_ZX (sudot_w0_s32_tied, svint32_t, svint8_t, uint8_t,
@@ -37,7 +37,7 @@ TEST_TRIPLE_ZX (sudot_w0_s32_tied, svint32_t, svint8_t, 
uint8_t,
 /*
 ** sudot_9_s32_tied:
 **     mov     (z[0-9]+\.b), #9
-**     usdot   z0\.s, z2\.b, \1
+**     usdot   z0\.s, \1, z2\.b
 **     ret
 */
 TEST_TRIPLE_Z (sudot_9_s32_tied, svint32_t, svint8_t, uint8_t,

Reply via email to