https://gcc.gnu.org/g:e1d9eaf68079e943e8884b5e7861b5b21853eea1
commit r16-7642-ge1d9eaf68079e943e8884b5e7861b5b21853eea1 Author: Alice Carlotti <[email protected]> Date: Tue Dec 30 08:53:10 2025 +0000 aarch64: Adjust sme tests that require sve Some SME tests currently require SVE to be enabled as well. Either mark this requirement explicitly in the test file (with a pragma), or eliminate it by modifying the test. For some check-function-bodies tests we take both approaches - that is, we add "+sve" to the existing test, and create a duplicate "+nosve" test that checks for the use of __arm_get_current_vg in the prologue. For vect-dotprod-twoway.c, the issue is that the number of udot/sdot/whilelo instructions depends on the selected tuning option. Pass an explicit -mtune option to preserve the expected codegen. gcc/testsuite/ChangeLog: * g++.target/aarch64/sme/exceptions_2.C: Add +sve pragma. * g++.target/aarch64/sme/exceptions_2_nosve.C: New test. * gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c: Remove svbool_t parameters. * gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c: Ditto. * gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c: Ditto. * gcc.target/aarch64/sme/acle-asm/str_za_sc.c: Ditto. * gcc.target/aarch64/sme/acle-asm/test_sme_acle.h (TEST_LOAD_ZA_NOPRED, TEST_STORE_ZA_NOPRED): New macros. * gcc.target/aarch64/sme/call_sm_switch_1.c: Add +sve pragma. * gcc.target/aarch64/sme/call_sm_switch_1_nosve.c: New test. * gcc.target/aarch64/sme/call_sm_switch_3.c: Add +sve pragma. * gcc.target/aarch64/sme/call_sm_switch_3_nosve.c: New test. * gcc.target/aarch64/sme/call_sm_switch_5.c: Add +sve pragma. * gcc.target/aarch64/sme/call_sm_switch_6.c: Add +sve pragma. * gcc.target/aarch64/sme/call_sm_switch_11.c: Add +sve pragma. * gcc.target/aarch64/sme/call_sm_switch_11_nosve.c: New test. * gcc.target/aarch64/sme/inlining_9.c: Add +sve pragma. * gcc.target/aarch64/sme/inlining_10.c: Add +sve pragma. * gcc.target/aarch64/sme/inlining_11.c: Add +sve pragma. * gcc.target/aarch64/sme/locally_streaming_1.c: * gcc.target/aarch64/sme/locally_streaming_1_nosve.c: New test. * gcc.target/aarch64/sme/pr121028.c: Add +sve to pragma. * gcc.target/aarch64/sme/vect-dotprod-twoway.c: Add -mtune. * gcc.target/aarch64/sme2/acle-asm/ldr_zt.c: Use svcntsb instead of svcntb. * gcc.target/aarch64/sme2/acle-asm/str_zt.c: Ditto. Diff: --- .../g++.target/aarch64/sme/exceptions_2.C | 2 + .../g++.target/aarch64/sme/exceptions_2_nosve.C | 150 +++++++ .../aarch64/sme/acle-asm/ldr_vnum_za_sc.c | 26 +- .../gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c | 12 +- .../aarch64/sme/acle-asm/str_vnum_za_sc.c | 26 +- .../gcc.target/aarch64/sme/acle-asm/str_za_sc.c | 12 +- .../aarch64/sme/acle-asm/test_sme_acle.h | 14 + .../gcc.target/aarch64/sme/call_sm_switch_1.c | 2 + .../gcc.target/aarch64/sme/call_sm_switch_11.c | 1 + .../aarch64/sme/call_sm_switch_11_nosve.c | 222 ++++++++++ .../aarch64/sme/call_sm_switch_1_nosve.c | 240 +++++++++++ .../gcc.target/aarch64/sme/call_sm_switch_3.c | 2 + .../aarch64/sme/call_sm_switch_3_nosve.c | 169 ++++++++ .../gcc.target/aarch64/sme/call_sm_switch_5.c | 2 + .../gcc.target/aarch64/sme/call_sm_switch_6.c | 2 + gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c | 2 + gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c | 2 + gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c | 2 + .../gcc.target/aarch64/sme/locally_streaming_1.c | 2 + .../aarch64/sme/locally_streaming_1_nosve.c | 468 +++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/sme/pr121028.c | 4 +- .../gcc.target/aarch64/sme/vect-dotprod-twoway.c | 2 +- .../gcc.target/aarch64/sme2/acle-asm/ldr_zt.c | 4 +- .../gcc.target/aarch64/sme2/acle-asm/str_zt.c | 4 +- 24 files changed, 1328 insertions(+), 44 deletions(-) diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C index f791b6ecc548..b0be27661e63 100644 --- a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C +++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C @@ -1,6 +1,8 @@ // { dg-options "-O -fno-optimize-sibling-calls" } // { dg-final { check-function-bodies "**" "" } } +#pragma GCC target "+sve" + void n_callee(); void s_callee() __arm_streaming; void sc_callee() __arm_streaming_compatible; diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2_nosve.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2_nosve.C new file mode 100644 index 000000000000..0e5772e33078 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2_nosve.C @@ -0,0 +1,150 @@ +// { dg-options "-O -fno-optimize-sibling-calls" } +// { dg-final { check-function-bodies "**" "" } } + +#pragma GCC target "+nosve" + +void n_callee(); +void s_callee() __arm_streaming; +void sc_callee() __arm_streaming_compatible; + +void n_callee_ne() noexcept; +void s_callee_ne() noexcept __arm_streaming; +void sc_callee_ne() noexcept __arm_streaming_compatible; + +void n_caller1() +{ + try + { + n_callee(); + sc_callee(); + } + catch (...) + { + n_callee_ne(); + sc_callee_ne(); + } +} +// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } } + +/* +** _Z9n_caller2v: +** ... +** bl __arm_get_current_vg +** str x0, [^\n]+ +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z11s_callee_nev +** smstop sm +** bl __cxa_end_catch +** ... +*/ +void n_caller2() +{ + try + { + n_callee(); + sc_callee(); + } + catch (...) + { + s_callee_ne(); + } +} + +/* +** _Z9s_caller1v: +** ... +** bl __cxa_end_catch +** smstart sm +** ... +*/ +int s_caller1() __arm_streaming +{ + try + { + s_callee(); + return 1; + } + catch (...) + { + return 2; + } +} + +/* +** _Z9s_caller2v: +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z11s_callee_nev +** smstop sm +** bl __cxa_end_catch +** smstart sm +** ... +*/ +int s_caller2() __arm_streaming +{ + try + { + n_callee(); + return 1; + } + catch (...) + { + s_callee_ne(); + return 2; + } +} + +/* +** _Z10sc_caller1v: +** ... +** bl __arm_get_current_vg +** str x0, [^\n]+ +** mrs (x[0-9]+), svcr +** str \1, ([^\n]+) +** ... +** bl __cxa_end_catch +** ldr (x[0-9]+), \2 +** tbz \3, 0, [^\n]+ +** smstart sm +** ... +*/ +int sc_caller1() __arm_streaming_compatible +{ + try + { + sc_callee(); + return 1; + } + catch (...) + { + return 2; + } +} + +/* +** _Z10ls_caller1v: +** ... +** bl __arm_get_current_vg +** str x0, [^\n]+ +** ... +** bl __cxa_begin_catch +** smstart sm +** bl _Z12sc_callee_nev +** smstop sm +** bl __cxa_end_catch +** ... +*/ +__arm_locally_streaming void ls_caller1() +{ + try + { + sc_callee(); + } + catch (...) + { + sc_callee_ne(); + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c index dfc2d139f819..09481ccf4dc6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c @@ -9,7 +9,7 @@ ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_0_0, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_0_0, svldr_vnum_za (0, x1, 0), svldr_vnum_za (0, x1, 0)) @@ -19,7 +19,7 @@ TEST_LOAD_ZA (ldr_vnum_za_0_0, ** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_0_1, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_0_1, svldr_vnum_za (0, x1, 1), svldr_vnum_za (0, x1, 1)) @@ -29,7 +29,7 @@ TEST_LOAD_ZA (ldr_vnum_za_0_1, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_1_0, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_1_0, svldr_vnum_za (1, x1, 0), svldr_vnum_za (1, x1, 0)) @@ -39,7 +39,7 @@ TEST_LOAD_ZA (ldr_vnum_za_1_0, ** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_1_2, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_1_2, svldr_vnum_za (1, x1, 2), svldr_vnum_za (1, x1, 2)) @@ -49,7 +49,7 @@ TEST_LOAD_ZA (ldr_vnum_za_1_2, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_0, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_0, svldr_vnum_za (w0, x1, 0), svldr_vnum_za (w0, x1, 0)) @@ -59,7 +59,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_0, ** ldr za\[\1, 1\], \[x1, #1, mul vl\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_1, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_1, svldr_vnum_za (w0, x1, 1), svldr_vnum_za (w0, x1, 1)) @@ -69,7 +69,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_1, ** ldr za\[\1, 13\], \[x1, #13, mul vl\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_13, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_13, svldr_vnum_za (w0, x1, 13), svldr_vnum_za (w0, x1, 13)) @@ -79,7 +79,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_13, ** ldr za\[\1, 15\], \[x1, #15, mul vl\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_15, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_15, svldr_vnum_za (w0, x1, 15), svldr_vnum_za (w0, x1, 15)) @@ -96,7 +96,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_15, ** ) ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_16, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_16, svldr_vnum_za (w0, x1, 16), svldr_vnum_za (w0, x1, 16)) @@ -113,7 +113,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_16, ** ) ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0_m1, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_m1, svldr_vnum_za (w0, x1, -1), svldr_vnum_za (w0, x1, -1)) @@ -123,7 +123,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0_m1, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0p1_0, svldr_vnum_za (w0 + 1, x1, 0), svldr_vnum_za (w0 + 1, x1, 0)) @@ -133,7 +133,7 @@ TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, ** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0m1_1, svldr_vnum_za (w0 - 1, x1, 1), svldr_vnum_za (w0 - 1, x1, 1)) @@ -143,6 +143,6 @@ TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, ** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, +TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0p2_3, svldr_vnum_za (w0 + 2, x1, 3), svldr_vnum_za (w0 + 2, x1, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c index a27be7671968..a358b016d8ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c @@ -9,7 +9,7 @@ ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_za_0, +TEST_LOAD_ZA_NOPRED (ldr_za_0, svldr_za (0, x1), svldr_za (0, x1)) @@ -19,7 +19,7 @@ TEST_LOAD_ZA (ldr_za_0, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_za_1, +TEST_LOAD_ZA_NOPRED (ldr_za_1, svldr_za (1, x1), svldr_za (1, x1)) @@ -29,7 +29,7 @@ TEST_LOAD_ZA (ldr_za_1, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_za_w0, +TEST_LOAD_ZA_NOPRED (ldr_za_w0, svldr_za (w0, x1), svldr_za (w0, x1)) @@ -39,7 +39,7 @@ TEST_LOAD_ZA (ldr_za_w0, ** ldr za\[\1, 1\], \[x1, #1, mul vl\] ** ret */ -TEST_LOAD_ZA (ldr_za_w0_1_vnum, +TEST_LOAD_ZA_NOPRED (ldr_za_w0_1_vnum, svldr_za (w0 + 1, x1 + svcntsb ()), svldr_za (w0 + 1, x1 + svcntsb ())) @@ -49,7 +49,7 @@ TEST_LOAD_ZA (ldr_za_w0_1_vnum, ** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_LOAD_ZA (ldr_za_w0p2, +TEST_LOAD_ZA_NOPRED (ldr_za_w0p2, svldr_za (w0 + 2, x1), svldr_za (w0 + 2, x1)) @@ -66,6 +66,6 @@ TEST_LOAD_ZA (ldr_za_w0p2, ** ) ** ret */ -TEST_LOAD_ZA (ldr_za_offset, +TEST_LOAD_ZA_NOPRED (ldr_za_offset, svldr_za (w0, x1 + 1), svldr_za (w0, x1 + 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c index 7cd09e67c935..29dc00c3d533 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c @@ -9,7 +9,7 @@ ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_0_0, +TEST_STORE_ZA_NOPRED (str_vnum_za_0_0, svstr_vnum_za (0, x1, 0), svstr_vnum_za (0, x1, 0)) @@ -19,7 +19,7 @@ TEST_STORE_ZA (str_vnum_za_0_0, ** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_0_1, +TEST_STORE_ZA_NOPRED (str_vnum_za_0_1, svstr_vnum_za (0, x1, 1), svstr_vnum_za (0, x1, 1)) @@ -29,7 +29,7 @@ TEST_STORE_ZA (str_vnum_za_0_1, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_1_0, +TEST_STORE_ZA_NOPRED (str_vnum_za_1_0, svstr_vnum_za (1, x1, 0), svstr_vnum_za (1, x1, 0)) @@ -39,7 +39,7 @@ TEST_STORE_ZA (str_vnum_za_1_0, ** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_1_2, +TEST_STORE_ZA_NOPRED (str_vnum_za_1_2, svstr_vnum_za (1, x1, 2), svstr_vnum_za (1, x1, 2)) @@ -49,7 +49,7 @@ TEST_STORE_ZA (str_vnum_za_1_2, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_0, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_0, svstr_vnum_za (w0, x1, 0), svstr_vnum_za (w0, x1, 0)) @@ -59,7 +59,7 @@ TEST_STORE_ZA (str_vnum_za_w0_0, ** str za\[\1, 1\], \[x1, #1, mul vl\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_1, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_1, svstr_vnum_za (w0, x1, 1), svstr_vnum_za (w0, x1, 1)) @@ -69,7 +69,7 @@ TEST_STORE_ZA (str_vnum_za_w0_1, ** str za\[\1, 13\], \[x1, #13, mul vl\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_13, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_13, svstr_vnum_za (w0, x1, 13), svstr_vnum_za (w0, x1, 13)) @@ -79,7 +79,7 @@ TEST_STORE_ZA (str_vnum_za_w0_13, ** str za\[\1, 15\], \[x1, #15, mul vl\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_15, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_15, svstr_vnum_za (w0, x1, 15), svstr_vnum_za (w0, x1, 15)) @@ -96,7 +96,7 @@ TEST_STORE_ZA (str_vnum_za_w0_15, ** ) ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_16, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_16, svstr_vnum_za (w0, x1, 16), svstr_vnum_za (w0, x1, 16)) @@ -113,7 +113,7 @@ TEST_STORE_ZA (str_vnum_za_w0_16, ** ) ** ret */ -TEST_STORE_ZA (str_vnum_za_w0_m1, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0_m1, svstr_vnum_za (w0, x1, -1), svstr_vnum_za (w0, x1, -1)) @@ -123,7 +123,7 @@ TEST_STORE_ZA (str_vnum_za_w0_m1, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0p1_0, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0p1_0, svstr_vnum_za (w0 + 1, x1, 0), svstr_vnum_za (w0 + 1, x1, 0)) @@ -133,7 +133,7 @@ TEST_STORE_ZA (str_vnum_za_w0p1_0, ** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0m1_1, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0m1_1, svstr_vnum_za (w0 - 1, x1, 1), svstr_vnum_za (w0 - 1, x1, 1)) @@ -143,6 +143,6 @@ TEST_STORE_ZA (str_vnum_za_w0m1_1, ** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_vnum_za_w0p2_3, +TEST_STORE_ZA_NOPRED (str_vnum_za_w0p2_3, svstr_vnum_za (w0 + 2, x1, 3), svstr_vnum_za (w0 + 2, x1, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c index 3406055e70d3..a0aa1c7ca7da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c @@ -9,7 +9,7 @@ ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_za_0, +TEST_STORE_ZA_NOPRED (str_za_0, svstr_za (0, x1), svstr_za (0, x1)) @@ -19,7 +19,7 @@ TEST_STORE_ZA (str_za_0, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_za_1, +TEST_STORE_ZA_NOPRED (str_za_1, svstr_za (1, x1), svstr_za (1, x1)) @@ -29,7 +29,7 @@ TEST_STORE_ZA (str_za_1, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_za_w0, +TEST_STORE_ZA_NOPRED (str_za_w0, svstr_za (w0, x1), svstr_za (w0, x1)) @@ -39,7 +39,7 @@ TEST_STORE_ZA (str_za_w0, ** str za\[\1, 1\], \[x1, #1, mul vl\] ** ret */ -TEST_STORE_ZA (str_za_w0_1_vnum, +TEST_STORE_ZA_NOPRED (str_za_w0_1_vnum, svstr_za (w0 + 1, x1 + svcntsb ()), svstr_za (w0 + 1, x1 + svcntsb ())) @@ -49,7 +49,7 @@ TEST_STORE_ZA (str_za_w0_1_vnum, ** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] ** ret */ -TEST_STORE_ZA (str_za_w0p2, +TEST_STORE_ZA_NOPRED (str_za_w0p2, svstr_za (w0 + 2, x1), svstr_za (w0 + 2, x1)) @@ -66,6 +66,6 @@ TEST_STORE_ZA (str_za_w0p2, ** ) ** ret */ -TEST_STORE_ZA (str_za_offset, +TEST_STORE_ZA_NOPRED (str_za_offset, svstr_za (w0, x1 + 1), svstr_za (w0, x1 + 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h index 75e3413768e2..c81bf074c501 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h @@ -29,6 +29,20 @@ INVOKE (CODE1, CODE2); \ } +#define TEST_LOAD_ZA_NOPRED(NAME, CODE1, CODE2) \ + PROTO (NAME, void, (int32_t w0, const char *x1, \ + uint64_t x2)) \ + { \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_STORE_ZA_NOPRED(NAME, CODE1, CODE2) \ + PROTO (NAME, void, (int32_t w0, char *x1, \ + uint64_t x2)) \ + { \ + INVOKE (CODE1, CODE2); \ + } + #define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \ int32_t w0)) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c index 3a63da7439cc..ed4674a6c84a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c @@ -1,6 +1,8 @@ // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } // { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } +#pragma GCC target "+sve" + void ns_callee (); void s_callee () [[arm::streaming]]; void sc_callee () [[arm::streaming_compatible]]; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c index c72d03f33b97..78ac503f2e2c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c @@ -1,6 +1,7 @@ // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" } // { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } +#pragma GCC target "+sve" void ns_callee (); void s_callee () [[arm::streaming]]; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11_nosve.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11_nosve.c new file mode 100644 index 000000000000..8f8b25ad89c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11_nosve.c @@ -0,0 +1,222 @@ +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" } +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } + +#pragma GCC target "+nosve" + +void ns_callee (); + void s_callee () [[arm::streaming]]; + void sc_callee () [[arm::streaming_compatible]]; + +void ns_callee_stack (int, int, int, int, int, int, int, int, int); + +struct callbacks { + void (*ns_ptr) (); + void (*s_ptr) () [[arm::streaming]]; + void (*sc_ptr) () [[arm::streaming_compatible]]; +}; + +/* +** sc_caller_sme: +** cmp sp, #?0 +** csetm x15, ne +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl __arm_get_current_vg +** cmp sp, #?0 +** csetm x15, ne +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** ldr x16, \[x29, #?16\] +** tst x16, #?1 +** beq [^\n]* +** csel x15, x15, xzr, ne +** smstop sm +** b [^\n]* +** csel x15, x15, xzr, eq +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl ns_callee +** cmp sp, #?0 +** csetm x15, ne +** ldr x16, \[x29, #?16\] +** tst x16, #?1 +** beq [^\n]* +** csel x15, x15, xzr, ne +** smstart sm +** b [^\n]* +** csel x15, x15, xzr, eq +** ldr x16, \[x29, #?16\] +** tst x16, #?1 +** bne [^\n]* +** csel x15, x15, xzr, eq +** smstart sm +** b [^\n]* +** csel x15, x15, xzr, ne +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl s_callee +** cmp sp, #?0 +** csetm x15, ne +** ldr x16, \[x29, #?16\] +** tst x16, #?1 +** bne [^\n]* +** csel x15, x15, xzr, eq +** smstop sm +** b [^\n]* +** csel x15, x15, xzr, ne +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl sc_callee +** cmp sp, #?0 +** csetm x15, ne +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** ret +*/ +void +sc_caller_sme () [[arm::streaming_compatible]] +{ + ns_callee (); + s_callee (); + sc_callee (); +} + +#pragma GCC target "+nosme" + +/* +** sc_caller: +** cmp sp, #?0 +** csetm x15, ne +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** bl __arm_sme_state +** cmp sp, #?0 +** csetm x15, ne +** str x0, \[x29, #?16\] +** ... +** bl sc_callee +** cmp sp, #?0 +** csetm x15, ne +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** ret +*/ +void +sc_caller () [[arm::streaming_compatible]] +{ + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_x0: +** ... +** mov x10, x0 +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl __arm_get_current_vg +** ... +** bl __arm_sme_state +** ... +** str wzr, \[x10\] +** ... +*/ +void +sc_caller_x0 (int *ptr) [[arm::streaming_compatible]] +{ + *ptr = 0; + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_x1: +** ... +** mov x10, x0 +** mov x14, sp +** and x14, x14, x15 +** mov sp, x14 +** bl __arm_get_current_vg +** ... +** mov x11, x1 +** bl __arm_sme_state +** ... +** str w11, \[x10\] +** ... +*/ +void +sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]] +{ + *ptr = a; + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_stack: +** cmp sp, #?0 +** csetm x15, ne +** sub sp, sp, #112 +** stp x29, x30, \[sp, #?16\] +** add x29, sp, #?16 +** ... +** bl __arm_get_current_vg +** ... +** stp d8, d9, \[sp, #?48\] +** ... +** bl __arm_sme_state +** cmp sp, #?0 +** csetm x15, ne +** str x0, \[x29, #?16\] +** ... +** bl ns_callee_stack +** cmp sp, #?0 +** csetm x15, ne +** ldr x16, \[x29, #?16\] +** tst x16, #?1 +** beq [^\n]* +** csel x15, x15, xzr, ne +** .inst 0xd503437f // smstart sm +** ... +*/ +void +sc_caller_stack () [[arm::streaming_compatible]] +{ + ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ +/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1_nosve.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1_nosve.c new file mode 100644 index 000000000000..970fa4a3cc14 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1_nosve.c @@ -0,0 +1,240 @@ +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } + +#pragma GCC target "+nosve" + +void ns_callee (); + void s_callee () [[arm::streaming]]; + void sc_callee () [[arm::streaming_compatible]]; + +void ns_callee_stack (int, int, int, int, int, int, int, int, int); + +struct callbacks { + void (*ns_ptr) (); + void (*s_ptr) () [[arm::streaming]]; + void (*sc_ptr) () [[arm::streaming_compatible]]; +}; + +/* +** n_caller: { target lp64 } +** stp x30, (x19|x2[0-8]), \[sp, #?-96\]! +** mov (x9|x1[0-5]), x0 +** bl __arm_get_current_vg +** str x0, \[sp, #?16\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mov \1, \2 +** bl ns_callee +** smstart sm +** bl s_callee +** smstop sm +** bl sc_callee +** ldr (x[0-9]+), \[\1\] +** blr \3 +** ldr (x[0-9]+), \[\1, #?8\] +** smstart sm +** blr \4 +** smstop sm +** ldr (x[0-9]+), \[\1, #?16\] +** blr \5 +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x30, \1, \[sp\], #?96 +** ret +*/ +void +n_caller (struct callbacks *c) +{ + ns_callee (); + s_callee (); + sc_callee (); + + c->ns_ptr (); + c->s_ptr (); + c->sc_ptr (); +} + +/* +** s_caller: { target lp64 } +** stp x30, (x19|x2[0-8]), \[sp, #?-96\]! +** cntd x16 +** str x16, \[sp, #?16\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mov \1, x0 +** smstop sm +** bl ns_callee +** smstart sm +** bl s_callee +** bl sc_callee +** ldr (x[0-9]+), \[\1\] +** smstop sm +** blr \2 +** smstart sm +** ldr (x[0-9]+), \[\1, #?8\] +** blr \3 +** ldr (x[0-9]+), \[\1, #?16\] +** blr \4 +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x30, \1, \[sp\], #?96 +** ret +*/ +void +s_caller (struct callbacks *c) [[arm::streaming]] +{ + ns_callee (); + s_callee (); + sc_callee (); + + c->ns_ptr (); + c->s_ptr (); + c->sc_ptr (); +} + +/* +** sc_caller_sme: +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstop sm +** bl ns_callee +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstart sm +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, .* +** smstart sm +** bl s_callee +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, .* +** smstop sm +** bl sc_callee +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** ret +*/ +void +sc_caller_sme () [[arm::streaming_compatible]] +{ + ns_callee (); + s_callee (); + sc_callee (); +} + +#pragma GCC target "+nosme" + +/* +** sc_caller: +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** bl __arm_sme_state +** str x0, \[x29, #?16\] +** ... +** bl sc_callee +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** ret +*/ +void +sc_caller () [[arm::streaming_compatible]] +{ + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_x0: +** ... +** mov x10, x0 +** bl __arm_get_current_vg +** ... +** bl __arm_sme_state +** ... +** str wzr, \[x10\] +** ... +*/ +void +sc_caller_x0 (int *ptr) [[arm::streaming_compatible]] +{ + *ptr = 0; + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_x1: +** ... +** mov x10, x0 +** bl __arm_get_current_vg +** ... +** mov x11, x1 +** bl __arm_sme_state +** ... +** str w11, \[x10\] +** ... +*/ +void +sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]] +{ + *ptr = a; + ns_callee (); + sc_callee (); +} + +/* +** sc_caller_stack: +** sub sp, sp, #112 +** stp x29, x30, \[sp, #?16\] +** add x29, sp, #?16 +** ... +** stp d8, d9, \[sp, #?48\] +** ... +** bl __arm_sme_state +** str x0, \[x29, #?16\] +** ... +** bl ns_callee_stack +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** .inst 0xd503437f // smstart sm +** ... +*/ +void +sc_caller_stack () [[arm::streaming_compatible]] +{ + ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ +/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ +/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ +/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c index 4250fe7984cd..cdfd31340103 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c @@ -1,6 +1,8 @@ // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } // { dg-final { check-function-bodies "**" "" } } +#pragma GCC target "+sve" + __attribute__((aarch64_vector_pcs)) void ns_callee (); __attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]]; __attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]]; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3_nosve.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3_nosve.c new file mode 100644 index 000000000000..9a1b4af20c97 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3_nosve.c @@ -0,0 +1,169 @@ +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } +// { dg-final { check-function-bodies "**" "" } } + +#pragma GCC target "+nosve" + +__attribute__((aarch64_vector_pcs)) void ns_callee (); +__attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]]; +__attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]]; + +struct callbacks { + __attribute__((aarch64_vector_pcs)) void (*ns_ptr) (); + __attribute__((aarch64_vector_pcs)) void (*s_ptr) () [[arm::streaming]]; + __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () [[arm::streaming_compatible]]; +}; + +/* +** n_caller: { target lp64 } +** stp x30, (x19|x2[0-8]), \[sp, #?-288\]! +** mov (x9|x1[0-5]), x0 +** bl __arm_get_current_vg +** str x0, \[sp, #?16\] +** stp q8, q9, \[sp, #?32\] +** stp q10, q11, \[sp, #?64\] +** stp q12, q13, \[sp, #?96\] +** stp q14, q15, \[sp, #?128\] +** stp q16, q17, \[sp, #?160\] +** stp q18, q19, \[sp, #?192\] +** stp q20, q21, \[sp, #?224\] +** stp q22, q23, \[sp, #?256\] +** mov \1, \2 +** bl ns_callee +** smstart sm +** bl s_callee +** smstop sm +** bl sc_callee +** ldr (x[0-9]+), \[\1\] +** blr \3 +** ldr (x[0-9]+), \[\1, #?8\] +** smstart sm +** blr \4 +** smstop sm +** ldr (x[0-9]+), \[\1, #?16\] +** blr \5 +** ldp q8, q9, \[sp, #?32\] +** ldp q10, q11, \[sp, #?64\] +** ldp q12, q13, \[sp, #?96\] +** ldp q14, q15, \[sp, #?128\] +** ldp q16, q17, \[sp, #?160\] +** ldp q18, q19, \[sp, #?192\] +** ldp q20, q21, \[sp, #?224\] +** ldp q22, q23, \[sp, #?256\] +** ldp x30, \1, \[sp\], #?288 +** ret +*/ +void __attribute__((aarch64_vector_pcs)) +n_caller (struct callbacks *c) +{ + ns_callee (); + s_callee (); + sc_callee (); + + c->ns_ptr (); + c->s_ptr (); + c->sc_ptr (); +} + +/* +** s_caller: { target lp64 } +** stp x30, (x19|x2[0-8]), \[sp, #?-288\]! +** cntd x16 +** str x16, \[sp, #?16\] +** stp q8, q9, \[sp, #?32\] +** stp q10, q11, \[sp, #?64\] +** stp q12, q13, \[sp, #?96\] +** stp q14, q15, \[sp, #?128\] +** stp q16, q17, \[sp, #?160\] +** stp q18, q19, \[sp, #?192\] +** stp q20, q21, \[sp, #?224\] +** stp q22, q23, \[sp, #?256\] +** mov \1, x0 +** smstop sm +** bl ns_callee +** smstart sm +** bl s_callee +** bl sc_callee +** ldr (x[0-9]+), \[\1\] +** smstop sm +** blr \2 +** smstart sm +** ldr (x[0-9]+), \[\1, #?8\] +** blr \3 +** ldr (x[0-9]+), \[\1, #?16\] +** blr \4 +** ldp q8, q9, \[sp, #?32\] +** ldp q10, q11, \[sp, #?64\] +** ldp q12, q13, \[sp, #?96\] +** ldp q14, q15, \[sp, #?128\] +** ldp q16, q17, \[sp, #?160\] +** ldp q18, q19, \[sp, #?192\] +** ldp q20, q21, \[sp, #?224\] +** ldp q22, q23, \[sp, #?256\] +** ldp x30, \1, \[sp\], #?288 +** ret +*/ +void __attribute__((aarch64_vector_pcs)) +s_caller (struct callbacks *c) [[arm::streaming]] +{ + ns_callee (); + s_callee (); + sc_callee (); + + c->ns_ptr (); + c->s_ptr (); + c->sc_ptr (); +} + +/* +** sc_caller: +** stp x29, x30, \[sp, #?-288\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp q8, q9, \[sp, #?32\] +** stp q10, q11, \[sp, #?64\] +** stp q12, q13, \[sp, #?96\] +** stp q14, q15, \[sp, #?128\] +** stp q16, q17, \[sp, #?160\] +** stp q18, q19, \[sp, #?192\] +** stp q20, q21, \[sp, #?224\] +** stp q22, q23, \[sp, #?256\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstop sm +** bl ns_callee +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstart sm +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, .* +** smstart sm +** bl s_callee +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, .* +** smstop sm +** bl sc_callee +** ldp q8, q9, \[sp, #?32\] +** ldp q10, q11, \[sp, #?64\] +** ldp q12, q13, \[sp, #?96\] +** ldp q14, q15, \[sp, #?128\] +** ldp q16, q17, \[sp, #?160\] +** ldp q18, q19, \[sp, #?192\] +** ldp q20, q21, \[sp, #?224\] +** ldp q22, q23, \[sp, #?256\] +** ldp x29, x30, \[sp\], #?288 +** ret +*/ +void __attribute__((aarch64_vector_pcs)) +sc_caller () [[arm::streaming_compatible]] +{ + ns_callee (); + s_callee (); + sc_callee (); +} + +/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ +/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ +/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -264\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c index d31b6b91f1f0..86ebfcdcadaf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c @@ -3,6 +3,8 @@ #include <arm_sve.h> +#pragma GCC target "+sve" + svbool_t ns_callee (); svbool_t s_callee () [[arm::streaming]]; svbool_t sc_callee () [[arm::streaming_compatible]]; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c index 0f6bc4f6c9a5..165a4407d6f1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c @@ -2,6 +2,8 @@ #include <arm_sve.h> +#pragma GCC target "+sve" + svbool_t ns_callee (); svbool_t s_callee () [[arm::streaming]]; svbool_t sc_callee () [[arm::streaming_compatible]]; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c index adfd45a872fd..78e737e2f40b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c @@ -3,6 +3,8 @@ #include <arm_neon.h> #include <arm_sme.h> +#pragma GCC target "+sve" + uint8x16_t *neon; svint64_t *sve; int64_t *ptr; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c index d05a92c1c24f..0cd3487973e3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c @@ -3,6 +3,8 @@ #include <arm_neon.h> #include <arm_sme.h> +#pragma GCC target "+sve" + uint8x16_t *neon; svint64_t *sve; int64_t *ptr; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c index 91520e3787b1..dfbfbcae8f36 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c @@ -3,6 +3,8 @@ #include <arm_neon.h> #include <arm_sme.h> +#pragma GCC target "+sve" + uint8x16_t *neon; svint64_t *sve; int64_t *ptr; diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c index cb235f5c832d..3e3b56532d8d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c @@ -1,6 +1,8 @@ // { dg-options "-O -fomit-frame-pointer -fno-stack-clash-protection" } // { dg-final { check-function-bodies "**" "" } } +#pragma GCC target "+sve" + void consume_za () [[arm::streaming, arm::inout("za")]]; /* diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_nosve.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_nosve.c new file mode 100644 index 000000000000..f58a4a9453cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_nosve.c @@ -0,0 +1,468 @@ +// { dg-options "-O -fomit-frame-pointer -fno-stack-clash-protection" } +// { dg-final { check-function-bodies "**" "" } } + +#pragma GCC target "+nosve" + +void consume_za () [[arm::streaming, arm::inout("za")]]; + +/* +** n_ls: +** sub sp, sp, #?80 +** bl __arm_get_current_vg +** str x0, \[sp\] +** stp d8, d9, \[sp, #?16\] +** stp d10, d11, \[sp, #?32\] +** stp d12, d13, \[sp, #?48\] +** stp d14, d15, \[sp, #?64\] +** smstart sm +** smstop sm +** ldp d8, d9, \[sp, #?16\] +** ldp d10, d11, \[sp, #?32\] +** ldp d12, d13, \[sp, #?48\] +** ldp d14, d15, \[sp, #?64\] +** add sp, sp, #?80 +** ret +*/ +[[arm::locally_streaming]] void +n_ls () +{ + asm (""); +} + +/* +** s_ls: +** ret +*/ +[[arm::locally_streaming]] void +s_ls () [[arm::streaming]] +{ + asm (""); +} + +/* +** sc_ls: +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstart sm +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstop sm +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** ret +*/ +[[arm::locally_streaming]] void +sc_ls () [[arm::streaming_compatible]] +{ + asm (""); +} + +/* +** n_ls_new_za: +** str x30, \[sp, #?-80\]! +** bl __arm_get_current_vg +** str x0, \[sp, #?8\] +** stp d8, d9, \[sp, #?16\] +** stp d10, d11, \[sp, #?32\] +** stp d12, d13, \[sp, #?48\] +** stp d14, d15, \[sp, #?64\] +** smstart sm +** mrs (x[0-9]+), tpidr2_el0 +** cbz \1, [^\n]+ +** bl __arm_tpidr2_save +** msr tpidr2_el0, xzr +** zero { za } +** smstart za +** bl consume_za +** smstop za +** smstop sm +** ldp d8, d9, \[sp, #?16\] +** ldp d10, d11, \[sp, #?32\] +** ldp d12, d13, \[sp, #?48\] +** ldp d14, d15, \[sp, #?64\] +** ldr x30, \[sp\], #?80 +** ret +*/ +[[arm::locally_streaming, arm::new("za")]] void +n_ls_new_za () +{ + consume_za (); + asm (""); +} + +/* +** s_ls_new_za: +** str x30, \[sp, #?-16\]! +** mrs (x[0-9]+), tpidr2_el0 +** cbz \1, [^\n]+ +** bl __arm_tpidr2_save +** msr tpidr2_el0, xzr +** zero { za } +** smstart za +** bl consume_za +** smstop za +** ldr x30, \[sp\], #?16 +** ret +*/ +[[arm::locally_streaming, arm::new("za")]] void +s_ls_new_za () [[arm::streaming]] +{ + consume_za (); + asm (""); +} + +/* +** sc_ls_new_za: +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstart sm +** mrs (x[0-9]+), tpidr2_el0 +** cbz \1, [^\n]+ +** bl __arm_tpidr2_save +** msr tpidr2_el0, xzr +** zero { za } +** smstart za +** bl consume_za +** smstop za +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstop sm +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** ret +*/ +[[arm::locally_streaming, arm::new("za")]] void +sc_ls_new_za () [[arm::streaming_compatible]] +{ + consume_za (); + asm (""); +} + +/* +** n_ls_shared_za: +** str x30, \[sp, #?-80\]! +** bl __arm_get_current_vg +** str x0, \[sp, #?8\] +** stp d8, d9, \[sp, #?16\] +** stp d10, d11, \[sp, #?32\] +** stp d12, d13, \[sp, #?48\] +** stp d14, d15, \[sp, #?64\] +** smstart sm +** bl consume_za +** smstop sm +** ldp d8, d9, \[sp, #?16\] +** ldp d10, d11, \[sp, #?32\] +** ldp d12, d13, \[sp, #?48\] +** ldp d14, d15, \[sp, #?64\] +** ldr x30, \[sp\], #?80 +** ret +*/ +[[arm::locally_streaming]] void +n_ls_shared_za () [[arm::inout("za")]] +{ + consume_za (); + asm (""); +} + +/* +** s_ls_shared_za: +** str x30, \[sp, #?-16\]! +** bl consume_za +** ldr x30, \[sp\], #?16 +** ret +*/ +[[arm::locally_streaming]] void +s_ls_shared_za () [[arm::streaming, arm::inout("za")]] +{ + consume_za (); + asm (""); +} + +/* +** sc_ls_shared_za: +** stp x29, x30, \[sp, #?-96\]! +** mov x29, sp +** bl __arm_get_current_vg +** str x0, \[sp, #?24\] +** stp d8, d9, \[sp, #?32\] +** stp d10, d11, \[sp, #?48\] +** stp d12, d13, \[sp, #?64\] +** stp d14, d15, \[sp, #?80\] +** mrs x16, svcr +** str x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstart sm +** bl consume_za +** ldr x16, \[x29, #?16\] +** tbnz x16, 0, [^\n]+ +** smstop sm +** ldp d8, d9, \[sp, #?32\] +** ldp d10, d11, \[sp, #?48\] +** ldp d12, d13, \[sp, #?64\] +** ldp d14, d15, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?96 +** ret +*/ +[[arm::locally_streaming]] void +sc_ls_shared_za () [[arm::streaming_compatible, arm::inout("za")]] +{ + consume_za (); + asm (""); +} + +/* +** n_ls_vector_pcs: +** sub sp, sp, #?272 +** bl __arm_get_current_vg +** str x0, \[sp\] +** stp q8, q9, \[sp, #?16\] +** stp q10, q11, \[sp, #?48\] +** stp q12, q13, \[sp, #?80\] +** stp q14, q15, \[sp, #?112\] +** stp q16, q17, \[sp, #?144\] +** stp q18, q19, \[sp, #?176\] +** stp q20, q21, \[sp, #?208\] +** stp q22, q23, \[sp, #?240\] +** smstart sm +** smstop sm +** ldp q8, q9, \[sp, #?16\] +** ldp q10, q11, \[sp, #?48\] +** ldp q12, q13, \[sp, #?80\] +** ldp q14, q15, \[sp, #?112\] +** ldp q16, q17, \[sp, #?144\] +** ldp q18, q19, \[sp, #?176\] +** ldp q20, q21, \[sp, #?208\] +** ldp q22, q23, \[sp, #?240\] +** add sp, sp, #?272 +** ret +*/ +[[arm::locally_streaming]] void __attribute__((aarch64_vector_pcs)) +n_ls_vector_pcs () +{ + asm (""); +} + +/* +** n_ls_sve_pcs: { target aarch64_little_endian } +** sub sp, sp, #?16 +** bl __arm_get_current_vg +** str x0, \[sp\] +** addsvl sp, sp, #-18 +** str p4, \[sp\] +** str p5, \[sp, #1, mul vl\] +** str p6, \[sp, #2, mul vl\] +** str p7, \[sp, #3, mul vl\] +** str p8, \[sp, #4, mul vl\] +** str p9, \[sp, #5, mul vl\] +** str p10, \[sp, #6, mul vl\] +** str p11, \[sp, #7, mul vl\] +** str p12, \[sp, #8, mul vl\] +** str p13, \[sp, #9, mul vl\] +** str p14, \[sp, #10, mul vl\] +** str p15, \[sp, #11, mul vl\] +** str z8, \[sp, #2, mul vl\] +** str z9, \[sp, #3, mul vl\] +** str z10, \[sp, #4, mul vl\] +** str z11, \[sp, #5, mul vl\] +** str z12, \[sp, #6, mul vl\] +** str z13, \[sp, #7, mul vl\] +** str z14, \[sp, #8, mul vl\] +** str z15, \[sp, #9, mul vl\] +** str z16, \[sp, #10, mul vl\] +** str z17, \[sp, #11, mul vl\] +** str z18, \[sp, #12, mul vl\] +** str z19, \[sp, #13, mul vl\] +** str z20, \[sp, #14, mul vl\] +** str z21, \[sp, #15, mul vl\] +** str z22, \[sp, #16, mul vl\] +** str z23, \[sp, #17, mul vl\] +** addvl sp, sp, #-1 +** str p0, \[sp\] +** smstart sm +** ldr p0, \[sp\] +** addvl sp, sp, #1 +** smstop sm +** ldr z8, \[sp, #2, mul vl\] +** ldr z9, \[sp, #3, mul vl\] +** ldr z10, \[sp, #4, mul vl\] +** ldr z11, \[sp, #5, mul vl\] +** ldr z12, \[sp, #6, mul vl\] +** ldr z13, \[sp, #7, mul vl\] +** ldr z14, \[sp, #8, mul vl\] +** ldr z15, \[sp, #9, mul vl\] +** ldr z16, \[sp, #10, mul vl\] +** ldr z17, \[sp, #11, mul vl\] +** ldr z18, \[sp, #12, mul vl\] +** ldr z19, \[sp, #13, mul vl\] +** ldr z20, \[sp, #14, mul vl\] +** ldr z21, \[sp, #15, mul vl\] +** ldr z22, \[sp, #16, mul vl\] +** ldr z23, \[sp, #17, mul vl\] +** ldr p4, \[sp\] +** ldr p5, \[sp, #1, mul vl\] +** ldr p6, \[sp, #2, mul vl\] +** ldr p7, \[sp, #3, mul vl\] +** ldr p8, \[sp, #4, mul vl\] +** ldr p9, \[sp, #5, mul vl\] +** ldr p10, \[sp, #6, mul vl\] +** ldr p11, \[sp, #7, mul vl\] +** ldr p12, \[sp, #8, mul vl\] +** ldr p13, \[sp, #9, mul vl\] +** ldr p14, \[sp, #10, mul vl\] +** ldr p15, \[sp, #11, mul vl\] +** addsvl sp, sp, #18 +** add sp, sp, #?16 +** ret +*/ +[[arm::locally_streaming]] void +n_ls_sve_pcs (__SVBool_t x) +{ + asm (""); +} + +/* +** n_ls_v0: +** addsvl sp, sp, #-1 +** ... +** smstart sm +** add x[0-9]+, [^\n]+ +** smstop sm +** ... +** addsvl sp, sp, #1 +** ... +*/ +#define TEST(VN) __SVInt32_t VN; asm ("" :: "r" (&VN)); +[[arm::locally_streaming]] void +n_ls_v0 () +{ + TEST (v0); +} + +/* +** n_ls_v32: +** addsvl sp, sp, #-32 +** ... +** smstart sm +** ... +** smstop sm +** ... +** rdsvl (x[0-9]+), #1 +** lsl (x[0-9]+), \1, #?5 +** add sp, sp, \2 +** ... +*/ +[[arm::locally_streaming]] void +n_ls_v32 () +{ + TEST (v0); + TEST (v1); + TEST (v2); + TEST (v3); + TEST (v4); + TEST (v5); + TEST (v6); + TEST (v7); + TEST (v8); + TEST (v9); + TEST (v10); + TEST (v11); + TEST (v12); + TEST (v13); + TEST (v14); + TEST (v15); + TEST (v16); + TEST (v17); + TEST (v18); + TEST (v19); + TEST (v20); + TEST (v21); + TEST (v22); + TEST (v23); + TEST (v24); + TEST (v25); + TEST (v26); + TEST (v27); + TEST (v28); + TEST (v29); + TEST (v30); + TEST (v31); +} + +/* +** n_ls_v33: +** rdsvl (x[0-9]+), #1 +** mov (x[0-9]+), #?33 +** mul (x[0-9]+), (?:\1, \2|\2, \1) +** sub sp, sp, \3 +** ... +** smstart sm +** ... +** smstop sm +** ... +** rdsvl (x[0-9]+), #1 +** mov (x[0-9]+), #?33 +** mul (x[0-9]+), (?:\4, \5|\5, \4) +** add sp, sp, \6 +** ... +*/ +[[arm::locally_streaming]] void +n_ls_v33 () +{ + TEST (v0); + TEST (v1); + TEST (v2); + TEST (v3); + TEST (v4); + TEST (v5); + TEST (v6); + TEST (v7); + TEST (v8); + TEST (v9); + TEST (v10); + TEST (v11); + TEST (v12); + TEST (v13); + TEST (v14); + TEST (v15); + TEST (v16); + TEST (v17); + TEST (v18); + TEST (v19); + TEST (v20); + TEST (v21); + TEST (v22); + TEST (v23); + TEST (v24); + TEST (v25); + TEST (v26); + TEST (v27); + TEST (v28); + TEST (v29); + TEST (v30); + TEST (v31); + TEST (v32); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c index a6aa11900f2e..b06e011c033d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c @@ -24,7 +24,9 @@ void sc_caller_sme() __arm_streaming_compatible ns_callee (); } -#pragma GCC target "+nosme" +/* Add +sve to prevent passing +fcma to the assembler, since +fcma was + added to assemblers later than SME support. */ +#pragma GCC target "+sve+nosme" /* ** sc_caller_nosme: diff --git a/gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c b/gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c index 77a019a2f388..c8b68c797272 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-O2 -ftree-vectorize" } */ +/* { dg-additional-options "-O2 -ftree-vectorize -mtune=generic-armv9-a" } */ #include <stdint.h> #pragma GCC target "+sme2" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c index a614fbc9537c..1baf719fb0a4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c @@ -29,8 +29,8 @@ PROTO (ldr_zt0_x0p64, void, (char *x0)) { svldr_zt (0, x0 + 64); } /* ** ldr_zt0_x0_vl1: -** incb x0 +** addsvl x0, x0, #?1 ** ldr zt0, \[x0\] ** ret */ -PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntb()); } +PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntsb()); } diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c index c8ecacb10a01..9e146ed1fc8d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c @@ -29,8 +29,8 @@ PROTO (str_zt0_x0p64, void, (char *x0)) { svstr_zt (0, x0 + 64); } /* ** str_zt0_x0_vl1: -** incb x0 +** addsvl x0, x0, #?1 ** str zt0, \[x0\] ** ret */ -PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntb()); } +PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntsb()); }
