Hi Spencer, > On 28 Jul 2025, at 16:25, Spencer Abson <spencer.ab...@arm.com> wrote: > > Streaming-compatible functions can be compiled without SME enabled, but need > to use "SMSTART SM" and "SMSTOP SM" to temporarily switch into the streaming > mode of a callee. These switches are conditional on the current mode being > opposite to the target mode, so no SME instructions are executed if SME is not > available. > > However, in GAS, "SMSTART SM" and "SMSTOP SM" always require +sme. A call > from a streaming-compatible function, compiled without SME enabled, to a non > -streaming function will be rejected as:
I guess this is an argument for allowing SMTART/SMSTOP in gas unconditionally? Though it would need to be consistent with LLVM and we’d still need a workaround for older binutils, so okay... > > Error: selected processor does not support `smstop sm'.. > > To work around this, we make use of the .inst directive to insert the literal > encodings of "SMSTART SM" and "SMSTOP SM". > > gcc/ChangeLog: > PR target/121028 > * config/aarch64/aarch64-sme.md (aarch64_smstart_sm): Use the .inst > directive if !TARGET_SME. > (aarch64_smstop_sm): Likewise. > > gcc/testsuite/ChangeLog: > PR target/121028 > * gcc.target/aarch64/sme/call_sm_switch_1.c: Tell check-function > -bodies not to ignore .inst directives, and replace the test for > "smstart sm" with one for it's encoding. > * gcc.target/aarch64/sme/call_sm_switch_11.c: Likewise. > * gcc.target/aarch64/sme/pr121028.c: New test. > --- > gcc/config/aarch64/aarch64-sme.md | 12 ++++- > .../gcc.target/aarch64/sme/call_sm_switch_1.c | 4 +- > .../aarch64/sme/call_sm_switch_11.c | 5 +- > .../gcc.target/aarch64/sme/pr121028.c | 46 +++++++++++++++++++ > 4 files changed, 61 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/pr121028.c > > diff --git a/gcc/config/aarch64/aarch64-sme.md > b/gcc/config/aarch64/aarch64-sme.md > index 6b3f4390943..5264bf75c0b 100644 > --- a/gcc/config/aarch64/aarch64-sme.md > +++ b/gcc/config/aarch64/aarch64-sme.md > @@ -62,6 +62,10 @@ > ;; (b) they are sometimes used conditionally, particularly in streaming- > ;; compatible code. > ;; > +;; To prevent the latter from upsetting the assembler, we emit the literal > +;; encodings of "SMSTART SM" and "SMSTOP SM" when compiling without > +;; TARGET_SME. > +;; > ;; ========================================================================= > > ;; ------------------------------------------------------------------------- > @@ -161,7 +165,9 @@ > (clobber (reg:VNx16BI P14_REGNUM)) > (clobber (reg:VNx16BI P15_REGNUM))] > "" > - "smstart\tsm" > + { > + return TARGET_SME ? "smstart\tsm" : ".inst 0xd503437f"; > + } Usually when we emit such .inst directives we also emit the mnemonic as an assembly comment to aid human assembly debugging. Ok by me with these changes, but maybe Richard is best-placed to comment on the SME parts. Thanks, Kyrill > ) > > ;; Turn off streaming mode. This clobbers all SVE state. > @@ -196,7 +202,9 @@ > (clobber (reg:VNx16BI P14_REGNUM)) > (clobber (reg:VNx16BI P15_REGNUM))] > "" > - "smstop\tsm" > + { > + return TARGET_SME ? "smstop\tsm" : ".inst 0xd503427f"; > + } > ) > > ;; ------------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c > b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c > index 98922aaeae0..7d514bd46c1 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c > @@ -1,5 +1,5 @@ > // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls > -funwind-tables" } > -// { dg-final { check-function-bodies "**" "" } } > +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } > {\t\.inst} } } > > void ns_callee (); > void s_callee () [[arm::streaming]]; > @@ -218,7 +218,7 @@ sc_caller_x1 (int *ptr, int a) > [[arm::streaming_compatible]] > ** bl ns_callee_stack > ** ldr x16, \[x29, #?16\] > ** tbz x16, 0, .* > -** smstart sm > +** .inst 0xd503437f > ** ... > */ > void > diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c > b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c > index ee6f98737d9..7f1eab716eb 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c > +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c > @@ -1,5 +1,6 @@ > // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls > -funwind-tables -mtrack-speculation" } > -// { dg-final { check-function-bodies "**" "" } } > +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } > {\t\.inst} } } > + > > void ns_callee (); > void s_callee () [[arm::streaming]]; > @@ -196,7 +197,7 @@ sc_caller_x1 (int *ptr, int a) > [[arm::streaming_compatible]] > ** tst x16, #?1 > ** beq [^\n]* > ** csel x15, x15, xzr, ne > -** smstart sm > +** .inst 0xd503437f > ** ... > */ > void > diff --git a/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c > b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c > new file mode 100644 > index 00000000000..fa36d4ec170 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c > @@ -0,0 +1,46 @@ > +// PR121028 > +// { dg-do assemble { target aarch64_asm_sme_ok } } > +// { dg-options "-O --save-temps" } > +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } > {\t\.inst} } } > + > +void ns_callee (); > + > +/* > +** sc_caller_sme: > +** ... > +** mrs x16, svcr > +** str x16, \[x29, #?16\] > +** ldr x16, \[x29, #?16\] > +** tbz x16, 0, .* > +** smstop sm > +** bl ns_callee > +** ldr x16, \[x29, #?16\] > +** tbz x16, 0, .* > +** smstart sm > +** ... > +*/ > +void sc_caller_sme() __arm_streaming_compatible > +{ > + ns_callee (); > +} > + > +#pragma GCC target "+nosme" > + > +/* > +** sc_caller_nosme: > +** ... > +** bl __arm_sme_state > +** str x0, \[x29, #?16\] > +** ldr x16, \[x29, #?16\] > +** tbz x16, 0, .* > +** .inst 0xd503427f > +** bl ns_callee > +** ldr x16, \[x29, #?16\] > +** tbz x16, 0, .* > +** .inst 0xd503437f > +** ... > +*/ > +void sc_caller_nosme() __arm_streaming_compatible > +{ > + ns_callee (); > +} > -- > 2.34.1 >