https://github.com/c-rhodes updated 
https://github.com/llvm/llvm-project/pull/177169

>From 092c1fcd76eaacd56093409fef476bb3cb53e4ae Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Wed, 21 Jan 2026 13:38:07 +0000
Subject: [PATCH] [AArch64][SME] Disable tail calls in new ZA/ZT0 functions
 (#177152)

Allowing this can result in invalid tail calls to shared ZA functions.

It may be possible to limit this to the case where the caller is private
ZA and the callee shares ZA, but for now it is generally disabled.

(cherry picked from commit 10aca26ffffe6a9ee049f479ed7fee9e07421dad)
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  3 +-
 .../AArch64/sme-new-za-zt0-no-tail-call.ll    | 78 +++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 74ee8ff8ab5f5..093927049e9d1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9328,7 +9328,8 @@ bool 
AArch64TargetLowering::isEligibleForTailCallOptimization(
   if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
       CallAttrs.requiresPreservingAllZAState() ||
       CallAttrs.requiresPreservingZT0() ||
-      CallAttrs.caller().hasStreamingBody())
+      CallAttrs.caller().hasStreamingBody() || CallAttrs.caller().isNewZA() ||
+      CallAttrs.caller().isNewZT0())
     return false;
 
   // Functions using the C or Fast calling convention that have an SVE 
signature
diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll 
b/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll
new file mode 100644
index 0000000000000..3c76132556600
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -O3 -verify-machineinstrs < 
%s | FileCheck %s
+
+declare void @inout_za_zt0() "aarch64_inout_za" "aarch64_inout_zt0"
+
+define void @new_za_zt0() "aarch64_new_za" "aarch64_new_zt0" {
+; CHECK-LABEL: new_za_zt0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    cbz x8, .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    bl __arm_tpidr2_save
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    zero {za}
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:  .LBB0_2: // %entry
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    bl inout_za_zt0
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  tail call void @inout_za_zt0()
+  ret void
+}
+
+declare void @inout_za() "aarch64_inout_za"
+
+define void @new_za() "aarch64_new_za" {
+; CHECK-LABEL: new_za:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    cbz x8, .LBB1_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    bl __arm_tpidr2_save
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    zero {za}
+; CHECK-NEXT:  .LBB1_2: // %entry
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    bl inout_za
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  tail call void @inout_za()
+  ret void
+}
+
+declare void @inout_zt0() "aarch64_inout_zt0"
+
+define void @new_zt0() "aarch64_new_zt0" {
+; CHECK-LABEL: new_zt0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    cbz x8, .LBB2_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    bl __arm_tpidr2_save
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:  .LBB2_2: // %entry
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    bl inout_zt0
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  tail call void @inout_zt0()
+  ret void
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to