https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/177169
>From 092c1fcd76eaacd56093409fef476bb3cb53e4ae Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <[email protected]> Date: Wed, 21 Jan 2026 13:38:07 +0000 Subject: [PATCH] [AArch64][SME] Disable tail calls in new ZA/ZT0 functions (#177152) Allowing this can result in invalid tail calls to shared ZA functions. It may be possible to limit this to the case where the caller is private ZA and the callee shares ZA, but for now it is generally disabled. (cherry picked from commit 10aca26ffffe6a9ee049f479ed7fee9e07421dad) --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 +- .../AArch64/sme-new-za-zt0-no-tail-call.ll | 78 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 74ee8ff8ab5f5..093927049e9d1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9328,7 +9328,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState() || CallAttrs.requiresPreservingZT0() || - CallAttrs.caller().hasStreamingBody()) + CallAttrs.caller().hasStreamingBody() || CallAttrs.caller().isNewZA() || + CallAttrs.caller().isNewZT0()) return false; // Functions using the C or Fast calling convention that have an SVE signature diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll b/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll new file mode 100644 index 0000000000000..3c76132556600 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-new-za-zt0-no-tail-call.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -O3 -verify-machineinstrs < %s | FileCheck %s + +declare void @inout_za_zt0() "aarch64_inout_za" "aarch64_inout_zt0" + +define void @new_za_zt0() "aarch64_new_za" "aarch64_new_zt0" { +; CHECK-LABEL: new_za_zt0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbz x8, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: zero {za} +; CHECK-NEXT: zero { zt0 } +; CHECK-NEXT: .LBB0_2: // %entry +; CHECK-NEXT: smstart za +; CHECK-NEXT: bl inout_za_zt0 +; CHECK-NEXT: smstop za +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + tail call void @inout_za_zt0() + ret void +} + +declare void @inout_za() "aarch64_inout_za" + +define void @new_za() "aarch64_new_za" { +; CHECK-LABEL: new_za: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbz x8, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: zero {za} +; CHECK-NEXT: .LBB1_2: // %entry +; CHECK-NEXT: smstart za +; CHECK-NEXT: bl inout_za +; CHECK-NEXT: smstop za +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + tail call void @inout_za() + ret void +} + +declare void @inout_zt0() "aarch64_inout_zt0" + +define void @new_zt0() "aarch64_new_zt0" { +; CHECK-LABEL: new_zt0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbz x8, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: zero { zt0 } +; CHECK-NEXT: .LBB2_2: // %entry +; CHECK-NEXT: smstart za +; CHECK-NEXT: bl inout_zt0 +; CHECK-NEXT: smstop za +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + tail call void @inout_zt0() + ret void +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
