[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-30 Thread Momchil Velikov via cfe-commits

momchil-velikov wrote:

> Do the call frame changes here affect Windows? (I guess that would indicate a 
> latent bug in our Windows ABI support...)

TBH, I have not tested on Windows. However, by construction, I've hopefully 
made sure the Windows and non-Windows are isolated and separate - either with 
checks whether target is or isn't Windows or with check about a particular kind 
of stack probing (e.g. attribute "probe-stack"="inline" means not Windows).

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-23 Thread Momchil Velikov via cfe-commits


@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s
+
+; Dynamically-sized allocation, needs a loop which can handle any size at
+; runtime. The final iteration of the loop will temporarily put SP below the
+; target address, but this doesn't break any of the ABI constraints on the
+; stack, and also doesn't probe below the target SP value.
+define void @dynamic(i64 %size, ptr %out) #0 {
+; CHECK-LABEL: dynamic:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 16
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 16
+; CHECK-NEXT:.cfi_offset w30, -8
+; CHECK-NEXT:.cfi_offset w29, -16
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT:  .LBB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB0_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:b .LBB0_1
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:mov sp, x8
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:str x8, [x1]
+; CHECK-NEXT:mov sp, x29
+; CHECK-NEXT:.cfi_def_cfa wsp, 16
+; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:.cfi_def_cfa_offset 0
+; CHECK-NEXT:.cfi_restore w30
+; CHECK-NEXT:.cfi_restore w29
+; CHECK-NEXT:ret
+  %v = alloca i8, i64 %size, align 1
+  store ptr %v, ptr %out, align 8
+  ret void
+}
+
+; This function has a fixed-size stack slot and a dynamic one. The fixed size
+; slot isn't large enough that we would normally probe it, but we need to do so
+; here otherwise the gap between the CSR save and the first probe of the
+; dynamic allocation could be too far apart when the size of the dynamic
+; allocation is close to the guard size.
+define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
+; CHECK-LABEL: dynamic_fixed:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 16
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 16
+; CHECK-NEXT:.cfi_offset w30, -8
+; CHECK-NEXT:.cfi_offset w29, -16
+; CHECK-NEXT:str xzr, [sp, #-64]!
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:sub x10, x29, #64
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:str x10, [x1]
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT:  .LBB1_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB1_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:b .LBB1_1
+; CHECK-NEXT:  .LBB1_3:
+; CHECK-NEXT:mov sp, x8
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:str x8, [x2]
+; CHECK-NEXT:mov sp, x29
+; CHECK-NEXT:.cfi_def_cfa wsp, 16
+; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:.cfi_def_cfa_offset 0
+; CHECK-NEXT:.cfi_restore w30
+; CHECK-NEXT:.cfi_restore w29
+; CHECK-NEXT:ret
+  %v1 = alloca i8, i64 64, align 1
+  store ptr %v1, ptr %out1, align 8
+  %v2 = alloca i8, i64 %size, align 1
+  store ptr %v2, ptr %out2, align 8
+  ret void
+}
+
+; Dynamic allocation, with an alignment requirement greater than the alignment
+; of SP. Done by ANDing the target SP with a constant to align it down, then
+; doing the loop as normal. Note that we also re-align the stack in the prolog,
+; which isn't actually needed because the only aligned allocations are dynamic,
+; this is done even without stack probing.
+define void @dynamic_align_64(i64 %size, ptr %out) #0 {
+; CHECK-LABEL: dynamic_align_64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 32
+; CHECK-NEXT:str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 32
+; CHECK-NEXT:.cfi_offset w19, -16
+; CHECK-NEXT:.cfi_offset w30, -24
+; CHECK-NEXT:.cfi_offset w29, -32
+; CHECK-NEXT:sub x9, sp, #32
+; CHECK-NEXT:and sp, x9, #0xffc0
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:mov x19, sp
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT:and x8, x8, #0xffc0
+; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB2_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1
+; 

[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-23 Thread Momchil Velikov via cfe-commits


@@ -861,6 +861,12 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", 
SDT_AArch64stilp, [SDNPHasChain,
 def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, 
SDNPMayStore, SDNPMemOperand]>;
 
 def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
+
+def AArch64probedalloca
+: SDNode<"AArch64ISD::PROBED_ALLOCA",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain]>;

momchil-velikov wrote:

Done

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-04 Thread Momchil Velikov via cfe-commits

momchil-velikov wrote:

Ping?

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits