@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s
+
+; Dynamically-sized allocation, needs a loop which can handle any size at
+; runtime. The final iteration of the loop will temporarily put SP below the
+; target address, but this doesn't break any of the ABI constraints on the
+; stack, and also doesn't probe below the target SP value.
+define void @dynamic(i64 %size, ptr %out) #0 {
+; CHECK-LABEL: dynamic:
+; CHECK: // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 16
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 16
+; CHECK-NEXT:.cfi_offset w30, -8
+; CHECK-NEXT:.cfi_offset w29, -16
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB0_3
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:b .LBB0_1
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT:mov sp, x8
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:str x8, [x1]
+; CHECK-NEXT:mov sp, x29
+; CHECK-NEXT:.cfi_def_cfa wsp, 16
+; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:.cfi_def_cfa_offset 0
+; CHECK-NEXT:.cfi_restore w30
+; CHECK-NEXT:.cfi_restore w29
+; CHECK-NEXT:ret
+ %v = alloca i8, i64 %size, align 1
+ store ptr %v, ptr %out, align 8
+ ret void
+}
+
+; This function has a fixed-size stack slot and a dynamic one. The fixed size
+; slot isn't large enough that we would normally probe it, but we need to do so
+; here otherwise the gap between the CSR save and the first probe of the
+; dynamic allocation could be too far apart when the size of the dynamic
+; allocation is close to the guard size.
+define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
+; CHECK-LABEL: dynamic_fixed:
+; CHECK: // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 16
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 16
+; CHECK-NEXT:.cfi_offset w30, -8
+; CHECK-NEXT:.cfi_offset w29, -16
+; CHECK-NEXT:str xzr, [sp, #-64]!
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:sub x10, x29, #64
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:str x10, [x1]
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB1_3
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:b .LBB1_1
+; CHECK-NEXT: .LBB1_3:
+; CHECK-NEXT:mov sp, x8
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:str x8, [x2]
+; CHECK-NEXT:mov sp, x29
+; CHECK-NEXT:.cfi_def_cfa wsp, 16
+; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:.cfi_def_cfa_offset 0
+; CHECK-NEXT:.cfi_restore w30
+; CHECK-NEXT:.cfi_restore w29
+; CHECK-NEXT:ret
+ %v1 = alloca i8, i64 64, align 1
+ store ptr %v1, ptr %out1, align 8
+ %v2 = alloca i8, i64 %size, align 1
+ store ptr %v2, ptr %out2, align 8
+ ret void
+}
+
+; Dynamic allocation, with an alignment requirement greater than the alignment
+; of SP. Done by ANDing the target SP with a constant to align it down, then
+; doing the loop as normal. Note that we also re-align the stack in the prolog,
+; which isn't actually needed because the only aligned allocations are dynamic,
+; this is done even without stack probing.
+define void @dynamic_align_64(i64 %size, ptr %out) #0 {
+; CHECK-LABEL: dynamic_align_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT:stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:.cfi_def_cfa_offset 32
+; CHECK-NEXT:str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:mov x29, sp
+; CHECK-NEXT:.cfi_def_cfa w29, 32
+; CHECK-NEXT:.cfi_offset w19, -16
+; CHECK-NEXT:.cfi_offset w30, -24
+; CHECK-NEXT:.cfi_offset w29, -32
+; CHECK-NEXT:sub x9, sp, #32
+; CHECK-NEXT:and sp, x9, #0xffc0
+; CHECK-NEXT:add x9, x0, #15
+; CHECK-NEXT:mov x8, sp
+; CHECK-NEXT:str xzr, [sp]
+; CHECK-NEXT:and x9, x9, #0xfff0
+; CHECK-NEXT:mov x19, sp
+; CHECK-NEXT:sub x8, x8, x9
+; CHECK-NEXT:and x8, x8, #0xffc0
+; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, #1, lsl #12 // =4096
+; CHECK-NEXT:cmp sp, x8
+; CHECK-NEXT:b.le .LBB2_3
+; CHECK-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1
+;