pratlucas created this revision.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
pratlucas requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, MaskRay.
Herald added projects: clang, LLVM.

Currently the a AAPCS compliant frame record is not always created for
functions when it should. Although a consistent frame record might not
be required in some cases, there are still scenarios where applications
may want to make use of the call hierarchy made available trough it.

In order to enable the use of AAPCS compliant frame records whilst keep
backwards compatibility, this patch introduces a new command-line option
(`-mframe-chain=[none|aapcs|aapcs+leaf]`) for Aarch32 and Thumb backends.
The option allows users to explicitly select when to use it, and is also
useful to ensure the extra overhead introduced by the frame records is
only introduced when necessary, in particular for Thumb targets.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125094

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
  llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll
  llvm/test/CodeGen/ARM/frame-chain.ll
  llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll
  llvm/test/CodeGen/Thumb/frame-chain.ll

Index: llvm/test/CodeGen/Thumb/frame-chain.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb/frame-chain.ll
@@ -0,0 +1,301 @@
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS
+
+define dso_local noundef i32 @leaf(i32 noundef %0) {
+; LEAF-FP-LABEL: leaf:
+; LEAF-FP:       @ %bb.0:
+; LEAF-FP-NEXT:    .pad #4
+; LEAF-FP-NEXT:    sub sp, #4
+; LEAF-FP-NEXT:    str r0, [sp]
+; LEAF-FP-NEXT:    adds r0, r0, #4
+; LEAF-FP-NEXT:    add sp, #4
+; LEAF-FP-NEXT:    bx lr
+;
+; LEAF-FP-AAPCS-LABEL: leaf:
+; LEAF-FP-AAPCS:       @ %bb.0:
+; LEAF-FP-AAPCS-NEXT:    .save {lr}
+; LEAF-FP-AAPCS-NEXT:    push {lr}
+; LEAF-FP-AAPCS-NEXT:    mov lr, r11
+; LEAF-FP-AAPCS-NEXT:    .save {r11}
+; LEAF-FP-AAPCS-NEXT:    push {lr}
+; LEAF-FP-AAPCS-NEXT:    .setfp r11, sp
+; LEAF-FP-AAPCS-NEXT:    add r11, sp, #0
+; LEAF-FP-AAPCS-NEXT:    .pad #4
+; LEAF-FP-AAPCS-NEXT:    sub sp, #4
+; LEAF-FP-AAPCS-NEXT:    str r0, [sp]
+; LEAF-FP-AAPCS-NEXT:    adds r0, r0, #4
+; LEAF-FP-AAPCS-NEXT:    add sp, #4
+; LEAF-FP-AAPCS-NEXT:    pop {lr}
+; LEAF-FP-AAPCS-NEXT:    mov r11, lr
+; LEAF-FP-AAPCS-NEXT:    pop {r1}
+; LEAF-FP-AAPCS-NEXT:    bx r1
+;
+; LEAF-NOFP-LABEL: leaf:
+; LEAF-NOFP:       @ %bb.0:
+; LEAF-NOFP-NEXT:    .pad #4
+; LEAF-NOFP-NEXT:    sub sp, #4
+; LEAF-NOFP-NEXT:    str r0, [sp]
+; LEAF-NOFP-NEXT:    adds r0, r0, #4
+; LEAF-NOFP-NEXT:    add sp, #4
+; LEAF-NOFP-NEXT:    bx lr
+;
+; LEAF-NOFP-AAPCS-LABEL: leaf:
+; LEAF-NOFP-AAPCS:       @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT:    .save {lr}
+; LEAF-NOFP-AAPCS-NEXT:    push {lr}
+; LEAF-NOFP-AAPCS-NEXT:    mov lr, r11
+; LEAF-NOFP-AAPCS-NEXT:    .save {r11}
+; LEAF-NOFP-AAPCS-NEXT:    push {lr}
+; LEAF-NOFP-AAPCS-NEXT:    .setfp r11, sp
+; LEAF-NOFP-AAPCS-NEXT:    add r11, sp, #0
+; LEAF-NOFP-AAPCS-NEXT:    .pad #4
+; LEAF-NOFP-AAPCS-NEXT:    sub sp, #4
+; LEAF-NOFP-AAPCS-NEXT:    str r0, [sp]
+; LEAF-NOFP-AAPCS-NEXT:    adds r0, r0, #4
+; LEAF-NOFP-AAPCS-NEXT:    add sp, #4
+; LEAF-NOFP-AAPCS-NEXT:    pop {lr}
+; LEAF-NOFP-AAPCS-NEXT:    mov r11, lr
+; LEAF-NOFP-AAPCS-NEXT:    pop {r1}
+; LEAF-NOFP-AAPCS-NEXT:    bx r1
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = add nsw i32 %3, 4
+  ret i32 %4
+}
+
+define dso_local noundef i32 @non_leaf(i32 noundef %0) {
+; FP-LABEL: non_leaf:
+; FP:       @ %bb.0:
+; FP-NEXT:    .save {r7, lr}
+; FP-NEXT:    push {r7, lr}
+; FP-NEXT:    .setfp r7, sp
+; FP-NEXT:    add r7, sp, #0
+; FP-NEXT:    .pad #8
+; FP-NEXT:    sub sp, #8
+; FP-NEXT:    str r0, [sp, #4]
+; FP-NEXT:    bl leaf
+; FP-NEXT:    adds r0, r0, #1
+; FP-NEXT:    add sp, #8
+; FP-NEXT:    pop {r7}
+; FP-NEXT:    pop {r1}
+; FP-NEXT:    bx r1
+;
+; FP-AAPCS-LABEL: non_leaf:
+; FP-AAPCS:       @ %bb.0:
+; FP-AAPCS-NEXT:    .save {lr}
+; FP-AAPCS-NEXT:    push {lr}
+; FP-AAPCS-NEXT:    mov lr, r11
+; FP-AAPCS-NEXT:    .save {r11}
+; FP-AAPCS-NEXT:    push {lr}
+; FP-AAPCS-NEXT:    .setfp r11, sp
+; FP-AAPCS-NEXT:    add r11, sp, #0
+; FP-AAPCS-NEXT:    .pad #8
+; FP-AAPCS-NEXT:    sub sp, #8
+; FP-AAPCS-NEXT:    str r0, [sp, #4]
+; FP-AAPCS-NEXT:    bl leaf
+; FP-AAPCS-NEXT:    adds r0, r0, #1
+; FP-AAPCS-NEXT:    add sp, #8
+; FP-AAPCS-NEXT:    pop {lr}
+; FP-AAPCS-NEXT:    mov r11, lr
+; FP-AAPCS-NEXT:    pop {r1}
+; FP-AAPCS-NEXT:    bx r1
+;
+; NOFP-LABEL: non_leaf:
+; NOFP:       @ %bb.0:
+; NOFP-NEXT:    .save {r7, lr}
+; NOFP-NEXT:    push {r7, lr}
+; NOFP-NEXT:    .pad #8
+; NOFP-NEXT:    sub sp, #8
+; NOFP-NEXT:    str r0, [sp, #4]
+; NOFP-NEXT:    bl leaf
+; NOFP-NEXT:    adds r0, r0, #1
+; NOFP-NEXT:    add sp, #8
+; NOFP-NEXT:    pop {r7}
+; NOFP-NEXT:    pop {r1}
+; NOFP-NEXT:    bx r1
+;
+; NOFP-AAPCS-LABEL: non_leaf:
+; NOFP-AAPCS:       @ %bb.0:
+; NOFP-AAPCS-NEXT:    .save {lr}
+; NOFP-AAPCS-NEXT:    push {lr}
+; NOFP-AAPCS-NEXT:    mov lr, r11
+; NOFP-AAPCS-NEXT:    .save {r11}
+; NOFP-AAPCS-NEXT:    push {lr}
+; NOFP-AAPCS-NEXT:    .setfp r11, sp
+; NOFP-AAPCS-NEXT:    add r11, sp, #0
+; NOFP-AAPCS-NEXT:    .pad #8
+; NOFP-AAPCS-NEXT:    sub sp, #8
+; NOFP-AAPCS-NEXT:    str r0, [sp, #4]
+; NOFP-AAPCS-NEXT:    bl leaf
+; NOFP-AAPCS-NEXT:    adds r0, r0, #1
+; NOFP-AAPCS-NEXT:    add sp, #8
+; NOFP-AAPCS-NEXT:    pop {lr}
+; NOFP-AAPCS-NEXT:    mov r11, lr
+; NOFP-AAPCS-NEXT:    pop {r1}
+; NOFP-AAPCS-NEXT:    bx r1
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = call noundef i32 @leaf(i32 noundef %3)
+  %5 = add nsw i32 %4, 1
+  ret i32 %5
+}
+
+declare i8* @llvm.stacksave()
+define dso_local void @required_fp(i32 %0, i32 %1) {
+; FP-LABEL: required_fp:
+; FP:       @ %bb.0:
+; FP-NEXT:    .save {r4, r6, r7, lr}
+; FP-NEXT:    push {r4, r6, r7, lr}
+; FP-NEXT:    .setfp r7, sp, #8
+; FP-NEXT:    add r7, sp, #8
+; FP-NEXT:    .pad #24
+; FP-NEXT:    sub sp, #24
+; FP-NEXT:    mov r6, sp
+; FP-NEXT:    mov r2, r6
+; FP-NEXT:    str r1, [r2, #16]
+; FP-NEXT:    str r0, [r2, #20]
+; FP-NEXT:    mov r1, sp
+; FP-NEXT:    str r1, [r2, #8]
+; FP-NEXT:    lsls r1, r0, #2
+; FP-NEXT:    adds r1, r1, #7
+; FP-NEXT:    movs r3, #7
+; FP-NEXT:    bics r1, r3
+; FP-NEXT:    mov r3, sp
+; FP-NEXT:    subs r1, r3, r1
+; FP-NEXT:    mov sp, r1
+; FP-NEXT:    movs r1, #0
+; FP-NEXT:    str r1, [r6, #4]
+; FP-NEXT:    str r0, [r2]
+; FP-NEXT:    subs r4, r7, #7
+; FP-NEXT:    subs r4, #1
+; FP-NEXT:    mov sp, r4
+; FP-NEXT:    pop {r4, r6, r7}
+; FP-NEXT:    pop {r0}
+; FP-NEXT:    bx r0
+;
+; FP-AAPCS-LABEL: required_fp:
+; FP-AAPCS:       @ %bb.0:
+; FP-AAPCS-NEXT:    .save {lr}
+; FP-AAPCS-NEXT:    push {lr}
+; FP-AAPCS-NEXT:    mov lr, r11
+; FP-AAPCS-NEXT:    .save {r11}
+; FP-AAPCS-NEXT:    push {lr}
+; FP-AAPCS-NEXT:    .save {r4, r6}
+; FP-AAPCS-NEXT:    push {r4, r6}
+; FP-AAPCS-NEXT:    .setfp r11, sp
+; FP-AAPCS-NEXT:    add r11, sp, #0
+; FP-AAPCS-NEXT:    .pad #24
+; FP-AAPCS-NEXT:    sub sp, #24
+; FP-AAPCS-NEXT:    mov r6, sp
+; FP-AAPCS-NEXT:    mov r2, r6
+; FP-AAPCS-NEXT:    str r1, [r2, #16]
+; FP-AAPCS-NEXT:    str r0, [r2, #20]
+; FP-AAPCS-NEXT:    mov r1, sp
+; FP-AAPCS-NEXT:    str r1, [r2, #8]
+; FP-AAPCS-NEXT:    lsls r1, r0, #2
+; FP-AAPCS-NEXT:    adds r1, r1, #7
+; FP-AAPCS-NEXT:    movs r3, #7
+; FP-AAPCS-NEXT:    bics r1, r3
+; FP-AAPCS-NEXT:    mov r3, sp
+; FP-AAPCS-NEXT:    subs r1, r3, r1
+; FP-AAPCS-NEXT:    mov sp, r1
+; FP-AAPCS-NEXT:    movs r1, #0
+; FP-AAPCS-NEXT:    str r1, [r6, #4]
+; FP-AAPCS-NEXT:    str r0, [r2]
+; FP-AAPCS-NEXT:    mov sp, r11
+; FP-AAPCS-NEXT:    pop {r4, r6}
+; FP-AAPCS-NEXT:    pop {lr}
+; FP-AAPCS-NEXT:    mov r11, lr
+; FP-AAPCS-NEXT:    pop {r0}
+; FP-AAPCS-NEXT:    bx r0
+;
+; NOFP-LABEL: required_fp:
+; NOFP:       @ %bb.0:
+; NOFP-NEXT:    .save {r4, r6, r7, lr}
+; NOFP-NEXT:    push {r4, r6, r7, lr}
+; NOFP-NEXT:    .setfp r7, sp, #8
+; NOFP-NEXT:    add r7, sp, #8
+; NOFP-NEXT:    .pad #24
+; NOFP-NEXT:    sub sp, #24
+; NOFP-NEXT:    mov r6, sp
+; NOFP-NEXT:    mov r2, r6
+; NOFP-NEXT:    str r1, [r2, #16]
+; NOFP-NEXT:    str r0, [r2, #20]
+; NOFP-NEXT:    mov r1, sp
+; NOFP-NEXT:    str r1, [r2, #8]
+; NOFP-NEXT:    lsls r1, r0, #2
+; NOFP-NEXT:    adds r1, r1, #7
+; NOFP-NEXT:    movs r3, #7
+; NOFP-NEXT:    bics r1, r3
+; NOFP-NEXT:    mov r3, sp
+; NOFP-NEXT:    subs r1, r3, r1
+; NOFP-NEXT:    mov sp, r1
+; NOFP-NEXT:    movs r1, #0
+; NOFP-NEXT:    str r1, [r6, #4]
+; NOFP-NEXT:    str r0, [r2]
+; NOFP-NEXT:    subs r4, r7, #7
+; NOFP-NEXT:    subs r4, #1
+; NOFP-NEXT:    mov sp, r4
+; NOFP-NEXT:    pop {r4, r6, r7}
+; NOFP-NEXT:    pop {r0}
+; NOFP-NEXT:    bx r0
+;
+; NOFP-AAPCS-LABEL: required_fp:
+; NOFP-AAPCS:       @ %bb.0:
+; NOFP-AAPCS-NEXT:    .save {lr}
+; NOFP-AAPCS-NEXT:    push {lr}
+; NOFP-AAPCS-NEXT:    mov lr, r11
+; NOFP-AAPCS-NEXT:    .save {r11}
+; NOFP-AAPCS-NEXT:    push {lr}
+; NOFP-AAPCS-NEXT:    .save {r4, r6}
+; NOFP-AAPCS-NEXT:    push {r4, r6}
+; NOFP-AAPCS-NEXT:    .setfp r11, sp
+; NOFP-AAPCS-NEXT:    add r11, sp, #0
+; NOFP-AAPCS-NEXT:    .pad #24
+; NOFP-AAPCS-NEXT:    sub sp, #24
+; NOFP-AAPCS-NEXT:    mov r6, sp
+; NOFP-AAPCS-NEXT:    mov r2, r6
+; NOFP-AAPCS-NEXT:    str r1, [r2, #16]
+; NOFP-AAPCS-NEXT:    str r0, [r2, #20]
+; NOFP-AAPCS-NEXT:    mov r1, sp
+; NOFP-AAPCS-NEXT:    str r1, [r2, #8]
+; NOFP-AAPCS-NEXT:    lsls r1, r0, #2
+; NOFP-AAPCS-NEXT:    adds r1, r1, #7
+; NOFP-AAPCS-NEXT:    movs r3, #7
+; NOFP-AAPCS-NEXT:    bics r1, r3
+; NOFP-AAPCS-NEXT:    mov r3, sp
+; NOFP-AAPCS-NEXT:    subs r1, r3, r1
+; NOFP-AAPCS-NEXT:    mov sp, r1
+; NOFP-AAPCS-NEXT:    movs r1, #0
+; NOFP-AAPCS-NEXT:    str r1, [r6, #4]
+; NOFP-AAPCS-NEXT:    str r0, [r2]
+; NOFP-AAPCS-NEXT:    mov sp, r11
+; NOFP-AAPCS-NEXT:    pop {r4, r6}
+; NOFP-AAPCS-NEXT:    pop {lr}
+; NOFP-AAPCS-NEXT:    mov r11, lr
+; NOFP-AAPCS-NEXT:    pop {r0}
+; NOFP-AAPCS-NEXT:    bx r0
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i8*, align 8
+  %6 = alloca i64, align 8
+  store i32 %0, i32* %3, align 4
+  store i32 %1, i32* %4, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = zext i32 %7 to i64
+  %9 = call i8* @llvm.stacksave()
+  store i8* %9, i8** %5, align 8
+  %10 = alloca i32, i64 %8, align 4
+  store i64 %8, i64* %6, align 8
+  ret void
+}
+
Index: llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll
@@ -0,0 +1,27 @@
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+
+declare void @leaf(i32 %input)
+
+define void @reserved_r7(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R7'
+; RESERVED-R7: error: write to reserved register 'R7'
+; RESERVED-R11-NOT: error: write to reserved register 'R7'
+  %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input)
+  ret void
+}
+
+define void @reserved_r11(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R11'
+; RESERVED-R7-NOT: error: write to reserved register 'R11'
+; RESERVED-R11: error: write to reserved register 'R11'
+  %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input)
+  ret void
+}
Index: llvm/test/CodeGen/ARM/frame-chain.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/frame-chain.ll
@@ -0,0 +1,228 @@
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS
+
+define dso_local noundef i32 @leaf(i32 noundef %0) {
+; LEAF-FP-LABEL: leaf:
+; LEAF-FP:       @ %bb.0:
+; LEAF-FP-NEXT:    .pad #4
+; LEAF-FP-NEXT:    sub sp, sp, #4
+; LEAF-FP-NEXT:    str r0, [sp]
+; LEAF-FP-NEXT:    add r0, r0, #4
+; LEAF-FP-NEXT:    add sp, sp, #4
+; LEAF-FP-NEXT:    mov pc, lr
+;
+; LEAF-FP-AAPCS-LABEL: leaf:
+; LEAF-FP-AAPCS:       @ %bb.0:
+; LEAF-FP-AAPCS-NEXT:    .save {r11, lr}
+; LEAF-FP-AAPCS-NEXT:    push {r11, lr}
+; LEAF-FP-AAPCS-NEXT:    .setfp r11, sp
+; LEAF-FP-AAPCS-NEXT:    mov r11, sp
+; LEAF-FP-AAPCS-NEXT:    push {r0}
+; LEAF-FP-AAPCS-NEXT:    add r0, r0, #4
+; LEAF-FP-AAPCS-NEXT:    mov sp, r11
+; LEAF-FP-AAPCS-NEXT:    pop {r11, lr}
+; LEAF-FP-AAPCS-NEXT:    mov pc, lr
+;
+; LEAF-NOFP-LABEL: leaf:
+; LEAF-NOFP:       @ %bb.0:
+; LEAF-NOFP-NEXT:    .pad #4
+; LEAF-NOFP-NEXT:    sub sp, sp, #4
+; LEAF-NOFP-NEXT:    str r0, [sp]
+; LEAF-NOFP-NEXT:    add r0, r0, #4
+; LEAF-NOFP-NEXT:    add sp, sp, #4
+; LEAF-NOFP-NEXT:    mov pc, lr
+;
+; LEAF-NOFP-AAPCS-LABEL: leaf:
+; LEAF-NOFP-AAPCS:       @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT:    .save {r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    push {r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    .setfp r11, sp
+; LEAF-NOFP-AAPCS-NEXT:    mov r11, sp
+; LEAF-NOFP-AAPCS-NEXT:    push {r0}
+; LEAF-NOFP-AAPCS-NEXT:    add r0, r0, #4
+; LEAF-NOFP-AAPCS-NEXT:    mov sp, r11
+; LEAF-NOFP-AAPCS-NEXT:    pop {r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    mov pc, lr
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = add nsw i32 %3, 4
+  ret i32 %4
+}
+
+define dso_local noundef i32 @non_leaf(i32 noundef %0) {
+; FP-LABEL: non_leaf:
+; FP:       @ %bb.0:
+; FP-NEXT:    .save {r11, lr}
+; FP-NEXT:    push {r11, lr}
+; FP-NEXT:    .setfp r11, sp
+; FP-NEXT:    mov r11, sp
+; FP-NEXT:    .pad #8
+; FP-NEXT:    sub sp, sp, #8
+; FP-NEXT:    str r0, [sp, #4]
+; FP-NEXT:    bl leaf
+; FP-NEXT:    add r0, r0, #1
+; FP-NEXT:    mov sp, r11
+; FP-NEXT:    pop {r11, lr}
+; FP-NEXT:    mov pc, lr
+;
+; FP-AAPCS-LABEL: non_leaf:
+; FP-AAPCS:       @ %bb.0:
+; FP-AAPCS-NEXT:    .save {r11, lr}
+; FP-AAPCS-NEXT:    push {r11, lr}
+; FP-AAPCS-NEXT:    .setfp r11, sp
+; FP-AAPCS-NEXT:    mov r11, sp
+; FP-AAPCS-NEXT:    .pad #8
+; FP-AAPCS-NEXT:    sub sp, sp, #8
+; FP-AAPCS-NEXT:    str r0, [sp, #4]
+; FP-AAPCS-NEXT:    bl leaf
+; FP-AAPCS-NEXT:    add r0, r0, #1
+; FP-AAPCS-NEXT:    mov sp, r11
+; FP-AAPCS-NEXT:    pop {r11, lr}
+; FP-AAPCS-NEXT:    mov pc, lr
+;
+; NOFP-LABEL: non_leaf:
+; NOFP:       @ %bb.0:
+; NOFP-NEXT:    .save {r11, lr}
+; NOFP-NEXT:    push {r11, lr}
+; NOFP-NEXT:    .pad #8
+; NOFP-NEXT:    sub sp, sp, #8
+; NOFP-NEXT:    str r0, [sp, #4]
+; NOFP-NEXT:    bl leaf
+; NOFP-NEXT:    add r0, r0, #1
+; NOFP-NEXT:    add sp, sp, #8
+; NOFP-NEXT:    pop {r11, lr}
+; NOFP-NEXT:    mov pc, lr
+;
+; NOFP-AAPCS-LABEL: non_leaf:
+; NOFP-AAPCS:       @ %bb.0:
+; NOFP-AAPCS-NEXT:    .save {r11, lr}
+; NOFP-AAPCS-NEXT:    push {r11, lr}
+; NOFP-AAPCS-NEXT:    .setfp r11, sp
+; NOFP-AAPCS-NEXT:    mov r11, sp
+; NOFP-AAPCS-NEXT:    .pad #8
+; NOFP-AAPCS-NEXT:    sub sp, sp, #8
+; NOFP-AAPCS-NEXT:    str r0, [sp, #4]
+; NOFP-AAPCS-NEXT:    bl leaf
+; NOFP-AAPCS-NEXT:    add r0, r0, #1
+; NOFP-AAPCS-NEXT:    mov sp, r11
+; NOFP-AAPCS-NEXT:    pop {r11, lr}
+; NOFP-AAPCS-NEXT:    mov pc, lr
+  %2 = alloca i32, align 4
+  store i32 %0, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  %4 = call noundef i32 @leaf(i32 noundef %3)
+  %5 = add nsw i32 %4, 1
+  ret i32 %5
+}
+
+declare i8* @llvm.stacksave()
+define dso_local void @required_fp(i32 %0, i32 %1) {
+; LEAF-FP-LABEL: required_fp:
+; LEAF-FP:       @ %bb.0:
+; LEAF-FP-NEXT:    .save {r4, r5, r11, lr}
+; LEAF-FP-NEXT:    push {r4, r5, r11, lr}
+; LEAF-FP-NEXT:    .setfp r11, sp, #8
+; LEAF-FP-NEXT:    add r11, sp, #8
+; LEAF-FP-NEXT:    .pad #24
+; LEAF-FP-NEXT:    sub sp, sp, #24
+; LEAF-FP-NEXT:    str r1, [r11, #-16]
+; LEAF-FP-NEXT:    mov r1, #7
+; LEAF-FP-NEXT:    add r1, r1, r0, lsl #2
+; LEAF-FP-NEXT:    str r0, [r11, #-12]
+; LEAF-FP-NEXT:    bic r1, r1, #7
+; LEAF-FP-NEXT:    str sp, [r11, #-24]
+; LEAF-FP-NEXT:    sub sp, sp, r1
+; LEAF-FP-NEXT:    mov r1, #0
+; LEAF-FP-NEXT:    str r0, [r11, #-32]
+; LEAF-FP-NEXT:    str r1, [r11, #-28]
+; LEAF-FP-NEXT:    sub sp, r11, #8
+; LEAF-FP-NEXT:    pop {r4, r5, r11, lr}
+; LEAF-FP-NEXT:    mov pc, lr
+;
+; LEAF-FP-AAPCS-LABEL: required_fp:
+; LEAF-FP-AAPCS:       @ %bb.0:
+; LEAF-FP-AAPCS-NEXT:    .save {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT:    push {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT:    .setfp r11, sp, #8
+; LEAF-FP-AAPCS-NEXT:    add r11, sp, #8
+; LEAF-FP-AAPCS-NEXT:    .pad #24
+; LEAF-FP-AAPCS-NEXT:    sub sp, sp, #24
+; LEAF-FP-AAPCS-NEXT:    str r1, [r11, #-16]
+; LEAF-FP-AAPCS-NEXT:    mov r1, #7
+; LEAF-FP-AAPCS-NEXT:    add r1, r1, r0, lsl #2
+; LEAF-FP-AAPCS-NEXT:    str r0, [r11, #-12]
+; LEAF-FP-AAPCS-NEXT:    bic r1, r1, #7
+; LEAF-FP-AAPCS-NEXT:    str sp, [r11, #-24]
+; LEAF-FP-AAPCS-NEXT:    sub sp, sp, r1
+; LEAF-FP-AAPCS-NEXT:    mov r1, #0
+; LEAF-FP-AAPCS-NEXT:    str r0, [r11, #-32]
+; LEAF-FP-AAPCS-NEXT:    str r1, [r11, #-28]
+; LEAF-FP-AAPCS-NEXT:    sub sp, r11, #8
+; LEAF-FP-AAPCS-NEXT:    pop {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT:    mov pc, lr
+;
+; LEAF-NOFP-LABEL: required_fp:
+; LEAF-NOFP:       @ %bb.0:
+; LEAF-NOFP-NEXT:    .save {r4, r5, r11}
+; LEAF-NOFP-NEXT:    push {r4, r5, r11}
+; LEAF-NOFP-NEXT:    .setfp r11, sp, #8
+; LEAF-NOFP-NEXT:    add r11, sp, #8
+; LEAF-NOFP-NEXT:    .pad #20
+; LEAF-NOFP-NEXT:    sub sp, sp, #20
+; LEAF-NOFP-NEXT:    str r1, [r11, #-16]
+; LEAF-NOFP-NEXT:    mov r1, #7
+; LEAF-NOFP-NEXT:    add r1, r1, r0, lsl #2
+; LEAF-NOFP-NEXT:    str r0, [r11, #-12]
+; LEAF-NOFP-NEXT:    bic r1, r1, #7
+; LEAF-NOFP-NEXT:    str sp, [r11, #-20]
+; LEAF-NOFP-NEXT:    sub sp, sp, r1
+; LEAF-NOFP-NEXT:    mov r1, #0
+; LEAF-NOFP-NEXT:    str r0, [r11, #-28]
+; LEAF-NOFP-NEXT:    str r1, [r11, #-24]
+; LEAF-NOFP-NEXT:    sub sp, r11, #8
+; LEAF-NOFP-NEXT:    pop {r4, r5, r11}
+; LEAF-NOFP-NEXT:    mov pc, lr
+;
+; LEAF-NOFP-AAPCS-LABEL: required_fp:
+; LEAF-NOFP-AAPCS:       @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT:    .save {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    push {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    .setfp r11, sp, #8
+; LEAF-NOFP-AAPCS-NEXT:    add r11, sp, #8
+; LEAF-NOFP-AAPCS-NEXT:    .pad #24
+; LEAF-NOFP-AAPCS-NEXT:    sub sp, sp, #24
+; LEAF-NOFP-AAPCS-NEXT:    str r1, [r11, #-16]
+; LEAF-NOFP-AAPCS-NEXT:    mov r1, #7
+; LEAF-NOFP-AAPCS-NEXT:    add r1, r1, r0, lsl #2
+; LEAF-NOFP-AAPCS-NEXT:    str r0, [r11, #-12]
+; LEAF-NOFP-AAPCS-NEXT:    bic r1, r1, #7
+; LEAF-NOFP-AAPCS-NEXT:    str sp, [r11, #-24]
+; LEAF-NOFP-AAPCS-NEXT:    sub sp, sp, r1
+; LEAF-NOFP-AAPCS-NEXT:    mov r1, #0
+; LEAF-NOFP-AAPCS-NEXT:    str r0, [r11, #-32]
+; LEAF-NOFP-AAPCS-NEXT:    str r1, [r11, #-28]
+; LEAF-NOFP-AAPCS-NEXT:    sub sp, r11, #8
+; LEAF-NOFP-AAPCS-NEXT:    pop {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT:    mov pc, lr
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i8*, align 8
+  %6 = alloca i64, align 8
+  store i32 %0, i32* %3, align 4
+  store i32 %1, i32* %4, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = zext i32 %7 to i64
+  %9 = call i8* @llvm.stacksave()
+  store i8* %9, i8** %5, align 8
+  %10 = alloca i32, i64 %8, align 4
+  store i64 %8, i64* %6, align 8
+  ret void
+}
Index: llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll
@@ -0,0 +1,25 @@
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+
+declare void @leaf(i32 %input)
+
+define void @reserved_r7(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R7'
+; RESERVED-R11-NOT: error: write to reserved register 'R7'
+  %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input)
+  ret void
+}
+
+define void @reserved_r11(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R11'
+; RESERVED-R11: error: write to reserved register 'R11'
+  %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input)
+  ret void
+}
Index: llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -208,6 +208,8 @@
   for (const CalleeSavedInfo &I : CSI) {
     Register Reg = I.getReg();
     int FI = I.getFrameIdx();
+    if (Reg == FramePtr)
+      FramePtrSpillFI = FI;
     switch (Reg) {
     case ARM::R8:
     case ARM::R9:
@@ -223,8 +225,6 @@
     case ARM::R6:
     case ARM::R7:
     case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
       GPRCS1Size += 4;
       break;
     default:
@@ -232,10 +232,31 @@
     }
   }
 
+  MachineBasicBlock::iterator GPRCS1Push, GPRCS2Push;
   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    GPRCS1Push = MBBI;
     ++MBBI;
   }
 
+  // Find last push instruction for GPRCS2 - spilling of high registers
+  // (r8-r11) could consist of multiple tPUSH and tMOVr instructions.
+  while (true) {
+    MachineBasicBlock::iterator OldMBBI = MBBI;
+    // Skip a run of tMOVr instructions
+    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
+      MBBI++;
+    if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+      GPRCS2Push = MBBI;
+      MBBI++;
+    } else {
+      // We have reached an instruction which is not a push, so the previous
+      // run of tMOVr instructions (which may have been empty) was not part of
+      // the prologue. Reset MBBI back to the last PUSH of the prologue.
+      MBBI = OldMBBI;
+      break;
+    }
+  }
+
   // Determine starting offsets of spill areas.
   unsigned DPRCSOffset  = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
@@ -252,71 +273,38 @@
   int FramePtrOffsetInBlock = 0;
   unsigned adjustedGPRCS1Size = GPRCS1Size;
   if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
-      tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+      tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) {
     FramePtrOffsetInBlock = NumBytes;
     adjustedGPRCS1Size += NumBytes;
     NumBytes = 0;
   }
+  CFAOffset += adjustedGPRCS1Size;
 
-  if (adjustedGPRCS1Size) {
-    CFAOffset += adjustedGPRCS1Size;
-    unsigned CFIIndex =
-        MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
-    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex)
-        .setMIFlags(MachineInstr::FrameSetup);
-  }
-  for (const CalleeSavedInfo &I : CSI) {
-    Register Reg = I.getReg();
-    int FI = I.getFrameIdx();
-    switch (Reg) {
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-    case ARM::R12:
-      if (STI.splitFramePushPop(MF))
-        break;
-      LLVM_FALLTHROUGH;
-    case ARM::R0:
-    case ARM::R1:
-    case ARM::R2:
-    case ARM::R3:
-    case ARM::R4:
-    case ARM::R5:
-    case ARM::R6:
-    case ARM::R7:
-    case ARM::LR:
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
-      break;
-    }
-  }
-
-  // Adjust FP so it point to the stack slot that contains the previous FP.
+   // Adjust FP so it point to the stack slot that contains the previous FP.
   if (HasFP) {
+    bool FPIsHighReg = ARM::hGPRRegClass.contains(FramePtr);
+    MachineBasicBlock::iterator AfterPush = FPIsHighReg ? std::next(GPRCS2Push) : std::next(GPRCS1Push);
     FramePtrOffsetInBlock +=
         MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+    if (FPIsHighReg)
+      FramePtrOffsetInBlock += GPRCS2Size;
+    BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr)
         .addReg(ARM::SP)
         .addImm(FramePtrOffsetInBlock / 4)
         .setMIFlags(MachineInstr::FrameSetup)
         .add(predOps(ARMCC::AL));
+
     if(FramePtrOffsetInBlock) {
-      CFAOffset -= FramePtrOffsetInBlock;
       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
-          nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock)));
+      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
     } else {
       unsigned CFIIndex =
           MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
     }
@@ -326,45 +314,69 @@
       AFI->setShouldRestoreSPFromFP(true);
   }
 
-  // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
-  // and tMOVr instructions. We don't need to add any call frame information
-  // in-between these instructions, because they do not modify the high
-  // registers.
-  while (true) {
-    MachineBasicBlock::iterator OldMBBI = MBBI;
-    // Skip a run of tMOVr instructions
-    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
-      MBBI++;
-    if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
-      MBBI++;
-    } else {
-      // We have reached an instruction which is not a push, so the previous
-      // run of tMOVr instructions (which may have been empty) was not part of
-      // the prologue. Reset MBBI back to the last PUSH of the prologue.
-      MBBI = OldMBBI;
-      break;
+  // Emit call frame information for the callee-saved low registers.
+  if (GPRCS1Size > 0) {
+    MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+    if (adjustedGPRCS1Size) {
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
+      BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
+    for (const CalleeSavedInfo &I : CSI) {
+      Register Reg = I.getReg();
+      int FI = I.getFrameIdx();
+      switch (Reg) {
+      case ARM::R8:
+      case ARM::R9:
+      case ARM::R10:
+      case ARM::R11:
+      case ARM::R12:
+        if (STI.splitFramePushPop(MF))
+          break;
+        LLVM_FALLTHROUGH;
+      case ARM::R0:
+      case ARM::R1:
+      case ARM::R2:
+      case ARM::R3:
+      case ARM::R4:
+      case ARM::R5:
+      case ARM::R6:
+      case ARM::R7:
+      case ARM::LR:
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlags(MachineInstr::FrameSetup);
+        break;
+      }
     }
   }
 
   // Emit call frame information for the callee-saved high registers.
-  for (auto &I : CSI) {
-    Register Reg = I.getReg();
-    int FI = I.getFrameIdx();
-    switch (Reg) {
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-    case ARM::R12: {
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
-      break;
-    }
-    default:
-      break;
+  if (GPRCS2Size > 0) {
+    MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+    for (auto &I : CSI) {
+      Register Reg = I.getReg();
+      int FI = I.getFrameIdx();
+      switch (Reg) {
+      case ARM::R8:
+      case ARM::R9:
+      case ARM::R10:
+      case ARM::R11:
+      case ARM::R12: {
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlags(MachineInstr::FrameSetup);
+        break;
+      }
+      default:
+        break;
+      }
     }
   }
 
@@ -794,65 +806,53 @@
   return true;
 }
 
-using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>;
-
-// Return the first iteraror after CurrentReg which is present in EnabledRegs,
-// or OrderEnd if no further registers are in that set. This does not advance
-// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
-static const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
-                                          const ARMRegSet &EnabledRegs,
-                                          const unsigned *OrderEnd) {
-  while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg])
-    ++CurrentReg;
-  return CurrentReg;
-}
-
-bool Thumb1FrameLowering::spillCalleeSavedRegisters(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-  MachineFunction &MF = *MBB.getParent();
-  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-
-  ARMRegSet LoRegsToSave; // r0-r7, lr
-  ARMRegSet HiRegsToSave; // r8-r11
-  ARMRegSet CopyRegs;     // Registers which can be used after pushing
-                          // LoRegs for saving HiRegs.
-
-  for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
-    Register Reg = I.getReg();
-
+static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6,
+                                                     ARM::R7, ARM::LR};
+static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9,
+                                                      ARM::R10, ARM::R11};
+static const SmallVector<Register> OrderedCopyRegs = {
+    ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+    ARM::R5, ARM::R6, ARM::R7, ARM::LR};
+
+static void splitLowAndHighRegs(const std::set<Register> &Regs,
+                                std::set<Register> &LowRegs,
+                                std::set<Register> &HighRegs) {
+  for (Register Reg : Regs) {
     if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
-      LoRegsToSave[Reg] = true;
+      LowRegs.insert(Reg);
     } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
-      HiRegsToSave[Reg] = true;
+      HighRegs.insert(Reg);
     } else {
       llvm_unreachable("callee-saved register of unexpected class");
     }
-
-    if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
-        !MF.getRegInfo().isLiveIn(Reg) &&
-        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
-      CopyRegs[Reg] = true;
   }
+}
 
-  // Unused argument registers can be used for the high register saving.
-  for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
-    if (!MF.getRegInfo().isLiveIn(ArgReg))
-      CopyRegs[ArgReg] = true;
+template <typename It>
+It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt,
+                     const std::set<Register> &RegSet) {
+  return std::find_if(OrderedStartIt, OrderedEndIt,
+                      [&](Register Reg) { return RegSet.count(Reg); });
+}
 
-  // Push the low registers and lr
+static void pushRegsToStack(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const TargetInstrInfo &TII,
+                            const std::set<Register> &RegsToSave,
+                            const std::set<Register> &CopyRegs) {
+  MachineFunction &MF = *MBB.getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  if (!LoRegsToSave.none()) {
+  DebugLoc DL;
+
+  std::set<Register> LowRegs, HighRegs;
+  splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs);
+
+  // Push low regs first
+  if (!LowRegs.empty()) {
     MachineInstrBuilder MIB =
         BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
-    for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
-      if (LoRegsToSave[Reg]) {
+    for (unsigned Reg : OrderedLowRegs) {
+      if (LowRegs.count(Reg)) {
         bool isKill = !MRI.isLiveIn(Reg);
         if (isKill && !MRI.isReserved(Reg))
           MBB.addLiveIn(Reg);
@@ -863,31 +863,26 @@
     MIB.setMIFlags(MachineInstr::FrameSetup);
   }
 
-  // Push the high registers. There are no store instructions that can access
-  // these registers directly, so we have to move them to low registers, and
-  // push them. This might take multiple pushes, as it is possible for there to
+  // Now push the high registers
+  // There are no store instructions that can access high registers directly,
+  // so we have to move them to low registers, and push them.
+  // This might take multiple pushes, as it is possible for there to
   // be fewer low registers available than high registers which need saving.
 
-  // These are in reverse order so that in the case where we need to use
+  // Find the first register to save.
+  // Registers must be processed in reverse order so that in case we need to use
   // multiple PUSH instructions, the order of the registers on the stack still
   // matches the unwind info. They need to be swicthed back to ascending order
   // before adding to the PUSH instruction.
-  static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
-                                         ARM::R5, ARM::R4, ARM::R3,
-                                         ARM::R2, ARM::R1, ARM::R0};
-  static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
-
-  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
-  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
-
-  // Find the first register to save.
-  const unsigned *HiRegToSave = findNextOrderedReg(
-      std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
+  auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(),
+                                       OrderedHighRegs.rend(),
+                                       HighRegs);
 
-  while (HiRegToSave != AllHighRegsEnd) {
+  while (HiRegToSave != OrderedHighRegs.rend()) {
     // Find the first low register to use.
-    const unsigned *CopyReg =
-        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+    auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
+                                       OrderedCopyRegs.rend(),
+                                       CopyRegs);
 
     // Create the PUSH, but don't insert it yet (the MOVs need to come first).
     MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
@@ -895,25 +890,28 @@
                                       .setMIFlags(MachineInstr::FrameSetup);
 
     SmallVector<unsigned, 4> RegsToPush;
-    while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
-      if (HiRegsToSave[*HiRegToSave]) {
+    while (HiRegToSave != OrderedHighRegs.rend() && CopyRegIt != OrderedCopyRegs.rend()) {
+      if (HighRegs.count(*HiRegToSave)) {
         bool isKill = !MRI.isLiveIn(*HiRegToSave);
         if (isKill && !MRI.isReserved(*HiRegToSave))
           MBB.addLiveIn(*HiRegToSave);
 
         // Emit a MOV from the high reg to the low reg.
         BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
-            .addReg(*CopyReg, RegState::Define)
+            .addReg(*CopyRegIt, RegState::Define)
             .addReg(*HiRegToSave, getKillRegState(isKill))
             .add(predOps(ARMCC::AL))
             .setMIFlags(MachineInstr::FrameSetup);
 
         // Record the register that must be added to the PUSH.
-        RegsToPush.push_back(*CopyReg);
-
-        CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
-        HiRegToSave =
-            findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+        RegsToPush.push_back(*CopyRegIt);
+
+        CopyRegIt = getNextOrderedReg(std::next(CopyRegIt),
+                                      OrderedCopyRegs.rend(),
+                                      CopyRegs);
+        HiRegToSave = getNextOrderedReg(std::next(HiRegToSave),
+                                        OrderedHighRegs.rend(),
+                                        HighRegs);
       }
     }
 
@@ -924,84 +922,44 @@
     // Insert the PUSH instruction after the MOVs.
     MBB.insert(MI, PushMIB);
   }
-
-  return true;
 }
 
-bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
+static void popRegsFromStack(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             const TargetInstrInfo &TII,
+                             const std::set<Register> &RegsToRestore,
+                             const std::set<Register> &CopyRegs,
+                             bool IsVarArg, bool HasV5Ops) {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-
-  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
 
-  ARMRegSet LoRegsToRestore;
-  ARMRegSet HiRegsToRestore;
-  // Low registers (r0-r7) which can be used to restore the high registers.
-  ARMRegSet CopyRegs;
-
-  for (CalleeSavedInfo I : CSI) {
-    Register Reg = I.getReg();
-
-    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
-      LoRegsToRestore[Reg] = true;
-    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
-      HiRegsToRestore[Reg] = true;
-    } else {
-      llvm_unreachable("callee-saved register of unexpected class");
-    }
-
-    // If this is a low register not used as the frame pointer, we may want to
-    // use it for restoring the high registers.
-    if ((ARM::tGPRRegClass.contains(Reg)) &&
-        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
-      CopyRegs[Reg] = true;
-  }
+  std::set<Register> LowRegs, HighRegs;
+  splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs);
 
-  // If this is a return block, we may be able to use some unused return value
-  // registers for restoring the high regs.
-  auto Terminator = MBB.getFirstTerminator();
-  if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
-    CopyRegs[ARM::R0] = true;
-    CopyRegs[ARM::R1] = true;
-    CopyRegs[ARM::R2] = true;
-    CopyRegs[ARM::R3] = true;
-    for (auto Op : Terminator->implicit_operands()) {
-      if (Op.isReg())
-        CopyRegs[Op.getReg()] = false;
-    }
-  }
-
-  static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-                                         ARM::R4, ARM::R5, ARM::R6, ARM::R7};
-  static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
-
-  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
-  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+  // Pop the high registers first
+  // There are no store instructions that can access high registers directly,
+  // so we have to pop into low registers and them move to  the high registers.
+  // This might take multiple pops, as it is possible for there to
+  // be fewer low registers available than high registers which need restoring.
 
   // Find the first register to restore.
-  auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
-                                           HiRegsToRestore, AllHighRegsEnd);
+  auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(),
+                                          OrderedHighRegs.end(),
+                                          HighRegs);
 
-  while (HiRegToRestore != AllHighRegsEnd) {
-    assert(!CopyRegs.none());
+  while (HiRegToRestore != OrderedHighRegs.end()) {
+    assert(!CopyRegs.empty());
     // Find the first low register to use.
-    auto CopyReg =
-        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+    auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(),
+                                     OrderedCopyRegs.end(),
+                                     CopyRegs);
 
     // Create the POP instruction.
     MachineInstrBuilder PopMIB =
         BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
 
-    while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+    while (HiRegToRestore != OrderedHighRegs.end() && CopyReg != OrderedCopyRegs.end()) {
       // Add the low register to the POP.
       PopMIB.addReg(*CopyReg, RegState::Define);
 
@@ -1011,62 +969,163 @@
           .addReg(*CopyReg, RegState::Kill)
           .add(predOps(ARMCC::AL));
 
-      CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
-      HiRegToRestore =
-          findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+      CopyReg = getNextOrderedReg(std::next(CopyReg),
+                                  OrderedCopyRegs.end(),
+                                  CopyRegs);
+      HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore),
+                                         OrderedHighRegs.end(),
+                                         HighRegs);
     }
   }
 
-  MachineInstrBuilder MIB =
-      BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
-
-  bool NeedsPop = false;
-  for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
-    Register Reg = Info.getReg();
-
-    // High registers (excluding lr) have already been dealt with
-    if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
-      continue;
-
-    if (Reg == ARM::LR) {
-      Info.setRestored(false);
-      if (!MBB.succ_empty() ||
-          MI->getOpcode() == ARM::TCRETURNdi ||
-          MI->getOpcode() == ARM::TCRETURNri)
-        // LR may only be popped into PC, as part of return sequence.
-        // If this isn't the return sequence, we'll need emitPopSpecialFixUp
-        // to restore LR the hard way.
-        // FIXME: if we don't pass any stack arguments it would be actually
-        // advantageous *and* correct to do the conversion to an ordinary call
-        // instruction here.
-        continue;
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
-        continue;
-      // ARMv4T requires BX, see emitEpilogue
-      if (!STI.hasV5TOps())
-        continue;
+  // Now pop the low registers
+  if (!LowRegs.empty()) {
+    MachineInstrBuilder MIB =
+        BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
 
-      // CMSE entry functions must return via BXNS, see emitEpilogue.
-      if (AFI->isCmseNSEntryFunction())
+    bool NeedsPop = false;
+    for (Register Reg : OrderedLowRegs) {
+      if (!LowRegs.count(Reg))
         continue;
 
-      // Pop LR into PC.
-      Reg = ARM::PC;
-      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
-      if (MI != MBB.end())
-        MIB.copyImplicitOps(*MI);
-      MI = MBB.erase(MI);
+      if (Reg == ARM::LR) {
+        if (!MBB.succ_empty() ||
+            MI->getOpcode() == ARM::TCRETURNdi ||
+            MI->getOpcode() == ARM::TCRETURNri)
+          // LR may only be popped into PC, as part of return sequence.
+          // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+          // to restore LR the hard way.
+          // FIXME: if we don't pass any stack arguments it would be actually
+          // advantageous *and* correct to do the conversion to an ordinary call
+          // instruction here.
+          continue;
+        // Special epilogue for vararg functions. See emitEpilogue
+        if (IsVarArg)
+          continue;
+        // ARMv4T requires BX, see emitEpilogue
+        if (!HasV5Ops)
+          continue;
+
+        // CMSE entry functions must return via BXNS, see emitEpilogue.
+        if (AFI->isCmseNSEntryFunction())
+          continue;
+
+        // Pop LR into PC.
+        Reg = ARM::PC;
+        (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+        if (MI != MBB.end())
+          MIB.copyImplicitOps(*MI);
+        MI = MBB.erase(MI);
+      }
+      MIB.addReg(Reg, getDefRegState(true));
+      NeedsPop = true;
+    }
+
+    // It's illegal to emit pop instruction without operands.
+    if (NeedsPop)
+      MBB.insert(MI, &*MIB);
+    else
+      MF.deleteMachineInstr(MIB);
+  }
+}
+
+bool Thumb1FrameLowering::spillCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  MachineFunction &MF = *MBB.getParent();
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  Register FPReg = RegInfo->getFrameRegister(MF);
+
+  // In case FP is a high reg, we need a separate push sequence to generate
+  // a correct Frame Record
+  bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+  std::set<Register> FrameRecord;
+  std::set<Register> SpilledGPRs;
+  for (const CalleeSavedInfo &I : CSI) {
+    Register Reg = I.getReg();
+    if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR))
+      FrameRecord.insert(Reg);
+    else
+      SpilledGPRs.insert(Reg);
+  }
+
+  pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
+
+  // Determine intermidiate registers which can be used for pushing high regs:
+  // - Spilled low regs
+  // - Unused argument registers
+  std::set<Register> CopyRegs;
+  for (Register Reg : SpilledGPRs)
+    if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+        !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg))
+      CopyRegs.insert(Reg);
+  for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+    if (!MF.getRegInfo().isLiveIn(ArgReg))
+      CopyRegs.insert(ArgReg);
+
+  pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
+
+  return true;
+}
+
+bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  bool IsVarArg = AFI->getArgRegsSaveSize() > 0;
+  Register FPReg = RegInfo->getFrameRegister(MF);
+
+  // In case FP is a high reg, we need a separate pop sequence to generate
+  // a correct Frame Record
+  bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+  std::set<Register> FrameRecord;
+  std::set<Register> SpilledGPRs;
+  for (CalleeSavedInfo &I : CSI) {
+    Register Reg = I.getReg();
+    if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR))
+      FrameRecord.insert(Reg);
+    else
+      SpilledGPRs.insert(Reg);
+
+    if (Reg == ARM::LR)
+      I.setRestored(false);
+  }
+
+  // Determine intermidiate registers which can be used for popping high regs:
+  // - Spilled low regs
+  // - Unused return registers
+  std::set<Register> CopyRegs;
+  for (Register Reg : SpilledGPRs)
+    if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg))
+      CopyRegs.insert(Reg);
+  auto Terminator = MBB.getFirstTerminator();
+  if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+    CopyRegs.insert(ARM::R0);
+    CopyRegs.insert(ARM::R1);
+    CopyRegs.insert(ARM::R2);
+    CopyRegs.insert(ARM::R3);
+    for (auto Op : Terminator->implicit_operands()) {
+      if (Op.isReg())
+        CopyRegs.erase(Op.getReg());
     }
-    MIB.addReg(Reg, getDefRegState(true));
-    NeedsPop = true;
   }
 
-  // It's illegal to emit pop instruction without operands.
-  if (NeedsPop)
-    MBB.insert(MI, &*MIB);
-  else
-    MF.deleteMachineInstr(MIB);
+  popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, STI.hasV5TOps());
+  popRegsFromStack(MBB, MI, TII, FrameRecord, {ARM::LR}, IsVarArg, STI.hasV5TOps());
 
   return true;
 }
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===================================================================
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -430,7 +430,7 @@
   }
 
   MCPhysReg getFramePointerReg() const {
-    if (isTargetDarwin() || (!isTargetWindows() && isThumb()))
+    if (isTargetDarwin() || (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
       return ARM::R7;
     return ARM::R11;
   }
Index: llvm/lib/Target/ARM/ARMFrameLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,8 @@
 // |                                   |
 // |-----------------------------------|
 // |                                   |
-// | prev_fp, prev_lr                  |
+// | prev_lr                           |
+// | prev_fp                           |
 // | (a.k.a. "frame record")           |
 // |                                   |
 // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
@@ -204,6 +205,8 @@
   // ABI-required frame pointer.
   if (MF.getTarget().Options.DisableFramePointerElim(MF))
     return true;
+  if (MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain())
+    return true;
 
   // Frame pointer required for use within this function.
   return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
@@ -720,6 +723,9 @@
   // into spill area 1, including the FP in R11.  In either case, it
   // is in area one and the adjustment needs to take place just after
   // that push.
+  // FIXME: The above is not necessary true when PACBTI is enabled.
+  // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
+  // so FP ends up on area two.
   if (HasFP) {
     MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
@@ -1818,6 +1824,11 @@
   return true;
 }
 
+static bool requiresAAPCSFrameRecord(const MachineFunction& MF) {
+  const auto& Subtarget = MF.getSubtarget<ARMSubtarget>();
+  return Subtarget.createAAPCSFrameChainLeaf() || (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
+}
+
 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                             BitVector &SavedRegs,
                                             RegScavenger *RS) const {
@@ -1826,7 +1837,7 @@
   // to take advantage the eliminateFrameIndex machinery. This also ensures it
   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
   // to combine multiple loads / stores.
-  bool CanEliminateFrame = true;
+  bool CanEliminateFrame = !requiresAAPCSFrameRecord(MF);
   bool CS1Spilled = false;
   bool LRSpilled = false;
   unsigned NumGPRSpills = 0;
@@ -2067,7 +2078,9 @@
       SavedRegs.set(FramePtr);
       // If the frame pointer is required by the ABI, also spill LR so that we
       // emit a complete frame record.
-      if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+      if ((requiresAAPCSFrameRecord(MF) ||
+           MF.getTarget().Options.DisableFramePointerElim(MF)) &&
+          !LRSpilled) {
         SavedRegs.set(ARM::LR);
         LRSpilled = true;
         NumGPRSpills++;
Index: llvm/lib/Target/ARM/ARM.td
===================================================================
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -542,6 +542,16 @@
                                                  "Don't place a BTI instruction "
                                                  "after a return-twice">;
 
+def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain",
+                                              "CreateAAPCSFrameChain", "true",
+                                              "Create an AAPCS compliant frame chain">;
+
+def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf",
+                                                  "CreateAAPCSFrameChainLeaf", "true",
+                                                  "Create an AAPCS compliant frame chain "
+                                                  "for leaf functions",
+                                                  [FeatureAAPCSFrameChain]>;
+
 //===----------------------------------------------------------------------===//
 // ARM architecture class
 //
Index: clang/lib/Driver/ToolChains/Arch/ARM.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -717,6 +717,15 @@
     }
   }
 
+  // Propagate frame-chain model selection
+  if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) {
+    StringRef FrameChainOption = A->getValue();
+    if (FrameChainOption.startswith("aapcs"))
+      Features.push_back("+aapcs-frame-chain");
+    if (FrameChainOption == "aapcs+leaf")
+      Features.push_back("+aapcs-frame-chain-leaf");
+  }
+
   // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later.
   if (Args.getLastArg(options::OPT_mcmse))
     Features.push_back("+8msecext");
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3432,7 +3432,9 @@
   BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that"
             " volatile bit-field width is dictated by the field container type. (ARM only).">>,
   Group<m_arm_Features_Group>;
-
+def mframe_chain : Joined<["-"], "mframe-chain=">,
+  Group<m_arm_Features_Group>, Values<"none,aapcs,aapcs+leaf">,
+  HelpText<"Select the frame chain model used to emit frame records (Arm only).">;
 def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group<m_Group>,
   HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">;
 def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to