yroux updated this revision to Diff 254834.
yroux added a comment.

Here is an update of the patch


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76066/new/

https://reviews.llvm.org/D76066

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/include/llvm/CodeGen/TargetPassConfig.h
  llvm/lib/CodeGen/MachineOutliner.cpp
  llvm/lib/CodeGen/TargetPassConfig.cpp
  llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
  llvm/lib/Target/ARM/ARMBaseInstrInfo.h
  llvm/lib/Target/ARM/ARMTargetMachine.cpp
  llvm/test/CodeGen/ARM/machine-outliner-tail.ll
  llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
  llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
  llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir

Index: llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+  define void @outline_cpsr_r12_ok() #0 { ret void }
+  define void @dont_outline_cpsr() #0 { ret void }
+  define void @dont_outline_r12() #0 { ret void }
+  declare void @z(i32, i32, i32, i32)
+
+  attributes #0 = { minsize optsize }
+...
+---
+
+name:           outline_cpsr_r12_ok
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: outline_cpsr_r12_ok
+  ; CHECK: bb.0:
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK: bb.1:
+  ; CHECK:   $r12 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   CMPri $r12, 42, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  bb.0:
+    liveins: $cpsr, $r12
+    $r0 = MOVi 1, 14, $noreg, $noreg
+    $r1 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r3 = MOVi 1, 14, $noreg, $noreg
+    BL @z
+  bb.1:
+    $r12 = MOVi 1, 14, $noreg, $noreg
+    CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+    $r0 = MOVi 1, 14, $noreg, $noreg
+    $r1 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r3 = MOVi 1, 14, $noreg, $noreg
+    BL @z
+  bb.2:
+    BX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_cpsr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_outline_cpsr
+  ; CHECK-NOT: BL @OUTLINED_FUNCTION_0
+  bb.0:
+    liveins: $cpsr
+    $r0 = MOVi 1, 14, $noreg, $noreg
+    $r1 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r3 = MOVi 1, 14, $noreg, $noreg
+    BL @z
+  bb.1:
+    liveins: $cpsr
+    BX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_r12
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r12
+    ; CHECK-LABEL: name: dont_outline_r12
+    ; CHECK-NOT: BL @OUTLINED_FUNCTION_0
+    $r0 = MOVi 1, 14, $noreg, $noreg
+    $r1 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r3 = MOVi 1, 14, $noreg, $noreg
+    BL @z
+    $r2 = MOVr $r12, 14, $noreg, $noreg
+    BX_RET 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+  define void @dont_outline_asm() #0 { ret void }
+  define void @dont_outline_lr() #0 { ret void }
+  define void @dont_outline_it() #0 { ret void }
+  define void @dont_outline_pic() #0 { ret void }
+  declare void @z(i32, i32, i32, i32)
+
+  attributes #0 = { minsize optsize }
+...
+---
+
+name:           dont_outline_asm
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_outline_asm
+  ; CHECK: bb.0:
+  ; CHECK:   INLINEASM &"movs  r0, #42", 1
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
+  ; CHECK: bb.1:
+  ; CHECK:   INLINEASM &"movs  r0, #42", 1
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
+  bb.0:
+    INLINEASM &"movs  r0, #42", 1
+    $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r1, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r2, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r3, dead $cpsr = tMOVi8 1, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.1:
+    INLINEASM &"movs  r0, #42", 1
+    $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r1, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r2, dead $cpsr = tMOVi8 1, 14, $noreg
+    $r3, dead $cpsr = tMOVi8 1, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.2:
+    tBX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_lr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_outline_lr
+  ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+  bb.0:
+    liveins: $lr
+    $r0 = tMOVr $lr, 14, $noreg
+    $r1 = tMOVr $lr, 14, $noreg
+    $r2 = tMOVr $lr, 14, $noreg
+    $r3 = tMOVr $lr, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.1:
+    liveins: $lr
+    $r0 = tMOVr $lr, 14, $noreg
+    $r1 = tMOVr $lr, 14, $noreg
+    $r2 = tMOVr $lr, 14, $noreg
+    $r3 = tMOVr $lr, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.2:
+    tBX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_it
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_outline_it
+  ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+  bb.0:
+    t2IT 0, 1, implicit-def $itstate
+    $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    tBL 14, $noreg, @z
+  bb.1:
+    t2IT 0, 1, implicit-def $itstate
+    $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+    tBL 14, $noreg, @z
+  bb.2:
+    tBX_RET 14, $noreg
+...
+---
+
+name:           dont_outline_pic
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_outline_pic
+  ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+  bb.0:
+    $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0
+    $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0
+    $r0 = PICADD $r0, 1, 14, $noreg
+    $r1 = PICLDR $r0, 2, 14, $noreg
+    PICSTR $r0, $r1, 3, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.1:
+    $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0
+    $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0
+    $r0 = PICADD $r0, 1, 14, $noreg
+    $r1 = PICLDR $r0, 2, 14, $noreg
+    PICSTR $r0, $r1, 3, 14, $noreg
+    tBL 14, $noreg, @z
+  bb.2:
+    tBX_RET 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
@@ -0,0 +1,116 @@
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -enable-machine-outliner -verify-machineinstrs \
+; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1
+
+declare i32 @thunk_called_fn(i32, i32, i32, i32)
+
+define i32 @a() {
+; ARM-LABEL: name:             a
+; ARM:       bb.0.entry:
+; ARM-NEXT:    liveins: $r11, $lr
+; ARM:         $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT:    BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT:    renamable $r0 = ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:    $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name:             a
+; THUMB:       bb.0.entry:
+; THUMB-NEXT:    liveins: $r7, $lr
+; THUMB:         frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT:    tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg
+; THUMB-NEXT:    tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name:             a
+; MACHO:       bb.0.entry:
+; MACHO-NEXT:    liveins: $lr
+; MACHO:         early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT:    tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+
+; THUMB1-NOT: OUTLINED_FUNCTION_0
+
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 8
+  ret i32 %cx
+}
+
+define i32 @b() {
+; ARM-LABEL: name:             b
+; ARM:       bb.0.entry:
+; ARM-NEXT:    liveins: $r11, $lr
+; ARM:         $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT:    BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT:    renamable $r0 = ADDri killed renamable $r0, 88, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:    $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name:             b
+; THUMB:       bb.0.entry:
+; THUMB-NEXT:    liveins: $r7, $lr
+; THUMB:         frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT:    tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg
+; THUMB-NEXT:    tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name:             b
+; MACHO:       bb.0.entry:
+; MACHO-NEXT:    liveins: $lr
+; MACHO:         early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT:    tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT:    tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 88
+  ret i32 %cx
+}
+
+; ARM-LABEL: name:            OUTLINED_FUNCTION_0
+; ARM:        bb.0:
+; ARM-NEXT:   $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:   $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:   $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:   $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT:   TAILJMPd @thunk_called_fn, implicit $sp
+
+; THUMB-LABEL: name:            OUTLINED_FUNCTION_0
+; THUMB:        bb.0:
+; THUMB-NEXT:   $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; THUMB-NEXT:   $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; THUMB-NEXT:   $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; THUMB-NEXT:   $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; THUMB-NEXT:   tTAILJMPdND @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp
+
+; MACHO-LABEL: name:            OUTLINED_FUNCTION_0
+; MACHO:        bb.0:
+; MACHO-NEXT:   $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; MACHO-NEXT:   $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; MACHO-NEXT:   $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; MACHO-NEXT:   $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT:   tTAILJMPd @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp
Index: llvm/test/CodeGen/ARM/machine-outliner-tail.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-tail.ll
@@ -0,0 +1,46 @@
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=arm-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -enable-machine-outliner -verify-machineinstrs \
+; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1
+
+; ARM-LABEL: name:            OUTLINED_FUNCTION_0
+; ARM: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: TAILJMPd @z
+
+; THUMB-LABEL: name:            OUTLINED_FUNCTION_0
+; THUMB: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; THUMB-NEXT: tTAILJMPdND @z, 14 /* CC::al */, $noreg
+
+; MACHO-LABEL: name:            OUTLINED_FUNCTION_0
+; MACHO: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: tTAILJMPd @z, 14 /* CC::al */, $noreg
+
+; THUMB1-NOT: OUTLINED_FUNCTION_0
+
+define void @a() {
+entry:
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+declare void @z(i32, i32, i32, i32)
+
+define dso_local void @b(i32* nocapture readnone %p) {
+entry:
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -247,6 +247,10 @@
   setSupportsDebugEntryValues(true);
 
   initAsmInfo();
+
+  // ARM supports the MachineOutliner.
+  setMachineOutliner(true);
+  setSupportsDefaultOutlining(false);
 }
 
 ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -546,7 +550,8 @@
 
 void ARMPassConfig::addPreEmitPass2() {
   addPass(createARMConstantIslandPass());
-  addPass(createARMLowOverheadLoopsPass());
+  if (!MachineOutlinerEnabled)
+    addPass(createARMLowOverheadLoopsPass());
 
   // Identify valid longjmp targets for Windows Control Flow Guard.
   if (TM->getTargetTriple().isOSWindows())
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -353,6 +353,22 @@
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableBitmaskMachineOperandTargetFlags() const override;
 
+  /// ARM supports the MachineOutliner.
+  bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+                                   bool OutlineFromLinkOnceODRs) const override;
+  outliner::OutlinedFunction getOutliningCandidateInfo(
+      std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+  outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
+                                       unsigned Flags) const override;
+  bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                              unsigned &Flags) const override;
+  void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+                          const outliner::OutlinedFunction &OF) const override;
+  MachineBasicBlock::iterator
+  insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator &It, MachineFunction &MF,
+                     const outliner::Candidate &C) const override;
+
 private:
   unsigned getInstBundleLength(const MachineInstr &MI) const;
 
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -5508,3 +5509,367 @@
   return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
          ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
 }
+
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2    --> B OUTLINED_FUNCTION     I1
+/// BX LR                             I2
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      0 |   0 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2   --> BL OUTLINED_FUNCTION     I1
+/// BL f                              I2
+///                                   B f
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      0 |   0 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+
+enum MachineOutlinerClass {
+  MachineOutlinerTailCall,
+  MachineOutlinerThunk
+};
+
+enum MachineOutlinerMBBFlags {
+  LRUnavailableSomewhere = 0x2,
+  HasCalls = 0x4,
+  UnsafeRegsDead = 0x8
+};
+
+struct OutlinerCosts {
+  const int CallTailCall;
+  const int FrameTailCall;
+  const int CallThunk;
+  const int FrameThunk;
+
+  OutlinerCosts(const ARMSubtarget &target)
+      : CallTailCall(target.isThumb() ? 4 : 4),
+        FrameTailCall(target.isThumb() ? 0 : 0),
+        CallThunk(target.isThumb() ? 4 : 4),
+        FrameThunk(target.isThumb() ? 0 : 0) {}
+};
+
+outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+  unsigned SequenceSize =
+      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+                      [this](unsigned Sum, const MachineInstr &MI) {
+                        return Sum + getInstSizeInBytes(MI);
+                      });
+
+  // Properties about candidate MBBs that hold for all of them.
+  unsigned FlagsSetInAll = 0xF;
+
+  // Compute liveness information for each candidate, and set FlagsSetInAll.
+  const TargetRegisterInfo &TRI = getRegisterInfo();
+  std::for_each(
+      RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+      [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+
+  // According to the ARM Procedure Call Standard, the following are
+  // undefined on entry/exit from a function call:
+  //
+  // * Register R12(IP),
+  // * Condition codes (and thus the CPSR register)
+  //
+  // Because of this, we can't outline any sequence of instructions where one
+  // of these registers is live into/across it. Thus, we need to delete those
+  // candidates.
+  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+    // If the unsafe registers in this block are all dead, then we don't need
+    // to compute liveness here.
+    if (C.Flags & UnsafeRegsDead)
+      return false;
+    C.initLRU(TRI);
+    LiveRegUnits LRU = C.LRU;
+    return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+  };
+
+  // Are there any candidates where those registers are live?
+  if (!(FlagsSetInAll & UnsafeRegsDead)) {
+    // Erase every candidate that violates the restrictions above. (It could be
+    // true that we have viable candidates, so it's not worth bailing out in
+    // the case that, say, 1 out of 20 candidates violate the restructions.)
+    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+                                              RepeatedSequenceLocs.end(),
+                                              CantGuaranteeValueAcrossCall),
+                               RepeatedSequenceLocs.end());
+
+    // If the sequence doesn't have enough candidates left, then we're done.
+    if (RepeatedSequenceLocs.size() < 2)
+      return outliner::OutlinedFunction();
+  }
+
+  // At this point, we have only "safe" candidates to outline. Figure out
+  // frame + call instruction information.
+
+  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
+
+  // Helper lambda which sets call information for every candidate.
+  auto SetCandidateCallInfo =
+      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
+        for (outliner::Candidate &C : RepeatedSequenceLocs)
+          C.setCallInfo(CallID, NumBytesForCall);
+      };
+
+  OutlinerCosts *Costs = new OutlinerCosts(Subtarget);
+  unsigned FrameID = 0;
+  unsigned NumBytesToCreateFrame = 0;
+
+  // If the last instruction in any candidate is a terminator, then we should
+  // tail call all of the candidates.
+  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
+    FrameID = MachineOutlinerTailCall;
+    NumBytesToCreateFrame = Costs->FrameTailCall;
+    SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall);
+  } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
+             LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+             LastInstrOpcode == ARM::tBLXi) {
+    FrameID = MachineOutlinerThunk;
+    NumBytesToCreateFrame = Costs->FrameThunk;
+    SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk);
+  } else
+    return outliner::OutlinedFunction();
+
+  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+                                    NumBytesToCreateFrame, FrameID);
+}
+
+bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
+    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+  const Function &F = MF.getFunction();
+
+  // Can F be deduplicated by the linker? If it can, don't outline from it.
+  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+    return false;
+
+  // Don't outline from functions with section markings; the program could
+  // expect that all the code is in the named section.
+  // FIXME: Allow outlining from multiple functions with the same section
+  // marking.
+  if (F.hasSection())
+    return false;
+
+  // FIXME: Thumb1 outlining is not handled
+  if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
+    return false;
+
+  // It's safe to outline from MF.
+  return true;
+}
+
+bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                                              unsigned &Flags) const {
+  // Check if LR is available through all of the MBB. If it's not, then set
+  // a flag.
+  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
+         "Suitable Machine Function for outlining must track liveness");
+
+  LiveRegUnits LRU(getRegisterInfo());
+
+  std::for_each(MBB.rbegin(), MBB.rend(),
+                [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+
+  // Check if each of the unsafe registers are available...
+  bool R12AvailableInBlock = LRU.available(ARM::R12);
+  bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
+
+  // If all of these are dead (and not live out), we know we don't have to check
+  // them later.
+  if (R12AvailableInBlock && CPSRAvailableInBlock)
+    Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+  // Now, add the live outs to the set.
+  LRU.addLiveOuts(MBB);
+
+  // If any of these registers is available in the MBB, but also a live out of
+  // the block, then we know outlining is unsafe.
+  if (R12AvailableInBlock && !LRU.available(ARM::R12))
+    return false;
+  if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
+    return false;
+
+  // Check if there's a call inside this MachineBasicBlock.  If there is, then
+  // set a flag.
+  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+    Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+  if (!LRU.available(ARM::LR))
+    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+  return true;
+}
+
+outliner::InstrType
+ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+                                   unsigned Flags) const {
+  MachineInstr &MI = *MIT;
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+  // Be conservative with inline ASM
+  if (MI.isInlineAsm())
+    return outliner::InstrType::Invisible;
+
+  // Don't allow debug values to impact outlining type.
+  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
+    return outliner::InstrType::Invisible;
+
+  // At this point, KILL instructions don't really tell us much so we can go
+  // ahead and skip over them.
+  if (MI.isKill())
+    return outliner::InstrType::Invisible;
+
+  // PIC instructions contain labels, outlining them would break offset
+  // computing.  unsigned Opc = MI.getOpcode();
+  unsigned Opc = MI.getOpcode();
+  if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
+      Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
+      Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
+      Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
+      Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
+      Opc == ARM::t2MOV_ga_pcrel)
+    return outliner::InstrType::Illegal;
+
+  // Is this a terminator for a basic block?
+  if (MI.isTerminator()) {
+    // Don't outline if the branch is not unconditional.
+    if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) {
+      if (MI.getOperand(0).getImm() != ARMCC::AL)
+        return outliner::InstrType::Illegal;
+    }
+    if (Opc == ARM::LDMIA_RET) {
+      if (MI.getOperand(2).getImm() != ARMCC::AL)
+        return outliner::InstrType::Illegal;
+    }
+
+    // Is this the end of a function?
+    if (MI.getParent()->succ_empty())
+      return outliner::InstrType::Legal;
+
+    // It's not, so don't outline it.
+    return outliner::InstrType::Illegal;
+  }
+
+  // Make sure none of the operands are un-outlinable.
+  for (const MachineOperand &MOP : MI.operands()) {
+    if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+        MOP.isTargetIndex())
+      return outliner::InstrType::Illegal;
+  }
+
+  if (MI.isCall()) {
+    // If we don't know anything about the callee, assume it depends on the
+    // stack layout of the caller. In that case, it's only legal to outline
+    // as a tail-call.  Whitelist the call instructions we know about so we
+    // don't get unexpected results with call pseudo-instructions.
+    auto UnknownCallOutlineType = outliner::InstrType::Illegal;
+    if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
+        Opc == ARM::tBLXr || Opc == ARM::tBLXi)
+      UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
+
+    return UnknownCallOutlineType;
+  }
+
+  // Don't touch the link register
+  if (MI.readsRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::LR, TRI))
+    return outliner::InstrType::Illegal;
+
+  // Does this use the stack?
+  if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI))
+    return outliner::InstrType::Illegal;
+
+  // Be conservative with IT blocks.
+  if (MI.readsRegister(ARM::ITSTATE, TRI) ||
+      MI.modifiesRegister(ARM::ITSTATE, TRI))
+    return outliner::InstrType::Illegal;
+
+  // Don't outline positions.
+  if (MI.isPosition())
+    return outliner::InstrType::Illegal;
+
+  return outliner::InstrType::Legal;
+}
+
+void ARMBaseInstrInfo::buildOutlinedFrame(
+    MachineBasicBlock &MBB, MachineFunction &MF,
+    const outliner::OutlinedFunction &OF) const {
+  // For thunk outlining, rewrite the last instruction from a call to a
+  // tail-call.
+  if (OF.FrameConstructionID == MachineOutlinerThunk) {
+    MachineInstr *Call = &*--MBB.instr_end();
+    bool isThumb = Subtarget.isThumb();
+    unsigned FuncOp = isThumb ? 2 : 0;
+    unsigned Opc = Call->getOperand(FuncOp).isReg()
+                       ? isThumb ? ARM::tTAILJMPr : ARM::tTAILJMPr
+                       : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
+                                                             : ARM::tTAILJMPdND
+                                 : ARM::TAILJMPd;
+    MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
+                                  .add(Call->getOperand(FuncOp));
+    if (isThumb && !Call->getOperand(FuncOp).isReg())
+      MIB.add(predOps(ARMCC::AL));
+    Call->eraseFromParent();
+  }
+}
+
+MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
+    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+    MachineFunction &MF, const outliner::Candidate &C) const {
+  MachineInstrBuilder MIB;
+  MachineBasicBlock::iterator CallPt;
+  unsigned Opc;
+  bool isThumb = Subtarget.isThumb();
+
+  // Are we tail calling?
+  if (C.CallConstructionID == MachineOutlinerTailCall) {
+    // If yes, then we can just branch to the label.
+    Opc = isThumb
+              ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
+              : ARM::TAILJMPd;
+    MIB = BuildMI(MF, DebugLoc(), get(Opc))
+              .addGlobalAddress(M.getNamedValue(MF.getName()));
+    if (isThumb)
+      MIB.add(predOps(ARMCC::AL));
+    It = MBB.insert(It, MIB);
+    return It;
+  }
+
+  // Create the call instruction.
+  Opc = isThumb ? ARM::tBL : ARM::BL;
+  MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
+  if (isThumb)
+    CallMIB.add(predOps(ARMCC::AL));
+  CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+
+  // Insert the call.
+  It = MBB.insert(It, CallMIB);
+  return It;
+}
Index: llvm/lib/CodeGen/TargetPassConfig.cpp
===================================================================
--- llvm/lib/CodeGen/TargetPassConfig.cpp
+++ llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -979,8 +979,10 @@
     bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline);
     bool AddOutliner = RunOnAllFunctions ||
                        TM->Options.SupportsDefaultOutlining;
-    if (AddOutliner)
+    if (AddOutliner) {
+      MachineOutlinerEnabled = true;
       addPass(createMachineOutlinerPass(RunOnAllFunctions));
+    }
   }
 
   if (TM->getBBSectionsType() != llvm::BasicBlockSection::None)
Index: llvm/lib/CodeGen/MachineOutliner.cpp
===================================================================
--- llvm/lib/CodeGen/MachineOutliner.cpp
+++ llvm/lib/CodeGen/MachineOutliner.cpp
@@ -1175,6 +1175,9 @@
   // Outlined functions shouldn't preserve liveness.
   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
   MF.getRegInfo().freezeReservedRegs(MF);
+  MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
+  MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
 
   // If there's a DISubprogram associated with this outlined function, then
   // emit debug info for the outlined function.
Index: llvm/include/llvm/CodeGen/TargetPassConfig.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -132,6 +132,9 @@
   /// callers.
   bool RequireCodeGenSCCOrder = false;
 
+  /// Default setting for -enable-machine-outliner
+  bool MachineOutlinerEnabled = false;
+
   /// Add the actual instruction selection passes. This does not include
   /// preparation passes on IR.
   bool addCoreISelPasses();
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6124,8 +6124,9 @@
       // We only support -moutline in AArch64 right now. If we're not compiling
       // for AArch64, emit a warning and ignore the flag. Otherwise, add the
       // proper mllvm flags.
-      if (Triple.getArch() != llvm::Triple::aarch64 &&
-          Triple.getArch() != llvm::Triple::aarch64_32) {
+      if (!(Triple.isARM() || Triple.isThumb() ||
+            Triple.getArch() == llvm::Triple::aarch64 ||
+            Triple.getArch() == llvm::Triple::aarch64_32)) {
         D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
       } else {
         CmdArgs.push_back("-mllvm");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to