Author: David Green Date: 2021-01-16T22:19:35Z New Revision: 145472421535c71a9ea60af7e5d012ab69dc85ff
URL: https://github.com/llvm/llvm-project/commit/145472421535c71a9ea60af7e5d012ab69dc85ff DIFF: https://github.com/llvm/llvm-project/commit/145472421535c71a9ea60af7e5d012ab69dc85ff.diff LOG: [ARM] Align blocks that are not fallthough targets If the previous block in a function does not fallthough, adding nop's to align it will never be executed. This means we can freely (except for codesize) align more branches. This happens in constantislandspass (as it cannot happen later) and only happens at aggressive optimization levels as it does increase codesize. Differential Revision: https://reviews.llvm.org/D94394 Added: Modified: llvm/lib/Target/ARM/ARMConstantIslandPass.cpp llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index e89eb0fb4502..630490f6f914 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -338,6 +338,32 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { } #endif +// Align blocks where the previous block does not fall through. This may add +// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a +// measure of how much to align, and only runs at CodeGenOpt::Aggressive. +static bool AlignBlocks(MachineFunction *MF) { + if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive || + MF->getFunction().hasOptSize()) + return false; + + auto *TLI = MF->getSubtarget().getTargetLowering(); + const Align Alignment = TLI->getPrefLoopAlignment(); + if (Alignment < 4) + return false; + + bool Changed = false; + bool PrevCanFallthough = true; + for (auto &MBB : *MF) { + if (!PrevCanFallthough) { + Changed = true; + MBB.setAlignment(Alignment); + } + PrevCanFallthough = MBB.canFallThrough(); + } + + return Changed; +} + bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MCP = mf.getConstantPool(); @@ -380,6 +406,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF->RenumberBlocks(); } + // Align any non-fallthrough blocks + MadeChange |= AlignBlocks(MF); + // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll index b949934e51df..ea7a83b96b46 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -26,6 +26,7 @@ define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -58,6 +59,7 @@ define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -129,6 +131,7 @@ define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapt ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, r5, r7, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -161,6 +164,7 @@ define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapt ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -232,6 +236,7 @@ define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -264,6 +269,7 @@ define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -335,6 +341,7 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 @@ -367,6 +374,7 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 @@ -440,6 +448,7 @@ define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture r ; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, lr, r12 ; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB4_4: ; CHECK-LE-NEXT: mov.w lr, #0 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -474,6 +483,7 @@ define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture r ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, lr ; CHECK-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB4_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: mov.w lr, #0 diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll index 90bf4df53f30..043268d769d1 100644 --- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -57,6 +57,7 @@ define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) { ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits