echristo updated this revision to Diff 262458.
echristo added a comment.
Update and reduce testcase a bit.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D71687/new/
https://reviews.llvm.org/D71687
Files:
clang/test/Misc/loop-opt-setup.c
llvm/lib/Passes/PassBuilder.cpp
llvm/test/Transforms/LoopUnroll/FullUnroll.ll
llvm/tools/opt/NewPMDriver.cpp
Index: llvm/tools/opt/NewPMDriver.cpp
===================================================================
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -100,6 +100,11 @@
"the OptimizerLast extension point into default pipelines"),
cl::Hidden);
+// Individual pipeline tuning options.
+static cl::opt<bool> DisableLoopUnrolling(
+ "new-pm-disable-loop-unrolling",
+ cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false));
+
extern cl::opt<PGOKind> PGOKindFlag;
extern cl::opt<std::string> ProfileFile;
extern cl::opt<CSPGOKind> CSPGOKindFlag;
@@ -260,6 +265,10 @@
SI.registerCallbacks(PIC);
PipelineTuningOptions PTO;
+ // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized
+ // to false above so we shouldn't necessarily need to check whether or not the
+ // option has been enabled.
+ PTO.LoopUnrolling = !DisableLoopUnrolling;
PTO.Coroutines = Coroutines;
PassBuilder PB(TM, PTO, P, &PIC);
registerEPCallbacks(PB, VerifyEachPass, DebugPM);
Index: llvm/test/Transforms/LoopUnroll/FullUnroll.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopUnroll/FullUnroll.ll
@@ -0,0 +1,73 @@
+; RUN: opt -passes='default<O1>' -disable-verify --mtriple x86_64-pc-linux-gnu -new-pm-disable-loop-unrolling=true \
+; RUN: -S -o - %s | FileCheck %s
+
+; This checks that the loop full unroller will fire in the new pass manager
+; when forced via #pragma in the source (or annotation in the code).
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind optnone uwtable
+define void @walrus() #0 {
+; We don't end up deleting the loop, merely turning it infinite, but we remove
+; everything inside of it so checking for a conditional branch will work.
+; CHECK-LABEL: entry
+; CHECK-NOT: br i1
+entry:
+ %nodes = alloca [5 x i32*], align 16
+ %num_active = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 5, i32* %num_active, align 4
+ br label %while.cond
+
+while.cond: ; preds = %for.end, %entry
+ %0 = load i32, i32* %num_active, align 4
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %while.body, label %while.end
+
+while.body: ; preds = %while.cond
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %while.body
+ %1 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %1, 5
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %2 to i64
+ %arrayidx = getelementptr inbounds [5 x i32*], [5 x i32*]* %nodes, i64 0, i64 %idxprom
+ %3 = load i32*, i32** %arrayidx, align 8
+ %tobool1 = icmp ne i32* %3, null
+ br i1 %tobool1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32, i32* %num_active, align 4
+ %dec = add nsw i32 %4, -1
+ store i32 %dec, i32* %num_active, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %5 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %5, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !1
+
+for.end: ; preds = %for.cond
+ br label %while.cond
+
+while.end: ; preds = %while.cond
+ ret void
+}
+
+attributes #0 = { noinline nounwind optnone uwtable }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.unroll.full"}
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -503,12 +503,13 @@
LPM2.addPass(LoopDeletionPass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
// because it changes IR to makes profile annotation in back compile
- // inaccurate.
- if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse) &&
- PTO.LoopUnrolling)
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /*OnlyWhenForced=*/false,
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
Index: clang/test/Misc/loop-opt-setup.c
===================================================================
--- clang/test/Misc/loop-opt-setup.c
+++ clang/test/Misc/loop-opt-setup.c
@@ -1,5 +1,5 @@
-// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s
-// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s
+// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM
extern int a[16];
int b = 0;
int foo(void) {
@@ -8,5 +8,34 @@
a[i] = b += 2;
return b;
}
+// Check br i1 to make sure that the loop is fully unrolled
// CHECK-NOT: br i1
+inline void Helper() {
+ const int *nodes[5];
+ int num_active = 5;
+
+ while (num_active) {
+#pragma clang loop unroll(full)
+ for (int i = 0; i < 5; ++i) {
+ if (nodes[i]) {
+ --num_active;
+ }
+ }
+ }
+}
+
+void Run() {
+ Helper();
+}
+
+// Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop.
+// CHECK-NEWPM-NOT: br i1
+
+// The old pass manager doesn't remove the loop so check for 5 load i32*.
+// CHECK-OLDPM: Helper
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits