echristo updated this revision to Diff 261928. echristo edited the summary of this revision. echristo added a comment. Herald added a subscriber: zzheng.
Add a testcase with opt and command line option so we can enable it. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71687/new/ https://reviews.llvm.org/D71687 Files: clang/test/Misc/loop-opt-setup.c llvm/lib/Passes/PassBuilder.cpp llvm/test/Transforms/LoopUnroll/FullUnroll.ll llvm/tools/opt/NewPMDriver.cpp
Index: llvm/tools/opt/NewPMDriver.cpp =================================================================== --- llvm/tools/opt/NewPMDriver.cpp +++ llvm/tools/opt/NewPMDriver.cpp @@ -100,6 +100,11 @@ "the OptimizerLast extension point into default pipelines"), cl::Hidden); +// Individual pipeline tuning options. +static cl::opt<bool> DisableLoopUnrolling( + "new-pm-disable-loop-unrolling", + cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false)); + extern cl::opt<PGOKind> PGOKindFlag; extern cl::opt<std::string> ProfileFile; extern cl::opt<CSPGOKind> CSPGOKindFlag; @@ -260,6 +265,10 @@ SI.registerCallbacks(PIC); PipelineTuningOptions PTO; + // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized + // to false above so we shouldn't necessarily need to check whether or not the + // option has been enabled. + PTO.LoopUnrolling = !DisableLoopUnrolling; PTO.Coroutines = Coroutines; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB, VerifyEachPass, DebugPM); Index: llvm/test/Transforms/LoopUnroll/FullUnroll.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/FullUnroll.ll @@ -0,0 +1,81 @@ +; RUN: opt -passes='default<O1>' -disable-verify --mtriple x86_64-pc-linux-gnu -new-pm-disable-loop-unrolling=true \ +; RUN: -S -o - %s | FileCheck %s + +; We don't end up deleting the loop, but we remove everything inside of it so checking for any +; reasonable instruction from the original loop will work. +; CHECK-NOT: br i1 +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$_Z6Helperv = comdat any + +; Function Attrs: noinline optnone uwtable +define dso_local void @_Z3Runv() #0 { +entry: + call void @_Z6Helperv() + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define linkonce_odr dso_local void @_Z6Helperv() #1 comdat { +entry: + %nodes = alloca [5 x i32*], align 16 + %num_active = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 5, i32* %num_active, align 4 + br label %while.cond + +while.cond: ; preds = %for.end, %entry + %0 = load i32, i32* %num_active, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %while.body, label %while.end + +while.body: ; preds = %while.cond + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %while.body + %1 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %1, 5 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [5 x i32*], [5 x i32*]* %nodes, i64 0, i64 %idxprom + %3 = load i32*, i32** %arrayidx, align 8 + %tobool1 = icmp ne i32* %3, null + br i1 %tobool1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %num_active, align 4 + %dec = add nsw i32 %4, -1 + store i32 %dec, i32* %num_active, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !2 + +for.end: ; preds = %for.cond + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 11.0.0 (g...@github.com:llvm/llvm-project.git 3ccd454c102b069d2230a18cfe16b84a5f005fc8)"} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.unroll.full"} Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -503,12 +503,13 @@ LPM2.addPass(LoopDeletionPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile - // inaccurate. - if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) && - PTO.LoopUnrolling) + // inaccurate. The normal unroller doesn't pay attention to forced full unroll + // attributes so we need to make sure and allow the full unroll pass to pay + // attention to it. + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /*OnlyWhenForced=*/false, + /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) Index: clang/test/Misc/loop-opt-setup.c =================================================================== --- clang/test/Misc/loop-opt-setup.c +++ clang/test/Misc/loop-opt-setup.c @@ -1,5 +1,5 @@ -// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s +// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM +// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM extern int a[16]; int b = 0; int foo(void) { @@ -8,5 +8,34 @@ a[i] = b += 2; return b; } +// Check br i1 to make sure that the loop is fully unrolled // CHECK-NOT: br i1 +inline void Helper() { + const int *nodes[5]; + int num_active = 5; + + while (num_active) { +#pragma clang loop unroll(full) + for (int i = 0; i < 5; ++i) { + if (nodes[i]) { + --num_active; + } + } + } +} + +void Run() { + Helper(); +} + +// Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop. +// CHECK-NEWPM-NOT: br i1 + +// The old pass manager doesn't remove the loop so check for 5 load i32*. +// CHECK-OLDPM: Helper +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32*
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits