mtrofin updated this revision to Diff 238107. mtrofin marked 2 inline comments as done. mtrofin added a comment.
Alternative: expose speedup/size components to more closely align with legacy PM Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D72547/new/ https://reviews.llvm.org/D72547 Files: clang/lib/CodeGen/BackendUtil.cpp llvm/include/llvm/Passes/PassBuilder.h llvm/lib/LTO/LTOBackend.cpp llvm/lib/Passes/PassBuilder.cpp
Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -243,20 +243,18 @@ extern cl::opt<bool> FlattenedProfileUsed; -static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { - switch (Level) { - case PassBuilder::O0: - case PassBuilder::O1: - case PassBuilder::O2: - case PassBuilder::O3: - return false; - - case PassBuilder::Os: - case PassBuilder::Oz: - return true; - } - llvm_unreachable("Invalid optimization level!"); -} +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0, + 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1, + 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2, + 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3, + 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {2, + 1}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {2, + 2}; namespace { @@ -395,7 +393,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations!"); + assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); // Form SSA out of local memory accesses after breaking apart aggregates into @@ -406,7 +404,7 @@ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - if (Level > O1) { + if (Level.getSpeedupLevel() >= 2) { if (EnableGVNHoist) FPM.addPass(GVNHoistPass()); @@ -418,7 +416,7 @@ } // Speculative execution if the target has divergent branches; otherwise nop. - if (Level > O1) { + if (Level.getSpeedupLevel() > 1) { FPM.addPass(SpeculativeExecutionPass()); // Optimize based on known information about branches, and cleanup afterward. @@ -426,11 +424,11 @@ FPM.addPass(CorrelatedValuePropagationPass()); } FPM.addPass(SimplifyCFGPass()); - if (Level == O3) + if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); FPM.addPass(InstCombinePass()); - if (!isOptimizingForSize(Level)) + if (!Level.isOptimizingForSize()) FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); @@ -438,11 +436,11 @@ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level) && Level > O1) + (Level.getSpeedupLevel() > 1 && !Level.isOptimizingForSize())) FPM.addPass(PGOMemOPSizeOpt()); // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (Level > O1) + if (Level.getSpeedupLevel() > 1) FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -469,7 +467,7 @@ LPM1.addPass(LoopSimplifyCFGPass()); // Rotate Loop - disable header duplication at -Oz - LPM1.addPass(LoopRotatePass(Level != Oz)); + LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); @@ -486,7 +484,8 @@ if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) && PTO.LoopUnrolling) - LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /*OnlyWhenForced=*/false, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) @@ -509,7 +508,7 @@ FPM.addPass(SROA()); // Eliminate redundancies. - if (Level != O1) { + if (Level != OptimizationLevel::O1) { // These passes add substantial compile time so skip them at O1. FPM.addPass(MergedLoadStoreMotionPass()); if (RunNewGVN) @@ -538,7 +537,7 @@ // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - if (Level > O1) { + if (Level.getSpeedupLevel() > 1) { FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); @@ -558,7 +557,7 @@ FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - if (EnableCHR && Level == O3 && PGOOpt && + if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); @@ -571,13 +570,13 @@ bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { - assert(Level != O0 && "Not expecting O0 here!"); + assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification // at -Os/Oz. We will not do this inline for context sensistive PGO (when // IsCS is true). - if (!isOptimizingForSize(Level) && !IsCS) { + if (!Level.isOptimizingForSize() && !IsCS) { InlineParams IP; IP.DefaultThreshold = PreInlineThreshold; @@ -662,10 +661,7 @@ static InlineParams getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { - auto O3 = PassBuilder::O3; - unsigned OptLevel = Level > O3 ? 2 : Level; - unsigned SizeLevel = Level > O3 ? Level - O3 : 0; - return getInlineParams(OptLevel, SizeLevel); + return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } ModulePassManager @@ -711,7 +707,7 @@ EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - if (Level == O3) + if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); // In SamplePGO ThinLTO backend, we need instcombine before profile annotation @@ -830,7 +826,7 @@ // When at O3 add argument promotion to the pass pipeline. // FIXME: It isn't at all clear why this should be limited to O3. - if (Level == O3) + if (Level == OptimizationLevel::O3) MainCGPipeline.addPass(ArgumentPromotionPass()); // Lastly, add the core function simplification pipeline nested inside the @@ -974,11 +970,11 @@ // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - OptimizePM.addPass(LoopUnrollAndJamPass(Level)); + OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); } - OptimizePM.addPass(LoopUnrollPass( - LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions( + Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); @@ -1040,7 +1036,8 @@ ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1067,7 +1064,8 @@ ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1128,7 +1126,7 @@ MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); } - if (Level == O0) + if (Level == OptimizationLevel::O0) return MPM; // Force any function attributes we want the rest of the pipeline to observe. @@ -1147,10 +1145,11 @@ ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, DebugLogging, - /* LTOPreLink */true); + /* LTOPreLink */ true); } ModulePassManager @@ -1158,7 +1157,7 @@ ModuleSummaryIndex *ExportSummary) { ModulePassManager MPM(DebugLogging); - if (Level == O0) { + if (Level == OptimizationLevel::O0) { // The WPD and LowerTypeTest passes need to run at -O0 to lower type // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); @@ -1187,7 +1186,7 @@ // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); - if (Level > 1) { + if (Level.getSpeedupLevel() > 1) { FunctionPassManager EarlyFPM(DebugLogging); EarlyFPM.addPass(CallSiteSplittingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); @@ -1225,7 +1224,7 @@ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. - if (Level == 1) { + if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); @@ -1250,7 +1249,7 @@ // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM(DebugLogging); - if (Level == O3) + if (Level == OptimizationLevel::O3) PeepholeFPM.addPass(AggressiveInstCombinePass()); PeepholeFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); @@ -1886,13 +1885,13 @@ assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); - if (L == O0) { + .Case("O0", OptimizationLevel::O0) + .Case("O1", OptimizationLevel::O1) + .Case("O2", OptimizationLevel::O2) + .Case("O3", OptimizationLevel::O3) + .Case("Os", OptimizationLevel::Os) + .Case("Oz", OptimizationLevel::Oz); + if (L == OptimizationLevel::O0) { // Add instrumentation PGO passes -- at O0 we can still do PGO. if (PGOOpt && Matches[1] != "thinlto" && (PGOOpt->Action == PGOOptions::IRInstr || @@ -1909,8 +1908,10 @@ // This is consistent with old pass manager invoked via opt, but // inconsistent with clang. Clang doesn't enable loop vectorization // but does enable slp vectorization at Oz. - PTO.LoopVectorization = L > O1 && L < Oz; - PTO.SLPVectorization = L > O1 && L < Oz; + PTO.LoopVectorization = + L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; + PTO.SLPVectorization = + L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -203,16 +203,16 @@ default: llvm_unreachable("Invalid optimization level"); case 0: - OL = PassBuilder::O0; + OL = PassBuilder::OptimizationLevel::O0; break; case 1: - OL = PassBuilder::O1; + OL = PassBuilder::OptimizationLevel::O1; break; case 2: - OL = PassBuilder::O2; + OL = PassBuilder::OptimizationLevel::O2; break; case 3: - OL = PassBuilder::O3; + OL = PassBuilder::OptimizationLevel::O3; break; } Index: llvm/include/llvm/Passes/PassBuilder.h =================================================================== --- llvm/include/llvm/Passes/PassBuilder.h +++ llvm/include/llvm/Passes/PassBuilder.h @@ -143,11 +143,26 @@ /// /// This enumerates the LLVM-provided high-level optimization levels. Each /// level has a specific goal and rationale. - enum OptimizationLevel { + class OptimizationLevel final { + unsigned SpeedLevel = 2; + unsigned SizeLevel = 0; + OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel) + : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) { + // Check that only valid combinations are passed. + assert((0 <= SpeedLevel && SpeedLevel <= 3) && + "Optimization level for speed should be 0, 1, 2, or 3"); + assert((0 <= SizeLevel && SizeLevel <= 2) && + "Optimization level for size should be 0, 1, or 2"); + assert((SizeLevel == 0 || SpeedLevel == 2) && + "Optimize for size should be encoded with speedup level == 2"); + } + + public: + OptimizationLevel() = default; /// Disable as many optimizations as possible. This doesn't completely /// disable the optimizer in all cases, for example always_inline functions /// can be required to be inlined for correctness. - O0, + static const OptimizationLevel O0; /// Optimize quickly without destroying debuggability. /// @@ -161,10 +176,9 @@ /// /// As an example, complex loop transformations such as versioning, /// vectorization, or fusion don't make sense here due to the degree to - /// which the executed code differs from the source code, and the compile time - /// cost. - O1, - + /// which the executed code differs from the source code, and the compile + /// time cost. + static const OptimizationLevel O1; /// Optimize for fast execution as much as possible without triggering /// significant incremental compile time or code size growth. /// @@ -181,8 +195,7 @@ /// /// This is expected to be a good default optimization level for the vast /// majority of users. - O2, - + static const OptimizationLevel O2; /// Optimize for fast execution as much as possible. /// /// This mode is significantly more aggressive in trading off compile time @@ -197,8 +210,7 @@ /// order to make even significantly slower compile times at least scale /// reasonably. This does not preclude very substantial constant factor /// costs though. - O3, - + static const OptimizationLevel O3; /// Similar to \c O2 but tries to optimize for small code size instead of /// fast execution without triggering significant incremental execution /// time slowdowns. @@ -209,8 +221,7 @@ /// A consequence of the different core goal is that this should in general /// produce substantially smaller executables that still run in /// a reasonable amount of time. - Os, - + static const OptimizationLevel Os; /// A very specialized mode that will optimize for code size at any and all /// costs. /// @@ -218,7 +229,24 @@ /// any effort taken to reduce the size is worth it regardless of the /// execution time impact. You should expect this level to produce rather /// slow, but very small, code. - Oz + static const OptimizationLevel Oz; + + bool isOptimizingForSpeed() const { + return SizeLevel == 0 && SpeedLevel > 0; + } + + bool isOptimizingForSize() const { return SizeLevel > 0; } + + bool operator==(const OptimizationLevel &Other) const { + return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel; + } + bool operator!=(const OptimizationLevel &Other) const { + return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel; + } + + unsigned getSpeedupLevel() const { return SpeedLevel; } + + unsigned getSizeLevel() const { return SizeLevel; } }; explicit PassBuilder(TargetMachine *TM = nullptr, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -924,7 +924,7 @@ llvm_unreachable("Invalid optimization level!"); case 1: - return PassBuilder::O1; + return PassBuilder::OptimizationLevel::O1; case 2: switch (Opts.OptimizeSize) { @@ -932,17 +932,17 @@ llvm_unreachable("Invalid optimization level for size!"); case 0: - return PassBuilder::O2; + return PassBuilder::OptimizationLevel::O2; case 1: - return PassBuilder::Os; + return PassBuilder::OptimizationLevel::Os; case 2: - return PassBuilder::Oz; + return PassBuilder::OptimizationLevel::Oz; } case 3: - return PassBuilder::O3; + return PassBuilder::OptimizationLevel::O3; } }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits