rcorcs created this revision.
rcorcs added a reviewer: hiraditya.
rcorcs added projects: LLVM, lld.
Herald added subscribers: cfe-commits, msifontes, jurahul, Kayjukh, frgossen,
grosul1, Joonsoo, stephenneuendorffer, liufengdb, lucyrfox, mgester,
arpith-jacob, nicolasvasilache, antiagainst, shauheen, jpienaar, rriddle,
mehdi_amini, dexonsmith, steven_wu, MaskRay, aheejin, arichardson, inglorion,
sbc100, mgorny, emaste.
Herald added a reviewer: espindola.
Herald added a reviewer: MaskRay.
Herald added projects: clang, MLIR.
This patch is the first in the sequence of three patches for supporting size
optimization with LTO. The planned patches are:
1: Standardizing the use of OptimizationLevel across pass builders, which
includes both SpeedupLevel and SizeLevel.
2: Enable the support for -Os and -Oz for LTO in lld.
3: Tune the LTO pipeline for size optimization.
Since we already have a class that describes both speed and size levels of
optimization, I believe it is a good idea to use across the code base when
defining optimization levels.
In the next patch, instead of adding a SizeLevel variable for the LTO
configuration, I'll be able to simply use this OptimizationLevel variable.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D81223
Files:
clang/lib/CodeGen/BackendUtil.cpp
lld/COFF/LTO.cpp
lld/ELF/LTO.cpp
lld/wasm/LTO.cpp
llvm/examples/Bye/Bye.cpp
llvm/include/llvm/IR/PassManager.h
llvm/include/llvm/LTO/Config.h
llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
llvm/include/llvm/Passes/PassBuilder.h
llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
llvm/lib/IR/PassManager.cpp
llvm/lib/LTO/LTO.cpp
llvm/lib/LTO/LTOBackend.cpp
llvm/lib/LTO/LTOCodeGenerator.cpp
llvm/lib/LTO/ThinLTOCodeGenerator.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/tools/bugpoint/bugpoint.cpp
llvm/tools/llvm-lto/llvm-lto.cpp
llvm/tools/llvm-lto2/llvm-lto2.cpp
llvm/tools/lto/lto.cpp
llvm/tools/opt/CMakeLists.txt
llvm/tools/opt/NewPMDriver.cpp
llvm/tools/opt/opt.cpp
mlir/lib/ExecutionEngine/OptUtils.cpp
Index: mlir/lib/ExecutionEngine/OptUtils.cpp
===================================================================
--- mlir/lib/ExecutionEngine/OptUtils.cpp
+++ mlir/lib/ExecutionEngine/OptUtils.cpp
@@ -65,8 +65,7 @@
unsigned optLevel, unsigned sizeLevel,
llvm::TargetMachine *targetMachine) {
llvm::PassManagerBuilder builder;
- builder.OptLevel = optLevel;
- builder.SizeLevel = sizeLevel;
+ builder.OptLevel = {optLevel, sizeLevel};
builder.Inliner = llvm::createFunctionInliningPass(
optLevel, sizeLevel, /*DisableInlineHotCallSite=*/false);
builder.LoopVectorize = optLevel > 1 && sizeLevel < 2;
Index: llvm/tools/opt/opt.cpp
===================================================================
--- llvm/tools/opt/opt.cpp
+++ llvm/tools/opt/opt.cpp
@@ -393,8 +393,7 @@
FPM.add(createVerifierPass()); // Verify that input is correct
PassManagerBuilder Builder;
- Builder.OptLevel = OptLevel;
- Builder.SizeLevel = SizeLevel;
+ Builder.OptLevel = {OptLevel, SizeLevel};
if (DisableInline) {
// No inlining pass
@@ -450,7 +449,7 @@
PassManagerBuilder Builder;
Builder.VerifyInput = true;
if (DisableOptimizations)
- Builder.OptLevel = 0;
+ Builder.OptLevel = OptimizationLevel::O0;
if (!DisableInline)
Builder.Inliner = createFunctionInliningPass();
Index: llvm/tools/opt/NewPMDriver.cpp
===================================================================
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -143,7 +143,7 @@
if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
PB.registerPeepholeEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
DebugLogging));
@@ -152,7 +152,7 @@
LateLoopOptimizationsEPPipeline))
PB.registerLateLoopOptimizationsEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
VerifyEachPass, DebugLogging));
@@ -160,7 +160,7 @@
if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
PB.registerLoopOptimizerEndEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline,
VerifyEachPass, DebugLogging));
@@ -169,7 +169,7 @@
ScalarOptimizerLateEPPipeline))
PB.registerScalarOptimizerLateEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
VerifyEachPass, DebugLogging));
@@ -177,7 +177,7 @@
if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
PB.registerCGSCCOptimizerLateEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ CGSCCPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline,
VerifyEachPass, DebugLogging));
@@ -185,7 +185,7 @@
if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
PB.registerVectorizerStartEPCallback(
[&PB, VerifyEachPass, DebugLogging](
- FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline,
VerifyEachPass, DebugLogging));
@@ -200,7 +200,7 @@
if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
[&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM,
- PassBuilder::OptimizationLevel) {
+ OptimizationLevel) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, VerifyEachPass,
DebugLogging));
Index: llvm/tools/opt/CMakeLists.txt
===================================================================
--- llvm/tools/opt/CMakeLists.txt
+++ llvm/tools/opt/CMakeLists.txt
@@ -17,13 +17,13 @@
Instrumentation
MC
ObjCARCOpts
+ Passes
Remarks
ScalarOpts
Support
Target
TransformUtils
Vectorize
- Passes
)
add_llvm_tool(opt
Index: llvm/tools/lto/lto.cpp
===================================================================
--- llvm/tools/lto/lto.cpp
+++ llvm/tools/lto/lto.cpp
@@ -166,7 +166,7 @@
if (OptLevel < '0' || OptLevel > '3')
report_fatal_error("Optimization level must be between 0 and 3");
- CG->setOptLevel(OptLevel - '0');
+ CG->setOptLevel({OptLevel - '0', 0});
CG->setFreestanding(EnableFreestanding);
}
@@ -510,7 +510,7 @@
if (OptLevel.getNumOccurrences()) {
if (OptLevel < '0' || OptLevel > '3')
report_fatal_error("Optimization level must be between 0 and 3");
- CodeGen->setOptLevel(OptLevel - '0');
+ CodeGen->setOptLevel({OptLevel - '0', 0});
switch (OptLevel) {
case '0':
CodeGen->setCodeGenOptLevel(CodeGenOpt::None);
Index: llvm/tools/llvm-lto2/llvm-lto2.cpp
===================================================================
--- llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -256,7 +256,7 @@
Conf.OptPipeline = OptPipeline;
Conf.AAPipeline = AAPipeline;
- Conf.OptLevel = OptLevel - '0';
+ Conf.OptLevel = {OptLevel - '0', 0};
Conf.UseNewPM = UseNewPM;
for (auto &PluginFN : PassPlugins)
Conf.PassPlugins.push_back(PluginFN);
@@ -284,8 +284,8 @@
Conf.OverrideTriple = OverrideTriple;
Conf.DefaultTriple = DefaultTriple;
Conf.StatsFile = StatsFile;
- Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
- Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
+ Conf.PTO.LoopVectorization = Conf.OptLevel.getSpeedupLevel() > 1;
+ Conf.PTO.SLPVectorization = Conf.OptLevel.getSpeedupLevel() > 1;
ThinBackend Backend;
if (ThinLTODistributedIndexes)
Index: llvm/tools/llvm-lto/llvm-lto.cpp
===================================================================
--- llvm/tools/llvm-lto/llvm-lto.cpp
+++ llvm/tools/llvm-lto/llvm-lto.cpp
@@ -1017,7 +1017,7 @@
// Set cpu and attrs strings for the default target/subtarget.
CodeGen.setCpu(codegen::getMCPU().c_str());
- CodeGen.setOptLevel(OptLevel - '0');
+ CodeGen.setOptLevel({OptLevel - '0', 0});
auto MAttrs = codegen::getMAttrs();
if (!MAttrs.empty()) {
Index: llvm/tools/bugpoint/bugpoint.cpp
===================================================================
--- llvm/tools/bugpoint/bugpoint.cpp
+++ llvm/tools/bugpoint/bugpoint.cpp
@@ -123,8 +123,7 @@
unsigned OptLevel,
unsigned SizeLevel) {
PassManagerBuilder Builder;
- Builder.OptLevel = OptLevel;
- Builder.SizeLevel = SizeLevel;
+ Builder.OptLevel = {OptLevel, SizeLevel};
if (OptLevel > 1)
Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false);
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -168,8 +168,7 @@
extern cl::opt<bool> EnableKnowledgeRetention;
PassManagerBuilder::PassManagerBuilder() {
- OptLevel = 2;
- SizeLevel = 0;
+ OptLevel = OptimizationLevel::O2;
LibraryInfo = nullptr;
Inliner = nullptr;
DisableUnrollLoops = false;
@@ -293,7 +292,7 @@
if (LibraryInfo)
FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
- if (OptLevel == 0) return;
+ if (OptLevel == OptimizationLevel::O0) return;
addInitialAliasAnalysisPasses(FPM);
@@ -315,8 +314,8 @@
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
// We will not do this inline for context sensitive PGO (when IsCS is true).
- if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
- PGOSampleUse.empty() && !IsCS) {
+ if (OptLevel.getSpeedupLevel() > 0 && OptLevel.getSizeLevel() == 0 &&
+ !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -350,7 +349,7 @@
// Indirect call promotion that promotes intra-module targets only.
// For ThinLTO this is done earlier due to interactions with globalopt
// for imported functions. We don't run this at -O0.
- if (OptLevel > 0 && !IsCS)
+ if (OptLevel.getSpeedupLevel() > 0 && !IsCS)
MPM.add(
createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
@@ -358,13 +357,14 @@
legacy::PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
- assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
+ assert(OptLevel.getSpeedupLevel() >= 1 &&
+ "Calling function optimizer with no optimization level!");
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
if (EnableKnowledgeRetention)
MPM.add(createAssumeSimplifyPass());
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
@@ -373,7 +373,7 @@
}
}
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
@@ -382,19 +382,19 @@
}
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Combine silly seq's
- if (OptLevel > 2)
+ if (OptLevel.getSpeedupLevel() > 2)
MPM.add(createAggressiveInstCombinerPass());
MPM.add(createInstructionCombiningPass());
- if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
+ if (OptLevel.getSizeLevel() == 0 && !DisableLibCallsShrinkWrap)
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
// Optimize memory intrinsic calls based on the profiled size information.
- if (SizeLevel == 0)
+ if (OptLevel.getSizeLevel() == 0)
MPM.add(createPGOMemOPSizeOptLegacyPass());
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
- if (OptLevel > 1)
+ if (OptLevel.getSpeedupLevel() > 1)
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
@@ -408,13 +408,14 @@
MPM.add(createLoopSimplifyCFGPass());
}
// Rotate Loop - disable header duplication at -Oz
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ MPM.add(createLoopRotatePass(OptLevel.getSizeLevel() == 2 ? 0 : -1));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ MPM.add(createLoopUnswitchPass(OptLevel.getSizeLevel() ||
+ OptLevel.getSpeedupLevel() < 3, DivergentTarget));
// FIXME: We break the loop pass pipeline here in order to do full
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
@@ -430,12 +431,12 @@
MPM.add(createLoopInterchangePass()); // Interchange loops
// Unroll small loops
- MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ MPM.add(createSimpleLoopUnrollPass(OptLevel.getSpeedupLevel(), DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
@@ -452,7 +453,7 @@
// opened up by them.
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
@@ -471,7 +472,7 @@
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
- if (EnableCHR && OptLevel >= 3 &&
+ if (EnableCHR && OptLevel.getSpeedupLevel() >= 3 &&
(!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
MPM.add(createControlHeightReductionLegacyPass());
}
@@ -496,7 +497,7 @@
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
- if (OptLevel == 0) {
+ if (OptLevel.getSpeedupLevel() == 0) {
addPGOInstrPasses(MPM);
if (Inliner) {
MPM.add(Inliner);
@@ -568,7 +569,7 @@
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
- if (OptLevel > 2)
+ if (OptLevel.getSpeedupLevel() > 2)
MPM.add(createCallSiteSplittingPass());
MPM.add(createIPSCCPPass()); // IP SCCP
@@ -617,11 +618,11 @@
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
- if (OptLevel > 1)
+ if (OptLevel.getSpeedupLevel() > 1)
MPM.add(createOpenMPOptLegacyPass());
MPM.add(createPostOrderFunctionAttrsLegacyPass());
- if (OptLevel > 2)
+ if (OptLevel.getSpeedupLevel() > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
@@ -635,7 +636,7 @@
if (RunPartialInlining)
MPM.add(createPartialInliningPass());
- if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)
+ if (OptLevel.getSpeedupLevel() > 1 && !PrepareForLTO && !PrepareForThinLTO)
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
// these so they are eligible for inlining at link-time. Note if they
@@ -732,7 +733,7 @@
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ MPM.add(createLoopRotatePass(OptLevel.getSizeLevel() == 2 ? 0 : -1));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -752,7 +753,7 @@
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && ExtraVectorizerPasses) {
+ if (OptLevel.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
// At higher optimization levels, try to clean up any runtime overlap and
// alignment checks inserted by the vectorizer. We want to track correllated
// runtime checks for two inner loops in the same outer loop, fold any
@@ -762,7 +763,8 @@
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createInstructionCombiningPass());
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ MPM.add(createLoopUnswitchPass(OptLevel.getSizeLevel() ||
+ OptLevel.getSpeedupLevel() < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
}
@@ -776,7 +778,7 @@
if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
+ if (OptLevel.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
MPM.add(createEarlyCSEPass());
}
}
@@ -791,11 +793,11 @@
// Unroll and Jam. We do this before unroll but need to be in a separate
// loop pass manager in order for the outer loop to be processed by
// unroll and jam before the inner loop is unrolled.
- MPM.add(createLoopUnrollAndJamPass(OptLevel));
+ MPM.add(createLoopUnrollAndJamPass(OptLevel.getSpeedupLevel()));
}
// Unroll small loops
- MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ MPM.add(createLoopUnrollPass(OptLevel.getSpeedupLevel(), DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
if (!DisableUnrollLoops) {
@@ -820,7 +822,7 @@
// GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
MPM.add(createConstantMergePass()); // Merge dup global constants
}
@@ -879,7 +881,7 @@
// Infer attributes about declarations if possible.
PM.add(createInferFunctionAttrsLegacyPass());
- if (OptLevel > 1) {
+ if (OptLevel.getSpeedupLevel() > 1) {
// Split call-site with more constrained arguments.
PM.add(createCallSiteSplittingPass());
@@ -918,7 +920,7 @@
PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
// That's all we need at opt level 1.
- if (OptLevel == 1)
+ if (OptLevel.getSpeedupLevel() == 1)
return;
// Now that we internalized some globals, see if we can hack on them!
@@ -937,7 +939,7 @@
// simplification opportunities, and both can propagate functions through
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
- if (OptLevel > 2)
+ if (OptLevel.getSpeedupLevel() > 2)
PM.add(createAggressiveInstCombinerPass());
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
@@ -960,7 +962,7 @@
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
- if (OptLevel > 1)
+ if (OptLevel.getSpeedupLevel() > 1)
PM.add(createOpenMPOptLegacyPass());
// Optimize globals again if we ran the inliner.
@@ -982,7 +984,7 @@
// LTO provides additional opportunities for tailcall elimination due to
// link-time inlining, and visibility of nocapture attribute.
- if (OptLevel > 1)
+ if (OptLevel.getSpeedupLevel() > 1)
PM.add(createTailCallEliminationPass());
// Infer attributes on declarations, call sites, arguments, etc.
@@ -1006,11 +1008,11 @@
PM.add(createLoopInterchangePass());
// Unroll small loops
- PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ PM.add(createSimpleLoopUnrollPass(OptLevel.getSpeedupLevel(), DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ PM.add(createLoopUnrollPass(OptLevel.getSpeedupLevel(), DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createWarnMissedTransformationsPass());
@@ -1105,7 +1107,7 @@
addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
- if (OptLevel != 0)
+ if (OptLevel != OptimizationLevel::O0)
addLTOOptimizationPasses(PM);
else {
// The whole-program-devirt pass needs to run at -O0 because only it knows
@@ -1123,7 +1125,7 @@
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
- if (OptLevel != 0)
+ if (OptLevel != OptimizationLevel::O0)
addLateLTOOptimizationPasses(PM);
addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
@@ -1146,14 +1148,14 @@
LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
unsigned OptLevel) {
PassManagerBuilder *Builder = unwrap(PMB);
- Builder->OptLevel = OptLevel;
+ Builder->OptLevel = {OptLevel, 0};
}
void
LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
unsigned SizeLevel) {
PassManagerBuilder *Builder = unwrap(PMB);
- Builder->SizeLevel = SizeLevel;
+ Builder->OptLevel = {2, SizeLevel};
}
void
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -280,25 +280,6 @@
extern cl::opt<AttributorRunOption> AttributorRun;
extern cl::opt<bool> EnableKnowledgeRetention;
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
- /*SpeedLevel*/ 0,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
- /*SpeedLevel*/ 1,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
- /*SpeedLevel*/ 3,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 1};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 2};
-
namespace {
/// No-op module pass which does nothing.
@@ -391,7 +372,7 @@
} // End anonymous namespace.
void PassBuilder::invokePeepholeEPCallbacks(
- FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager &FPM, OptimizationLevel Level) {
for (auto &C : PeepholeEPCallbacks)
C(FPM, Level);
}
@@ -734,7 +715,7 @@
}
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
- PassBuilder::OptimizationLevel Level,
+ OptimizationLevel Level,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
@@ -826,7 +807,7 @@
}
static InlineParams
-getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
+getInlineParamsFromOptLevel(OptimizationLevel Level) {
return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
}
Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp
===================================================================
--- llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -230,7 +230,7 @@
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,
- unsigned OptLevel, bool Freestanding,
+ OptimizationLevel OptLevel, bool Freestanding,
ModuleSummaryIndex *Index) {
// Populate the PassManager
PassManagerBuilder PMB;
@@ -319,7 +319,7 @@
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
- const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel,
+ const GVSummaryMapTy &DefinedGVSummaries, OptimizationLevel OptLevel,
bool Freestanding, const TargetMachineBuilder &TMBuilder) {
if (CachePath.empty())
return;
@@ -408,7 +408,7 @@
const GVSummaryMapTy &DefinedGlobals,
const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
bool DisableCodeGen, StringRef SaveTempsDir,
- bool Freestanding, unsigned OptLevel, unsigned count) {
+ bool Freestanding, OptimizationLevel OptLevel, unsigned count) {
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
Index: llvm/lib/LTO/LTOCodeGenerator.cpp
===================================================================
--- llvm/lib/LTO/LTOCodeGenerator.cpp
+++ llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -201,9 +201,9 @@
llvm_unreachable("Unknown debug format!");
}
-void LTOCodeGenerator::setOptLevel(unsigned Level) {
+void LTOCodeGenerator::setOptLevel(OptimizationLevel Level) {
OptLevel = Level;
- switch (OptLevel) {
+ switch (OptLevel.getSpeedupLevel()) {
case 0:
CGOptLevel = CodeGenOpt::None;
return;
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -180,7 +180,7 @@
}
static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
- unsigned OptLevel, bool IsThinLTO,
+ bool IsThinLTO,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
Optional<PGOOptions> PGOOpt;
@@ -225,24 +225,7 @@
ModulePassManager MPM(Conf.DebugPassManager);
// FIXME (davide): verify the input.
- PassBuilder::OptimizationLevel OL;
-
- switch (OptLevel) {
- default:
- llvm_unreachable("Invalid optimization level");
- case 0:
- OL = PassBuilder::OptimizationLevel::O0;
- break;
- case 1:
- OL = PassBuilder::OptimizationLevel::O1;
- break;
- case 2:
- OL = PassBuilder::OptimizationLevel::O2;
- break;
- case 3:
- OL = PassBuilder::OptimizationLevel::O3;
- break;
- }
+ OptimizationLevel OL = Conf.OptLevel;
if (IsThinLTO)
MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
@@ -338,7 +321,7 @@
runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
Conf.DisableVerify);
else if (Conf.UseNewPM)
- runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
+ runNewPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary,
ImportSummary);
else
runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -127,7 +127,8 @@
AddUnsigned(-1);
AddUnsigned(Conf.CGOptLevel);
AddUnsigned(Conf.CGFileType);
- AddUnsigned(Conf.OptLevel);
+ AddUnsigned(Conf.OptLevel.getSpeedupLevel());
+ AddUnsigned(Conf.OptLevel.getSizeLevel());
AddUnsigned(Conf.UseNewPM);
AddUnsigned(Conf.Freestanding);
AddString(Conf.OptPipeline);
@@ -924,7 +925,7 @@
return It->second;
};
computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols,
- isPrevailing, Conf.OptLevel > 0);
+ isPrevailing, Conf.OptLevel != OptimizationLevel::O0);
// Setup output file to emit statistics.
auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
@@ -1339,7 +1340,7 @@
runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
LocalWPDTargetsMap);
- if (Conf.OptLevel > 0)
+ if (Conf.OptLevel != OptimizationLevel::O0)
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
ImportLists, ExportLists);
Index: llvm/lib/IR/PassManager.cpp
===================================================================
--- llvm/lib/IR/PassManager.cpp
+++ llvm/lib/IR/PassManager.cpp
@@ -94,3 +94,22 @@
AnalysisSetKey CFGAnalyses::SetKey;
AnalysisSetKey PreservedAnalyses::AllAnalysesKey;
+
+const OptimizationLevel OptimizationLevel::O0 = {
+ /*SpeedLevel*/ 0,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O1 = {
+ /*SpeedLevel*/ 1,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O2 = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O3 = {
+ /*SpeedLevel*/ 3,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::Os = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 1};
+const OptimizationLevel OptimizationLevel::Oz = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 2};
Index: llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/IR/PassManager.h"
#include <functional>
#include <memory>
#include <string>
@@ -37,7 +38,7 @@
/// pass sequence in various ways. A simple example of using it would be:
///
/// PassManagerBuilder Builder;
-/// Builder.OptLevel = 2;
+/// Builder.OptLevel = OptimizationLevel::O2;
/// Builder.populateFunctionPassManager(FPM);
/// Builder.populateModulePassManager(MPM);
///
@@ -128,12 +129,7 @@
};
/// The Optimization Level - Specify the basic optimization level.
- /// 0 = -O0, 1 = -O1, 2 = -O2, 3 = -O3
- unsigned OptLevel;
-
- /// SizeLevel - How much we're optimizing for size.
- /// 0 = none, 1 = -Os, 2 = -Oz
- unsigned SizeLevel;
+ OptimizationLevel OptLevel;
/// LibraryInfo - Specifies information about the runtime library for the
/// optimizer. If this is non-null, it is added to both the function and
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -149,116 +149,6 @@
PostLink
};
- /// LLVM-provided high-level optimization levels.
- ///
- /// This enumerates the LLVM-provided high-level optimization levels. Each
- /// level has a specific goal and rationale.
- class OptimizationLevel final {
- unsigned SpeedLevel = 2;
- unsigned SizeLevel = 0;
- OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
- : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
- // Check that only valid combinations are passed.
- assert(SpeedLevel <= 3 &&
- "Optimization level for speed should be 0, 1, 2, or 3");
- assert(SizeLevel <= 2 &&
- "Optimization level for size should be 0, 1, or 2");
- assert((SizeLevel == 0 || SpeedLevel == 2) &&
- "Optimize for size should be encoded with speedup level == 2");
- }
-
- public:
- OptimizationLevel() = default;
- /// Disable as many optimizations as possible. This doesn't completely
- /// disable the optimizer in all cases, for example always_inline functions
- /// can be required to be inlined for correctness.
- static const OptimizationLevel O0;
-
- /// Optimize quickly without destroying debuggability.
- ///
- /// This level is tuned to produce a result from the optimizer as quickly
- /// as possible and to avoid destroying debuggability. This tends to result
- /// in a very good development mode where the compiled code will be
- /// immediately executed as part of testing. As a consequence, where
- /// possible, we would like to produce efficient-to-execute code, but not
- /// if it significantly slows down compilation or would prevent even basic
- /// debugging of the resulting binary.
- ///
- /// As an example, complex loop transformations such as versioning,
- /// vectorization, or fusion don't make sense here due to the degree to
- /// which the executed code differs from the source code, and the compile
- /// time cost.
- static const OptimizationLevel O1;
- /// Optimize for fast execution as much as possible without triggering
- /// significant incremental compile time or code size growth.
- ///
- /// The key idea is that optimizations at this level should "pay for
- /// themselves". So if an optimization increases compile time by 5% or
- /// increases code size by 5% for a particular benchmark, that benchmark
- /// should also be one which sees a 5% runtime improvement. If the compile
- /// time or code size penalties happen on average across a diverse range of
- /// LLVM users' benchmarks, then the improvements should as well.
- ///
- /// And no matter what, the compile time needs to not grow superlinearly
- /// with the size of input to LLVM so that users can control the runtime of
- /// the optimizer in this mode.
- ///
- /// This is expected to be a good default optimization level for the vast
- /// majority of users.
- static const OptimizationLevel O2;
- /// Optimize for fast execution as much as possible.
- ///
- /// This mode is significantly more aggressive in trading off compile time
- /// and code size to get execution time improvements. The core idea is that
- /// this mode should include any optimization that helps execution time on
- /// balance across a diverse collection of benchmarks, even if it increases
- /// code size or compile time for some benchmarks without corresponding
- /// improvements to execution time.
- ///
- /// Despite being willing to trade more compile time off to get improved
- /// execution time, this mode still tries to avoid superlinear growth in
- /// order to make even significantly slower compile times at least scale
- /// reasonably. This does not preclude very substantial constant factor
- /// costs though.
- static const OptimizationLevel O3;
- /// Similar to \c O2 but tries to optimize for small code size instead of
- /// fast execution without triggering significant incremental execution
- /// time slowdowns.
- ///
- /// The logic here is exactly the same as \c O2, but with code size and
- /// execution time metrics swapped.
- ///
- /// A consequence of the different core goal is that this should in general
- /// produce substantially smaller executables that still run in
- /// a reasonable amount of time.
- static const OptimizationLevel Os;
- /// A very specialized mode that will optimize for code size at any and all
- /// costs.
- ///
- /// This is useful primarily when there are absolute size limitations and
- /// any effort taken to reduce the size is worth it regardless of the
- /// execution time impact. You should expect this level to produce rather
- /// slow, but very small, code.
- static const OptimizationLevel Oz;
-
- bool isOptimizingForSpeed() const {
- return SizeLevel == 0 && SpeedLevel > 0;
- }
-
- bool isOptimizingForSize() const { return SizeLevel > 0; }
-
- bool operator==(const OptimizationLevel &Other) const {
- return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
- }
- bool operator!=(const OptimizationLevel &Other) const {
- return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
- }
-
- unsigned getSpeedupLevel() const { return SpeedLevel; }
-
- unsigned getSizeLevel() const { return SizeLevel; }
- };
-
explicit PassBuilder(TargetMachine *TM = nullptr,
PipelineTuningOptions PTO = PipelineTuningOptions(),
Optional<PGOOptions> PGOOpt = None,
Index: llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
===================================================================
--- llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/CodeGen.h"
@@ -221,9 +222,9 @@
TMBuilder.CGOptLevel = CGOptLevel;
}
- /// IR optimization level: from 0 to 3.
- void setOptLevel(unsigned NewOptLevel) {
- OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel;
+ /// IR optimization level
+ void setOptLevel(OptimizationLevel NewOptLevel) {
+ OptLevel = NewOptLevel;
}
/// Disable CodeGen, only run the stages till codegen and stop. The output
@@ -339,8 +340,8 @@
/// on the target.
bool Freestanding = false;
- /// IR Optimization Level [0-3].
- unsigned OptLevel = 3;
+ /// IR Optimization Level.
+ OptimizationLevel OptLevel = OptimizationLevel::O3;
};
}
#endif
Index: llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
===================================================================
--- llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -41,6 +41,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -94,7 +95,7 @@
void setCpu(StringRef MCpu) { this->MCpu = std::string(MCpu); }
void setAttr(StringRef MAttr) { this->MAttr = std::string(MAttr); }
- void setOptLevel(unsigned OptLevel);
+ void setOptLevel(OptimizationLevel OptLevel);
void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; }
@@ -234,7 +235,7 @@
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
const Target *MArch = nullptr;
std::string TripleStr;
- unsigned OptLevel = 2;
+ OptimizationLevel OptLevel = OptimizationLevel::O2;
lto_diagnostic_handler_t DiagHandler = nullptr;
void *DiagContext = nullptr;
bool ShouldInternalize = EnableLTOInternalization;
Index: llvm/include/llvm/LTO/Config.h
===================================================================
--- llvm/include/llvm/LTO/Config.h
+++ llvm/include/llvm/LTO/Config.h
@@ -46,7 +46,7 @@
Optional<CodeModel::Model> CodeModel = None;
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
CodeGenFileType CGFileType = CGFT_ObjectFile;
- unsigned OptLevel = 2;
+ OptimizationLevel OptLevel = OptimizationLevel::O2;
bool DisableVerify = false;
/// Use the new pass manager
Index: llvm/include/llvm/IR/PassManager.h
===================================================================
--- llvm/include/llvm/IR/PassManager.h
+++ llvm/include/llvm/IR/PassManager.h
@@ -410,6 +410,117 @@
}
};
+/// LLVM-provided high-level optimization levels.
+///
+/// This enumerates the LLVM-provided high-level optimization levels. Each
+/// level has a specific goal and rationale.
+class OptimizationLevel final {
+ unsigned SpeedLevel = 2;
+ unsigned SizeLevel = 0;
+public:
+ OptimizationLevel() = default;
+
+ OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+ : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+ // Check that only valid combinations are passed.
+ assert(SpeedLevel <= 3 &&
+ "Optimization level for speed should be 0, 1, 2, or 3");
+ assert(SizeLevel <= 2 &&
+ "Optimization level for size should be 0, 1, or 2");
+ assert((SizeLevel == 0 || SpeedLevel == 2) &&
+ "Optimize for size should be encoded with speedup level == 2");
+ }
+
+ /// Disable as many optimizations as possible. This doesn't completely
+ /// disable the optimizer in all cases, for example always_inline functions
+ /// can be required to be inlined for correctness.
+ static const OptimizationLevel O0;
+
+ /// Optimize quickly without destroying debuggability.
+ ///
+ /// This level is tuned to produce a result from the optimizer as quickly
+ /// as possible and to avoid destroying debuggability. This tends to result
+ /// in a very good development mode where the compiled code will be
+ /// immediately executed as part of testing. As a consequence, where
+ /// possible, we would like to produce efficient-to-execute code, but not
+ /// if it significantly slows down compilation or would prevent even basic
+ /// debugging of the resulting binary.
+ ///
+ /// As an example, complex loop transformations such as versioning,
+ /// vectorization, or fusion don't make sense here due to the degree to
+ /// which the executed code differs from the source code, and the compile
+ /// time cost.
+ static const OptimizationLevel O1;
+ /// Optimize for fast execution as much as possible without triggering
+ /// significant incremental compile time or code size growth.
+ ///
+ /// The key idea is that optimizations at this level should "pay for
+ /// themselves". So if an optimization increases compile time by 5% or
+ /// increases code size by 5% for a particular benchmark, that benchmark
+ /// should also be one which sees a 5% runtime improvement. If the compile
+ /// time or code size penalties happen on average across a diverse range of
+ /// LLVM users' benchmarks, then the improvements should as well.
+ ///
+ /// And no matter what, the compile time needs to not grow superlinearly
+ /// with the size of input to LLVM so that users can control the runtime of
+ /// the optimizer in this mode.
+ ///
+ /// This is expected to be a good default optimization level for the vast
+ /// majority of users.
+ static const OptimizationLevel O2;
+ /// Optimize for fast execution as much as possible.
+ ///
+ /// This mode is significantly more aggressive in trading off compile time
+ /// and code size to get execution time improvements. The core idea is that
+ /// this mode should include any optimization that helps execution time on
+ /// balance across a diverse collection of benchmarks, even if it increases
+ /// code size or compile time for some benchmarks without corresponding
+ /// improvements to execution time.
+ ///
+ /// Despite being willing to trade more compile time off to get improved
+ /// execution time, this mode still tries to avoid superlinear growth in
+ /// order to make even significantly slower compile times at least scale
+ /// reasonably. This does not preclude very substantial constant factor
+ /// costs though.
+ static const OptimizationLevel O3;
+ /// Similar to \c O2 but tries to optimize for small code size instead of
+ /// fast execution without triggering significant incremental execution
+ /// time slowdowns.
+ ///
+ /// The logic here is exactly the same as \c O2, but with code size and
+ /// execution time metrics swapped.
+ ///
+ /// A consequence of the different core goal is that this should in general
+ /// produce substantially smaller executables that still run in
+ /// a reasonable amount of time.
+ static const OptimizationLevel Os;
+ /// A very specialized mode that will optimize for code size at any and all
+ /// costs.
+ ///
+ /// This is useful primarily when there are absolute size limitations and
+ /// any effort taken to reduce the size is worth it regardless of the
+ /// execution time impact. You should expect this level to produce rather
+ /// slow, but very small, code.
+ static const OptimizationLevel Oz;
+
+ bool isOptimizingForSpeed() const {
+ return SizeLevel == 0 && SpeedLevel > 0;
+ }
+
+ bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+ bool operator==(const OptimizationLevel &Other) const {
+ return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+ }
+ bool operator!=(const OptimizationLevel &Other) const {
+ return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+ }
+
+ unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+ unsigned getSizeLevel() const { return SizeLevel; }
+};
+
namespace detail {
/// Actual unpacker of extra arguments in getAnalysisResult,
Index: llvm/examples/Bye/Bye.cpp
===================================================================
--- llvm/examples/Bye/Bye.cpp
+++ llvm/examples/Bye/Bye.cpp
@@ -55,7 +55,7 @@
[](PassBuilder &PB) {
PB.registerVectorizerStartEPCallback(
[](llvm::FunctionPassManager &PM,
- llvm::PassBuilder::OptimizationLevel Level) {
+ llvm::OptimizationLevel Level) {
PM.addPass(Bye());
});
PB.registerPipelineParsingCallback(
Index: lld/wasm/LTO.cpp
===================================================================
--- lld/wasm/LTO.cpp
+++ lld/wasm/LTO.cpp
@@ -49,7 +49,7 @@
c.DisableVerify = config->disableVerify;
c.DiagHandler = diagnosticHandler;
- c.OptLevel = config->ltoo;
+ c.OptLevel = {config->ltoo, 0};
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
Index: lld/ELF/LTO.cpp
===================================================================
--- lld/ELF/LTO.cpp
+++ lld/ELF/LTO.cpp
@@ -114,13 +114,13 @@
c.CodeModel = getCodeModelFromCMModel();
c.DisableVerify = config->disableVerify;
c.DiagHandler = diagnosticHandler;
- c.OptLevel = config->ltoo;
+ c.OptLevel = {config->ltoo, 0};
c.CPU = getCPUStr();
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
- c.PTO.LoopVectorization = c.OptLevel > 1;
- c.PTO.SLPVectorization = c.OptLevel > 1;
+ c.PTO.LoopVectorization = c.OptLevel.getSpeedupLevel() > 1;
+ c.PTO.SLPVectorization = c.OptLevel.getSpeedupLevel() > 1;
// Set up a custom pipeline if we've been asked to.
c.OptPipeline = std::string(config->ltoNewPmPasses);
Index: lld/COFF/LTO.cpp
===================================================================
--- lld/COFF/LTO.cpp
+++ lld/COFF/LTO.cpp
@@ -77,7 +77,7 @@
c.RelocModel = Reloc::PIC_;
c.DisableVerify = true;
c.DiagHandler = diagnosticHandler;
- c.OptLevel = config->ltoo;
+ c.OptLevel = {config->ltoo, 0};
c.CPU = getCPUStr();
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -186,17 +186,17 @@
}
static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
- if (Builder.OptLevel > 0)
+ if (Builder.OptLevel.getSpeedupLevel() > 0)
PM.add(createObjCARCAPElimPass());
}
static void addObjCARCExpandPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
- if (Builder.OptLevel > 0)
+ if (Builder.OptLevel.getSpeedupLevel() > 0)
PM.add(createObjCARCExpandPass());
}
static void addObjCARCOptPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
- if (Builder.OptLevel > 0)
+ if (Builder.OptLevel.getSpeedupLevel() > 0)
PM.add(createObjCARCOptPass());
}
@@ -319,7 +319,7 @@
// MemorySanitizer inserts complex instrumentation that mostly follows
// the logic of the original code, but operates on "shadow" values.
// It can benefit from re-running some general purpose optimization passes.
- if (Builder.OptLevel > 0) {
+ if (Builder.OptLevel.getSpeedupLevel() > 0) {
PM.add(createEarlyCSEPass());
PM.add(createReassociatePass());
PM.add(createLICMPass());
@@ -613,8 +613,7 @@
CodeGenOpts.PrepareForThinLTO));
}
- PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
- PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
+ PMBuilder.OptLevel = {CodeGenOpts.OptimizationLevel,CodeGenOpts.OptimizeSize};
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
@@ -966,13 +965,13 @@
DwoOS->keep();
}
-static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
+static OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
switch (Opts.OptimizationLevel) {
default:
llvm_unreachable("Invalid optimization level!");
case 1:
- return PassBuilder::OptimizationLevel::O1;
+ return OptimizationLevel::O1;
case 2:
switch (Opts.OptimizeSize) {
@@ -980,17 +979,17 @@
llvm_unreachable("Invalid optimization level for size!");
case 0:
- return PassBuilder::OptimizationLevel::O2;
+ return OptimizationLevel::O2;
case 1:
- return PassBuilder::OptimizationLevel::Os;
+ return OptimizationLevel::Os;
case 2:
- return PassBuilder::OptimizationLevel::Oz;
+ return OptimizationLevel::Oz;
}
case 3:
- return PassBuilder::OptimizationLevel::O3;
+ return OptimizationLevel::O3;
}
}
@@ -1244,7 +1243,7 @@
} else {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
- PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
+ OptimizationLevel Level = mapToLevel(CodeGenOpts);
// If we reached here with a non-empty index file name, then the index
// file was empty and we are not performing ThinLTO backend compilation
@@ -1268,7 +1267,7 @@
// FIXME: either handle asan/the remaining sanitizers or error out
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
PB.registerScalarOptimizerLateEPCallback(
- [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+ [](FunctionPassManager &FPM, OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
@@ -1277,7 +1276,7 @@
CodeGenOpts.SanitizeCoverageTraceCmp) {
PB.registerOptimizerLastEPCallback(
[this](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
MPM.addPass(ModuleSanitizerCoveragePass(
SancovOpts, CodeGenOpts.SanitizeCoverageWhitelistFiles,
@@ -1294,7 +1293,7 @@
});
PB.registerOptimizerLastEPCallback(
[TrackOrigins, Recover](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
MPM.addPass(createModuleToFunctionPassAdaptor(
MemorySanitizerPass({TrackOrigins, Recover, false})));
});
@@ -1303,7 +1302,7 @@
PB.registerPipelineStartEPCallback(
[](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); });
PB.registerOptimizerLastEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(
createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
});
@@ -1317,7 +1316,7 @@
bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
PB.registerOptimizerLastEPCallback(
[Recover, UseAfterScope](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
MPM.addPass(
createModuleToFunctionPassAdaptor(AddressSanitizerPass(
/*CompileKernel=*/false, Recover, UseAfterScope)));
@@ -1561,7 +1560,7 @@
Conf.MAttrs = TOpts.Features;
Conf.RelocModel = CGOpts.RelocationModel;
Conf.CGOptLevel = getCGOptLevel(CGOpts);
- Conf.OptLevel = CGOpts.OptimizationLevel;
+ Conf.OptLevel = {CGOpts.OptimizationLevel, 0};
initTargetOptions(Diags, Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits