mtrofin created this revision.
mtrofin added reviewers: aeubanks, jdoerfert, davidxl, eraman.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
mtrofin requested review of this revision.

Enable performing mandatory inlinings upfront, by reusing the same logic
as the full inliner, instead of the AlwaysInliner. This has the
following benefits:

- reduce code duplication - one inliner codebase
- open the opportunity to help the full inliner by performing additional

function passes after the mandatory inlinings, but before th full
inliner. Performing the mandatory inlinings first simplifies the problem
the full inliner needs to solve: less call sites, more contextualization, and,
depending on the additional function optimization passes run between the
2 inliners, higher accuracy of cost models / decision policies.

Note that this patch does not yet enable much in terms of post-always
inline function optimization.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D91567

Files:
  clang/test/Frontend/optimization-remark-line-directive.c
  llvm/include/llvm/Analysis/InlineAdvisor.h
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/Analysis/InlineAdvisor.cpp
  llvm/lib/Analysis/MLInlineAdvisor.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Passes/PassRegistry.def
  llvm/lib/Transforms/IPO/Inliner.cpp
  llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
  llvm/test/Transforms/Inline/ML/bounds-checks.ll
  llvm/test/Transforms/Inline/inline_stats.ll

Index: llvm/test/Transforms/Inline/inline_stats.ll
===================================================================
--- llvm/test/Transforms/Inline/inline_stats.ll
+++ llvm/test/Transforms/Inline/inline_stats.ll
@@ -6,8 +6,11 @@
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
 ; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
 
-; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER
-; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK
+; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK
+
+; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER
+; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER
 
 ; CHECK: ------- Dumping inliner stats for [<stdin>] -------
 ; CHECK-BASIC-NOT: -- List of inlined functions:
Index: llvm/test/Transforms/Inline/ML/bounds-checks.ll
===================================================================
--- llvm/test/Transforms/Inline/ML/bounds-checks.ll
+++ llvm/test/Transforms/Inline/ML/bounds-checks.ll
@@ -4,7 +4,7 @@
 ; factor, we don't inline anymore.
 ; REQUIRES: have_tf_aot
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
-; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
Index: llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
===================================================================
--- llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
+++ llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll
@@ -7,18 +7,18 @@
 ; REQUIRES: have_tf_api
 ;
 ; When the bounds are very wide ("no bounds"), all inlinings happen.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
 ;
 ; When the bounds are very restrictive, the first inlining happens but it's
 ; considered as "bad" (since it trips over the bounds) and its reward is a
 ; penalty. However, the mandatory inlining, which is considered next, happens.
 ; No other inlinings happend.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
 ;
 ; With more restrictive bounds, the first inlining happens and is OK. The
 ; mandatory inlining happens next, and it trips over the bounds, which then
 ; forces no further inlinings.
-; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS
+; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
Index: llvm/lib/Transforms/IPO/Inliner.cpp
===================================================================
--- llvm/lib/Transforms/IPO/Inliner.cpp
+++ llvm/lib/Transforms/IPO/Inliner.cpp
@@ -92,11 +92,6 @@
     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
                                 cl::init(false), cl::Hidden);
 
-/// Flag to disable adding AlwaysInlinerPass to ModuleInlinerWrapperPass.
-/// TODO: remove this once this has is baked in for long enough.
-static cl::opt<bool> DisableAlwaysInlinerInModuleWrapper(
-    "disable-always-inliner-in-module-wrapper", cl::init(false), cl::Hidden);
-
 namespace {
 
 enum class InlinerFunctionImportStatsOpts {
@@ -1046,8 +1041,6 @@
     return PreservedAnalyses::all();
   }
 
-  if (!DisableAlwaysInlinerInModuleWrapper)
-    MPM.addPass(AlwaysInlinerPass());
   // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
   // to detect when we devirtualize indirect calls and iterate the SCC passes
   // in that case to try and catch knock-on inlining or function attrs
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -62,6 +62,10 @@
 MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
+MODULE_PASS("always-inliner-wrapper", ModuleInlinerWrapperPass(
+  getInlineParams(), 
+  DebugLogging, 
+  InliningAdvisorMode::MandatoryOnly))
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrorderfile", InstrOrderFilePass())
 MODULE_PASS("instrprof", InstrProfiling())
@@ -95,7 +99,8 @@
 MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
 MODULE_PASS("sample-profile", SampleProfileLoaderPass())
 MODULE_PASS("scc-oz-module-inliner",
-  buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None))
+  buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, 
+  /*MandatoryOnly=*/false))
 MODULE_PASS("loop-extract-single", LoopExtractorPass(1))
 MODULE_PASS("oz-module-optimizer",
   buildModuleOptimizationPipeline(OptimizationLevel::Oz, /*LTOPreLink*/false))
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -299,6 +299,11 @@
                                        cl::Hidden, cl::ZeroOrMore,
                                        cl::desc("Enable memory profiler"));
 
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+    "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Perform mandatory inlinings module-wide, before performing "
+             "inlining."));
+
 PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = true;
   LoopVectorization = true;
@@ -906,7 +911,8 @@
 }
 
 ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase) {
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
+                                  bool MandatoryOnly) {
   InlineParams IP = getInlineParamsFromOptLevel(Level);
   if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt &&
       PGOOpt->Action == PGOOptions::SampleUse)
@@ -915,8 +921,10 @@
   if (PGOOpt)
     IP.EnableDeferral = EnablePGOInlineDeferral;
 
-  ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor,
-                                MaxDevirtIterations);
+  ModuleInlinerWrapperPass MIWP(
+      IP, DebugLogging,
+      (MandatoryOnly ? InliningAdvisorMode::MandatoryOnly : UseInlineAdvisor),
+      MaxDevirtIterations);
 
   // Require the GlobalsAA analysis for the module so we can query it within
   // the CGSCC pipeline.
@@ -946,6 +954,9 @@
   // Now deduce any function attributes based in the current code.
   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
 
+  if (MandatoryOnly)
+    return MIWP;
+
   // When at O3 add argument promotion to the pass pipeline.
   // FIXME: It isn't at all clear why this should be limited to O3.
   if (Level == OptimizationLevel::O3)
@@ -1104,7 +1115,9 @@
   if (EnableSyntheticCounts && !PGOOpt)
     MPM.addPass(SyntheticCountsPropagation());
 
-  MPM.addPass(buildInlinerPipeline(Level, Phase));
+  if (PerformMandatoryInliningsFirst)
+    MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/true));
+  MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/false));
 
   if (EnableMemProfiler && Phase != ThinLTOPhase::PreLink) {
     MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
Index: llvm/lib/Analysis/MLInlineAdvisor.cpp
===================================================================
--- llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -175,25 +175,20 @@
   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
     return FAM.getResult<AssumptionAnalysis>(F);
   };
-  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-    return FAM.getResult<TargetLibraryAnalysis>(F);
-  };
-
   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
 
-  auto TrivialDecision =
-      llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
-
+  auto MandatoryKind = MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE);
   // If this is a "never inline" case, there won't be any changes to internal
   // state we need to track, so we can just return the base InlineAdvice, which
   // will do nothing interesting.
   // Same thing if this is a recursive case.
-  if ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) ||
+  if (MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Never ||
       &Caller == &Callee)
     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
 
-  bool Mandatory = TrivialDecision.hasValue() && TrivialDecision->isSuccess();
+  bool Mandatory =
+      MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Always;
 
   // If we need to stop, we won't want to track anymore any state changes, so
   // we just return the base InlineAdvice, which acts as a noop.
Index: llvm/lib/Analysis/InlineAdvisor.cpp
===================================================================
--- llvm/lib/Analysis/InlineAdvisor.cpp
+++ llvm/lib/Analysis/InlineAdvisor.cpp
@@ -158,6 +158,9 @@
   case InliningAdvisorMode::Default:
     Advisor.reset(new DefaultInlineAdvisor(FAM, Params));
     break;
+  case InliningAdvisorMode::MandatoryOnly:
+    Advisor.reset(new MandatoryInlineAdvisor(FAM));
+    break;
   case InliningAdvisorMode::Development:
 #ifdef LLVM_HAVE_TF_API
     Advisor =
@@ -437,3 +440,38 @@
     return Remark;
   });
 }
+
+std::unique_ptr<InlineAdvice> MandatoryInlineAdvisor::getAdvice(CallBase &CB) {
+  auto &Caller = *CB.getCaller();
+  auto &Callee = *CB.getCalledFunction();
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+  bool Advice = MandatoryInliningKind::Always ==
+                    MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE) &&
+                &Caller != &Callee;
+  return std::make_unique<InlineAdvice>(this, CB, ORE, Advice);
+}
+
+MandatoryInlineAdvisor::MandatoryInliningKind
+MandatoryInlineAdvisor::getMandatoryKind(CallBase &CB,
+                                         FunctionAnalysisManager &FAM,
+                                         OptimizationRemarkEmitter &ORE) {
+  auto &Callee = *CB.getCalledFunction();
+
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
+
+  auto TrivialDecision =
+      llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
+
+  if (TrivialDecision.hasValue()) {
+    if (TrivialDecision->isSuccess())
+      return MandatoryInliningKind::Always;
+    else
+      return MandatoryInliningKind::Never;
+  }
+  return MandatoryInliningKind::NotMandatory;
+}
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -344,7 +344,8 @@
   /// Construct the module pipeline that performs inlining as well as
   /// the inlining-driven cleanups.
   ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
-                                                ThinLTOPhase Phase);
+                                                ThinLTOPhase Phase,
+                                                bool MandatoryOnly);
 
   /// Construct the core LLVM module optimization pipeline.
   ///
Index: llvm/include/llvm/Analysis/InlineAdvisor.h
===================================================================
--- llvm/include/llvm/Analysis/InlineAdvisor.h
+++ llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -27,6 +27,8 @@
 /// There are 3 scenarios we can use the InlineAdvisor:
 /// - Default - use manual heuristics.
 ///
+/// - MandatoryOnly - only mandatory inlinings (i.e. AlwaysInline).
+///
 /// - Release mode, the expected mode for production, day to day deployments.
 /// In this mode, when building the compiler, we also compile a pre-trained ML
 /// model to native code, and link it as a static library. This mode has low
@@ -37,7 +39,12 @@
 /// requires the full C Tensorflow API library, and evaluates models
 /// dynamically. This mode also permits generating training logs, for offline
 /// training.
-enum class InliningAdvisorMode : int { Default, Release, Development };
+enum class InliningAdvisorMode : int {
+  Default,
+  MandatoryOnly,
+  Release,
+  Development
+};
 
 class InlineAdvisor;
 /// Capture state between an inlining decision having had been made, and
@@ -178,6 +185,20 @@
   InlineParams Params;
 };
 
+/// Advisor recommending only mandatory (AlwaysInline) cases.
+class MandatoryInlineAdvisor final : public InlineAdvisor {
+  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+
+public:
+  MandatoryInlineAdvisor(FunctionAnalysisManager &FAM) : InlineAdvisor(FAM) {}
+
+  enum class MandatoryInliningKind { NotMandatory, Always, Never };
+
+  static MandatoryInliningKind getMandatoryKind(CallBase &CB,
+                                                FunctionAnalysisManager &FAM,
+                                                OptimizationRemarkEmitter &ORE);
+};
+
 /// The InlineAdvisorAnalysis is a module pass because the InlineAdvisor
 /// needs to capture state right before inlining commences over a module.
 class InlineAdvisorAnalysis : public AnalysisInfoMixin<InlineAdvisorAnalysis> {
Index: clang/test/Frontend/optimization-remark-line-directive.c
===================================================================
--- clang/test/Frontend/optimization-remark-line-directive.c
+++ clang/test/Frontend/optimization-remark-line-directive.c
@@ -2,11 +2,11 @@
 // directives. We cannot map #line directives back to
 // a SourceLocation.
 
-// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager
+// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager -mllvm -mandatory-inlining-first=0
 
 // The new PM inliner is not added to the default pipeline at O0, so we add
 // some optimizations to trigger it.
-// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify
+// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify -mllvm -mandatory-inlining-first=0
 
 int foo(int x, int y) __attribute__((always_inline));
 int foo(int x, int y) { return x + y; }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to