https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/143983
Implement an attribute in the spirit of always_inline, for giving programmers a hook to have llvm specialise subtrees of their program with respect to constant variables. For example, specialise a sort function on the comparator, or a datastructure on a struct of function pointers serving as a vtable. The attribute solves the cost model puzzle. An IR transform means bitcode libraries with the attribute in place are specialised with respect to applications, without needing to hoist the code in question into C++ templates. The implementation is straightforward. It might grow some additional on the fly simplifications (maybe icmp), and at most one command line argument to specify the extent to which specialisations can request further specialisations, but generally I don't expect it to grow much over time. I'm hopeful that this can be used to nudge some code away from writing always_inline everywhere. It's a feature some other languages have that I miss when working in IR. >From 52c848985afbd08a272334eac10150d5157268cf Mon Sep 17 00:00:00 2001 From: Jon Chesterfield <jonathanchesterfi...@gmail.com> Date: Sat, 17 May 2025 19:50:24 +0100 Subject: [PATCH] [Transforms] Implement always_specialise attribute lowering --- clang/include/clang/Basic/Attr.td | 8 + clang/include/clang/Basic/AttrDocs.td | 11 + clang/lib/CodeGen/CodeGenModule.cpp | 9 + clang/lib/Sema/SemaDeclAttr.cpp | 3 + clang/test/CodeGen/lto-newpm-pipeline.c | 2 + ...a-attribute-supported-attributes-list.test | 1 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 + llvm/include/llvm/IR/Attributes.td | 3 + llvm/include/llvm/InitializePasses.h | 1 + .../llvm/Transforms/IPO/AlwaysSpecializer.h | 29 ++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 3 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp | 324 ++++++++++++++++++ llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/SCCP.cpp | 1 - llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 + llvm/test/Other/new-pm-O0-defaults.ll | 2 + llvm/test/Other/new-pm-defaults.ll | 1 + .../Other/new-pm-thinlto-postlink-defaults.ll | 1 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 1 + ...-pm-thinlto-postlink-samplepgo-defaults.ll | 1 + .../Other/new-pm-thinlto-prelink-defaults.ll | 1 + .../new-pm-thinlto-prelink-pgo-defaults.ll | 1 + ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 1 + .../always-specialize-diamond.ll | 89 +++++ .../always-specialize-recursive-factorial.ll | 155 +++++++++ .../always-specialize-recursive-indirect.ll | 294 ++++++++++++++++ .../always-specialize-recursive.ll | 44 +++ .../always-specialize-simple.ll | 229 +++++++++++++ .../always-specialize-variadic.ll | 55 +++ llvm/tools/opt/optdriver.cpp | 1 + 34 files changed, 1279 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h create mode 100644 llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f113cd2ba2fbf..daef074e9dc72 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -905,6 +905,14 @@ def AlwaysInline : DeclOrStmtAttr { let Documentation = [AlwaysInlineDocs]; } +def AlwaysSpecialize : InheritableParamAttr { + let Spellings = [GNU<"always_specialize">, CXX11<"clang", "always_specialize">, + C23<"clang", "always_specialize">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [AlwaysSpecializeDocs]; + let SimpleHandler = 1; +} + def Artificial : InheritableAttr { let Spellings = [GCC<"artificial">]; let Subjects = SubjectList<[InlineFunction]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 047f51ffa59ed..64129a3107218 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8126,6 +8126,17 @@ Attribute docs`_, and `the GCC Inline docs`_. let Heading = "always_inline, __force_inline"; } +def AlwaysSpecializeDocs : Documentation { + let Category = DocCatConsumed; + let Content = [{ + The ``always_specialize`` attribute on a function parameter indicates that + the function shall be duplicated and specialized with respect to constant + arguments. This will usually increase code size. It controls an IR transform + similar in spirit to ``always_inline``. + }]; + let Heading = "always_specialize"; +} + def EnforceTCBDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 16e49aab4fe61..4ba32986146ef 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2559,6 +2559,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute("aarch64_new_zt0"); } + if (D->hasAttr<AlwaysSpecializeAttr>()) + B.addAttribute(llvm::Attribute::AlwaysSpecialize); + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = @@ -2978,6 +2981,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->addParamAttr(0, llvm::Attribute::Returned); } + for (auto [Index, Param] : enumerate(FD->parameters())) + if (Param->hasAttrs()) + for (auto *A : Param->getAttrs()) + if (A->getKind() == attr::AlwaysSpecialize) + F->addParamAttr(Index, llvm::Attribute::AlwaysSpecialize); + // Only a few attributes are set on declarations; these may later be // overridden by a definition. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1aeae41042a1c..c32f147737883 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7137,6 +7137,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AlwaysInline: handleAlwaysInlineAttr(S, D, AL); break; + case ParsedAttr::AT_AlwaysSpecialize: + handleSimpleAttribute<AlwaysSpecializeAttr>(S, D, AL); + break; case ParsedAttr::AT_AnalyzerNoReturn: handleAnalyzerNoReturnAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index ea9784a76f923..7f83bd38e7803 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -31,6 +31,7 @@ // CHECK-FULL-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-FULL-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass @@ -45,6 +46,7 @@ // CHECK-THIN-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-THIN-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 41d00dae3f69a..cd90e06609c55 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -15,6 +15,7 @@ // CHECK-NEXT: AlignValue (SubjectMatchRule_variable, SubjectMatchRule_type_alias) // CHECK-NEXT: AlwaysDestroy (SubjectMatchRule_variable) // CHECK-NEXT: AlwaysInline (SubjectMatchRule_function) +// CHECK-NEXT: AlwaysSpecialize (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: Annotate () // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b362a88963f6c..476bb4167dea8 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -798,6 +798,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, ATTR_KIND_SANITIZE_TYPE = 101, ATTR_KIND_CAPTURES = 102, + ATTR_KIND_ALWAYS_SPECIALIZE = 103, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index d488c5f419b82..fb4c7366d9491 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -98,6 +98,9 @@ def AllocSize : IntAttr<"allocsize", IntersectPreserve, [FnAttr]>; /// inline=always. def AlwaysInline : EnumAttr<"alwaysinline", IntersectPreserve, [FnAttr]>; +/// Specialize function when argument at call site is known constant +def AlwaysSpecialize : EnumAttr<"alwaysspecialize", IntersectPreserve, [ParamAttr]>; + /// Callee is recognized as a builtin, despite nobuiltin attribute on its /// declaration. def Builtin : EnumAttr<"builtin", IntersectPreserve, [FnAttr]>; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 42610d505c2bd..4c3f0ea08ed43 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -50,6 +50,7 @@ void initializeTarget(PassRegistry &); void initializeAAResultsWrapperPassPass(PassRegistry &); void initializeAlwaysInlinerLegacyPassPass(PassRegistry &); +void initializeAlwaysSpecializerPass(PassRegistry &); void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeAtomicExpandLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h new file mode 100644 index 0000000000000..020d8eec3e760 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h @@ -0,0 +1,29 @@ +//=== AlwaysSpecializer.h - implementation of always_specialize -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H +#define LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModulePass; + +class AlwaysSpecializerPass : public PassInfoMixin<AlwaysSpecializerPass> { +public: + AlwaysSpecializerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +ModulePass *createAlwaysSpecializerPass(); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index fde934fbb3cf1..5cb348e1a330e 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2054,6 +2054,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; + case bitc::ATTR_KIND_ALWAYS_SPECIALIZE: + return Attribute::AlwaysSpecialize; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 628b939af19ce..f3afc91176723 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -750,6 +750,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::AlwaysSpecialize: + return bitc::ATTR_KIND_ALWAYS_SPECIALIZE; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4603eaff8ade9..63ad02bcc522c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -195,6 +195,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a99146d5eaa34..a14ffddeb164b 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -42,6 +42,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" @@ -1277,6 +1278,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); + MPM.addPass(AlwaysSpecializerPass()); if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); @@ -2252,6 +2254,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, // code generation. MPM.addPass(AlwaysInlinerPass( /*InsertLifetimeIntrinsics=*/false)); + MPM.addPass(AlwaysSpecializerPass()); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f761d0dab09a8..b65981652e258 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -50,6 +50,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("always-specialize", AlwaysSpecializerPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) MODULE_PASS("assign-guid", AssignGUIDPass()) MODULE_PASS("attributor", AttributorPass()) diff --git a/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp new file mode 100644 index 0000000000000..9e0bbe883bd10 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp @@ -0,0 +1,324 @@ +//===- AlwaysSpecializer.cpp - implementation of always_specialize --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function specialisation under programmer control. +// +// Specifically, function parameters are marked [[always_specialize]], then call +// sites which pass a constant argument are rewritten to call specialisations. +// +// The difficult parts of function specialisation are the cost model, ensuring +// termination and specialisation to the anticipated extent. +// +// Cost model is under programmer control, exactly like always_inline. +// +// Termination follows from the implementation following a phased structure: +// 1. Functions are identifed in the input IR +// 2. Calls that exist in the input IR are identified +// Those constitute the complete set of specialisations that will be created. +// +// This pass does the _minimum_ specialisation, in the sense that only call +// sites in the input will lead to cloning. A specialised function will call +// another specialised function iff there was a call site with the same +// argument vector in the input. +// +// Running the identifyCalls + createClones sequence N times will behave +// as expected, specialising recursively to that depth. This patch has N=1 +// in the first instance, with no commandline argument to override. +// Similarly variadic functions are not yet handled. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +#define DEBUG_TYPE "always-specialize" + +namespace { + +class AlwaysSpecializer : public ModulePass { +public: + static char ID; + + AlwaysSpecializer() : ModulePass(ID) {} + StringRef getPassName() const override { return "Always specializer"; } + + // One constant for each argument, nullptr if that one is non-constant + using ArgVector = SmallVector<Constant *, 4>; + + // A map from the ArgVector to the matching specialisation + using FunctionSpecializations = MapVector<ArgVector, Function *>; + + // The four mini-passes populate and then use a map: + // 1. identifyFunctions writes all keys, with default initialised values. + // 2. identifyCalls writes all the ArgVector keys in the values of SpecList. + // 3. createClones writes the Function* values at the leaves. + // 4. replaceCalls walks the map doing the trivial rewrite. + + // Conceptually a Map<Function*, Specialization> but a vector suffices. + using SpecListTy = + SmallVector<std::pair<Function *, FunctionSpecializations>, 4>; + + SpecListTy identifyFunctions(Module &M); + bool identifyCalls(Module &M, Function *F, FunctionSpecializations &); + bool createClones(Module &M, Function *F, FunctionSpecializations &); + bool replaceCalls(Module &M, Function *F, FunctionSpecializations &); + + bool runOnModule(Module &M) override { + bool Changed = false; + + // Sets all the keys in the structure used in this invocation. + SpecListTy SpecList = identifyFunctions(M); + size_t Count = SpecList.size(); + if (Count == 0) { + return false; + } + + // Record distinct call sites as vector<Constant*> -> nullptr + for (auto &[F, spec] : SpecList) + Changed |= identifyCalls(M, F, spec); + + // Create and record the clones. Note that call sites within the clones + // cannot trigger creating more clones so no termination risk. + for (auto &[F, spec] : SpecList) + Changed |= createClones(M, F, spec); + + // Replacing calls as the final phase means no need to track + // partially-specialised calls and no creating further clones. + for (auto &[F, spec] : SpecList) + Changed |= replaceCalls(M, F, spec); + + return Changed; + } + + static bool isCandidateFunction(const Function &F); + static bool callEligible(const Function &F, const CallBase *CB, + ArgVector &Out); + static Function *cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C); + + // Only a member variable to reuse the allocation. Short lived. + ArgVector ArgVec; +}; + +AlwaysSpecializer::SpecListTy AlwaysSpecializer::identifyFunctions(Module &M) { + SpecListTy SpecList; + for (Function &F : M) { + if (isCandidateFunction(F)) { + SpecList.push_back(std::make_pair(&F, FunctionSpecializations())); + } + } + return SpecList; +} + +bool AlwaysSpecializer::identifyCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Found = false; + + for (User *U : F->users()) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + if (!Specs.contains(ArgVec)) { + Found = true; + Specs.insert(std::make_pair(ArgVec, nullptr)); + } + } + + return Found; +} + +bool AlwaysSpecializer::createClones(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (auto It = Specs.begin(); It != Specs.end(); ++It) { + if (It->second) + continue; + Function *Clone = cloneCandidateFunction(M, F, It->first); + if (Clone) { + Changed = true; + It->second = Clone; + } + } + + return Changed; +} + +bool AlwaysSpecializer::replaceCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (User *u : make_early_inc_range(F->users())) { + CallBase *CB = dyn_cast<CallBase>(u); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + Function *Clone = Specs[ArgVec]; + if (Clone) { + Changed = true; + CB->setCalledFunction(Clone); + } + } + + return Changed; +} + +bool AlwaysSpecializer::isCandidateFunction(const Function &F) { + + // Test if the function itself can't be specialised + if (!F.hasExactDefinition() || F.isIntrinsic() || + F.hasFnAttribute(Attribute::Naked)) + return false; + + // Variadics are left for a follow up patch + if (F.isVarArg()) + return false; + + // Need calls to the function for it to be worth considering + if (F.use_empty()) + return false; + + // Look for the attribute on a non-dead, non-indirect parameter + for (const Argument &Arg : F.args()) { + if (Arg.hasPointeeInMemoryValueAttr()) + continue; + + if (F.hasParamAttribute(Arg.getArgNo(), Attribute::AlwaysSpecialize)) + if (!Arg.use_empty()) + return true; + } + + return false; +} + +bool AlwaysSpecializer::callEligible(const Function &F, const CallBase *CB, + ArgVector &Out) { + const size_t Arity = F.arg_size(); + bool Eligible = false; + + if (CB->getCalledOperand() != &F) { + return false; + } + + if (CB->getFunctionType() != F.getFunctionType()) { + return false; + } + + if (CB->arg_size() != Arity) { + return false; + } + + Out.clear(); + for (size_t I = 0; I < Arity; I++) { + Constant *Arg = dyn_cast<Constant>(CB->getArgOperand(I)); + if (Arg && F.hasParamAttribute(I, Attribute::AlwaysSpecialize)) { + Eligible = true; + Out.push_back(Arg); + } else { + Out.push_back(nullptr); + } + } + + return Eligible; +} + +Function *AlwaysSpecializer::cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C) { + + Function *Clone = + Function::Create(F->getFunctionType(), F->getLinkage(), + F->getAddressSpace(), F->getName() + ".spec"); + + // Roughly CloneFunction but inserting specialisations next to the original. + ValueToValueMapTy VMap; + Function::arg_iterator DestI = Clone->arg_begin(); + for (const Argument &I : F->args()) { + DestI->setName(I.getName()); + VMap[&I] = &*DestI++; + } + SmallVector<ReturnInst *, 8> Returns; + CloneFunctionInto(Clone, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns); + + M.getFunctionList().insert(F->getIterator(), Clone); + + // Clones are local things. + Clone->setDSOLocal(true); + Clone->setVisibility(GlobalValue::DefaultVisibility); + Clone->setLinkage(GlobalValue::PrivateLinkage); + + // Replace uses of the argument with the constant. + for (size_t I = 0; I < C.size(); I++) { + if (!C[I]) + continue; + + // The argument is going to be dead, drop the specialise attr. + Clone->removeParamAttr(I, Attribute::AlwaysSpecialize); + + Argument *V = Clone->getArg(I); + for (User *U : make_early_inc_range(V->users())) { + + if (auto *Inst = dyn_cast<Instruction>(U)) { + SimplifyQuery SQ = SimplifyQuery(Clone->getDataLayout(), Inst); + + // Do some simplification on the fly so that call sites in the cloned + // functions can potentially themselves resolve to specialisations + if (Value *NewInst = simplifyWithOpReplaced( + Inst, V, C[I], SQ, false /*AllowRefinement*/)) { + Inst->replaceAllUsesWith(NewInst); + continue; + } + + // If we're about to create a load from a constant, try to resolve it + // immediately so that the uses of the load are now also constant. + // This covers constant vtable containing pointer to constant vtable. + if (auto *Load = dyn_cast<LoadInst>(Inst)) { + if (Load->getOperand(0) == V) { + if (Value *NewInst = simplifyLoadInst(Load, C[I], SQ)) { + Load->replaceAllUsesWith(NewInst); + continue; + } + } + } + } + } + + // Replace any remaining uses that the above failed to simplify. + V->replaceAllUsesWith(C[I]); + } + + return Clone; +} + +} // namespace + +char AlwaysSpecializer::ID = 0; + +INITIALIZE_PASS(AlwaysSpecializer, DEBUG_TYPE, "TODO", false, false) + +ModulePass *createAlwaysSpecializerPass() { return new AlwaysSpecializer(); } + +PreservedAnalyses AlwaysSpecializerPass::run(Module &M, + ModuleAnalysisManager &) { + return AlwaysSpecializer().runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +AlwaysSpecializerPass::AlwaysSpecializerPass() {} diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 1c4ee0336d4db..f510d2c944092 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMipo AlwaysInliner.cpp + AlwaysSpecializer.cpp Annotation2Metadata.cpp ArgumentPromotion.cpp Attributor.cpp diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 43c5df3575003..b5cbdeac8033d 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -384,7 +384,6 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { return FAM.getResult<BlockFrequencyAnalysis>(F); }; - if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, GetDT, GetBFI, isFuncSpecEnabled())) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1210bdf4a1c98..5d1733f0dcfc6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -990,6 +990,7 @@ Function *CodeExtractor::constructFunctionDeclaration( case Attribute::Alignment: case Attribute::AllocatedPointer: case Attribute::AllocAlign: + case Attribute::AlwaysSpecialize: case Attribute::ByVal: case Attribute::Captures: case Attribute::Dereferenceable: diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll index 81d1ee0df2c5b..2db9aa2eb793c 100644 --- a/llvm/test/Other/new-pm-O0-defaults.ll +++ b/llvm/test/Other/new-pm-O0-defaults.ll @@ -34,10 +34,12 @@ ; CHECK-DIS-NEXT: Running pass: AddDiscriminatorsPass ; CHECK-DIS-NEXT: Running pass: AlwaysInlinerPass ; CHECK-DIS-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-DIS-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-DEFAULT: Running analysis: InnerAnalysisManagerProxy ; CHECK-DEFAULT-NEXT: Running pass: EntryExitInstrumenterPass ; CHECK-DEFAULT-NEXT: Running pass: AlwaysInlinerPass ; CHECK-DEFAULT-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-DEFAULT-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis ; CHECK-CORO-NEXT: Running pass: CoroConditionalWrapper diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index c554fdbf4c799..21b1fb291784c 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -133,6 +133,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 62bb02d9b3c40..9baf119000d3e 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -64,6 +64,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 0da7a9f73bdce..1b5aaa11108ce 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -52,6 +52,7 @@ ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 38b7890682783..ccbe82f27987f 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -62,6 +62,7 @@ ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index 5aacd26def2be..2de56b91f0b21 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -96,6 +96,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6a9406596803..462c273788740 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -87,6 +87,7 @@ ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 48a9433d24999..01a7364ea6073 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -67,6 +67,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll new file mode 100644 index 0000000000000..248593e22f811 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @leaf(i32 alwaysspecialize %x, i32 alwaysspecialize %y) { +entry: + %add = add nsw i32 %x, %y + ret i32 %add +} + +define i32 @in_order(i32 alwaysspecialize %x) { +entry: + %call = call i32 @leaf(i32 42, i32 %x) + ret i32 %call +} + +define i32 @swapped(i32 alwaysspecialize %x) { +entry: + %call = call i32 @leaf(i32 %x, i32 81) + ret i32 %call +} + +define i32 @root() { +entry: + %call = call i32 @in_order(i32 81) + %call1 = call i32 @swapped(i32 42) + %mul = mul nsw i32 %call, %call1 + ret i32 %mul +} + + + +; CHECK-LABEL: define {{[^@]+}}@leaf.spec +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], 81 +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@leaf.spec.1 +; CHECK-SAME: (i32 [[X:%.*]], i32 alwaysspecialize [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 42, [[Y]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@leaf +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 alwaysspecialize [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@in_order.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf(i32 42, i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@in_order +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf.spec.1(i32 42, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@swapped.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf(i32 42, i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@swapped +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf.spec(i32 [[X]], i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@root() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @in_order.spec(i32 81) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @swapped.spec(i32 42) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[MUL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll new file mode 100644 index 0000000000000..b42427a9edfce --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @factorial(i32 alwaysspecialize %x) { +entry: + %cmp = icmp ult i32 %x, 2 + br i1 %cmp, label %if.then, label %if.end + +if.then: + br label %return + +if.end: + %sub = sub i32 %x, 1 + %call = call i32 @factorial(i32 %sub) + %mul = mul i32 %x, %call + br label %return + +return: + %retval.0 = phi i32 [ 1, %if.then ], [ %mul, %if.end ] + ret i32 %retval.0 +} + +define i32 @factorial_driver() { +entry: + %call = call i32 @factorial(i32 0) + %call1 = call i32 @factorial(i32 1) + %add = add i32 %call, %call1 + %call2 = call i32 @factorial(i32 2) + %add3 = add i32 %add, %call2 + %call4 = call i32 @factorial(i32 3) + %add5 = add i32 %add3, %call4 + %call6 = call i32 @factorial(i32 4) + %add7 = add i32 %add5, %call6 + ret i32 %add7 +} + + + +; CHECK-LABEL: define {{[^@]+}}@factorial.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 4, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 4, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.1(i32 3) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 4, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.1 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 3, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.2(i32 2) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 3, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.2 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 2, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.3(i32 1) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.3 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.4(i32 0) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[CALL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.4 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 0, 2 +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial(i32 -1) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 2 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial(i32 [[SUB]]) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.4(i32 0) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @factorial.spec.3(i32 1) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @factorial.spec.2(i32 2) +; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD]], [[CALL2]] +; CHECK-NEXT: [[CALL4:%.*]] = call i32 @factorial.spec.1(i32 3) +; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[ADD3]], [[CALL4]] +; CHECK-NEXT: [[CALL6:%.*]] = call i32 @factorial.spec(i32 4) +; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[ADD5]], [[CALL6]] +; CHECK-NEXT: ret i32 [[ADD7]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll new file mode 100644 index 0000000000000..ac82a3cbc4dc7 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define zeroext i1 @odd(i32 alwaysspecialize %x) { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %sub = sub i32 %x, 1 + %call = call zeroext i1 @even(i32 %sub) + %conv = zext i1 %call to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ 0, %cond.true ], [ %conv, %cond.false ] + %tobool = icmp ne i32 %cond, 0 + ret i1 %tobool +} + +define zeroext i1 @even(i32 alwaysspecialize %x) { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %sub = sub i32 %x, 1 + %call = call zeroext i1 @odd(i32 %sub) + %conv = zext i1 %call to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ 1, %cond.true ], [ %conv, %cond.false ] + %tobool = icmp ne i32 %cond, 0 + ret i1 %tobool +} + +define zeroext i1 @evenodd_driver() { +entry: + %call = call zeroext i1 @even(i32 0) + br i1 %call, label %land.lhs.true, label %land.end + +land.lhs.true: + %call1 = call zeroext i1 @even(i32 1) + br i1 %call1, label %land.end, label %land.lhs.true2 + +land.lhs.true2: + %call3 = call zeroext i1 @even(i32 2) + br i1 %call3, label %land.lhs.true4, label %land.end + +land.lhs.true4: + %call5 = call zeroext i1 @even(i32 3) + br i1 %call5, label %land.end, label %land.lhs.true6 + +land.lhs.true6: + %call7 = call zeroext i1 @odd(i32 0) + br i1 %call7, label %land.end, label %land.lhs.true8 + +land.lhs.true8: + %call9 = call zeroext i1 @odd(i32 1) + br i1 %call9, label %land.lhs.true10, label %land.end + +land.lhs.true10: + %call11 = call zeroext i1 @odd(i32 2) + br i1 %call11, label %land.end, label %land.rhs + +land.rhs: + %call12 = call zeroext i1 @odd(i32 3) + br label %land.end + +land.end: + %0 = phi i1 [ false, %land.lhs.true10 ], [ false, %land.lhs.true8 ], [ false, %land.lhs.true6 ], [ false, %land.lhs.true4 ], [ false, %land.lhs.true2 ], [ false, %land.lhs.true ], [ false, %entry ], [ %call12, %land.rhs ] + ret i1 %0 +} + + + +; CHECK-LABEL: define {{[^@]+}}@odd.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 3, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.4(i32 2) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.1 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 2, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.5(i32 1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.2 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 1, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.6(i32 0) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.3 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 true, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even(i32 -1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even(i32 [[SUB]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 3, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.1(i32 2) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.4 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 2, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.2(i32 1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.5 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 1, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.3(i32 0) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.6 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 true, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd(i32 -1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd(i32 [[SUB]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@evenodd_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.6(i32 0) +; CHECK-NEXT: br i1 [[CALL]], label [[LAND_LHS_TRUE:%.*]], label [[LAND_END:%.*]] +; CHECK: land.lhs.true: +; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i1 @even.spec.5(i32 1) +; CHECK-NEXT: br i1 [[CALL1]], label [[LAND_END]], label [[LAND_LHS_TRUE2:%.*]] +; CHECK: land.lhs.true2: +; CHECK-NEXT: [[CALL3:%.*]] = call zeroext i1 @even.spec.4(i32 2) +; CHECK-NEXT: br i1 [[CALL3]], label [[LAND_LHS_TRUE4:%.*]], label [[LAND_END]] +; CHECK: land.lhs.true4: +; CHECK-NEXT: [[CALL5:%.*]] = call zeroext i1 @even.spec(i32 3) +; CHECK-NEXT: br i1 [[CALL5]], label [[LAND_END]], label [[LAND_LHS_TRUE6:%.*]] +; CHECK: land.lhs.true6: +; CHECK-NEXT: [[CALL7:%.*]] = call zeroext i1 @odd.spec.3(i32 0) +; CHECK-NEXT: br i1 [[CALL7]], label [[LAND_END]], label [[LAND_LHS_TRUE8:%.*]] +; CHECK: land.lhs.true8: +; CHECK-NEXT: [[CALL9:%.*]] = call zeroext i1 @odd.spec.2(i32 1) +; CHECK-NEXT: br i1 [[CALL9]], label [[LAND_LHS_TRUE10:%.*]], label [[LAND_END]] +; CHECK: land.lhs.true10: +; CHECK-NEXT: [[CALL11:%.*]] = call zeroext i1 @odd.spec.1(i32 2) +; CHECK-NEXT: br i1 [[CALL11]], label [[LAND_END]], label [[LAND_RHS:%.*]] +; CHECK: land.rhs: +; CHECK-NEXT: [[CALL12:%.*]] = call zeroext i1 @odd.spec(i32 3) +; CHECK-NEXT: br label [[LAND_END]] +; CHECK: land.end: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[LAND_LHS_TRUE10]] ], [ false, [[LAND_LHS_TRUE8]] ], [ false, [[LAND_LHS_TRUE6]] ], [ false, [[LAND_LHS_TRUE4]] ], [ false, [[LAND_LHS_TRUE2]] ], [ false, [[LAND_LHS_TRUE]] ], [ false, [[ENTRY:%.*]] ], [ [[CALL12]], [[LAND_RHS]] ] +; CHECK-NEXT: ret i1 [[TMP0]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll new file mode 100644 index 0000000000000..b4216ebfb212a --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @loop(i32 alwaysspecialize %x) { +entry: + %call = call i32 @loop(i32 5) + %call1 = call i32 @loop(i32 %x) + %add = add i32 %call, %call1 + ret i32 %add +} + +define i32 @loop_driver() { +entry: + %call = call i32 @loop(i32 5) + ret i32 %call +} + + + +; CHECK-LABEL: define {{[^@]+}}@loop.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@loop +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @loop(i32 [[X]]) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@loop_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll new file mode 100644 index 0000000000000..a173e93c7cf37 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll @@ -0,0 +1,229 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@ptrfirst.x = internal global i32 42, align 4 +@ptrboth.x = internal global i32 42, align 4 +@ptrboth.y = internal constant i32 81, align 4 +@ptrallsame.x = internal global i32 42, align 4 + +define i32 @callee(i32 alwaysspecialize %x, i32 %y, i32 alwaysspecialize %z) { +entry: + %add = add nsw i32 %x, %z + %mul = mul nsw i32 %add, %z + ret i32 %mul +} + +define i32 @first(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 42, i32 %a, i32 %b) + ret i32 %call +} + +define i32 @second(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 %a, i32 42, i32 %b) + ret i32 %call +} + +define i32 @third(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 %a, i32 %b, i32 42) + ret i32 %call +} + +define i32 @both(i32 %a) { +entry: + %call = call i32 @callee(i32 21, i32 %a, i32 42) + ret i32 %call +} + +define i32 @ptrcallee(ptr alwaysspecialize %x, ptr %y, ptr alwaysspecialize %z) { +entry: + %0 = load i32, ptr %x, align 4 + %1 = load i32, ptr %z, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, ptr %z, align 4 + %mul = mul nsw i32 %add, %2 + ret i32 %mul +} + +define i32 @ptrfirst(ptr %a, ptr %b) { +entry: + %call = call i32 @ptrcallee(ptr @ptrfirst.x, ptr %a, ptr %b) + ret i32 %call +} + +define i32 @ptrboth(ptr %a) { +entry: + %call = call i32 @ptrcallee(ptr @ptrboth.x, ptr %a, ptr @ptrboth.y) + ret i32 %call +} + +define i32 @ptrallsame() { +entry: + %call = call i32 @ptrcallee(ptr @ptrallsame.x, ptr @ptrallsame.x, ptr @ptrallsame.x) + ret i32 %call +} + +define i32 @virtualcall(ptr alwaysspecialize %func, i32 %x) { +entry: + %call = call i32 %func(i32 %x) + ret i32 %call +} + +define i32 @devirtualisecaller(i32 %x) { +entry: + %call = call i32 @virtualcall(ptr @virtualcallee, i32 %x) + ret i32 %call +} + +declare i32 @virtualcallee(i32 noundef) + + + +; CHECK-LABEL: define {{[^@]+}}@callee.spec +; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 21, 42 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee.spec.1 +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], 42 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee.spec.2 +; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 42, [[Z]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]], i32 alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Z]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@first +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec.2(i32 42, i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@second +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee(i32 [[A]], i32 42, i32 [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@third +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec.1(i32 [[A]], i32 [[B]], i32 42) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@both +; CHECK-SAME: (i32 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec(i32 21, i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec.3 +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrboth.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @ptrboth.y, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 81 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @ptrboth.y, align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 81 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec.4 +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrfirst.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee +; CHECK-SAME: (ptr alwaysspecialize [[X:%.*]], ptr [[Y:%.*]], ptr alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrfirst +; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec.4(ptr @ptrfirst.x, ptr [[A]], ptr [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrboth +; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec.3(ptr @ptrboth.x, ptr [[A]], ptr @ptrboth.y) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrallsame() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec(ptr @ptrallsame.x, ptr @ptrallsame.x, ptr @ptrallsame.x) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@virtualcall.spec +; CHECK-SAME: (ptr [[FUNC:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @virtualcallee(i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@virtualcall +; CHECK-SAME: (ptr alwaysspecialize [[FUNC:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 [[FUNC]](i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@devirtualisecaller +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @virtualcall.spec(ptr @virtualcallee, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll new file mode 100644 index 0000000000000..b0ae24ed97b7a --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__va_list_tag = type { i32, i32, ptr, ptr } + +@.str = private unnamed_addr constant [10 x i8] c"%s -> %d\0A\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"thing\00", align 1 + +define i32 @print(ptr %F, ptr alwaysspecialize %fmt, ...) { +entry: + %va = alloca [1 x %struct.__va_list_tag], align 16 + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + call void @llvm.va_start.p0(ptr %arraydecay) + %arraydecay1 = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + %call = call i32 @vprint(ptr %F, ptr %fmt, ptr %arraydecay1) + %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + call void @llvm.va_end.p0(ptr %arraydecay2) + ret i32 %call +} + +declare void @llvm.va_start.p0(ptr) + +declare i32 @vprint(ptr noundef, ptr noundef, ptr noundef) + +declare void @llvm.va_end.p0(ptr) + +define i32 @caller(ptr %F, i32 %x) { +entry: + %call = call i32 (ptr, ptr, ...) @print(ptr %F, ptr @.str, ptr @.str.1, i32 %x) + ret i32 %call +} + + + +; CHECK-LABEL: define {{[^@]+}}@print +; CHECK-SAME: (ptr [[F:%.*]], ptr alwaysspecialize [[FMT:%.*]], ...) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +; CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @vprint(ptr [[F]], ptr [[FMT]], ptr [[ARRAYDECAY1]]) +; CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[ARRAYDECAY2]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (ptr [[F:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ptr, ...) @print(ptr [[F]], ptr @.str, ptr @.str.1, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index de46efa13025d..65c3ee5467604 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -421,6 +421,7 @@ extern "C" int optMain( initializeTarget(Registry); // For codegen passes, only passes that do IR to IR transformation are // supported. + initializeAlwaysSpecializerPass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandFpLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits