[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -155,5 +155,52 @@ std::string hashBlockLoose(BinaryContext , const BinaryBasicBlock ) { return HashString; } +/// An even looser hash level relative to $ hashBlockLoose to use with stale +/// profile matching, composed of the names of a block's called functions in +/// lexicographic order. +std::string hashBlockCalls(BinaryContext , const BinaryBasicBlock ) { + // The hash is computed by creating a string of all lexicographically ordered + // called function names. + std::multiset FunctionNames; maksfb wrote: Consider an alternative container. https://llvm.org/docs/ProgrammersManual.html#other-set-like-container-options https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -155,5 +155,52 @@ std::string hashBlockLoose(BinaryContext , const BinaryBasicBlock ) { return HashString; } +/// An even looser hash level relative to $ hashBlockLoose to use with stale +/// profile matching, composed of the names of a block's called functions in +/// lexicographic order. +std::string hashBlockCalls(BinaryContext , const BinaryBasicBlock ) { + // The hash is computed by creating a string of all lexicographically ordered + // called function names. + std::multiset FunctionNames; + for (const MCInst : BB) { +// Skip non-call instructions. +if (!BC.MIB->isCall(Instr)) + continue; +const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); +if (!CallSymbol) + continue; +FunctionNames.insert(std::string(CallSymbol->getName())); + } + + std::string HashString; + for (const std::string : FunctionNames) +HashString.append(FunctionName); + + return HashString; +} + +/// The same as the $hashBlockCalls function, but for profiled functions. +std::string +hashBlockCalls(const DenseMap , + const yaml::bolt::BinaryBasicBlockProfile ) { + std::multiset FunctionNames; + for (const yaml::bolt::CallSiteInfo : YamlBB.CallSites) { +auto It = IdToFunctionName.find(CallSiteInfo.DestId); +if (It == IdToFunctionName.end()) + continue; +StringRef Name = It->second; +const size_t Pos = Name.find("(*"); maksfb wrote: Let's expand `NameResolver` interface to restore the name of the symbol and use it here. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -56,6 +56,10 @@ class YAMLProfileReader : public ProfileReaderBase { /// is attributed. FunctionSet ProfiledFunctions; + /// Maps profiled function id to name, for function matching with calls as + /// anchors. + DenseMap IdToFunctionName; maksfb wrote: nit: use a type alias. Will make it easier to read specially when the type is used in an argument list. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Refactor function matching (PR #97502)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/97502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
maksfb wrote: Could you please reword the summary and add an example where the new matching technique helps. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Refactor function matching (PR #97502)
@@ -456,6 +435,39 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { ++MatchedWithLTOCommonName; } } + return MatchedWithLTOCommonName; +} + +Error YAMLProfileReader::readProfile(BinaryContext ) { + if (opts::Verbosity >= 1) { +outs() << "BOLT-INFO: YAML profile with hash: "; +switch (YamlBP.Header.HashFunction) { +case HashFunction::StdHash: + outs() << "std::hash\n"; + break; +case HashFunction::XXH3: + outs() << "xxh3\n"; + break; +} + } + YamlProfileToFunction.resize(YamlBP.Functions.size() + 1); + + // Computes hash for binary functions. + if (opts::MatchProfileWithFunctionHash) { +for (auto &[_, BF] : BC.getBinaryFunctions()) { + BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); +} + } else if (!opts::IgnoreHash) { +for (BinaryFunction *BF : ProfileBFs) { + if (!BF) +continue; + BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); +} + } + + size_t MatchedWithExactName = matchWithExactName(); + size_t MatchedWithHash = matchWithHash(BC); + size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); maksfb wrote: nit: make them `const`. https://github.com/llvm/llvm-project/pull/97502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -342,6 +350,108 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction ) { return false; } +uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext ) { + uint64_t MatchedWithNameSimilarity = 0; + ItaniumPartialDemangler ItaniumPartialDemangler; + + // Demangle and derive namespace from function name. + auto DemangleName = [&](std::string ) { +StringRef RestoredName = NameResolver::restore(FunctionName); +return demangle(RestoredName); + }; + auto DeriveNameSpace = [&](std::string ) { +if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) + return std::string(""); +std::vector Buffer(DemangledName.begin(), DemangledName.end()); +size_t BufferSize = Buffer.size(); +char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( +[0], ); +return NameSpace ? std::string(NameSpace) : std::string(""); + }; + + // Maps namespaces to associated function block counts and gets profile + // function names and namespaces to minimize the number of BFs to process and + // avoid repeated name demangling/namespace derivision. + StringMap> +NamespaceToProfiledBFSizes; + std::vector ProfileBFDemangledNames; + ProfileBFDemangledNames.reserve(YamlBP.Functions.size()); + std::vector ProfiledBFNamespaces; + ProfiledBFNamespaces.reserve(YamlBP.Functions.size()); + + for (auto : YamlBP.Functions) { +std::string YamlBFDemangledName = DemangleName(YamlBF.Name); +ProfileBFDemangledNames.push_back(YamlBFDemangledName); +std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName); +ProfiledBFNamespaces.push_back(YamlBFNamespace); +NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks); + } + + StringMap> + NamespaceToBFs; maksfb wrote: nit: formatting looks off. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -415,11 +423,116 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) matchProfileToFunction(YamlBF, *BF); + // Uses name similarity to match functions that were not matched by name. + uint64_t MatchedWithNameSimilarity = 0; + + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +ItaniumPartialDemangler ItaniumPartialDemangler; + +auto DemangleName = [&](std::string ) { + StringRef RestoredName = NameResolver::restore(FunctionName); + return demangle(RestoredName); +}; + +auto DeriveNameSpace = [&](std::string ) { + if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) +return std::string(""); + std::vector Buffer(DemangledName.begin(), DemangledName.end()); + size_t BufferSize = Buffer.size(); + char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( + [0], ); + return NameSpace ? std::string(NameSpace) : std::string(""); +}; + +// Preprocessing YamlBFs to minimize the number of BFs to process. +std::unordered_map> + NamespaceToProfiledBFSizes; +NamespaceToProfiledBFSizes.reserve(YamlBP.Functions.size()); +std::vector ProfileBFDemangledNames; +ProfileBFDemangledNames.reserve(YamlBP.Functions.size()); +std::vector ProfiledBFNamespaces; +ProfiledBFNamespaces.reserve(YamlBP.Functions.size()); + +for (auto : YamlBP.Functions) { + std::string YamlBFDemangledName = DemangleName(YamlBF.Name); + ProfileBFDemangledNames.push_back(YamlBFDemangledName); + std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName); + ProfiledBFNamespaces.push_back(YamlBFNamespace); + NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks); +} + +std::unordered_map> maksfb wrote: Same for `StringMap`. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -23,6 +26,11 @@ extern cl::opt Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt InferStaleProfile; +cl::opt NameSimilarityFunctionMatchingThreshold( +"name-similarity-function-matching-threshold", +cl::desc("Matches functions using namespace and edit distance."), maksfb wrote: nit: use imperative statement. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -0,0 +1,64 @@ +## Tests function matching in YAMLProfileReader by name similarity. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s maksfb wrote: ```suggestion # RUN: --print-cfg --name-similarity-function-matching-threshold=1 --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s ``` https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -0,0 +1,64 @@ +## Tests function matching in YAMLProfileReader by name similarity. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s + +# CHECK: BOLT-INFO: matched 1 functions with similar names + +#--- main.s +.globl main +.type main, @function +main: + .cfi_startproc +.LBB00: + pushq %rbp + movq%rsp, %rbp + subq$16, %rsp + testq %rax, %rax + js .LBB03 +.LBB01: + jne .LBB04 +.LBB02: + nop +.LBB03: + xorl%eax, %eax + addq$16, %rsp + popq%rbp + retq +.LBB04: + xorl%eax, %eax + addq$16, %rsp + popq%rbp + retq +## For relocations against .text +.LBB05: + call exit maksfb wrote: See comments on the other PR. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -415,11 +423,116 @@ Error YAMLProfileReader::readProfile(BinaryContext ) { if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) matchProfileToFunction(YamlBF, *BF); + // Uses name similarity to match functions that were not matched by name. + uint64_t MatchedWithNameSimilarity = 0; + + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +ItaniumPartialDemangler ItaniumPartialDemangler; + +auto DemangleName = [&](std::string ) { + StringRef RestoredName = NameResolver::restore(FunctionName); + return demangle(RestoredName); +}; + +auto DeriveNameSpace = [&](std::string ) { + if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) +return std::string(""); + std::vector Buffer(DemangledName.begin(), DemangledName.end()); + size_t BufferSize = Buffer.size(); + char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( + [0], ); + return NameSpace ? std::string(NameSpace) : std::string(""); +}; + +// Preprocessing YamlBFs to minimize the number of BFs to process. +std::unordered_map> maksfb wrote: Can you use `StringMap` here? https://llvm.org/docs/ProgrammersManual.html#llvm-adt-stringmap-h https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/maksfb commented: Please refactor new code into a separate function. Add a comment on how the matching is done such that the interface can be understood without reading the code. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
https://github.com/maksfb approved this pull request. LGTM with the nit addressed. https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
@@ -596,6 +597,9 @@ class RewriteInstance { NameResolver NR; + // Regex object matching split function names. + const Regex ColdFragment{"(.*)\\.(cold|warm)(\\.[0-9]+)?"}; maksfb wrote: nit: s/ColdFragment/FunctionFragmentTemplate/ https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore special symbols as function aliases in updateELFSymbolTable (PR #92713)
https://github.com/maksfb approved this pull request. LGTM. Thanks. https://github.com/llvm/llvm-project/pull/92713 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore hot markers as function references in updateELFSymbolTable (PR #92713)
@@ -4788,13 +4788,25 @@ void RewriteInstance::updateELFSymbolTable( if (!IsDynSym && shouldStrip(Symbol)) continue; +Expected SymbolName = Symbol.getName(StringSection); maksfb wrote: Can we move the code that checks for special symbols here and skip function matching for them all together? Otherwise we are checking for the markers in several locations unnecessarily. https://github.com/llvm/llvm-project/pull/92713 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
@@ -2386,25 +2362,26 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); }; - for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -const auto &[_, Index] = getBlock(FromOffset); -yaml::bolt::BinaryBasicBlockProfile = YamlBF.Blocks[Index]; -for (const auto &[SuccOffset, SuccDataIdx] : SuccKV) - if (BlockMap.isInputBlock(SuccOffset)) -YamlBB.Successors.emplace_back( -getSuccessorInfo(SuccOffset, SuccDataIdx)); - } - for (const auto &[FromOffset, CallTo] : Branches.InterIndex) { -const auto &[BlockOffset, BlockIndex] = getBlock(FromOffset); -yaml::bolt::BinaryBasicBlockProfile = YamlBF.Blocks[BlockIndex]; -const uint32_t Offset = FromOffset - BlockOffset; -for (const auto &[CallToLoc, CallToIdx] : CallTo) - YamlBB.CallSites.emplace_back( - getCallSiteInfo(CallToLoc, CallToIdx, Offset)); -llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo , -yaml::bolt::CallSiteInfo ) { - return A.Offset < B.Offset; -}); + for (const llvm::bolt::BranchInfo : Branches.Data) { maksfb wrote: nit: no need for `llvm::bolt::`. https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
https://github.com/maksfb approved this pull request. Looks good on my end. https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
https://github.com/maksfb approved this pull request. Please see the nit for the error message (caps and new line). Otherwise LGTM. https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2379,27 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) { + errs() << "BOLT-ERROR: Invalid BAT section"; maksfb wrote: ```suggestion errs() << "BOLT-ERROR: invalid BAT section\n"; ``` https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2378,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +assert(BlockIt != BlockMap.begin()); +--BlockIt; +return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); + }; + for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -if (!BlockMap.isInputBlock(FromOffset)) - continue; -const unsigned Index = BlockMap.getBBIndex(FromOffset); +const auto &[_, Index] = getBlock(FromOffset); maksfb wrote: Even though we generate BAT in BOLT, when we view the invocation of BOLT on a binary with embedded BAT, such input should be considered an external and potentially malformed data. In this case, assertions will not provide adequate enough protection since we can build BOLT without them. https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2378,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +assert(BlockIt != BlockMap.begin()); +--BlockIt; +return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); + }; + for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -if (!BlockMap.isInputBlock(FromOffset)) - continue; -const unsigned Index = BlockMap.getBBIndex(FromOffset); +const auto &[_, Index] = getBlock(FromOffset); maksfb wrote: Do we expect `getBlock()` to always return a good value? There's no chance for malformed input to trigger the assertion above? https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst , } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); + assert(Offset < MaxSize && "invalid offset"); maksfb wrote: nit: capitalize the message. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst , } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); + assert(Offset < MaxSize && "invalid offset"); + ErrorOr> FunctionData = getData(); + assert(FunctionData && "cannot get function as data"); maksfb wrote: nit: ditto. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst , } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); maksfb wrote: nit: add message. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
https://github.com/maksfb approved this pull request. Please address the nits. Otherwise - good to go. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -492,6 +486,10 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress, const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit if (IsBranchSrc) { +// Branch entry is found in BAT +if (KeyVal->first == Offset && KeyVal->second & BRANCHENTRY) maksfb wrote: What would be the case where we need to check for the second condition (`KeyVal->second & BRANCHENTRY`)? Can we drop it? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -48,10 +48,9 @@ static cl::opt InputFilename(cl::Positional, cl::Required, cl::cat(BatDumpCategory)); -static cl::list Translate("translate", -cl::desc("translate addresses using BAT"), -cl::value_desc("addr"), -cl::cat(BatDumpCategory)); +static cl::list +Translate("translate", cl::desc("translate addresses using BAT"), + cl::value_desc("addr[:is_from]"), cl::cat(BatDumpCategory)); maksfb wrote: ```suggestion cl::value_desc("addr[:is_src]"), cl::cat(BatDumpCategory)); ``` https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -24,6 +24,32 @@ READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries +# Check handling of a branch not in BAT (added by BOLT) +RUN: FileCheck --input-file %p/Inputs/blarge_new_bat.preagg.txt --check-prefix PREAGG-CHECK %s +# The branch +PREAGG-CHECK: B 80007b 80004c 483 1 +RUN: llvm-objdump %t.out -d --start-address=0x80007b --stop-address=0x80007d \ +RUN: | FileCheck %s --check-prefix OBJDUMP-CHECK +OBJDUMP-CHECK: jmp +# Confirming it's not in BAT +RUN: llvm-bat-dump %t.out --dump-all | FileCheck %s --check-prefix BAT-DUMP-CHECK +BAT-DUMP-CHECK: Function Address: 0x800040, hash: 0x99e67ed32a203023 +# Containing basic block +BAT-DUMP-CHECK: 0x34 -> 0x32 hash: 0x6c36179f229b0032 +# Branch entry just above +BAT-DUMP-CHECK-NEXT: 0x37 -> 0x35 (branch) +# No entry for that offset +BAT-DUMP-NOT: 0x3b -> maksfb wrote: Is this line intended to be used? Is it needed? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -478,18 +478,34 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress, return Offset; const MapTy = Iter->second; + if (IsBranchSrc) { +// Try exact lookup first +auto KeyVal = Map.find(Offset); +if (KeyVal != Map.end() && KeyVal->second & BRANCHENTRY) + return KeyVal->second >> 1; + } auto KeyVal = Map.upper_bound(Offset); if (KeyVal == Map.begin()) return Offset; --KeyVal; const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit - // Branch source addresses are translated to the first instruction of the - // source BB to avoid accounting for modifications BOLT may have made in the - // BB regarding deletion/addition of instructions. - if (IsBranchSrc) -return Val; + if (IsBranchSrc) { maksfb wrote: Can we handle the exact match case (from above) here as well to simplify the code? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use heuristic for matching split local functions (PR #90424)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/90424 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use offset deduplication for cold fragments (PR #87853)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -2341,86 +2341,62 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy = BAT->getBBHashMap(FuncAddress); + YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); - auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile , -uint64_t SuccOffset, unsigned SuccDataIdx) { + for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks)) +YamlBB.Index = Idx; + + for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) +YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash(); + + auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) { const llvm::bolt::BranchInfo = Branches.Data.at(SuccDataIdx); yaml::bolt::SuccessorInfo SI; SI.Index = BlockMap.getBBIndex(SuccOffset); SI.Count = BI.Branches; SI.Mispreds = BI.Mispreds; -YamlBB.Successors.emplace_back(SI); +return SI; }; - std::unordered_map> BFBranches = - BAT->getBFBranches(FuncAddress); - - auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile , - uint64_t Offset) { -// Iterate over BRANCHENTRY records in the current block -for (uint32_t BranchOffset : BFBranches[Offset]) { - if (!Branches.InterIndex.contains(BranchOffset)) -continue; - for (const auto &[CallToLoc, CallToIdx] : - Branches.InterIndex.at(BranchOffset)) { -const llvm::bolt::BranchInfo = Branches.Data.at(CallToIdx); -yaml::bolt::CallSiteInfo YamlCSI; -YamlCSI.DestId = 0; // designated for unknown functions -YamlCSI.EntryDiscriminator = 0; -YamlCSI.Count = BI.Branches; -YamlCSI.Mispreds = BI.Mispreds; -YamlCSI.Offset = BranchOffset - Offset; -BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name); -if (!CallTargetBD) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -uint64_t CallTargetAddress = CallTargetBD->getAddress(); -BinaryFunction *CallTargetBF = -BC.getBinaryFunctionAtAddress(CallTargetAddress); -if (!CallTargetBF) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -// Calls between hot and cold fragments must be handled in -// fixupBATProfile. -assert(CallTargetBF != BF && "invalid CallTargetBF"); -YamlCSI.DestId = CallTargetBF->getFunctionNumber(); -if (CallToLoc.Offset) { - if (BAT->isBATFunction(CallTargetAddress)) { -LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary " - "entry point in BAT function " - << CallToLoc.Name << '\n'); - } else if (const BinaryBasicBlock *CallTargetBB = - CallTargetBF->getBasicBlockAtOffset( - CallToLoc.Offset)) { -// Only record true call information, ignoring returns (normally -// won't have a target basic block) and jumps to the landing -// pads (not an entry point). -if (CallTargetBB->isEntryPoint()) { - YamlCSI.EntryDiscriminator = - CallTargetBF->getEntryIDForSymbol( - CallTargetBB->getLabel()); -} - } -} -YamlBB.CallSites.emplace_back(YamlCSI); - } -} + auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx, + uint32_t Offset) { +const llvm::bolt::BranchInfo = Branches.Data.at(CallToIdx); +yaml::bolt::CallSiteInfo CSI; +CSI.DestId = 0; // designated for unknown functions +CSI.EntryDiscriminator = 0; +CSI.Count = BI.Branches; +CSI.Mispreds = BI.Mispreds; +CSI.Offset = Offset; +if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name)) + YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, + CallToLoc.Offset); +return CSI; }; for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -yaml::bolt::BinaryBasicBlockProfile YamlBB; if (!BlockMap.isInputBlock(FromOffset)) continue; -YamlBB.Index = BlockMap.getBBIndex(FromOffset); -YamlBB.Hash = BlockMap.getBBHash(FromOffset); +unsigned Index = BlockMap.getBBIndex(FromOffset); +yaml::bolt::BinaryBasicBlockProfile = YamlBF.Blocks[Index]; for (const auto &[SuccOffset, SuccDataIdx] : SuccKV) -
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -2341,86 +2341,62 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext , YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy = BAT->getBBHashMap(FuncAddress); + YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); - auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile , -uint64_t SuccOffset, unsigned SuccDataIdx) { + for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks)) +YamlBB.Index = Idx; + + for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) +YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash(); + + auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) { const llvm::bolt::BranchInfo = Branches.Data.at(SuccDataIdx); yaml::bolt::SuccessorInfo SI; SI.Index = BlockMap.getBBIndex(SuccOffset); SI.Count = BI.Branches; SI.Mispreds = BI.Mispreds; -YamlBB.Successors.emplace_back(SI); +return SI; }; - std::unordered_map> BFBranches = - BAT->getBFBranches(FuncAddress); - - auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile , - uint64_t Offset) { -// Iterate over BRANCHENTRY records in the current block -for (uint32_t BranchOffset : BFBranches[Offset]) { - if (!Branches.InterIndex.contains(BranchOffset)) -continue; - for (const auto &[CallToLoc, CallToIdx] : - Branches.InterIndex.at(BranchOffset)) { -const llvm::bolt::BranchInfo = Branches.Data.at(CallToIdx); -yaml::bolt::CallSiteInfo YamlCSI; -YamlCSI.DestId = 0; // designated for unknown functions -YamlCSI.EntryDiscriminator = 0; -YamlCSI.Count = BI.Branches; -YamlCSI.Mispreds = BI.Mispreds; -YamlCSI.Offset = BranchOffset - Offset; -BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name); -if (!CallTargetBD) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -uint64_t CallTargetAddress = CallTargetBD->getAddress(); -BinaryFunction *CallTargetBF = -BC.getBinaryFunctionAtAddress(CallTargetAddress); -if (!CallTargetBF) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -// Calls between hot and cold fragments must be handled in -// fixupBATProfile. -assert(CallTargetBF != BF && "invalid CallTargetBF"); -YamlCSI.DestId = CallTargetBF->getFunctionNumber(); -if (CallToLoc.Offset) { - if (BAT->isBATFunction(CallTargetAddress)) { -LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary " - "entry point in BAT function " - << CallToLoc.Name << '\n'); - } else if (const BinaryBasicBlock *CallTargetBB = - CallTargetBF->getBasicBlockAtOffset( - CallToLoc.Offset)) { -// Only record true call information, ignoring returns (normally -// won't have a target basic block) and jumps to the landing -// pads (not an entry point). -if (CallTargetBB->isEntryPoint()) { - YamlCSI.EntryDiscriminator = - CallTargetBF->getEntryIDForSymbol( - CallTargetBB->getLabel()); -} - } -} -YamlBB.CallSites.emplace_back(YamlCSI); - } -} + auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx, + uint32_t Offset) { +const llvm::bolt::BranchInfo = Branches.Data.at(CallToIdx); +yaml::bolt::CallSiteInfo CSI; +CSI.DestId = 0; // designated for unknown functions +CSI.EntryDiscriminator = 0; +CSI.Count = BI.Branches; +CSI.Mispreds = BI.Mispreds; +CSI.Offset = Offset; +if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name)) + YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, + CallToLoc.Offset); +return CSI; }; for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -yaml::bolt::BinaryBasicBlockProfile YamlBB; if (!BlockMap.isInputBlock(FromOffset)) continue; -YamlBB.Index = BlockMap.getBBIndex(FromOffset); -YamlBB.Hash = BlockMap.getBBHash(FromOffset); +unsigned Index = BlockMap.getBBIndex(FromOffset); +yaml::bolt::BinaryBasicBlockProfile = YamlBF.Blocks[Index]; for (const auto &[SuccOffset, SuccDataIdx] : SuccKV) -
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -38,10 +38,63 @@ # RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \ # RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat +# Prepare pre-aggregated profile using %t.bat maksfb wrote: Use double `#`s. https://github.com/llvm/llvm-project/pull/87743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
@@ -59,10 +59,29 @@ # RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \ # RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat +# Prepare pre-aggregated profile using %t.bat maksfb wrote: Nit: use double `#`s. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
@@ -245,14 +245,12 @@ class DataAggregator : public DataReader { /// disassembled BinaryFunctions BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address) const; + /// Perform BAT translation for a given \p Func and return the parent + /// BinaryFunction or nullptr. + BinaryFunction *getParentFunction(const BinaryFunction ) const; maksfb wrote: If the function is BAT-specific, it should be clear from the name. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: Okay. That explains the increase. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: > > Creating a proper FDE entry is the right thing to do regardless of BAT. How > > large is the regression? > > The largest I've seen is 34M->39M (HHVM instrumentation). Did you check if we patch every single function in he original `.text`? How many functions are there? https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: > > Let's make `hasCFI()` return true for injected functions. > > Given that this change increases the size of eh_frame section, should we make > it dependent on `enable-bat`, i.e. when we expect to feed the binary back to > BOLT? > > Because otherwise this "fix" is a pure size regression. Creating a proper FDE entry is the right thing to do regardless of BAT. How large is the regression? https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use BAT to register fragments (PR #87968)
https://github.com/maksfb commented: Is information we emit in the symbol table insufficient to establish the fragment relationship? https://github.com/llvm/llvm-project/pull/87968 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Emit empty FDE for injected functions (PR #87967)
https://github.com/maksfb commented: Let's make `hasCFI()` return true for injected functions. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
@@ -161,6 +164,10 @@ class BoltAddressTranslation { /// Map a function to its secondary entry points vector std::unordered_map> SecondaryEntryPointsMap; + /// Translates a given \p Symbol into a BinaryFunction and + /// Returns a secondary entry point id for a given \p Address and \p Offset. maksfb wrote: ```suggestion /// Return a secondary entry point ID for a function located at \p Address and \p Offset within that function. ``` https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
@@ -123,6 +124,12 @@ class BoltAddressTranslation { std::unordered_map> getBFBranches(uint64_t FuncOutputAddress) const; + /// For a given \p Symbol in the output binary, returns a corresponding pair + /// of parent BinaryFunction and secondary entry point in it. maksfb wrote: Update comment with `Offset` info. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
https://github.com/maksfb approved this pull request. Looks good with nits addressed. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext , - yaml::bolt::CallSiteInfo , - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext , yaml::bolt::CallSiteInfo , + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; + }; + if (Symbol) { uint64_t EntryID = 0; if (const BinaryFunction *const Callee = BC.getFunctionForSymbol(Symbol, )) { CSI.DestId = Callee->getFunctionNumber(); CSI.EntryDiscriminator = EntryID; + if (BAT && BAT->isBATFunction(Callee->getAddress())) +setBATSecondaryEntry(Callee); maksfb wrote: How about moving the adjustment logic into BAT then? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Support multi-way split functions (PR #87123)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87123 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Support multi-way split functions (PR #87123)
@@ -197,8 +197,10 @@ void BoltAddressTranslation::writeMaps(std::map , ? SecondaryEntryPointsMap[Address].size() : 0; if (Cold) { - size_t HotIndex = - std::distance(ColdPartSource.begin(), ColdPartSource.find(Address)); + // `Maps` is keyed by output addresses. + auto HotEntryIt = Maps.find(ColdPartSource[Address]); + assert(HotEntryIt != Maps.end()); maksfb wrote: Does this happen when BAT maps are incomplete? Since BAT is part of the input, should this be an error instead? https://github.com/llvm/llvm-project/pull/87123 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext , - yaml::bolt::CallSiteInfo , - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext , yaml::bolt::CallSiteInfo , + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; maksfb wrote: Can we modify `getSecondaryEntryPointId()` interface so that there's no need to "+1" here? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext , - yaml::bolt::CallSiteInfo , - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext , yaml::bolt::CallSiteInfo , + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; + }; + if (Symbol) { uint64_t EntryID = 0; if (const BinaryFunction *const Callee = BC.getFunctionForSymbol(Symbol, )) { CSI.DestId = Callee->getFunctionNumber(); CSI.EntryDiscriminator = EntryID; + if (BAT && BAT->isBATFunction(Callee->getAddress())) +setBATSecondaryEntry(Callee); maksfb wrote: What do you think about modifying `BinaryContext::getFunctionForSymbol()` in the presence of BAT? We might move BAT into `BinaryContext` (from `RewriteInstance`) for that. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext , - yaml::bolt::CallSiteInfo , - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext , yaml::bolt::CallSiteInfo , + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) maksfb wrote: Can we get a condition when `getFunctionForSymbol(Symbol)` return a function, but the symbol has no set value? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext , - yaml::bolt::CallSiteInfo , - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext , yaml::bolt::CallSiteInfo , + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { maksfb wrote: ```suggestion auto setBATSecondaryEntry = [&](const BinaryFunction ) { ``` https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -123,6 +123,9 @@ class BoltAddressTranslation { std::unordered_map> getBFBranches(uint64_t FuncOutputAddress) const; + /// Returns a secondary entry point id for a given function and offset. maksfb wrote: "... function at a given \p Address..." https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/maksfb approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
@@ -78,32 +97,15 @@ YAMLProfileWriter::convert(const BinaryFunction , bool UseDFS) { if (!ICSP) continue; for (const IndirectCallProfile : ICSP.get()) { - StringRef TargetName = ""; - CSI.DestId = 0; // designated for unknown functions - CSI.EntryDiscriminator = 0; - if (CSP.Symbol) { -const BinaryFunction *Callee = BC.getFunctionForSymbol(CSP.Symbol); -if (Callee) { - CSI.DestId = Callee->getFunctionNumber(); - TargetName = Callee->getOneName(); -} - } + const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol); CSI.Count = CSP.Count; CSI.Mispreds = CSP.Mispreds; - CSTargets.emplace_back(TargetName, CSI); + if (CSI.Count && Callee) maksfb wrote: Previously, we would record data for unknown destinations, but now we are ignoring those. Not sure if it matters in practice. But if we are going to ignore them, the code that could be simplified/refactored. https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits