hoy created this revision.
Herald added subscribers: llvm-commits, cfe-commits, dexonsmith, wenlei, 
pengfei, hiraditya, mgorny.
Herald added projects: clang, LLVM.
hoy requested review of this revision.

An indirect call site needs to be probed for its potential call targets. With 
CSSPGO a direct call also needs a probe so that a calling context can be 
represented by a stack of callsite probes. Unlike pseudo probes for basic 
blocks that are in form of standalone intrinsic call instructions, pseudo 
probes for callsites have to be attached to the call instruction, thus a 
separate instruction would not work.

One possible way of attaching a probe to a call instruction is to use a special 
metadata that carries information about the probe. The special metadata will 
have to make its way through the optimization pipeline down to object emission. 
This requires additional efforts to maintain the metadata in various places. 
Given that the !dbg metadata is a first-class metadata and has all essential 
support in place , leveraging the !dbg metadata as a channel to encode pseudo 
probe information is handy.

With the requirement of not inflating !dbg metadata that is allocated for 
almost every instruction, we found that the 32-bit DWARF discriminator field 
which mainly serves AutoFDO can be reused for pseudo probes. DWARF 
discriminators distinguish identical source locations between instructions and 
with pseudo probes such support is not required. In this change we are using 
the discriminator field to encode the ID and type of a callsite probe and the 
encoded value will be unpacked and consumed right before object emission.

To avoid collision with the baseline AutoFDO in various places that handles 
dwarf discriminators where a check against  the `-pseudo-probe-for-profiling` 
switch is not available, a special encoding scheme is used to tell apart a 
pseudo probe discriminator from a regular discriminator. For the regular 
discriminator, if all lowest 3 bits are non-zero, it means the discriminator is 
basically empty and all higher 29 bits can be reversed for pseudo probe use.

Callsite pseudo probes are inserted in `SampleProfileProbePass` and a 
target-independent MIR pass `PseudoProbeInserter` is added to unpack the probe 
ID/type from `!dbg`.

Note that with this work the switch -debug-info-for-profiling will not work 
with -pseudo-probe-for-profiling anymore. They cannot be used at the same time.

Test Plan:


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D91756

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/CodeGen/CommandFlags.h
  llvm/include/llvm/CodeGen/Passes.h
  llvm/include/llvm/IR/DebugInfoMetadata.h
  llvm/include/llvm/IR/PseudoProbe.h
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/include/llvm/Target/TargetOptions.h
  llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
  llvm/lib/CodeGen/CMakeLists.txt
  llvm/lib/CodeGen/CommandFlags.cpp
  llvm/lib/CodeGen/PseudoProbeInserter.cpp
  llvm/lib/CodeGen/TargetPassConfig.cpp
  llvm/lib/Target/X86/X86TargetMachine.cpp
  llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
  llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll

Index: llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll
===================================================================
--- llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll
+++ llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o %t
 ; RUN: FileCheck %s < %t --check-prefix=CHECK-IL
-; RUN: llc %t -stop-after=instruction-select -o - | FileCheck %s --check-prefix=CHECK-MIR
+; RUN: llc %t -pseudo-probe-for-profiling -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR
 ;
 ;; Check the generation of pseudoprobe intrinsic call.
 
@@ -28,9 +28,35 @@
   ret void, !dbg !12
 }
 
+declare void @bar(i32 %x) 
+
+define internal void @foo2(void (i32)* %f) !dbg !4 {
+entry:
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1)
+; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0
+; Check pseudo_probe metadata attached to the indirect call instruction.
+; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]]
+; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1
+  call void %f(i32 1), !dbg !13
+; Check pseudo_probe metadata attached to the direct call instruction.
+; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]]
+; CHECK-MIR: PSEUDO_PROBE	[[#GUID2]], 3, 2
+  call void @bar(i32 1)
+  ret void
+}
+
 ; CHECK-IL: ![[#FOO:]] = distinct !DISubprogram(name: "foo"
 ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
 ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
+; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
+;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
+;; with an index of 2.
+; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
+; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
+;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
+;; with an index of 3.
+; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
+
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
@@ -39,10 +65,12 @@
 !1 = !DIFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, unit: !0, retainedNodes: !2)
+!4 = distinct !DISubprogram(name: "foo2", scope: !1, file: !1, line: 2, type: !5, unit: !0, retainedNodes: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.9.0"}
-!12 = !DILocation(line: 2, scope: !3)
\ No newline at end of file
+!12 = !DILocation(line: 2, scope: !3)
+!13 = !DILocation(line: 2, column: 20, scope: !4)
Index: llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
===================================================================
--- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -35,10 +35,43 @@
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges
+// preceded by the number of indirect calls.
+// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
+void SampleProfileProber::computeCFGHash() {
+  std::vector<uint8_t> Indexes;
+  JamCRC JC;
+  for (auto &BB : *F) {
+    auto TI = BB.getTerminator();
+    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+      auto Succ = TI->getSuccessor(I);
+      auto Index = getBlockId(Succ);
+      for (int J = 0; J < 4; J++)
+        Indexes.push_back((uint8_t)(Index >> (J * 8)));
+    }
+  }
+  JC.update(Indexes);
+
+  FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
+                 (uint64_t)Indexes.size() << 32 | JC.getCRC();
+  // Reserve bit 60-63 for other information purpose.
+  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+  assert(FunctionHash != 0);
+  LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
+                    << ":\n"
+                    << " CRC = " << JC.getCRC() << ", Edges = "
+                    << Indexes.size() << ", ICSites = " << CallProbeIds.size()
+                    << ", Hash = " << FunctionHash << "\n");
+}
+
 SampleProfileProber::SampleProfileProber(Function &Func) : F(&Func) {
   BlockProbeIds.clear();
+  CallProbeIds.clear();
   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
   computeProbeIdForBlocks();
+  computeProbeIdForCallsites();
+  computeCFGHash();
 }
 
 void SampleProfileProber::computeProbeIdForBlocks() {
@@ -47,11 +80,28 @@
   }
 }
 
+void SampleProfileProber::computeProbeIdForCallsites() {
+  for (auto &BB : *F) {
+    for (auto &I : BB) {
+      if (!isa<CallBase>(I))
+        continue;
+      if (isa<IntrinsicInst>(&I))
+        continue;
+      CallProbeIds[&I] = ++LastProbeId;
+    }
+  }
+}
+
 uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
   auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
   return I == BlockProbeIds.end() ? 0 : I->second;
 }
 
+uint32_t SampleProfileProber::getCallsiteId(const Instruction *call) const {
+  auto iter = CallProbeIds.find(const_cast<Instruction *>(call));
+  return iter == CallProbeIds.end() ? 0 : iter->second;
+}
+
 void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
   Module *M = F.getParent();
   MDBuilder MDB(F.getContext());
@@ -59,6 +109,27 @@
   // fine since function name is the only key in the profile database.
   uint64_t Guid = Function::getGUID(F.getName());
 
+  // Assign an artificial debug line to a probe that doesn't come with a real
+  // line. A probe not having a debug line will get an incomplete inline
+  // context. This will cause samples collected on the probe to be counted
+  // into the base profile instead of a context profile. The line number
+  // itself is not important though.
+  auto AssignDebugLoc = [&](Instruction *I) {
+    assert(isa<PseudoProbeInst>(I) || isa<CallBase>(I));
+    if (!I->getDebugLoc()) {
+      if (auto SP = F.getSubprogram()) {
+        auto DIL = DebugLoc::get(0, 0, SP);
+        I->setDebugLoc(DIL);
+        ArtificialDbgLine++;
+        LLVM_DEBUG({
+          dbgs() << "\nIn Function " << F.getName()
+                 << " Probe gets an artificial debug line\n";
+          I->dump();
+        });
+      }
+    }
+  };
+
   // Probe basic blocks.
   for (auto &I : BlockProbeIds) {
     BasicBlock *BB = I.first;
@@ -86,19 +157,26 @@
         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index)};
     auto Probe = Builder.CreateCall(ProbeFn, Args);
-    // Assign an artificial debug line to a probe that doesn't come with a real
-    // line. A probe not having a debug line will get an incomplete inline
-    // context. This will cause samples collected on the probe to be counted
-    // into the base profile instead of a context profile. The line number
-    // itself is not important though.
-    if (!Probe->getDebugLoc()) {
-      if (auto SP = F.getSubprogram()) {
-        auto DIL = DebugLoc::get(0, 0, SP);
-        Probe->setDebugLoc(DIL);
-        ArtificialDbgLine++;
-        LLVM_DEBUG(dbgs() << "\nIn Function " << F.getName() << " Probe "
-                          << Index << " gets an artificial debug line\n";);
-      }
+    AssignDebugLoc(Probe);
+  }
+
+  // Probe both direct calls and indirect calls. Direct calls are probed so that
+  // their probe ID can be used as an call site identifier to represent a
+  // calling context.
+  for (auto &I : CallProbeIds) {
+    auto Call = I.first;
+    uint32_t Index = I.second;
+    uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
+                        ? (uint32_t)PseudoProbeType::DirectCall
+                        : (uint32_t)PseudoProbeType::IndirectCall;
+    AssignDebugLoc(Call);
+    // Levarge the 32-bit discriminator field of debug data to store the ID and
+    // type of a callsite probe. This gets rid of the dependency on plumbing a
+    // customized metadata through the codegen pipeline.
+    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+    if (auto DIL = Call->getDebugLoc()) {
+      DIL = DIL->cloneWithDiscriminator(V);
+      Call->setDebugLoc(DIL);
     }
   }
 }
Index: llvm/lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetMachine.cpp
+++ llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -83,6 +83,7 @@
   initializeX86LoadValueInjectionRetHardeningPassPass(PR);
   initializeX86OptimizeLEAPassPass(PR);
   initializeX86PartialReductionPass(PR);
+  initializePseudoProbeInserterPass(PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Index: llvm/lib/CodeGen/TargetPassConfig.cpp
===================================================================
--- llvm/lib/CodeGen/TargetPassConfig.cpp
+++ llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1039,6 +1039,10 @@
   // Add passes that directly emit MI after all other MI passes.
   addPreEmitPass2();
 
+  // Insert pseudo probe annotation for callsite profiling
+  if (TM->Options.PseudoProbeForProfiling)
+    addPass(createPseudoProbeInserter());
+
   AddingMachinePasses = false;
 }
 
Index: llvm/lib/CodeGen/PseudoProbeInserter.cpp
===================================================================
--- /dev/null
+++ llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -0,0 +1,96 @@
+//===- PseudoProbeInserter.cpp - Insert annotation for callsite profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PseudoProbeInserter pass, which inserts pseudo probe
+// annotations for call instructions with a pseudo-probe-specific dwarf
+// discriminator. such discriminator indicates that the call instruction comes
+// with a pseudo probe, and the discriminator value holds information to
+// identify the corresponding counter.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include <unordered_map>
+
+#define DEBUG_TYPE "pseudo-probe-inserter"
+
+using namespace llvm;
+
+namespace {
+class PseudoProbeInserter : public MachineFunctionPass {
+public:
+  static char ID;
+
+  PseudoProbeInserter() : MachineFunctionPass(ID) {
+    initializePseudoProbeInserterPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Pseudo Probe Inserter"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+    bool Changed = false;
+    for (MachineBasicBlock &MBB : MF) {
+      MachineInstr *FirstInstr = nullptr;
+      for (MachineInstr &MI : MBB) {
+        if (!MI.isPseudo())
+          FirstInstr = &MI;
+        if (MI.isCall()) {
+          if (DILocation *DL = MI.getDebugLoc()) {
+            auto Value = DL->getDiscriminator();
+            if (DILocation::isPseudoProbeDiscriminator(Value)) {
+              BuildMI(MBB, MI, DL, TII->get(TargetOpcode::PSEUDO_PROBE))
+                  .addImm(getFuncGUID(MF.getFunction().getParent(), DL))
+                  .addImm(
+                      PseudoProbeDwarfDiscriminator::extractProbeIndex(Value))
+                  .addImm(
+                      PseudoProbeDwarfDiscriminator::extractProbeType(Value));
+              Changed = true;
+            }
+          }
+        }
+      }
+    }
+
+    return Changed;
+  }
+
+private:
+  uint64_t getFuncGUID(Module *M, DILocation *DL) {
+    auto SP = DL->getScope()->getSubprogram();
+    auto Name = SP->getLinkageName();
+    if (Name.empty())
+      Name = SP->getName();
+    return Function::getGUID(Name);
+  }
+};
+} // namespace
+
+char PseudoProbeInserter::ID = 0;
+INITIALIZE_PASS_BEGIN(PseudoProbeInserter, DEBUG_TYPE,
+                      "Insert pseudo probe annotations for value profiling",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PseudoProbeInserter, DEBUG_TYPE,
+                    "Insert pseudo probe annotations for value profiling",
+                    false, false)
+
+FunctionPass *llvm::createPseudoProbeInserter() {
+  return new PseudoProbeInserter();
+}
Index: llvm/lib/CodeGen/CommandFlags.cpp
===================================================================
--- llvm/lib/CodeGen/CommandFlags.cpp
+++ llvm/lib/CodeGen/CommandFlags.cpp
@@ -90,6 +90,7 @@
 CGOPT(bool, EmitCallSiteInfo)
 CGOPT(bool, EnableMachineFunctionSplitter)
 CGOPT(bool, EnableDebugEntryValues)
+CGOPT(bool, PseudoProbeForProfiling)
 CGOPT(bool, ValueTrackingVariableLocations)
 CGOPT(bool, ForceDwarfFrameSection)
 CGOPT(bool, XRayOmitFunctionIndex)
@@ -428,6 +429,11 @@
       cl::init(false));
   CGBINDOPT(EnableDebugEntryValues);
 
+  static cl::opt<bool> PseudoProbeForProfiling(
+      "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
+      cl::init(false));
+  CGBINDOPT(PseudoProbeForProfiling);
+
   static cl::opt<bool> ValueTrackingVariableLocations(
       "experimental-debug-variable-locations",
       cl::desc("Use experimental new value-tracking variable locations"),
@@ -541,6 +547,7 @@
   Options.EmitAddrsig = getEnableAddrsig();
   Options.EmitCallSiteInfo = getEmitCallSiteInfo();
   Options.EnableDebugEntryValues = getEnableDebugEntryValues();
+  Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
   Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
   Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
   Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Index: llvm/lib/CodeGen/CMakeLists.txt
===================================================================
--- llvm/lib/CodeGen/CMakeLists.txt
+++ llvm/lib/CodeGen/CMakeLists.txt
@@ -122,6 +122,7 @@
   PreISelIntrinsicLowering.cpp
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
+  PseudoProbeInserter.cpp
   PseudoSourceValue.cpp
   RDFGraph.cpp
   RDFLiveness.cpp
Index: llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/Target/TargetMachine.h"
 #include <unordered_map>
 
@@ -25,10 +26,10 @@
 class Module;
 
 using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
+using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
 
 enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
 
-enum class PseudoProbeType { Block = 0 };
 
 /// Sample profile pseudo prober.
 ///
@@ -41,14 +42,28 @@
 
 private:
   Function *getFunction() const { return F; }
+  uint64_t getFunctionHash() const { return FunctionHash; }
   uint32_t getBlockId(const BasicBlock *BB) const;
+  uint32_t getCallsiteId(const Instruction *call) const;
+  void computeCFGHash();
   void computeProbeIdForBlocks();
+  void computeProbeIdForCallsites();
 
   Function *F;
 
+  /// The current module ID that is used to name a static object as a comdat
+  /// group.
+  std::string CurModuleUniqueId;
+
+  /// A CFG hash code used to identify a function code changes.
+  uint64_t FunctionHash;
+
   /// Map basic blocks to the their pseudo probe ids.
   BlockIdMap BlockProbeIds;
 
+  /// Map indirect calls to the their pseudo probe ids.
+  InstructionIdMap CallProbeIds;
+
   /// The ID of the last probe, Can be used to number a new probe.
   uint32_t LastProbeId;
 };
Index: llvm/include/llvm/Target/TargetOptions.h
===================================================================
--- llvm/include/llvm/Target/TargetOptions.h
+++ llvm/include/llvm/Target/TargetOptions.h
@@ -137,8 +137,8 @@
           EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
           EmitAddrsig(false), EmitCallSiteInfo(false),
           SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
-          ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
-          XRayOmitFunctionIndex(false),
+          PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+          ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
           FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
 
     /// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -302,6 +302,9 @@
     /// production.
     bool ShouldEmitDebugEntryValues() const;
 
+    /// Emit pseudo probes into the binary for sample profiling
+    unsigned PseudoProbeForProfiling : 1;
+
     // When set to true, use experimental new debug variable location tracking,
     // which seeks to follow the values of variables rather than their location,
     // post isel.
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -63,6 +63,14 @@
     // PseudoProbeForProfiling needs to be true.
     assert(this->Action != NoAction || this->CSAction != NoCSAction ||
            this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+
+    // Pseudo probe emission does work with -fdebug-info-for-profiling since
+    // they both use the discriminator field of debug lines but for different
+    // purposes.
+    if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
+      report_fatal_error(
+          "Pseudo probes cannot be used with -debug-info-for-profiling", false);
+    }
   }
   std::string ProfileFile;
   std::string CSProfileGenFile;
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -361,6 +361,7 @@
 void initializePromoteLegacyPassPass(PassRegistry&);
 void initializePruneEHPass(PassRegistry&);
 void initializeRABasicPass(PassRegistry&);
+void initializePseudoProbeInserterPass(PassRegistry &);
 void initializeRAGreedyPass(PassRegistry&);
 void initializeReachingDefAnalysisPass(PassRegistry&);
 void initializeReassociateLegacyPassPass(PassRegistry&);
Index: llvm/include/llvm/IR/PseudoProbe.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/IR/PseudoProbe.h
@@ -0,0 +1,45 @@
+//===- PseudoProbe.h - Pseudo Probe IR Helpers ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pseudo probe IR intrinsic and dwarf discriminator manipulation routines.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PSEUDOPROBE_H
+#define LLVM_IR_PSEUDOPROBE_H
+
+namespace llvm {
+
+enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
+
+struct PseudoProbeDwarfDiscriminator {
+  // The following APIs encodes/decodes per-probe information to/from a
+  // 32-bit integer which is organized as:
+  //  [2:0] - 0x7, this is reserved for regular discriminator,
+  //          see DWARF discriminator encoding rule
+  //  [18:3] - probe id
+  //  [25:19] - reserved for future probe distribution factor
+  //  [28:26] - probe type, see PseudoProbeType
+  //  [31:29] - reserved for future probe attributes
+  static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+    assert(Index <= 0xFFFF);
+    assert(Type <= 0x7);
+    return (Index << 3) | (Type << 26) | 0x7;
+  }
+
+  static uint32_t extractProbeIndex(uint32_t Value) {
+    return (Value >> 3) & 0xFFFF;
+  }
+
+  static uint32_t extractProbeType(uint32_t Value) {
+    return (Value >> 26) & 0x7;
+  }
+};
+} // end namespace llvm
+
+#endif // LLVM_IR_PSEUDOPROBE_H
Index: llvm/include/llvm/IR/DebugInfoMetadata.h
===================================================================
--- llvm/include/llvm/IR/DebugInfoMetadata.h
+++ llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -1698,6 +1698,18 @@
 
   inline unsigned getDiscriminator() const;
 
+  // For the regular discriminator, it stands for all empty components if all
+  // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
+  // default). Here we fully leverage the higher 29 bits for pseudo probe use.
+  // This is the format:
+  // [2:0] - 0x7
+  // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
+  // So if the lower 3 bits is non-zero and the others has at least one
+  // non-zero bit, it guarantees to be a pseudo probe discriminator
+  inline static bool isPseudoProbeDiscriminator(unsigned discriminator) {
+    return ((discriminator & 0x7) == 0x7) && (discriminator & 0xFFFFFFF8);
+  }
+
   /// Returns a new DILocation with updated \p Discriminator.
   inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
 
Index: llvm/include/llvm/CodeGen/Passes.h
===================================================================
--- llvm/include/llvm/CodeGen/Passes.h
+++ llvm/include/llvm/CodeGen/Passes.h
@@ -475,6 +475,9 @@
   /// Create Hardware Loop pass. \see HardwareLoops.cpp
   FunctionPass *createHardwareLoopsPass();
 
+  /// This pass inserts pseudo probe annotation for callsite profiling.
+  FunctionPass *createPseudoProbeInserter();
+
   /// Create IR Type Promotion pass. \see TypePromotion.cpp
   FunctionPass *createTypePromotionPass();
 
Index: llvm/include/llvm/CodeGen/CommandFlags.h
===================================================================
--- llvm/include/llvm/CodeGen/CommandFlags.h
+++ llvm/include/llvm/CodeGen/CommandFlags.h
@@ -125,6 +125,8 @@
 
 bool getEnableDebugEntryValues();
 
+bool getPseudoProbeForProfiling();
+
 bool getValueTrackingVariableLocations();
 
 bool getForceDwarfFrameSection();
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -549,6 +549,7 @@
   Options.EmitAddrsig = CodeGenOpts.Addrsig;
   Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
   Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
+  Options.PseudoProbeForProfiling = CodeGenOpts.PseudoProbeForProfiling;
   Options.ValueTrackingVariableLocations =
       CodeGenOpts.ValueTrackingVariableLocations;
   Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to