https://github.com/huangjd created 
https://github.com/llvm/llvm-project/pull/81545

Such expression does not correspond to a variable in the source code thus does 
not have a debug location.  When the user collects perf data on the program, if 
the intermediate memory load instruction is sampled, it could not be attributed 
to any variable/class member, which causes the sampling results to be 
under-counted. 
This patch adds an option  `-fdebug_info_for_pointer_type` to generate a psuedo 
variable and its debug info for intermediate expression with pointer 
dereferencing, so that perf data collected on the instruction of that 
expression can be attributed to the correct class member.

This is a prototype so comments are needed.



>From f2c82758e1cba7773e41d941d2812c829c339675 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhu...@google.com>
Date: Mon, 12 Feb 2024 02:27:13 -0500
Subject: [PATCH] Add option to generate additional info for expression
 containing pointer of pointers.

Such expression does correspond to a variable in the source code thus
does not have a debug location. However the user may want to collect
sampling counter for memory accesses to analyze usage frequency of class
members. By enabling -fdebug_info_for_pointer_type a psuedo variable and
its debug info is generated in place whenever there's an intermediate
expression with pointer access.
---
 clang/include/clang/Basic/DebugOptions.def |  4 ++
 clang/include/clang/Driver/Options.td      |  4 ++
 clang/lib/CodeGen/CGDebugInfo.cpp          | 16 +++++
 clang/lib/CodeGen/CGDebugInfo.h            |  6 ++
 clang/lib/CodeGen/CGDecl.cpp               |  4 ++
 clang/lib/CodeGen/CGExpr.cpp               | 79 ++++++++++++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h        |  5 ++
 clang/lib/Driver/ToolChains/Clang.cpp      |  3 +
 8 files changed, 121 insertions(+)

diff --git a/clang/include/clang/Basic/DebugOptions.def 
b/clang/include/clang/Basic/DebugOptions.def
index 7cd3edf08a17ea..6dd09f46842077 100644
--- a/clang/include/clang/Basic/DebugOptions.def
+++ b/clang/include/clang/Basic/DebugOptions.def
@@ -129,6 +129,10 @@ DEBUGOPT(CodeViewCommandLine, 1, 0)
 /// Whether emit extra debug info for sample pgo profile collection.
 DEBUGOPT(DebugInfoForProfiling, 1, 0)
 
+/// Whether to generate pseudo variables and their debug info for intermediate
+/// pointer accesses.
+DEBUGOPT(DebugInfoForPointerType, 1, 0)
+
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 DEBUGOPT(DebugNameTable, 2, 0)
 
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 7f4fa33748faca..96b22d3f7640dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1675,6 +1675,10 @@ defm debug_info_for_profiling : 
BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
+def fdebug_info_for_pointer_type : Flag<["-"], "fdebug-info-for-pointer-type">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Generate pseudo variables and their debug info for intermediate 
pointer accesses">,
+  MarshallingInfoFlag<CodeGenOpts<"DebugInfoForPointerType">>;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into 
default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env 
var)">;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp 
b/clang/lib/CodeGen/CGDebugInfo.cpp
index 0f3f684d61dc94..6ce40da22dc97d 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5636,6 +5636,22 @@ void 
CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
+void CGDebugInfo::EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                                     SourceLocation Loc) {
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+  unsigned Column = getColumnNumber(Loc);
+  llvm::DILocalVariable *D = DBuilder.createAutoVariable(
+      LexicalBlockStack.back(), Alloca->getName(), getOrCreateFile(Loc), Line,
+      getOrCreateType(Ty, Unit));
+  llvm::DILocation *DIL =
+      llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
+                            LexicalBlockStack.back(), CurInlinedAt);
+  SmallVector<uint64_t> Expr;
+  DBuilder.insertDeclare(Alloca, D, DBuilder.createExpression(Expr), DIL,
+                         Alloca->getParent());
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 7b60e94555d060..a2c484f50b2bc5 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -529,6 +529,12 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
+  /// Emit debug information for a pseudo variable assigned to the value of an
+  /// intermediate expression, so that a performance counter can track the 
usage
+  /// of a specific expression of interest.
+  void EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                          SourceLocation Loc);
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..5f7b2529179003 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -793,6 +793,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
+    if (CGM.getCodeGenOpts().getDebugInfo() >
+            llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      value = UnemitPseudoVariable(value);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitNullabilityCheck(lvalue, value, init->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c5f6b6d3a99f0b..b979c0830c5b34 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -951,6 +951,58 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction 
&CGF,
   return nullptr;
 }
 
+/// When a pseudo variable is created for %1, it generates these instructions
+/// in sequence and return %2:
+/// %pseudo = alloca Ty
+/// call void @llvm.dbg.declare(metadata ptr %pseudo, metadata, metadata)
+/// store Ty %1, ptr %pseudo
+/// %2 = load ptr, ptr %pseudo
+/// To undo, we detect and remove this sequence, and replace %2 back to %1.
+llvm::Value *CodeGenFunction::UnemitPseudoVariable(llvm::Value *V) {
+  if (!getDebugInfo())
+    return V;
+
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
+    llvm::Value *PseudoVar = Load->getPointerOperand();
+    if (llvm::StoreInst *Store =
+            dyn_cast<llvm::StoreInst>(Load->getPrevNode())) {
+      if (Store->getPointerOperand() != PseudoVar)
+        return V;
+      llvm::Value *OriginalValue = Store->getValueOperand();
+      if (llvm::CallInst *DbgCall =
+              dyn_cast<llvm::CallInst>(Store->getPrevNode())) {
+        if (DbgCall->getCalledFunction() !=
+                llvm::Intrinsic::getDeclaration(&CGM.getModule(),
+                                                llvm::Intrinsic::dbg_declare) 
||
+            DbgCall->getNumOperands() != 4)
+          return V;
+        for (int i = 0; i < 3; i++) {
+          if (!isa<llvm::MetadataAsValue>(DbgCall->getArgOperand(i)))
+            return V;
+        }
+        if (llvm::MetadataAsValue *Metadata =
+                dyn_cast<llvm::MetadataAsValue>(DbgCall->getOperand(0))) {
+          if (llvm::ValueAsMetadata *Value =
+                  dyn_cast<llvm::ValueAsMetadata>(Metadata->getMetadata())) {
+            if (Value->getValue() != PseudoVar)
+              return V;
+            if (llvm::AllocaInst *Alloca =
+                    dyn_cast<llvm::AllocaInst>(DbgCall->getPrevNode())) {
+              V->replaceAllUsesWith(OriginalValue);
+              Load->eraseFromParent();
+              Store->eraseFromParent();
+              DbgCall->eraseFromParent();
+              Alloca->eraseFromParent();
+              return OriginalValue;
+            }
+          }
+        }
+      }
+    }
+  }
+  return V;
+}
+
 namespace {
 
 /// \p StructAccessBase returns the base \p Expr of a field access. It returns
@@ -2015,6 +2067,29 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address 
Addr, bool Volatile,
                         llvm::MDNode::get(getLLVMContext(), std::nullopt));
     }
 
+  // if -g2 or above and -fdebug-info-for-pointer-type are enabled, emit
+  // additional debug info for loads in an intermediate expression, which 
allows
+  // a performance counter to deduce the type of the value being loaded, even 
if
+  // it does not correspond to a variable in the source code.
+  // Since there is no variable correspond to an intermediate expression, we
+  // create a pseudo variable for it and emit its debug info, as if the
+  // expression were written in SSA form.
+  if (CGM.getCodeGenOpts().getDebugInfo() > 
llvm::codegenoptions::DebugLineTablesOnly &&
+      CGM.getCodeGenOpts().DebugInfoForPointerType) {
+    if (CGDebugInfo *DI = getDebugInfo())
+      // We only generate this debug info if loading from GEP, not from other
+      // cases such as loading a function argument.
+      if (isa<llvm::GetElementPtrInst>(Load->getOperand(0))) {
+        const llvm::DebugLoc &DebugLoc = Load->getDebugLoc();
+        llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(
+            Load->getType(), nullptr, 
Twine("pseudo_").concat(Twine(DebugLoc.getLine())).concat("_").concat(Twine(DebugLoc.getCol())));
+        DI->EmitPseudoVariable(PseudoVar, Ty, Loc);
+        Address PseudoVarAddr(PseudoVar, Load->getType(), Addr.getAlignment());
+        Builder.CreateStore(Load, PseudoVarAddr);
+        Load = Builder.CreateLoad(PseudoVarAddr);
+      }
+  }
+
   return EmitFromMemory(Load, Ty);
 }
 
@@ -5569,6 +5644,10 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const 
BinaryOperator *E) {
     }
 
     RValue RV = EmitAnyExpr(E->getRHS());
+    if (CGM.getCodeGenOpts().getDebugInfo() > 
llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
+        RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 143ad64e8816b1..36a572ace2ef64 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3104,6 +3104,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Get the record field index as represented in debug info.
   unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
 
+  /// When the result of EmitLoadOfScalar is immediately assigned to a declared
+  /// variable, the pseudo variable emitted for it (when the flag
+  /// -fdebug-info-for-pointer-type is specified) should be undone since there
+  /// is already a debug value emitted for the declared variable.
+  llvm::Value *UnemitPseudoVariable(llvm::Value *V);
 
   
//===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index bcba7cbbdb58c2..7882c4f1225f1f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4256,6 +4256,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, 
const llvm::Triple &T,
   // decision should be made in the driver as well though.
   llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
 
+  if (Args.hasArg(options::OPT_fdebug_info_for_pointer_type))
+    CmdArgs.push_back("-fdebug-info-for-pointer-type");
+
   bool SplitDWARFInlining =
       Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
                    options::OPT_fno_split_dwarf_inlining, false);

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to