Re: [clang] 7aa1fa0 - Reland "[dwarf] Emit a DIGlobalVariable for constant strings."

2022-05-23 Thread David Blaikie via cfe-commits
(when recommitting a patch it can be helpful to mention the revisions
of the previous commit/revert, the reason for the revert and what's
different in this version of the patch that addresses that issue (or
how was the issue otherwise addressed))

On Wed, May 18, 2022 at 1:59 PM Mitch Phillips via cfe-commits
 wrote:
>
>
> Author: Mitch Phillips
> Date: 2022-05-18T13:56:45-07:00
> New Revision: 7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91
>
> URL: 
> https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91
> DIFF: 
> https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91.diff
>
> LOG: Reland "[dwarf] Emit a DIGlobalVariable for constant strings."
>
> An upcoming patch will extend llvm-symbolizer to provide the source line
> information for global variables. The goal is to move AddressSanitizer
> off of internal debug info for symbolization onto the DWARF standard
> (and doing a clean-up in the process). Currently, ASan reports the line
> information for constant strings if a memory safety bug happens around
> them. We want to keep this behaviour, so we need to emit debuginfo for
> these variables as well.
>
> Reviewed By: dblaikie, rnk, aprantl
>
> Differential Revision: https://reviews.llvm.org/D123534
>
> Added:
> clang/test/CodeGen/debug-info-variables.c
> llvm/test/DebugInfo/COFF/global-no-strings.ll
>
> Modified:
> clang/lib/CodeGen/CGDebugInfo.cpp
> clang/lib/CodeGen/CGDebugInfo.h
> clang/lib/CodeGen/CodeGenModule.cpp
> clang/test/VFS/external-names.c
> llvm/lib/AsmParser/LLParser.cpp
> llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
> llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
>
> Removed:
> llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll
>
>
> 
> diff  --git a/clang/lib/CodeGen/CGDebugInfo.cpp 
> b/clang/lib/CodeGen/CGDebugInfo.cpp
> index 3d73bfb8ce79..753427029441 100644
> --- a/clang/lib/CodeGen/CGDebugInfo.cpp
> +++ b/clang/lib/CodeGen/CGDebugInfo.cpp
> @@ -5132,7 +5132,7 @@ std::string CGDebugInfo::GetName(const Decl *D, bool 
> Qualified) const {
>  return Name;
>codegenoptions::DebugTemplateNamesKind TemplateNamesKind =
>CGM.getCodeGenOpts().getDebugSimpleTemplateNames();
> -
> +
>if (!CGM.getCodeGenOpts().hasReducedDebugInfo())
>  TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full;
>
> @@ -5459,6 +5459,21 @@ void CGDebugInfo::EmitGlobalAlias(const 
> llvm::GlobalValue *GV,
>ImportedDeclCache[GD.getCanonicalDecl().getDecl()].reset(ImportDI);
>  }
>
> +void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
> +const StringLiteral *S) {
> +  SourceLocation Loc = S->getStrTokenLoc(0);
> +  PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc);
> +  if (!PLoc.isValid())
> +return;
> +
> +  llvm::DIFile *File = getOrCreateFile(Loc);
> +  llvm::DIGlobalVariableExpression *Debug =
> +  DBuilder.createGlobalVariableExpression(
> +  nullptr, StringRef(), StringRef(), getOrCreateFile(Loc),
> +  getLineNumber(Loc), getOrCreateType(S->getType(), File), true);
> +  GV->addDebugInfo(Debug);
> +}
> +
>  llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
>if (!LexicalBlockStack.empty())
>  return LexicalBlockStack.back();
>
> diff  --git a/clang/lib/CodeGen/CGDebugInfo.h 
> b/clang/lib/CodeGen/CGDebugInfo.h
> index 8984f3eb1d7a..38e3fa5b2fa9 100644
> --- a/clang/lib/CodeGen/CGDebugInfo.h
> +++ b/clang/lib/CodeGen/CGDebugInfo.h
> @@ -533,6 +533,14 @@ class CGDebugInfo {
>/// Emit an @import declaration.
>void EmitImportDecl(const ImportDecl &ID);
>
> +  /// DebugInfo isn't attached to string literals by default. While certain
> +  /// aspects of debuginfo aren't useful for string literals (like a name), 
> it's
> +  /// nice to be able to symbolize the line and column information. This is
> +  /// especially useful for sanitizers, as it allows symbolization of
> +  /// heap-buffer-overflows on constant strings.
> +  void AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
> + const StringLiteral *S);
> +
>/// Emit C++ namespace alias.
>llvm::DIImportedEntity *EmitNamespaceAlias(const NamespaceAliasDecl &NA);
>
>
> diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
> b/clang/lib/CodeGen/CodeGenModule.cpp
> index f8bf210dc0e2..703cf4edf5f5 100644
> --- a/clang/lib/CodeGen/CodeGenModule.cpp
> +++ b/clang/lib/CodeGen/CodeGenModule.cpp
> @@ -5670,6 +5670,11 @@ 
> CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
>}
>
>auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, 
> Alignment);
> +
> +  CGDebugInfo *DI = getModuleDebugInfo();
> +  if (DI && getCodeGenOpts().hasReducedDebugInfo())
> +DI->AddStringLiteralDebugInfo(GV, S);
> +
>if (Entry)
>  *Entr

[clang] 7aa1fa0 - Reland "[dwarf] Emit a DIGlobalVariable for constant strings."

2022-05-18 Thread Mitch Phillips via cfe-commits

Author: Mitch Phillips
Date: 2022-05-18T13:56:45-07:00
New Revision: 7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91

URL: 
https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91
DIFF: 
https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91.diff

LOG: Reland "[dwarf] Emit a DIGlobalVariable for constant strings."

An upcoming patch will extend llvm-symbolizer to provide the source line
information for global variables. The goal is to move AddressSanitizer
off of internal debug info for symbolization onto the DWARF standard
(and doing a clean-up in the process). Currently, ASan reports the line
information for constant strings if a memory safety bug happens around
them. We want to keep this behaviour, so we need to emit debuginfo for
these variables as well.

Reviewed By: dblaikie, rnk, aprantl

Differential Revision: https://reviews.llvm.org/D123534

Added: 
clang/test/CodeGen/debug-info-variables.c
llvm/test/DebugInfo/COFF/global-no-strings.ll

Modified: 
clang/lib/CodeGen/CGDebugInfo.cpp
clang/lib/CodeGen/CGDebugInfo.h
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/VFS/external-names.c
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp

Removed: 
llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll



diff  --git a/clang/lib/CodeGen/CGDebugInfo.cpp 
b/clang/lib/CodeGen/CGDebugInfo.cpp
index 3d73bfb8ce79..753427029441 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5132,7 +5132,7 @@ std::string CGDebugInfo::GetName(const Decl *D, bool 
Qualified) const {
 return Name;
   codegenoptions::DebugTemplateNamesKind TemplateNamesKind =
   CGM.getCodeGenOpts().getDebugSimpleTemplateNames();
-  
+
   if (!CGM.getCodeGenOpts().hasReducedDebugInfo())
 TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full;
 
@@ -5459,6 +5459,21 @@ void CGDebugInfo::EmitGlobalAlias(const 
llvm::GlobalValue *GV,
   ImportedDeclCache[GD.getCanonicalDecl().getDecl()].reset(ImportDI);
 }
 
+void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
+const StringLiteral *S) {
+  SourceLocation Loc = S->getStrTokenLoc(0);
+  PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc);
+  if (!PLoc.isValid())
+return;
+
+  llvm::DIFile *File = getOrCreateFile(Loc);
+  llvm::DIGlobalVariableExpression *Debug =
+  DBuilder.createGlobalVariableExpression(
+  nullptr, StringRef(), StringRef(), getOrCreateFile(Loc),
+  getLineNumber(Loc), getOrCreateType(S->getType(), File), true);
+  GV->addDebugInfo(Debug);
+}
+
 llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
   if (!LexicalBlockStack.empty())
 return LexicalBlockStack.back();

diff  --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 8984f3eb1d7a..38e3fa5b2fa9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -533,6 +533,14 @@ class CGDebugInfo {
   /// Emit an @import declaration.
   void EmitImportDecl(const ImportDecl &ID);
 
+  /// DebugInfo isn't attached to string literals by default. While certain
+  /// aspects of debuginfo aren't useful for string literals (like a name), 
it's
+  /// nice to be able to symbolize the line and column information. This is
+  /// especially useful for sanitizers, as it allows symbolization of
+  /// heap-buffer-overflows on constant strings.
+  void AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
+ const StringLiteral *S);
+
   /// Emit C++ namespace alias.
   llvm::DIImportedEntity *EmitNamespaceAlias(const NamespaceAliasDecl &NA);
 

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index f8bf210dc0e2..703cf4edf5f5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5670,6 +5670,11 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const 
StringLiteral *S,
   }
 
   auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment);
+
+  CGDebugInfo *DI = getModuleDebugInfo();
+  if (DI && getCodeGenOpts().hasReducedDebugInfo())
+DI->AddStringLiteralDebugInfo(GV, S);
+
   if (Entry)
 *Entry = GV;
 

diff  --git a/clang/test/CodeGen/debug-info-variables.c 
b/clang/test/CodeGen/debug-info-variables.c
new file mode 100644
index ..8ec60ff7c1d9
--- /dev/null
+++ b/clang/test/CodeGen/debug-info-variables.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 %s -debug-info-kind=standalone -S -emit-llvm -o - | 
FileCheck %s
+
+// CHECK: DIGlobalVariable(name: "global",{{.*}} line: [[@LINE+1]]
+int global = 42;
+
+// CHECK: DIGlobalVariable({{.*}}line: [[@LINE+4]],{{.*}} type: 
[[TYPEID:![0-9]+]]
+// CHECK: [[TYPEID]] = !DIComp