[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-07-01 Thread Vitaly Buka via Phabricator via cfe-commits
vitalybuka added a comment.

In D115844#3625781 , @ychen wrote:

> In D115844#3625776 , @vitalybuka 
> wrote:
>
>> @ychen  This patch causes 20% .o size increase (x86_64)  Is this expected?
>
> No, it is not expected.  Do you have a test case? I'll take a look.

Sure, I'll create reproducer.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-07-01 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

In D115844#3625776 , @vitalybuka 
wrote:

> @ychen  This patch causes 20% .o size increase (x86_64)  Is this expected?

No, it is not expected.  Do you have a test case? I'll take a look.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-07-01 Thread Vitaly Buka via Phabricator via cfe-commits
vitalybuka added a comment.

@ychen  This patch causes 20% .o size increase (x86_64)  Is this expected?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-06-27 Thread Yuanfang Chen via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6678f8e505b1: [ubsan] Using metadata instead of prologue 
data for function sanitizer (authored by ychen).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/Driver/SanitizerArgs.cpp
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  clang/test/Driver/fsanitize.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll

Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; CHECK: _Z3funv:
+; CHECK: .cfi_startproc
+; CHECK: .long   846595819
+; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK: .L__llvm_rtti_proxy:
+; CHECK: .quad   i
+; CHECK: .size   .L__llvm_rtti_proxy, 8
+
+@i = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant i32* @i
+
+define dso_local void @_Z3funv() !func_sanitize !0 {
+  ret void
+}
+
+!0 = !{i32 846595819, i32** @__llvm_rtti_proxy}
Index: llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1217,7 +1217,11 @@
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
   // Do not instrument @llvm.global_ctors, @llvm.used, etc.
-  if (G->getName().startswith("llvm."))
+  if (G->getName().startswith("llvm.") ||
+  // Do not instrument gcov counter arrays.
+  G->getName().startswith("__llvm_gcov_ctr") ||
+  // Do not instrument rtti proxy symbols for function sanitizer.
+  G->getName().startswith("__llvm_rtti_proxy"))
 return true;
 
   // Do not instrument asan globals.
@@ -1226,10 +1230,6 @@
   G->getName().startswith(kODRGenPrefix))
 return true;
 
-  // Do not instrument gcov counter arrays.
-  if (G->getName() == "__llvm_gcov_ctr")
-return true;
-
   return false;
 }
 
Index: llvm/lib/IR/MDBuilder.cpp
===
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+ Constant *RTTI) {
+  SmallVector Ops;
+  Ops.push_back(createConstant(PrologueSig));
+  Ops.push_back(createConstant(RTTI));
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   SmallVector Args(1, nullptr);
   if (Extra)
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1004,6 +1004,24 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+   TM.getTargetTriple().getArch() == Triple::x86_64);
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+assert(PrologueSig && FTRTTIProxy);
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
Index: llvm/include/llvm/IR/MDBuilder.h
===
--- llvm/include/llvm/IR/MDBuilder.h
+++ llvm/include/llvm/IR/MDBuilder.h
@@ -108,6 +108,10 @@
   /// Merge the new call

[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-06-27 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen updated this revision to Diff 440329.
ychen marked an inline comment as done.
ychen added a comment.

Update LangRef.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/Driver/SanitizerArgs.cpp
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  clang/test/Driver/fsanitize.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll

Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; CHECK: _Z3funv:
+; CHECK: .cfi_startproc
+; CHECK: .long   846595819
+; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK: .L__llvm_rtti_proxy:
+; CHECK: .quad   i
+; CHECK: .size   .L__llvm_rtti_proxy, 8
+
+@i = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant i32* @i
+
+define dso_local void @_Z3funv() !func_sanitize !0 {
+  ret void
+}
+
+!0 = !{i32 846595819, i32** @__llvm_rtti_proxy}
Index: llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1217,7 +1217,11 @@
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
   // Do not instrument @llvm.global_ctors, @llvm.used, etc.
-  if (G->getName().startswith("llvm."))
+  if (G->getName().startswith("llvm.") ||
+  // Do not instrument gcov counter arrays.
+  G->getName().startswith("__llvm_gcov_ctr") ||
+  // Do not instrument rtti proxy symbols for function sanitizer.
+  G->getName().startswith("__llvm_rtti_proxy"))
 return true;
 
   // Do not instrument asan globals.
@@ -1226,10 +1230,6 @@
   G->getName().startswith(kODRGenPrefix))
 return true;
 
-  // Do not instrument gcov counter arrays.
-  if (G->getName() == "__llvm_gcov_ctr")
-return true;
-
   return false;
 }
 
Index: llvm/lib/IR/MDBuilder.cpp
===
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+ Constant *RTTI) {
+  SmallVector Ops;
+  Ops.push_back(createConstant(PrologueSig));
+  Ops.push_back(createConstant(RTTI));
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   SmallVector Args(1, nullptr);
   if (Extra)
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1004,6 +1004,24 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+   TM.getTargetTriple().getArch() == Triple::x86_64);
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+assert(PrologueSig && FTRTTIProxy);
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
Index: llvm/include/llvm/IR/MDBuilder.h
===
--- llvm/include/llvm/IR/MDBuilder.h
+++ llvm/include/llvm/IR/MDBuilder.h
@@ -108,6 +108,10 @@
   /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
   MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB);
 
+ 

[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-06-23 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc accepted this revision.
pcc added a comment.
This revision is now accepted and ready to land.

LGTM

> Yes, but not indirectly called from C/C++ but rather from compiler-generated 
> code right? That's presumably why this patch + D116130 
>  worked for @zhuhan0.

If the answer to my question is "yes" can you please update the commit message 
to not talk about changing the function signature since that's not the problem 
here.




Comment at: clang/lib/CodeGen/CodeGenModule.cpp:1843
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+  TheModule, Addr->getType(),

ychen wrote:
> pcc wrote:
> > ychen wrote:
> > > pcc wrote:
> > > > Are these proxy variables necessary? I think that now that we have 
> > > > custom code generation for this you should be able to use a GOTPCREL 
> > > > relocation to refer to the global.
> > > I attempted the GOTPCREL approach in a local branch. It didn't work for a 
> > > reason that I couldn't remember off the top of my head. I'll find out.
> > > 
> > > > I think that now that we have custom code generation for this 
> > > 
> > > Sorry, I don't quite follow which `custom code generation` you are 
> > > referring to. Do you mean the changes in `AsmPrinter.cpp`?
> > > Sorry, I don't quite follow which custom code generation you are 
> > > referring to. Do you mean the changes in AsmPrinter.cpp?
> > 
> > Yes, that's what I meant.
> @pcc, I looked into my local branch that uses PCREL approach. It is not 
> simpler/cleaner than the current approach due to the X86/X86-64, macho/ELF 
> difference. These need their specific relocation types.
Okay, let's go with this then.



Comment at: llvm/docs/LangRef.rst:5209
 know or know it can't preserve. Currently there is an exception for metadata
 attachment to globals for ``!type`` and ``!absolute_symbol`` which can't be
 unconditionally dropped unless the global is itself deleted.

We should add `!func_sanitize` here as well.



Comment at: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp:843
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {

ychen wrote:
> pcc wrote:
> > What if we have both prologue data and this metadata? Should it be an error?
> It may or may not be depending on what is in the prologue data (if it just 
> adds up a counter in the prologue, it should be fine?). How about clarifying 
> this in Langref for this new `MD_func_sanitize`? If being conservative, we 
> could error this out in the Verifier. WDYT?
Sounds good.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-06-13 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

gentle ping..


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-06-03 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added inline comments.
Herald added subscribers: jsji, Enna1.



Comment at: clang/lib/CodeGen/CodeGenModule.cpp:1843
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+  TheModule, Addr->getType(),

pcc wrote:
> ychen wrote:
> > pcc wrote:
> > > Are these proxy variables necessary? I think that now that we have custom 
> > > code generation for this you should be able to use a GOTPCREL relocation 
> > > to refer to the global.
> > I attempted the GOTPCREL approach in a local branch. It didn't work for a 
> > reason that I couldn't remember off the top of my head. I'll find out.
> > 
> > > I think that now that we have custom code generation for this 
> > 
> > Sorry, I don't quite follow which `custom code generation` you are 
> > referring to. Do you mean the changes in `AsmPrinter.cpp`?
> > Sorry, I don't quite follow which custom code generation you are referring 
> > to. Do you mean the changes in AsmPrinter.cpp?
> 
> Yes, that's what I meant.
@pcc, I looked into my local branch that uses PCREL approach. It is not 
simpler/cleaner than the current approach due to the X86/X86-64, macho/ELF 
difference. These need their specific relocation types.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-05-12 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc added inline comments.



Comment at: clang/lib/CodeGen/CodeGenModule.cpp:1843
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+  TheModule, Addr->getType(),

ychen wrote:
> pcc wrote:
> > Are these proxy variables necessary? I think that now that we have custom 
> > code generation for this you should be able to use a GOTPCREL relocation to 
> > refer to the global.
> I attempted the GOTPCREL approach in a local branch. It didn't work for a 
> reason that I couldn't remember off the top of my head. I'll find out.
> 
> > I think that now that we have custom code generation for this 
> 
> Sorry, I don't quite follow which `custom code generation` you are referring 
> to. Do you mean the changes in `AsmPrinter.cpp`?
> Sorry, I don't quite follow which custom code generation you are referring 
> to. Do you mean the changes in AsmPrinter.cpp?

Yes, that's what I meant.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-05-12 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added inline comments.



Comment at: clang/lib/CodeGen/CodeGenModule.cpp:1843
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+  TheModule, Addr->getType(),

pcc wrote:
> Are these proxy variables necessary? I think that now that we have custom 
> code generation for this you should be able to use a GOTPCREL relocation to 
> refer to the global.
I attempted the GOTPCREL approach in a local branch. It didn't work for a 
reason that I couldn't remember off the top of my head. I'll find out.

> I think that now that we have custom code generation for this 

Sorry, I don't quite follow which `custom code generation` you are referring 
to. Do you mean the changes in `AsmPrinter.cpp`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-05-12 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc added inline comments.



Comment at: clang/lib/CodeGen/CodeGenModule.cpp:1843
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+  TheModule, Addr->getType(),

Are these proxy variables necessary? I think that now that we have custom code 
generation for this you should be able to use a GOTPCREL relocation to refer to 
the global.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-04-27 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

gentle ping. @pcc  could we please move this forward if the direction looks 
good to you? It has been sitting for a while...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-04-01 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

ping..


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-03-25 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.
Herald added a subscriber: MaskRay.

ping ...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-03-17 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.
Herald added a project: All.

gentle ping..


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-15 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen updated this revision to Diff 408752.
ychen added a comment.
Herald added subscribers: jdoerfert, pengfei.

- check code model in driver
- document `func_sanitizer` in Langref
- add a codegen test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/Driver/SanitizerArgs.cpp
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  clang/test/Driver/fsanitize.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll

Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; CHECK: _Z3funv:
+; CHECK: .cfi_startproc
+; CHECK: .long   846595819
+; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK: .L__llvm_rtti_proxy:
+; CHECK: .quad   i
+; CHECK: .size   .L__llvm_rtti_proxy, 8
+
+@i = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant i32* @i
+
+define dso_local void @_Z3funv() !func_sanitize !0 {
+  ret void
+}
+
+!0 = !{i32 846595819, i32** @__llvm_rtti_proxy}
Index: llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1405,7 +1405,11 @@
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
   // Do not instrument @llvm.global_ctors, @llvm.used, etc.
-  if (G->getName().startswith("llvm."))
+  if (G->getName().startswith("llvm.") ||
+  // Do not instrument gcov counter arrays.
+  G->getName().startswith("__llvm_gcov_ctr") ||
+  // Do not instrument rtti proxy symbols for function sanitizer.
+  G->getName().startswith("__llvm_rtti_proxy"))
 return true;
 
   // Do not instrument asan globals.
@@ -1414,10 +1418,6 @@
   G->getName().startswith(kODRGenPrefix))
 return true;
 
-  // Do not instrument gcov counter arrays.
-  if (G->getName() == "__llvm_gcov_ctr")
-return true;
-
   return false;
 }
 
Index: llvm/lib/IR/MDBuilder.cpp
===
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+ Constant *RTTI) {
+  SmallVector Ops;
+  Ops.push_back(createConstant(PrologueSig));
+  Ops.push_back(createConstant(RTTI));
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   SmallVector Args(1, nullptr);
   if (Extra)
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -844,6 +844,24 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+   TM.getTargetTriple().getArch() == Triple::x86_64);
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+assert(PrologueSig && FTRTTIProxy);
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
Index: llvm/include/llvm/IR/MDBuilder.h
===
--- llvm/include/llvm/IR/MDBuilder.h
+++ llvm/include/llvm/IR/MDBuilder.h
@@ -108,6 +108,10 @@
   /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
   M

[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-15 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:5171
   (!TargetDecl || !isa(TargetDecl))) {
+assert((CGM.getCodeGenOpts().CodeModel == "default" ||
+CGM.getCodeGenOpts().CodeModel == "small") &&

pcc wrote:
> What happens when building with other code models? Hopefully we get an error 
> of some sort before hitting this assertion failure, right?
> What happens when building with other code models?

The PCRel offset is 32bit currently regardless of the code model. The proxy 
variable might be out of 32-bit offset from the function (for the large model; 
for the medium model, only if the proxy variable is in `.ldata`,`.lrodata` 
which is further away), then loading some random data may give false positives. 

We could fix this by making the PCRel offset 64 bit for the medium or the large 
model. But since no one complains all these years, I guess it is a real edge 
use case.

> Hopefully we get an error of some sort before hitting this assertion failure, 
> right?

We didn't. I'll add it.




Comment at: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp:843
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {

pcc wrote:
> What if we have both prologue data and this metadata? Should it be an error?
It may or may not be depending on what is in the prologue data (if it just adds 
up a counter in the prologue, it should be fine?). How about clarifying this in 
Langref for this new `MD_func_sanitize`? If being conservative, we could error 
this out in the Verifier. WDYT?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-14 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc added a comment.

In D115844#3321235 , @ychen wrote:

> In D115844#3321190 , @pcc wrote:
>
>> On the bug you have:
>>
>>   define internal fastcc void 
>> @​_Z4callIiE4taskv.resume(%_Z4callIiE4taskv.Frame* noalias nonnull 
>> align 8 dereferenceable(24
>>   ) %FramePtr) #​1 prologue <{ i32, i32 }> <{ i32 846595819, i32 
>> trunc (i64 sub (i64 ptrtoint (i8** @​1 to i64), i64 ptrtoint (void ()* 
>> @​_Z4callIiE4taskv to i64)) to i32) }> {...}
>>
>> Is it possible for the C/C++ code to take the address of the function 
>> `_Z4callIiE4taskv.resume` and call it indirectly?
>
> `*.resume` is a compiler inserted function that is opaque to the programmer. 
> It is called indirectly most of the time if not all the time.

Yes, but not indirectly called from C/C++ but rather from compiler-generated 
code right? That's presumably why this patch + D116130 
 worked for @zhuhan0.

>> If not, it seems like the right fix would be to arrange for the prologue 
>> data to be dropped on the `.resume` function instead of duplicating it 
>> there. I would also imagine that whatever signature you have on the 
>> `.resume` function would be incorrect since it appears that the coro 
>> splitting pass will use a different function signature for that function.
>
> That is addressed by D116130 . @rjmccall 
> suggested the direction of this patch (which I agreed) 
> https://reviews.llvm.org/D114728#3159303.

Okay. It seems unfortunate to have to special-case this just because it uses 
relative relocations. But that's probably the best that we can do without 
changing the global variable initializer representation (as well as 
prefix/prologue data) to be blob plus relocations.




Comment at: clang/lib/CodeGen/CGExpr.cpp:5171
   (!TargetDecl || !isa(TargetDecl))) {
+assert((CGM.getCodeGenOpts().CodeModel == "default" ||
+CGM.getCodeGenOpts().CodeModel == "small") &&

What happens when building with other code models? Hopefully we get an error of 
some sort before hitting this assertion failure, right?



Comment at: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp:843
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {

What if we have both prologue data and this metadata? Should it be an error?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-14 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

In D115844#3321190 , @pcc wrote:

> On the bug you have:
>
>   define internal fastcc void 
> @​_Z4callIiE4taskv.resume(%_Z4callIiE4taskv.Frame* noalias nonnull 
> align 8 dereferenceable(24
>   ) %FramePtr) #​1 prologue <{ i32, i32 }> <{ i32 846595819, i32 
> trunc (i64 sub (i64 ptrtoint (i8** @​1 to i64), i64 ptrtoint (void ()* 
> @​_Z4callIiE4taskv to i64)) to i32) }> {...}
>
> Is it possible for the C/C++ code to take the address of the function 
> `_Z4callIiE4taskv.resume` and call it indirectly?

`*.resume` is a compiler inserted function that is opaque to the programmer. It 
is called indirectly most of the time if not all the time.

> If not, it seems like the right fix would be to arrange for the prologue data 
> to be dropped on the `.resume` function instead of duplicating it there. I 
> would also imagine that whatever signature you have on the `.resume` function 
> would be incorrect since it appears that the coro splitting pass will use a 
> different function signature for that function.

That is addressed by D116130 . @rjmccall 
suggested the direction of this patch (which I agreed) 
https://reviews.llvm.org/D114728#3159303.

> Note that D119296  will have the same 
> problem.

Thanks for the info!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-14 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc added a comment.

On the bug you have:

  define internal fastcc void 
@​_Z4callIiE4taskv.resume(%_Z4callIiE4taskv.Frame* noalias nonnull align 
8 dereferenceable(24
  ) %FramePtr) #​1 prologue <{ i32, i32 }> <{ i32 846595819, i32 
trunc (i64 sub (i64 ptrtoint (i8** @​1 to i64), i64 ptrtoint (void ()* 
@​_Z4callIiE4taskv to i64)) to i32) }> {...}

Is it possible for the C/C++ code to take the address of the function 
`_Z4callIiE4taskv.resume` and call it indirectly? If not, it seems like the 
right fix would be to arrange for the prologue data to be dropped on the 
`.resume` function instead of duplicating it there. I would also imagine that 
whatever signature you have on the `.resume` function would be incorrect since 
it appears that the coro splitting pass will use a different function signature 
for that function.

Note that D119296  will have the same problem.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-02 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

gentle ping..


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-02-02 Thread Han Zhu via Phabricator via cfe-commits
zhuhan0 added a comment.

I verified this patch together with https://reviews.llvm.org/D116130 fix the 
issue https://github.com/llvm/llvm-project/issues/49689. I'm not an expert in 
this area. Could someone review this patch so we can move forward with the fix?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2022-01-18 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen added a comment.

Hi @pcc, does this make sense to you? Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115844/new/

https://reviews.llvm.org/D115844

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D115844: [ubsan] Using metadata instead of prologue data for function sanitizer

2021-12-15 Thread Yuanfang Chen via Phabricator via cfe-commits
ychen created this revision.
ychen added reviewers: pcc, vsk, rjmccall.
Herald added subscribers: dexonsmith, hiraditya.
ychen requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Information in the function `Prologue Data` is intentionally opaque. This
is fine from the function sanitizer's perspective if function manipulations
(duplication etc.) do not change the function signature. However,
coroutine lowering needs to split one function into several functions
that have different signatures. The ideal way to solve this is to make
the sanitizer information not opaque so that LLVM passes know how to deal with
it. This patch detaches the information from function `Prologue Data`
and attaches it to a function metadata node.

Alternatively,
(1) function attributes do not work because it could not carry
GlobalValue.
(2) I've considered adding one additional function optional
data(D13829 ) for the function sanitizer. 
However, it requires LL/parser
changes that I want to avoid.

If this is agreed upon, I'll send an follow-up patch fix PR50345.

Do you think this is worth doing? If so, which approach is better: this
patch or (2)?


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115844

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Index: llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1405,7 +1405,11 @@
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
   // Do not instrument @llvm.global_ctors, @llvm.used, etc.
-  if (G->getName().startswith("llvm."))
+  if (G->getName().startswith("llvm.") ||
+  // Do not instrument gcov counter arrays.
+  G->getName().startswith("__llvm_gcov_ctr") ||
+  // Do not instrument rtti proxy symbols for function sanitizer.
+  G->getName().startswith("__llvm_rtti_proxy"))
 return true;
 
   // Do not instrument asan globals.
@@ -1414,10 +1418,6 @@
   G->getName().startswith(kODRGenPrefix))
 return true;
 
-  // Do not instrument gcov counter arrays.
-  if (G->getName() == "__llvm_gcov_ctr")
-return true;
-
   return false;
 }
 
Index: llvm/lib/IR/MDBuilder.cpp
===
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+ Constant *RTTI) {
+  SmallVector Ops;
+  Ops.push_back(createConstant(PrologueSig));
+  Ops.push_back(createConstant(RTTI));
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   SmallVector Args(1, nullptr);
   if (Extra)
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -839,6 +839,24 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+   TM.getTargetTriple().getArch() == Triple::x86_64);
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+assert(PrologueSig && FTRTTIProxy);
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
Index: llvm/include/llvm/IR/MDBuilder.h
===
--- llvm/include/llvm/IR/MDB