ychen created this revision.
ychen added reviewers: pcc, vsk, rjmccall.
Herald added subscribers: dexonsmith, hiraditya.
ychen requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Information in the function `Prologue Data` is intentionally opaque. This
is fine from the function sanitizer's perspective if function manipulations
(duplication etc.) do not change the function signature. However,
coroutine lowering needs to split one function into several functions
that have different signatures. The ideal way to solve this is to make
the sanitizer information not opaque so that LLVM passes know how to deal with
it. This patch detaches the information from function `Prologue Data`
and attaches it to a function metadata node.

Alternatively,
(1) function attributes do not work because it could not carry
GlobalValue.
(2) I've considered adding one additional function optional
data(D13829 <https://reviews.llvm.org/D13829>) for the function sanitizer. 
However, it requires LL/parser
changes that I want to avoid.

If this is agreed upon, I'll send an follow-up patch fix PR50345.

Do you think this is worth doing? If so, which approach is better: this
patch or (2)?


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115844

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Index: llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1405,7 +1405,11 @@
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
   // Do not instrument @llvm.global_ctors, @llvm.used, etc.
-  if (G->getName().startswith("llvm."))
+  if (G->getName().startswith("llvm.") ||
+      // Do not instrument gcov counter arrays.
+      G->getName().startswith("__llvm_gcov_ctr") ||
+      // Do not instrument rtti proxy symbols for function sanitizer.
+      G->getName().startswith("__llvm_rtti_proxy"))
     return true;
 
   // Do not instrument asan globals.
@@ -1414,10 +1418,6 @@
       G->getName().startswith(kODRGenPrefix))
     return true;
 
-  // Do not instrument gcov counter arrays.
-  if (G->getName() == "__llvm_gcov_ctr")
-    return true;
-
   return false;
 }
 
Index: llvm/lib/IR/MDBuilder.cpp
===================================================================
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+                                             Constant *RTTI) {
+  SmallVector<Metadata *, 4> Ops;
+  Ops.push_back(createConstant(PrologueSig));
+  Ops.push_back(createConstant(RTTI));
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   SmallVector<Metadata *, 3> Args(1, nullptr);
   if (Extra)
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -839,6 +839,24 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
     emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+    assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+           TM.getTargetTriple().getArch() == Triple::x86_64);
+    assert(MD->getNumOperands() == 2);
+
+    auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+    auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
+    assert(PrologueSig && FTRTTIProxy);
+    emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+    const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+    const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+    const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+    // Use 32 bit since only small code model is supported.
+    OutStreamer->emitValue(PCRel, 4u);
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
Index: llvm/include/llvm/IR/MDBuilder.h
===================================================================
--- llvm/include/llvm/IR/MDBuilder.h
+++ llvm/include/llvm/IR/MDBuilder.h
@@ -108,6 +108,10 @@
   /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
   MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB);
 
+  /// Return metadata feeding to the CodeGen about how to generate a function
+  /// prologue for the "function" santizier.
+  MDNode *createRTTIPointerPrologue(Constant *PrologueSig, Constant *RTTI);
+
   //===------------------------------------------------------------------===//
   // AA metadata.
   //===------------------------------------------------------------------===//
Index: llvm/include/llvm/IR/FixedMetadataKinds.def
===================================================================
--- llvm/include/llvm/IR/FixedMetadataKinds.def
+++ llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -42,3 +42,4 @@
 LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28)
 LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29)
 LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30)
+LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 31)
Index: clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
===================================================================
--- clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
+++ clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
@@ -2,8 +2,8 @@
 
 // Check that typeinfo recorded in function prolog doesn't have "Do" noexcept
 // qualifier in its mangled name.
-// CHECK: @[[RTTI:[0-9]+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
-// CHECK: define{{.*}} void @_Z1fv() #{{.*}} prologue <{ i32, i32 }> <{ i32 {{.*}}, i32 trunc (i64 sub (i64 ptrtoint (i8** @[[RTTI]] to i64), i64 ptrtoint (void ()* @_Z1fv to i64)) to i32) }>
+// CHECK: [[PROXY:@.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+// CHECK: define{{.*}} void @_Z1fv() #{{.*}} !func_sanitize ![[FUNCSAN:.*]] {
 void f() noexcept {}
 
 // CHECK: define{{.*}} void @_Z1gPDoFvvE
@@ -13,3 +13,5 @@
   // CHECK: icmp eq i8* %{{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize
   p();
 }
+
+// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
Index: clang/test/CodeGenCXX/catch-undef-behavior.cpp
===================================================================
--- clang/test/CodeGenCXX/catch-undef-behavior.cpp
+++ clang/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefixes=CHECK,CHECK-FUNCSAN
 // RUN: %clang_cc1 -std=c++11 -fsanitize=vptr,address -fsanitize-recover=vptr,address -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-ASAN
 // RUN: %clang_cc1 -std=c++11 -fsanitize=vptr -fsanitize-recover=vptr -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=DOWNCAST-NULL
-// RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-X32
-// RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-X86
+// RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-FUNCSAN
+// RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-FUNCSAN
 
 struct S {
   double d;
@@ -16,9 +16,7 @@
 // Check that type mismatch handler is not modified by ASan.
 // CHECK-ASAN: private unnamed_addr global { { [{{.*}} x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8*, i8 } { {{.*}}, { i16, i16, [4 x i8] }* [[TYPE_DESCR]], {{.*}} }
 
-// CHECK: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
-// CHECK-X86: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
-// CHECK-X32: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
+// CHECK-FUNCSAN: [[PROXY:@.+]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
 
 struct T : S {};
 
@@ -399,10 +397,7 @@
   // CHECK-NEXT: br i1 [[AND]]
 }
 
-//
-// CHECK-LABEL: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** {{.*}} to i64), i64 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i64)) to i32) }>
-// CHECK-X32: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
-// CHECK-X86: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
+// CHECK-FUNCSAN: @_Z22indirect_function_callPFviE({{.*}} !func_sanitize ![[FUNCSAN:.*]] {
 void indirect_function_call(void (*p)(int)) {
   // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i32 }>*
 
@@ -483,34 +478,34 @@
 }
 
 // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls1B1fEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define{{.*}} void @_ZTv0_n24_N29FunctionSanitizerVirtualCalls1B1fEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls11force_irgenEv()
-// CHECK: prologue
+// CHECK: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1AC1Ev
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1gEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1hEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1BC1Ev
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1bEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1gEv
-// CHECK-NOT: prologue
+// CHECK-NOT: !func_sanitize
 //
 // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1qEv
-// CHECK: prologue
+// CHECK: !func_sanitize
 
 }
 
@@ -754,3 +749,5 @@
 }
 
 // CHECK: attributes [[NR_NUW]] = { noreturn nounwind }
+
+// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
Index: clang/test/CodeGen/ubsan-function.cpp
===================================================================
--- clang/test/CodeGen/ubsan-function.cpp
+++ clang/test/CodeGen/ubsan-function.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s
 
-// CHECK-LABEL: define{{.*}} void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> {
+// CHECK: @[[PROXY:.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+// CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] {
 void fun() {}
 
 // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(void ()* %f)
@@ -20,3 +21,5 @@
 // CHECK: [[LABEL3]]:
 // CHECK: br label %[[LABEL4]], !nosanitize
 void caller(void (*f)()) { f(); }
+
+// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** @[[PROXY]]}
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -578,6 +578,8 @@
   MetadataTypeMap VirtualMetadataIdMap;
   MetadataTypeMap GeneralizedMetadataIdMap;
 
+  llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap;
+
 public:
   CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
                 const PreprocessorOptions &ppopts,
@@ -1425,6 +1427,9 @@
   std::vector<const CXXRecordDecl *>
   getMostBaseClasses(const CXXRecordDecl *RD);
 
+  llvm::GlobalVariable *
+  GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr);
+
   /// Get the declaration of std::terminate for the platform.
   llvm::FunctionCallee getTerminateFn();
 
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1817,6 +1817,22 @@
   return MostBases.takeVector();
 }
 
+llvm::GlobalVariable *
+CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) {
+  auto It = RTTIProxyMap.find(Addr);
+  if (It != RTTIProxyMap.end())
+    return It->second;
+
+  auto *FTRTTIProxy = new llvm::GlobalVariable(
+      TheModule, Addr->getType(),
+      /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Addr,
+      "__llvm_rtti_proxy");
+  FTRTTIProxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  RTTIProxyMap[Addr] = FTRTTIProxy;
+  return FTRTTIProxy;
+}
+
 void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
                                                            llvm::Function *F) {
   llvm::AttrBuilder B;
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -2352,10 +2352,6 @@
   /// XRay typed event handling calls.
   bool AlwaysEmitXRayTypedEvents() const;
 
-  /// Encode an address into a form suitable for use in a function prologue.
-  llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F,
-                                             llvm::Constant *Addr);
-
   /// Decode an address used in a function prologue, encoded by \c
   /// EncodeAddrForUseInPrologue.
   llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F,
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -559,29 +559,6 @@
               XRayInstrKind::Typed);
 }
 
-llvm::Constant *
-CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F,
-                                            llvm::Constant *Addr) {
-  // Addresses stored in prologue data can't require run-time fixups and must
-  // be PC-relative. Run-time fixups are undesirable because they necessitate
-  // writable text segments, which are unsafe. And absolute addresses are
-  // undesirable because they break PIE mode.
-
-  // Add a layer of indirection through a private global. Taking its address
-  // won't result in a run-time fixup, even if Addr has linkonce_odr linkage.
-  auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(),
-                                      /*isConstant=*/true,
-                                      llvm::GlobalValue::PrivateLinkage, Addr);
-
-  // Create a PC-relative address.
-  auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy);
-  auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy);
-  auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt);
-  return (IntPtrTy == Int32Ty)
-             ? PCRelAsInt
-             : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty);
-}
-
 llvm::Value *
 CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F,
                                           llvm::Value *EncodedAddr) {
@@ -918,12 +895,13 @@
           FD->getType(), EST_None);
       llvm::Constant *FTRTTIConst =
           CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
-      llvm::Constant *FTRTTIConstEncoded =
-          EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
-      llvm::Constant *PrologueStructElems[] = {PrologueSig, FTRTTIConstEncoded};
-      llvm::Constant *PrologueStructConst =
-          llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true);
-      Fn->setPrologueData(PrologueStructConst);
+      llvm::GlobalVariable *FTRTTIProxy =
+          CGM.GetOrCreateRTTIProxyGlobalVariable(FTRTTIConst);
+      llvm::LLVMContext &Ctx = Fn->getContext();
+      llvm::MDBuilder MDB(Ctx);
+      Fn->setMetadata(llvm::LLVMContext::MD_func_sanitize,
+                      MDB.createRTTIPointerPrologue(PrologueSig, FTRTTIProxy));
+      CGM.addCompilerUsedGlobal(FTRTTIProxy);
     }
   }
 
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -5168,6 +5168,9 @@
 
   if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) &&
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
+    assert((CGM.getCodeGenOpts().CodeModel == "default" ||
+            CGM.getCodeGenOpts().CodeModel == "small") &&
+           "function sanitizer only works with the small code model");
     if (llvm::Constant *PrefixSig =
             CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
       SanitizerScope SanScope(this);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to