[PATCH] D38210: [ubsan] Port the function sanitizer to C

Vedant Kumar via Phabricator via cfe-commits Mon, 25 Sep 2017 05:15:03 -0700

vsk created this revision.

The function sanitizer relies on RTTI to check callee types, but this
scheme doesn't work well in languages without the ODR.


This patch introduces a simple, best-effort function type encoding
which can be used when RTTI isn't available. In this scheme, function
types are encoded within 32 bits. The return type and all parameter
types are recorded using a 3-bit encoding. Zero is a special value in
the 3-bit encoding which means "there is either no type here OR any type
would be permissible here".

This scheme allows false negatives, but not false positives. It's simple
and does not require any changes to the instrumentation.

Testing: I've found some minor issues with the new check, and no FPs.

https://trac.ffmpeg.org/ticket/6685
https://github.com/openssl/openssl/issues/4413


https://reviews.llvm.org/D38210

Files:
  docs/UndefinedBehaviorSanitizer.rst
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/sanitize-function-calls.c

Index: test/CodeGen/sanitize-function-calls.c
===================================================================
--- /dev/null
+++ test/CodeGen/sanitize-function-calls.c
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -w -triple i386-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X32
+// RUN: %clang_cc1 -w -triple x86_64-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X64
+
+struct S {};
+
+// X32: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i32 4 to i8*)
+// X64: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i64 4 to i8*)
+
+// X32: prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[no_proto_ti]] to i32), i32 ptrtoint (void ()* @no_proto to i32)) }>
+// X64: prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @no_proto to i64)) to i32) }>
+void no_proto() {}
+
+void proto(void) {}
+
+typedef struct S (*vfunc0)(void);
+typedef void (*vfunc1)(void);
+typedef char (*vfunc2)(void);
+typedef short (*vfunc3)(void);
+typedef int (*vfunc4)(void);
+typedef long long (*vfunc5)(void);
+typedef float (*vfunc6)(void);
+typedef double (*vfunc7)(void);
+typedef void (*vfunc8)(int, int, int, int, int, int, int, int, int, int, int);
+
+// X64-LABEL: @call_proto
+void call_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = &proto;
+  f0();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 4 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc1 f1 = &proto;
+  f1();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 16 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc2 f2 = &proto;
+  f2();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 20 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc3 f3 = &proto;
+  f3();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 24 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc4 f4 = &proto;
+  f4();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 28 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc5 f5 = &proto;
+  f5();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 8 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc6 f6 = &proto;
+  f6();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 12 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc7 f7 = &proto;
+  f7();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 3681400516 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc8 f8 = &proto;
+  f8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+}
+
+// X64-LABEL: @call_no_proto
+void call_no_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = &no_proto;
+  f0();
+}
+
+// X64-LABEL: @main
+int main() {
+  call_proto();
+  call_no_proto();
+  return 0;
+}
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -745,6 +745,9 @@
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
+  /// Get the type descriptor for a function for use with UBSan.
+  llvm::Constant *GetUBSanFunctionTypeDescriptor(QualType Ty);
+
   /// Get the address of a uuid descriptor .
   ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -4495,6 +4495,62 @@
   return getCXXABI().getAddrOfRTTIDescriptor(Ty);
 }
 
+llvm::Constant *CodeGenModule::GetUBSanFunctionTypeDescriptor(QualType Ty) {
+  if (getLangOpts().CPlusPlus)
+    return GetAddrOfRTTIDescriptor(Ty, /*ForEH=*/true);
+
+  // Bits:
+  //  0    : unused
+  //  1    : unused
+  //  2-4  : encode(returnType)
+  //  5-6  : encode(param1)
+  //  ...  : encode(paramK)
+  //  30-32: encode(param9)
+
+  auto encode = [this](QualType EncodeTy) {
+    // Encode one of: ?, void, f32, f64, i8, i16, i32, i64 (in 3 bits)
+    const auto *T = EncodeTy.getTypePtr();
+    if (T->isVoidType())
+      return 1;
+
+    uint64_t Size = getContext().getTypeSize(EncodeTy);
+    if (T->hasFloatingRepresentation()) {
+      switch (Size) {
+      case 32:
+        return 2;
+      case 64:
+        return 3;
+      default:
+        return 0;
+      }
+    }
+
+    switch (Size) {
+    case 8:
+      return 4;
+    case 16:
+      return 5;
+    case 32:
+      return 6;
+    case 64:
+      return 7;
+    default:
+      return 0;
+    }
+  };
+
+  const auto *FuncTy = Ty->getAs<FunctionType>();
+  unsigned Encoding = encode(FuncTy->getReturnType()) << 2;
+  if (auto *ProtoTy = dyn_cast<FunctionProtoType>(FuncTy)) {
+    auto Types = ProtoTy->getParamTypes();
+    for (unsigned I = 0, E = Types.size(); I < E && I < 9; ++I)
+      Encoding |= encode(Types[I]) << (5 + (3 * I));
+  }
+
+  return llvm::ConstantExpr::getIntToPtr(
+      llvm::ConstantInt::get(IntPtrTy, Encoding), Int8PtrTy);
+}
+
 void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
   for (auto RefExpr : D->varlists()) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl());
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -887,16 +887,14 @@
 
   // If we are checking function types, emit a function type signature as
   // prologue data.
-  if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) {
+  if (SanOpts.has(SanitizerKind::Function)) {
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
       if (llvm::Constant *PrologueSig =
               CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
-        llvm::Constant *FTRTTIConst =
-            CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true);
-        llvm::Constant *FTRTTIConstEncoded =
-            EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
-        llvm::Constant *PrologueStructElems[] = {PrologueSig,
-                                                 FTRTTIConstEncoded};
+        llvm::Constant *FuncTy =
+            CGM.GetUBSanFunctionTypeDescriptor(FD->getType());
+        llvm::Constant *FuncTyEncoded = EncodeAddrForUseInPrologue(Fn, FuncTy);
+        llvm::Constant *PrologueStructElems[] = {PrologueSig, FuncTyEncoded};
         llvm::Constant *PrologueStructConst =
             llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true);
         Fn->setPrologueData(PrologueStructConst);
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -4402,13 +4402,13 @@
 
   CGCallee Callee = OrigCallee;
 
-  if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) &&
+  if (SanOpts.has(SanitizerKind::Function) &&
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
     if (llvm::Constant *PrefixSig =
             CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
       SanitizerScope SanScope(this);
-      llvm::Constant *FTRTTIConst =
-          CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true);
+      llvm::Constant *FuncTy =
+          CGM.GetUBSanFunctionTypeDescriptor(QualType(FnType, 0));
       llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty};
       llvm::StructType *PrefixStructTy = llvm::StructType::get(
           CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true);
@@ -4434,8 +4434,7 @@
           Builder.CreateAlignedLoad(CalleeRTTIPtr, getPointerAlign());
       llvm::Value *CalleeRTTI =
           DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded);
-      llvm::Value *CalleeRTTIMatch =
-          Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst);
+      llvm::Value *CalleeRTTIMatch = Builder.CreateICmpEQ(CalleeRTTI, FuncTy);
       llvm::Constant *StaticData[] = {
         EmitCheckSourceLocation(E->getLocStart()),
         EmitCheckTypeDescriptor(CalleeType)
Index: docs/UndefinedBehaviorSanitizer.rst
===================================================================
--- docs/UndefinedBehaviorSanitizer.rst
+++ docs/UndefinedBehaviorSanitizer.rst
@@ -87,7 +87,7 @@
   -  ``-fsanitize=float-divide-by-zero``: Floating point division by
      zero.
   -  ``-fsanitize=function``: Indirect call of a function through a
-     function pointer of the wrong type (Darwin/Linux, C++ and x86/x86_64
+     function pointer of the wrong type (Darwin/Linux and x86/x86_64
      only).
   -  ``-fsanitize=integer-divide-by-zero``: Integer division by zero.
   -  ``-fsanitize=nonnull-attribute``: Passing null pointer as a function

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D38210: [ubsan] Port the function sanitizer to C

Reply via email to