vsk created this revision. The function sanitizer relies on RTTI to check callee types, but this scheme doesn't work well in languages without the ODR.
This patch introduces a simple, best-effort function type encoding which can be used when RTTI isn't available. In this scheme, function types are encoded within 32 bits. The return type and all parameter types are recorded using a 3-bit encoding. Zero is a special value in the 3-bit encoding which means "there is either no type here OR any type would be permissible here". This scheme allows false negatives, but not false positives. It's simple and does not require any changes to the instrumentation. Testing: I've found some minor issues with the new check, and no FPs. https://trac.ffmpeg.org/ticket/6685 https://github.com/openssl/openssl/issues/4413 https://reviews.llvm.org/D38210 Files: docs/UndefinedBehaviorSanitizer.rst lib/CodeGen/CGExpr.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/sanitize-function-calls.c
Index: test/CodeGen/sanitize-function-calls.c =================================================================== --- /dev/null +++ test/CodeGen/sanitize-function-calls.c @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -w -triple i386-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X32 +// RUN: %clang_cc1 -w -triple x86_64-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X64 + +struct S {}; + +// X32: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i32 4 to i8*) +// X64: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i64 4 to i8*) + +// X32: prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[no_proto_ti]] to i32), i32 ptrtoint (void ()* @no_proto to i32)) }> +// X64: prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @no_proto to i64)) to i32) }> +void no_proto() {} + +void proto(void) {} + +typedef struct S (*vfunc0)(void); +typedef void (*vfunc1)(void); +typedef char (*vfunc2)(void); +typedef short (*vfunc3)(void); +typedef int (*vfunc4)(void); +typedef long long (*vfunc5)(void); +typedef float (*vfunc6)(void); +typedef double (*vfunc7)(void); +typedef void (*vfunc8)(int, int, int, int, int, int, int, int, int, int, int); + +// X64-LABEL: @call_proto +void call_proto(void) { + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc0 f0 = &proto; + f0(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 4 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc1 f1 = &proto; + f1(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 16 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc2 f2 = &proto; + f2(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 20 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc3 f3 = &proto; + f3(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 24 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc4 f4 = &proto; + f4(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 28 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc5 f5 = &proto; + f5(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 8 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc6 f6 = &proto; + f6(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 12 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc7 f7 = &proto; + f7(); + + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 3681400516 to i8*), !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc8 f8 = &proto; + f8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); +} + +// X64-LABEL: @call_no_proto +void call_no_proto(void) { + // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize + // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize + vfunc0 f0 = &no_proto; + f0(); +} + +// X64-LABEL: @main +int main() { + call_proto(); + call_no_proto(); + return 0; +} Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -745,6 +745,9 @@ /// Get the address of the RTTI descriptor for the given type. llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false); + /// Get the type descriptor for a function for use with UBSan. + llvm::Constant *GetUBSanFunctionTypeDescriptor(QualType Ty); + /// Get the address of a uuid descriptor . ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E); Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -4495,6 +4495,62 @@ return getCXXABI().getAddrOfRTTIDescriptor(Ty); } +llvm::Constant *CodeGenModule::GetUBSanFunctionTypeDescriptor(QualType Ty) { + if (getLangOpts().CPlusPlus) + return GetAddrOfRTTIDescriptor(Ty, /*ForEH=*/true); + + // Bits: + // 0 : unused + // 1 : unused + // 2-4 : encode(returnType) + // 5-6 : encode(param1) + // ... : encode(paramK) + // 30-32: encode(param9) + + auto encode = [this](QualType EncodeTy) { + // Encode one of: ?, void, f32, f64, i8, i16, i32, i64 (in 3 bits) + const auto *T = EncodeTy.getTypePtr(); + if (T->isVoidType()) + return 1; + + uint64_t Size = getContext().getTypeSize(EncodeTy); + if (T->hasFloatingRepresentation()) { + switch (Size) { + case 32: + return 2; + case 64: + return 3; + default: + return 0; + } + } + + switch (Size) { + case 8: + return 4; + case 16: + return 5; + case 32: + return 6; + case 64: + return 7; + default: + return 0; + } + }; + + const auto *FuncTy = Ty->getAs<FunctionType>(); + unsigned Encoding = encode(FuncTy->getReturnType()) << 2; + if (auto *ProtoTy = dyn_cast<FunctionProtoType>(FuncTy)) { + auto Types = ProtoTy->getParamTypes(); + for (unsigned I = 0, E = Types.size(); I < E && I < 9; ++I) + Encoding |= encode(Types[I]) << (5 + (3 * I)); + } + + return llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(IntPtrTy, Encoding), Int8PtrTy); +} + void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { for (auto RefExpr : D->varlists()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl()); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -887,16 +887,14 @@ // If we are checking function types, emit a function type signature as // prologue data. - if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { + if (SanOpts.has(SanitizerKind::Function)) { if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (llvm::Constant *PrologueSig = CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { - llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true); - llvm::Constant *FTRTTIConstEncoded = - EncodeAddrForUseInPrologue(Fn, FTRTTIConst); - llvm::Constant *PrologueStructElems[] = {PrologueSig, - FTRTTIConstEncoded}; + llvm::Constant *FuncTy = + CGM.GetUBSanFunctionTypeDescriptor(FD->getType()); + llvm::Constant *FuncTyEncoded = EncodeAddrForUseInPrologue(Fn, FuncTy); + llvm::Constant *PrologueStructElems[] = {PrologueSig, FuncTyEncoded}; llvm::Constant *PrologueStructConst = llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true); Fn->setPrologueData(PrologueStructConst); Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -4402,13 +4402,13 @@ CGCallee Callee = OrigCallee; - if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) && + if (SanOpts.has(SanitizerKind::Function) && (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { if (llvm::Constant *PrefixSig = CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { SanitizerScope SanScope(this); - llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true); + llvm::Constant *FuncTy = + CGM.GetUBSanFunctionTypeDescriptor(QualType(FnType, 0)); llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty}; llvm::StructType *PrefixStructTy = llvm::StructType::get( CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true); @@ -4434,8 +4434,7 @@ Builder.CreateAlignedLoad(CalleeRTTIPtr, getPointerAlign()); llvm::Value *CalleeRTTI = DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded); - llvm::Value *CalleeRTTIMatch = - Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst); + llvm::Value *CalleeRTTIMatch = Builder.CreateICmpEQ(CalleeRTTI, FuncTy); llvm::Constant *StaticData[] = { EmitCheckSourceLocation(E->getLocStart()), EmitCheckTypeDescriptor(CalleeType) Index: docs/UndefinedBehaviorSanitizer.rst =================================================================== --- docs/UndefinedBehaviorSanitizer.rst +++ docs/UndefinedBehaviorSanitizer.rst @@ -87,7 +87,7 @@ - ``-fsanitize=float-divide-by-zero``: Floating point division by zero. - ``-fsanitize=function``: Indirect call of a function through a - function pointer of the wrong type (Darwin/Linux, C++ and x86/x86_64 + function pointer of the wrong type (Darwin/Linux and x86/x86_64 only). - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits