vsk created this revision.
vsk added reviewers: rjmccall, jfb, arphaman, delcypher.
Herald added subscribers: llvm-commits, dexonsmith.
Herald added a project: LLVM.

Check that the implicit cast from `id` used to construct the element
variable in an ObjC for-in statement is valid.

This check is included as part of a new `objc-cast` sanitizer, outside
of the main 'undefined' group, as (IIUC) the behavior it's checking for
is not technically UB.

The check can be extended to cover other kinds of invalid casts in ObjC.

Partially addresses: rdar://12903059, rdar://9542496


https://reviews.llvm.org/D71491

Files:
  clang/docs/UndefinedBehaviorSanitizer.rst
  clang/include/clang/Basic/Sanitizers.def
  clang/lib/CodeGen/CGObjC.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Driver/SanitizerArgs.cpp
  clang/lib/Driver/ToolChain.cpp
  clang/test/CodeGenObjC/for-in.m
  compiler-rt/lib/ubsan/ubsan_checks.inc
  compiler-rt/lib/ubsan/ubsan_handlers.cpp
  compiler-rt/lib/ubsan/ubsan_handlers.h
  compiler-rt/lib/ubsan/ubsan_value.cpp
  compiler-rt/lib/ubsan/ubsan_value.h
  compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m

Index: compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
===================================================================
--- /dev/null
+++ compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
@@ -0,0 +1,18 @@
+// REQUIRES: darwin
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast %s -O1 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast -fno-sanitize-recover=objc-cast %s -O1 -o %t.trap
+// RUN: not %run %t.trap 2>&1 | FileCheck %s
+
+#include <Foundation/Foundation.h>
+
+int main() {
+  NSArray *array = [NSArray arrayWithObjects: @1, @2, @3, (void *)0];
+  // CHECK: objc-cast.m:[[@LINE+1]]:{{.*}}: runtime error: invalid ObjC cast, object is a '__NSCFNumber', but expected a 'NSString'
+  for (NSString *str in array) {
+    NSLog(@"%@", str);
+  }
+  return 0;
+}
Index: compiler-rt/lib/ubsan/ubsan_value.h
===================================================================
--- compiler-rt/lib/ubsan/ubsan_value.h
+++ compiler-rt/lib/ubsan/ubsan_value.h
@@ -135,6 +135,9 @@
 /// \brief An opaque handle to a value.
 typedef uptr ValueHandle;
 
+/// Returns the class name of the given ObjC object, or null if the name
+/// cannot be found.
+const char *getObjCClassName(ValueHandle Pointer);
 
 /// \brief Representation of an operand value provided by the instrumented code.
 ///
Index: compiler-rt/lib/ubsan/ubsan_value.cpp
===================================================================
--- compiler-rt/lib/ubsan/ubsan_value.cpp
+++ compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -17,8 +17,46 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
 
+#if defined(__APPLE__)
+#include <dlfcn.h>
+#endif
+
 using namespace __ubsan;
 
+typedef const char *(*ObjCGetClassNameTy)(void *);
+
+const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
+#if defined(__APPLE__)
+  // We need to query the ObjC runtime for some information, but do not want
+  // to introduce a static dependency from the ubsan runtime onto ObjC. Try to
+  // grab a handle to the ObjC runtime used by the process.
+  static bool AttemptedDlopen = false;
+  static void *ObjCHandle = nullptr;
+  static void *ObjCObjectGetClassName = nullptr;
+
+  if (!AttemptedDlopen) {
+    ObjCHandle = dlopen(
+        "/usr/lib/libobjc.A.dylib",
+        RTLD_LAZY         /* Only bind symbols when used. */
+            | RTLD_LOCAL  /* Only make symbols available via the handle. */
+            | RTLD_NOLOAD /* Do not load the dylib, just grab a handle if the
+                             image is already loaded. */
+            | RTLD_FIRST /* Only search the image pointed-to by the handle. */);
+    AttemptedDlopen = true;
+    if (!ObjCHandle)
+      return nullptr;
+    ObjCObjectGetClassName = dlsym(ObjCHandle, "object_getClassName");
+  }
+
+  if (!ObjCObjectGetClassName)
+    return nullptr;
+
+  return ObjCGetClassNameTy(ObjCObjectGetClassName)((void *)Pointer);
+#else
+  return nullptr;
+#endif
+}
+
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
   if (isInlineInt()) {
Index: compiler-rt/lib/ubsan/ubsan_handlers.h
===================================================================
--- compiler-rt/lib/ubsan/ubsan_handlers.h
+++ compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -168,6 +168,14 @@
 /// Handle a builtin called in an invalid way.
 RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
 
+struct InvalidObjCCast {
+  SourceLocation Loc;
+  const TypeDescriptor &ExpectedType;
+};
+
+/// Handle an invalid ObjC cast.
+RECOVERABLE(invalid_objc_cast, InvalidObjCCast *Data, ValueHandle Pointer)
+
 struct NonNullReturnData {
   SourceLocation AttrLoc;
 };
Index: compiler-rt/lib/ubsan/ubsan_handlers.cpp
===================================================================
--- compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -16,6 +16,7 @@
 #include "ubsan_diag.h"
 #include "ubsan_flags.h"
 #include "ubsan_monitor.h"
+#include "ubsan_value.h"
 
 #include "sanitizer_common/sanitizer_common.h"
 
@@ -598,6 +599,36 @@
   Die();
 }
 
+static void handleInvalidObjCCast(InvalidObjCCast *Data, ValueHandle Pointer,
+                                  ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::InvalidObjCCast;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  const char *GivenClass = getObjCClassName(Pointer);
+  const char *GivenClassStr = GivenClass ? GivenClass : "<unknown type>";
+
+  Diag(Loc, DL_Error, ET,
+       "invalid ObjC cast, object is a '%0', but expected a %1")
+      << GivenClassStr << Data->ExpectedType;
+}
+
+void __ubsan::__ubsan_handle_invalid_objc_cast(InvalidObjCCast *Data,
+                                               ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+}
+void __ubsan::__ubsan_handle_invalid_objc_cast_abort(InvalidObjCCast *Data,
+                                                     ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+  Die();
+}
+
 static void handleNonNullReturn(NonNullReturnData *Data, SourceLocation *LocPtr,
                                 ReportOptions Opts, bool IsAttr) {
   if (!LocPtr)
Index: compiler-rt/lib/ubsan/ubsan_checks.inc
===================================================================
--- compiler-rt/lib/ubsan/ubsan_checks.inc
+++ compiler-rt/lib/ubsan/ubsan_checks.inc
@@ -35,6 +35,7 @@
             "integer-divide-by-zero")
 UBSAN_CHECK(FloatDivideByZero, "float-divide-by-zero", "float-divide-by-zero")
 UBSAN_CHECK(InvalidBuiltin, "invalid-builtin-use", "invalid-builtin-use")
+UBSAN_CHECK(InvalidObjCCast, "invalid-objc-cast", "invalid-objc-cast")
 UBSAN_CHECK(ImplicitUnsignedIntegerTruncation,
             "implicit-unsigned-integer-truncation",
             "implicit-unsigned-integer-truncation")
Index: clang/test/CodeGenObjC/for-in.m
===================================================================
--- clang/test/CodeGenObjC/for-in.m
+++ clang/test/CodeGenObjC/for-in.m
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 %s -verify -o /dev/null
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -emit-llvm -fsanitize=objc-cast -o - | FileCheck %s
 
 void p(const char*, ...);
 
@@ -18,12 +19,26 @@
 #define L5(n) L4(n+0),L4(n+16)
 #define L6(n) L5(n+0),L5(n+32)
 
+// CHECK-LABEL: define void @t0
 void t0() {
   NSArray *array = [NSArray arrayWithObjects: L1(0), (void*)0];
 
   p("array.length: %d\n", [array count]);
   unsigned index = 0;
   for (NSString *i in array) {	// expected-warning {{collection expression type 'NSArray *' may not respond}}
+
+    // CHECK:      [[expectedCls:%.*]] = load %struct._class_t*, {{.*}}, !nosanitize
+    // CHECK-NEXT: [[kindOfClassSel:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES{{.*}}, !nosanitize
+    // CHECK-NEXT: [[expectedClsI8:%.*]] = bitcast %struct._class_t* [[expectedCls]] to i8*, !nosanitize
+    // CHECK-NEXT: [[isCls:%.*]] = call zeroext i1 bitcast {{.*}}@objc_msgSend to i1 (i8*, i8*, %struct.__objcFastEnumerationState*, {{.*}})(i8* [[theItem:%.*]], i8* [[kindOfClassSel]], %struct.__objcFastEnumerationState* {{.*}}, i8* [[expectedClsI8]]), !nosanitize
+    // CHECK: br i1 [[isCls]]
+
+    // CHECK: ptrtoint i8* [[theItem]] to i64, !nosanitize
+    // CHECK-NEXT: call void @__ubsan_handle_invalid_objc_cast
+    // CHECK-NEXT: unreachable, !nosanitize
+
+    // CHECK: bitcast i8* [[theItem]]
+
     p("element %d: %s\n", index++, [i cString]);
   }
 }
Index: clang/lib/Driver/ToolChain.cpp
===================================================================
--- clang/lib/Driver/ToolChain.cpp
+++ clang/lib/Driver/ToolChain.cpp
@@ -943,14 +943,14 @@
   // Return sanitizers which don't require runtime support and are not
   // platform dependent.
 
-  SanitizerMask Res = (SanitizerKind::Undefined & ~SanitizerKind::Vptr &
-                       ~SanitizerKind::Function) |
-                      (SanitizerKind::CFI & ~SanitizerKind::CFIICall) |
-                      SanitizerKind::CFICastStrict |
-                      SanitizerKind::FloatDivideByZero |
-                      SanitizerKind::UnsignedIntegerOverflow |
-                      SanitizerKind::ImplicitConversion |
-                      SanitizerKind::Nullability | SanitizerKind::LocalBounds;
+  SanitizerMask Res =
+      (SanitizerKind::Undefined & ~SanitizerKind::Vptr &
+       ~SanitizerKind::Function) |
+      (SanitizerKind::CFI & ~SanitizerKind::CFIICall) |
+      SanitizerKind::CFICastStrict | SanitizerKind::FloatDivideByZero |
+      SanitizerKind::UnsignedIntegerOverflow |
+      SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
+      SanitizerKind::LocalBounds | SanitizerKind::ObjCCast;
   if (getTriple().getArch() == llvm::Triple::x86 ||
       getTriple().getArch() == llvm::Triple::x86_64 ||
       getTriple().getArch() == llvm::Triple::arm ||
Index: clang/lib/Driver/SanitizerArgs.cpp
===================================================================
--- clang/lib/Driver/SanitizerArgs.cpp
+++ clang/lib/Driver/SanitizerArgs.cpp
@@ -27,7 +27,8 @@
 static const SanitizerMask NeedsUbsanRt =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask NeedsUbsanCxxRt =
     SanitizerKind::Vptr | SanitizerKind::CFI;
 static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr;
@@ -47,11 +48,12 @@
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
     SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
     SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero |
-    SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack;
+    SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask RecoverableByDefault =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::FloatDivideByZero;
+    SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast;
 static const SanitizerMask Unrecoverable =
     SanitizerKind::Unreachable | SanitizerKind::Return;
 static const SanitizerMask AlwaysRecoverable =
@@ -63,7 +65,8 @@
     (SanitizerKind::Undefined & ~SanitizerKind::Vptr) |
     SanitizerKind::UnsignedIntegerOverflow | SanitizerKind::ImplicitConversion |
     SanitizerKind::Nullability | SanitizerKind::LocalBounds |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask TrappingDefault = SanitizerKind::CFI;
 static const SanitizerMask CFIClasses =
     SanitizerKind::CFIVCall | SanitizerKind::CFINVCall |
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -118,6 +118,7 @@
   SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1)             \
   SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
   SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
+  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0)                       \
   SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
   SANITIZER_CHECK(MissingReturn, missing_return, 0)                            \
   SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \
Index: clang/lib/CodeGen/CGObjC.cpp
===================================================================
--- clang/lib/CodeGen/CGObjC.cpp
+++ clang/lib/CodeGen/CGObjC.cpp
@@ -1841,6 +1841,41 @@
   llvm::Value *CurrentItem =
     Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
 
+  if (SanOpts.has(SanitizerKind::ObjCCast)) {
+    // Before using an item from the collection, check that the implicit cast
+    // from id to the element type is valid. This is done with instrumentation
+    // roughly corresponding to:
+    //
+    //   if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ }
+    SanitizerScope SanScope(this);
+    const ObjCObjectPointerType *ObjPtrTy =
+        elementType->getAsObjCInterfacePointerType();
+    const ObjCInterfaceType *InterfaceTy =
+        ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr;
+    if (InterfaceTy) {
+      auto &C = CGM.getContext();
+      assert(InterfaceTy->getDecl() && "No decl for ObjC interface type");
+      IdentifierInfo *IsKindOfClassII[] = {&C.Idents.get("isKindOfClass")};
+      Selector IsKindOfClassSel = C.Selectors.getSelector(
+          llvm::array_lengthof(IsKindOfClassII), &IsKindOfClassII[0]);
+      CallArgList IsKindOfClassArgs;
+      llvm::Value *Cls =
+          CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl());
+      Args.add(RValue::get(Cls), C.getObjCClassType());
+      llvm::Value *IsClass =
+          CGM.getObjCRuntime()
+              .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy,
+                                   IsKindOfClassSel, CurrentItem, Args)
+              .getScalarVal();
+      llvm::Constant *StaticData[] = {
+          EmitCheckSourceLocation(S.getBeginLoc()),
+          EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))};
+      EmitCheck({{IsClass, SanitizerKind::ObjCCast}},
+                SanitizerHandler::InvalidObjCCast,
+                ArrayRef<llvm::Constant *>(StaticData), CurrentItem);
+    }
+  }
+
   // Cast that value to the right type.
   CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
                                       "currentitem");
Index: clang/include/clang/Basic/Sanitizers.def
===================================================================
--- clang/include/clang/Basic/Sanitizers.def
+++ clang/include/clang/Basic/Sanitizers.def
@@ -156,6 +156,8 @@
                 ImplicitIntegerArithmeticValueChange,
                 ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation)
 
+SANITIZER("objc-cast", ObjCCast)
+
 // FIXME:
 //SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
 //                ImplicitIntegerArithmeticValueChange |
Index: clang/docs/UndefinedBehaviorSanitizer.rst
===================================================================
--- clang/docs/UndefinedBehaviorSanitizer.rst
+++ clang/docs/UndefinedBehaviorSanitizer.rst
@@ -121,6 +121,9 @@
      is annotated with ``_Nonnull``.
   -  ``-fsanitize=nullability-return``: Returning null from a function with
      a return type annotated with ``_Nonnull``.
+  -  ``-fsanitize=objc-cast``: Invalid implicit cast of an ObjC object pointer
+     to an incompatible type. This is often unintentional, but is not undefined
+     behavior, therefore the check is not a part of the ``undefined`` group.
   -  ``-fsanitize=object-size``: An attempt to potentially use bytes which
      the optimizer can determine are not part of the object being accessed.
      This will also detect some types of undefined behavior that may not
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to