[PATCH] D86796: [Sema] Address-space sensitive index check for unbounded arrays

Chris Hamilton via Phabricator via cfe-commits Wed, 02 Sep 2020 10:07:19 -0700

chrish_ericsson_atx updated this revision to Diff 289503.
chrish_ericsson_atx added a comment.


Updating D86796 <https://reviews.llvm.org/D86796>: [Sema] Address-space 
sensitive index check for unbounded arrays

Refactored math as suggested by Bevin Hansson.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86796/new/

https://reviews.llvm.org/D86796

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/const-eval.c
  clang/test/Sema/unbounded-array-bounds.c
  clang/test/SemaCXX/constant-expression-cxx1y.cpp

Index: clang/test/SemaCXX/constant-expression-cxx1y.cpp
===================================================================
--- clang/test/SemaCXX/constant-expression-cxx1y.cpp
+++ clang/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1018,8 +1018,9 @@
 }
 
 constexpr void PR28739(int n) { // expected-error {{never produces a constant}}
-  int *p = &n;
+  int *p = &n;                  // expected-note {{declared here}}
   p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}}
+  // expected-warning@-1 {{refers past the last possible element}}
 }
 
 constexpr void Void(int n) {
Index: clang/test/Sema/unbounded-array-bounds.c
===================================================================
--- /dev/null
+++ clang/test/Sema/unbounded-array-bounds.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu   -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple avr-pc-linux-gnu    -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+
+struct S {
+  long long a;
+  char b;
+  long long c;
+  short d;
+};
+
+struct S s[];
+
+void f1() {
+  ++s[3].a;
+  ++s[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++s[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+long long ll[];
+
+void f2() {
+  ++ll[3];
+  ++ll[2705843009213693952];
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+  ++ll[847073650];
+  // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+}
+
+void f3(struct S p[]) {
+  ++p[3].a;
+  ++p[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++p[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+void f4(struct S *p) {
+  p += 3;
+  p += 7073650413200313099;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  p += 7073650;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+struct BQ {
+  struct S bigblock[3276];
+};
+
+struct BQ bq[];
+
+void f5() {
+  ++bq[0].bigblock[0].a;
+  ++bq[1].bigblock[0].a;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element)
+}
Index: clang/test/Sema/const-eval.c
===================================================================
--- clang/test/Sema/const-eval.c
+++ clang/test/Sema/const-eval.c
@@ -140,10 +140,10 @@
 
 // We evaluate these by providing 2s' complement semantics in constant
 // expressions, like we do for integers.
-void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;
-void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;
-__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c;
-void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];
+void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;                  // expected-warning {{refers past the last possible element}}
+void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;                  // expected-warning {{refers past the last possible element}}
+__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}}
+void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];                // expected-warning {{refers past the last possible element}}
 
 struct PR35214_X {
   int k;
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -13941,11 +13941,11 @@
   const ConstantArrayType *ArrayTy =
       Context.getAsConstantArrayType(BaseExpr->getType());
 
-  if (!ArrayTy)
-    return;
-
-  const Type *BaseType = ArrayTy->getElementType().getTypePtr();
-  if (EffectiveType->isDependentType() || BaseType->isDependentType())
+  const Type *BaseType =
+      ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr();
+  bool isUnboundedArray = (BaseType == nullptr);
+  if (EffectiveType->isDependentType() ||
+      (!isUnboundedArray && BaseType->isDependentType()))
     return;
 
   Expr::EvalResult Result;
@@ -13963,77 +13963,132 @@
     ND = ME->getMemberDecl();
 
   if (index.isUnsigned() || !index.isNegative()) {
-    // It is possible that the type of the base expression after
-    // IgnoreParenCasts is incomplete, even though the type of the base
-    // expression before IgnoreParenCasts is complete (see PR39746 for an
-    // example). In this case we have no information about whether the array
-    // access exceeds the array bounds. However we can still diagnose an array
-    // access which precedes the array bounds.
-    if (BaseType->isIncompleteType())
-      return;
+    if (isUnboundedArray) {
+      const auto &ASTC = getASTContext();
+      unsigned AddrBits =
+          ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace(
+              EffectiveType->getCanonicalTypeInternal()));
+      if (index.getBitWidth() < AddrBits)
+        index = index.zext(AddrBits);
+      CharUnits ElemBytes = ASTC.getTypeSizeInChars(EffectiveType);
+      llvm::APInt apElemBytes(index.getBitWidth(), ElemBytes.getQuantity());
+      // If index has more active bits than address space, we already know
+      // we have a bounds violation to warn about.  Otherwise, compute
+      // address of (index + 1)th element, and warn about bounds violation
+      // only if that address exceeds address space.
+      if (index.getActiveBits() <= AddrBits) {
+        bool overflow;
+        llvm::APInt product(index);
+        product += 1;
+        product = product.umul_ov(apElemBytes, overflow);
+        if (!overflow && product.getActiveBits() <= AddrBits)
+          return;
+      }
 
-    llvm::APInt size = ArrayTy->getSize();
-    if (!size.isStrictlyPositive())
-      return;
+      // Need to compute max possible elements in address space, since  that
+      // is included in diag message.
+      llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits);
+      MaxElems =
+          MaxElems.zext(std::max(AddrBits << 1, apElemBytes.getBitWidth()));
+      MaxElems += 1;
+      if (MaxElems.getBitWidth() < apElemBytes.getBitWidth())
+        MaxElems = MaxElems.zext(apElemBytes.getBitWidth());
+      else if (apElemBytes.getBitWidth() < MaxElems.getBitWidth())
+        apElemBytes = apElemBytes.zext(MaxElems.getBitWidth());
+      MaxElems = MaxElems.udiv(apElemBytes);
+
+      unsigned DiagID = diag::warn_ptr_arith_exceeds_max_addressable_bounds;
+      if (ASE)
+        DiagID = diag::warn_array_index_exceeds_max_addressable_bounds;
+
+      // Diag message shows element size in bits and in "bytes" (platform-
+      // dependent CharUnits)
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+                          PDiag(DiagID)
+                              << index.toString(10, true) << AddrBits
+                              << (unsigned)ASTC.toBits(ElemBytes)
+                              << apElemBytes.toString(10, false)
+                              << MaxElems.toString(10, false)
+                              << (unsigned)MaxElems.getLimitedValue(~0U)
+                              << IndexExpr->getSourceRange());
+    } else {
+      // It is possible that the type of the base expression after
+      // IgnoreParenCasts is incomplete, even though the type of the base
+      // expression before IgnoreParenCasts is complete (see PR39746 for an
+      // example). In this case we have no information about whether the array
+      // access exceeds the array bounds. However we can still diagnose an array
+      // access which precedes the array bounds.
+      if (BaseType->isIncompleteType())
+        return;
+
+      llvm::APInt size = ArrayTy->getSize();
+      if (!size.isStrictlyPositive())
+        return;
 
-    if (BaseType != EffectiveType) {
-      // Make sure we're comparing apples to apples when comparing index to size
-      uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
-      uint64_t array_typesize = Context.getTypeSize(BaseType);
-      // Handle ptrarith_typesize being zero, such as when casting to void*
-      if (!ptrarith_typesize) ptrarith_typesize = 1;
-      if (ptrarith_typesize != array_typesize) {
-        // There's a cast to a different size type involved
-        uint64_t ratio = array_typesize / ptrarith_typesize;
-        // TODO: Be smarter about handling cases where array_typesize is not a
-        // multiple of ptrarith_typesize
-        if (ptrarith_typesize * ratio == array_typesize)
-          size *= llvm::APInt(size.getBitWidth(), ratio);
+      if (BaseType != EffectiveType) {
+        // Make sure we're comparing apples to apples when comparing index to
+        // size
+        uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
+        uint64_t array_typesize = Context.getTypeSize(BaseType);
+        // Handle ptrarith_typesize being zero, such as when casting to void*
+        if (!ptrarith_typesize)
+          ptrarith_typesize = 1;
+        if (ptrarith_typesize != array_typesize) {
+          // There's a cast to a different size type involved
+          uint64_t ratio = array_typesize / ptrarith_typesize;
+          // TODO: Be smarter about handling cases where array_typesize is not a
+          // multiple of ptrarith_typesize
+          if (ptrarith_typesize * ratio == array_typesize)
+            size *= llvm::APInt(size.getBitWidth(), ratio);
+        }
       }
-    }
 
-    if (size.getBitWidth() > index.getBitWidth())
-      index = index.zext(size.getBitWidth());
-    else if (size.getBitWidth() < index.getBitWidth())
-      size = size.zext(index.getBitWidth());
+      if (size.getBitWidth() > index.getBitWidth())
+        index = index.zext(size.getBitWidth());
+      else if (size.getBitWidth() < index.getBitWidth())
+        size = size.zext(index.getBitWidth());
 
-    // For array subscripting the index must be less than size, but for pointer
-    // arithmetic also allow the index (offset) to be equal to size since
-    // computing the next address after the end of the array is legal and
-    // commonly done e.g. in C++ iterators and range-based for loops.
-    if (AllowOnePastEnd ? index.ule(size) : index.ult(size))
-      return;
+      // For array subscripting the index must be less than size, but for
+      // pointer arithmetic also allow the index (offset) to be equal to size
+      // since computing the next address after the end of the array is legal
+      // and commonly done e.g. in C++ iterators and range-based for loops.
+      if (AllowOnePastEnd ? index.ule(size) : index.ult(size))
+        return;
 
-    // Also don't warn for arrays of size 1 which are members of some
-    // structure. These are often used to approximate flexible arrays in C89
-    // code.
-    if (IsTailPaddedMemberArray(*this, size, ND))
-      return;
+      // Also don't warn for arrays of size 1 which are members of some
+      // structure. These are often used to approximate flexible arrays in C89
+      // code.
+      if (IsTailPaddedMemberArray(*this, size, ND))
+        return;
 
-    // Suppress the warning if the subscript expression (as identified by the
-    // ']' location) and the index expression are both from macro expansions
-    // within a system header.
-    if (ASE) {
-      SourceLocation RBracketLoc = SourceMgr.getSpellingLoc(
-          ASE->getRBracketLoc());
-      if (SourceMgr.isInSystemHeader(RBracketLoc)) {
-        SourceLocation IndexLoc =
-            SourceMgr.getSpellingLoc(IndexExpr->getBeginLoc());
-        if (SourceMgr.isWrittenInSameFile(RBracketLoc, IndexLoc))
-          return;
+      // Suppress the warning if the subscript expression (as identified by the
+      // ']' location) and the index expression are both from macro expansions
+      // within a system header.
+      if (ASE) {
+        SourceLocation RBracketLoc =
+            SourceMgr.getSpellingLoc(ASE->getRBracketLoc());
+        if (SourceMgr.isInSystemHeader(RBracketLoc)) {
+          SourceLocation IndexLoc =
+              SourceMgr.getSpellingLoc(IndexExpr->getBeginLoc());
+          if (SourceMgr.isWrittenInSameFile(RBracketLoc, IndexLoc))
+            return;
+        }
       }
-    }
 
-    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
-    if (ASE)
-      DiagID = diag::warn_array_index_exceeds_bounds;
+      unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
+      if (ASE)
+        DiagID = diag::warn_array_index_exceeds_bounds;
 
-    DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
-                        PDiag(DiagID) << index.toString(10, true)
-                                      << size.toString(10, true)
-                                      << (unsigned)size.getLimitedValue(~0U)
-                                      << IndexExpr->getSourceRange());
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+                          PDiag(DiagID) << index.toString(10, true)
+                                        << size.toString(10, true)
+                                        << (unsigned)size.getLimitedValue(~0U)
+                                        << IndexExpr->getSourceRange());
+    }
   } else {
+    if (isUnboundedArray)
+      return;
+
     unsigned DiagID = diag::warn_array_index_precedes_bounds;
     if (!ASE) {
       DiagID = diag::warn_ptr_arith_precedes_bounds;
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8831,6 +8831,14 @@
 def warn_array_index_exceeds_bounds : Warning<
   "array index %0 is past the end of the array (which contains %1 "
   "element%s2)">, InGroup<ArrayBounds>;
+def warn_ptr_arith_exceeds_max_addressable_bounds : Warning<
+  "the pointer incremented by %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
+def warn_array_index_exceeds_max_addressable_bounds : Warning<
+  "array index %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
 def note_array_declared_here : Note<
   "array %0 declared here">;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D86796: [Sema] Address-space sensitive index check for unbounded arrays

Reply via email to