ASDenysPetrov updated this revision to Diff 368905.
ASDenysPetrov edited the summary of this revision.
ASDenysPetrov added a comment.

Reworked the patch according to the discussion and taking UB into account. 
Moved `Expr::getExprForConstArrayByRawIndex` to `RegionStoreManager`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104285/new/

https://reviews.llvm.org/D104285

Files:
  clang/include/clang/AST/Type.h
  clang/lib/AST/Type.cpp
  clang/lib/StaticAnalyzer/Core/RegionStore.cpp
  clang/test/Analysis/initialization.cpp

Index: clang/test/Analysis/initialization.cpp
===================================================================
--- clang/test/Analysis/initialization.cpp
+++ clang/test/Analysis/initialization.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++14 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.builtin,debug.ExprInspection -verify %s
+// RUN: %clang_cc1 -std=c++14 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.uninitialized.Assign,core.builtin,debug.ExprInspection,core.uninitialized.UndefReturn -verify %s
 
 void clang_analyzer_eval(int);
 
@@ -18,3 +18,106 @@
   // FIXME: Should recognize that it is 0.
   clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}}
 }
+
+int const glob_arr1[2][2][3] = {};
+void direct_index1() {
+  int const *ptr = (int const *)glob_arr1;
+  clang_analyzer_eval(ptr[0] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[1] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[2] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[3] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[4] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[5] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[6] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[7] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[8] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[9] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[10] == 0); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[11] == 0); // expected-warning{{UNDEFINED}}
+}
+
+int const glob_arr2[2][2][3] = {{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}};
+void direct_index2() {
+  int const *ptr = glob_arr2[0][0];
+  clang_analyzer_eval(ptr[0] == 1);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[1] == 2);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[2] == 3);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[3] == 4);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[4] == 5);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[5] == 6);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[6] == 7);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[7] == 8);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[8] == 9);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[9] == 10);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[10] == 11); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[11] == 12); // expected-warning{{UNDEFINED}}
+}
+
+int const glob_arr3[2][2][3] = {{{}, {}}, {{}, {}}};
+void direct_index3() {
+  int const *ptr = &(glob_arr3[0][0][0]);
+  clang_analyzer_eval(ptr[0] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[1] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[2] == 0);  // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[3] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[4] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[5] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[6] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[7] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[8] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[9] == 0);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[10] == 0); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[11] == 0); // expected-warning{{UNDEFINED}}
+}
+
+int const glob_arr4[2][2][3] = {{{1, 2}, {}}, {{7, 8}, {10, 11, 12}}};
+void direct_index4() {
+  int const *ptr = (int const *)glob_arr4[0];
+  clang_analyzer_eval(ptr[0] == 1);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[1] == 2);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[2] == 0);   // expected-warning{{TRUE}}
+  clang_analyzer_eval(ptr[3] == 0);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[4] == 0);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[5] == 0);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[6] == 7);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[7] == 8);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[8] == 0);   // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[9] == 10);  // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[10] == 11); // expected-warning{{UNDEFINED}}
+  clang_analyzer_eval(ptr[11] == 12); // expected-warning{{UNDEFINED}}
+}
+
+int glob_arr5[2][2][3] = {{{1, 2}}, {{7}}};
+void direct_index5() {
+  int *ptr = (int *)glob_arr5;
+  clang_analyzer_eval(ptr[0] == 1);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[1] == 2);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[2] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[3] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[4] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[5] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[6] == 7);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[7] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[8] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[9] == 0);  // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[10] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}}
+  clang_analyzer_eval(ptr[11] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}}
+}
+
+void direct_invalid_index1() {
+  const int *ptr = (const int *)glob_arr1;
+  int idx = -1;
+  auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
+}
+
+void direct_invalid_index2() {
+  const int *ptr = (const int *)glob_arr1;
+  int idx = 42;
+  auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
+}
+
+const float glob_arr6[] = {
+    0.0000, 0.0235, 0.0470, 0.0706, 0.0941, 0.1176};
+float no_warn_garbage_value() {
+  return glob_arr6[0]; // no-warning (garbage or undefined)
+}
Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ImmutableMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Support/raw_ostream.h"
+#include <numeric>
 #include <utility>
 
 using namespace clang;
@@ -577,6 +578,10 @@
   SVal getLazyBinding(const SubRegion *LazyBindingRegion,
                       RegionBindingsRef LazyBinding);
 
+  const Expr *getExprFromInitListByConstArrayRawIndex(
+      const InitListExpr *ILE, const SmallVectorImpl<uint64_t> &Extents,
+      uint64_t Idx);
+
   /// Get bindings for the values in a struct and return a CompoundVal, used
   /// when doing struct copy:
   /// struct s x, y;
@@ -1668,23 +1673,43 @@
         if (const auto *InitList = dyn_cast<InitListExpr>(Init)) {
           // The array index has to be known.
           if (auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
-            int64_t i = CI->getValue().getSExtValue();
-            // If it is known that the index is out of bounds, we can return
-            // an undefined value.
-            if (i < 0)
+            // If it is not an array, return Undef.
+            QualType T = VD->getType();
+            const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T);
+            if (!CAT)
+              return UndefinedVal();
+
+            // C++20 [expr.add] 7.6.6.4 (excerpt):
+            //   If P points to an array element i of an array object x with n
+            //   elements, where i < 0 or i > n, the behavior is undefined.
+            // C++20 6.5.6.8
+            // NOTE: The Standard declares that an array element is legal when
+            // i <= n. But dereferencing is not allowed on the "one past the
+            // last element" (when i == n). So, we return Undefined as well.
+            // For multidimensional arrays this means that pointer arithmetics
+            // shall not go outside the lowest array extent. Example:
+            //   const int arr[8][2][5] = {}; // [5] is the lowest extent.
+            //   const int *ptr = (const int*)arr; // &arr[0][0][0]
+            //   int x = ptr[42]; // `ptr` index shall be in a range [0, 5].
+            SmallVector<uint64_t, 2> Extents = CAT->getAllExtents();
+            const uint64_t LowestExtent = Extents.back();
+            const llvm::APSInt Idx = CI->getValue();
+            uint64_t I = static_cast<uint64_t>(Idx.getExtValue());
+            if (Idx < 0 || I >= LowestExtent)
               return UndefinedVal();
 
-            if (auto CAT = Ctx.getAsConstantArrayType(VD->getType()))
-              if (CAT->getSize().sle(i))
-                return UndefinedVal();
+            // Get an actual Expr by index.
+            const Expr *E =
+                getExprFromInitListByConstArrayRawIndex(InitList, Extents, I);
 
-            // If there is a list, but no init, it must be zero.
-            if (i >= InitList->getNumInits())
+            // If E is the same as InitList, then there is no value specified
+            // in the list and we shall return a zero value.
+            if (E == InitList)
               return svalBuilder.makeZeroVal(R->getElementType());
 
-            if (const Expr *ElemInit = InitList->getInit(i))
-              if (Optional<SVal> V = svalBuilder.getConstantVal(ElemInit))
-                return *V;
+            // Return a constant value.
+            if (Optional<SVal> V = svalBuilder.getConstantVal(E))
+              return *V;
           }
         }
       }
@@ -1731,6 +1756,53 @@
   return getBindingForFieldOrElementCommon(B, R, R->getElementType());
 }
 
+/// Return a value-expression under the given index.
+///
+/// \param Idx Direct index beginning from the first value of the list.
+/// It's a direct index as it would be in a one-dimentional array.
+/// For instance for `Idx = 3`:
+/// - `const T x[2][2] = {{1,2},{3,4}}` returns '4';
+/// - `const T x[2][2] = {{1,},{3,4}}` returns '4';
+/// - `const T x[3][2] = {{1,},{},{5,6}}` returns '0'.
+/// \returns
+/// - value-expression for the valid index;
+/// - `ILE` if there's no explicit expression for the valid index;
+/// - `nullptr` if `Idx < 0` or `Idx > lowest extent`.
+const Expr *RegionStoreManager::getExprFromInitListByConstArrayRawIndex(
+    const InitListExpr *ILE, const SmallVectorImpl<uint64_t> &Extents,
+    uint64_t Idx) {
+  // Calculate array total size.
+  uint64_t Size = std::accumulate(Extents.begin(), Extents.end(), uint64_t(1),
+                                  std::multiplies<uint64_t>());
+
+  // We can iterate through the multi-dimensional array the same as through
+  // one-dimensional array, because of the continuous layout in memory.
+  const InitListExpr *InitList = ILE;
+  uint64_t I = 0;
+  for (uint64_t Ext : Extents) {
+    Size /= Ext;
+    I = Idx / Size;
+    Idx -= I * Size;
+
+    // If there is a list but no value, in theory, it must be zero.
+    // Return `ILE` to notify the user of this particular case.
+    // This should be handled by a caller.
+    if (I >= InitList->getNumInits())
+      return ILE;
+
+    const Expr *E = InitList->getInit(I);
+
+    // If it is not an InitListExpr, then we've reached the actual values.
+    // Return this Expr.
+    if (!(InitList = dyn_cast<InitListExpr>(E)))
+      return cast<Expr>(E);
+  }
+
+  // We shouldn't reach here unless we missed some legal syntax while parsing.
+  // Otherwise, fix parsing logic above.
+  llvm_unreachable("List initialization is formed in an unusual way.");
+}
+
 SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
                                             const FieldRegion* R) {
 
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -138,6 +138,18 @@
   ArrayTypeBits.SizeModifier = sm;
 }
 
+/// Return an array with extents of the declared array type.
+///
+/// E.g. for `const int x[1][2][3];` returns {1,2,3}.
+SmallVector<uint64_t, 2> ConstantArrayType::getAllExtents() const {
+  SmallVector<uint64_t, 2> Extents;
+  const ConstantArrayType *CAT = this;
+  do {
+    Extents.push_back(*CAT->getSize().getRawData());
+  } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType())));
+  return Extents;
+}
+
 unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context,
                                                  QualType ElementType,
                                                const llvm::APInt &NumElements) {
Index: clang/include/clang/AST/Type.h
===================================================================
--- clang/include/clang/AST/Type.h
+++ clang/include/clang/AST/Type.h
@@ -2949,6 +2949,7 @@
 
 public:
   const llvm::APInt &getSize() const { return Size; }
+  SmallVector<uint64_t, 2> getAllExtents() const;
   const Expr *getSizeExpr() const {
     return ConstantArrayTypeBits.HasStoredSizeExpr
                ? *getTrailingObjects<const Expr *>()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to