ASDenysPetrov updated this revision to Diff 368905. ASDenysPetrov edited the summary of this revision. ASDenysPetrov added a comment.
Reworked the patch according to the discussion and taking UB into account. Moved `Expr::getExprForConstArrayByRawIndex` to `RegionStoreManager`. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D104285/new/ https://reviews.llvm.org/D104285 Files: clang/include/clang/AST/Type.h clang/lib/AST/Type.cpp clang/lib/StaticAnalyzer/Core/RegionStore.cpp clang/test/Analysis/initialization.cpp
Index: clang/test/Analysis/initialization.cpp =================================================================== --- clang/test/Analysis/initialization.cpp +++ clang/test/Analysis/initialization.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++14 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.builtin,debug.ExprInspection -verify %s +// RUN: %clang_cc1 -std=c++14 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.uninitialized.Assign,core.builtin,debug.ExprInspection,core.uninitialized.UndefReturn -verify %s void clang_analyzer_eval(int); @@ -18,3 +18,106 @@ // FIXME: Should recognize that it is 0. clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}} } + +int const glob_arr1[2][2][3] = {}; +void direct_index1() { + int const *ptr = (int const *)glob_arr1; + clang_analyzer_eval(ptr[0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[5] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[6] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[7] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[8] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[9] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[10] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[11] == 0); // expected-warning{{UNDEFINED}} +} + +int const glob_arr2[2][2][3] = {{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}}; +void direct_index2() { + int const *ptr = glob_arr2[0][0]; + clang_analyzer_eval(ptr[0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == 2); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == 4); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[4] == 5); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[5] == 6); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[6] == 7); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[7] == 8); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[8] == 9); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[9] == 10); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[10] == 11); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[11] == 12); // expected-warning{{UNDEFINED}} +} + +int const glob_arr3[2][2][3] = {{{}, {}}, {{}, {}}}; +void direct_index3() { + int const *ptr = &(glob_arr3[0][0][0]); + clang_analyzer_eval(ptr[0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[5] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[6] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[7] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[8] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[9] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[10] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[11] == 0); // expected-warning{{UNDEFINED}} +} + +int const glob_arr4[2][2][3] = {{{1, 2}, {}}, {{7, 8}, {10, 11, 12}}}; +void direct_index4() { + int const *ptr = (int const *)glob_arr4[0]; + clang_analyzer_eval(ptr[0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[1] == 2); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(ptr[3] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[5] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[6] == 7); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[7] == 8); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[8] == 0); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[9] == 10); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[10] == 11); // expected-warning{{UNDEFINED}} + clang_analyzer_eval(ptr[11] == 12); // expected-warning{{UNDEFINED}} +} + +int glob_arr5[2][2][3] = {{{1, 2}}, {{7}}}; +void direct_index5() { + int *ptr = (int *)glob_arr5; + clang_analyzer_eval(ptr[0] == 1); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[1] == 2); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[2] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[3] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[4] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[5] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[6] == 7); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[7] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[8] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[9] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[10] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} + clang_analyzer_eval(ptr[11] == 0); // expected-warning{{TRUE}} expected-warning{{FALSE}} +} + +void direct_invalid_index1() { + const int *ptr = (const int *)glob_arr1; + int idx = -1; + auto x = ptr[idx]; // expected-warning{{garbage or undefined}} +} + +void direct_invalid_index2() { + const int *ptr = (const int *)glob_arr1; + int idx = 42; + auto x = ptr[idx]; // expected-warning{{garbage or undefined}} +} + +const float glob_arr6[] = { + 0.0000, 0.0235, 0.0470, 0.0706, 0.0941, 0.1176}; +float no_warn_garbage_value() { + return glob_arr6[0]; // no-warning (garbage or undefined) +} Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/raw_ostream.h" +#include <numeric> #include <utility> using namespace clang; @@ -577,6 +578,10 @@ SVal getLazyBinding(const SubRegion *LazyBindingRegion, RegionBindingsRef LazyBinding); + const Expr *getExprFromInitListByConstArrayRawIndex( + const InitListExpr *ILE, const SmallVectorImpl<uint64_t> &Extents, + uint64_t Idx); + /// Get bindings for the values in a struct and return a CompoundVal, used /// when doing struct copy: /// struct s x, y; @@ -1668,23 +1673,43 @@ if (const auto *InitList = dyn_cast<InitListExpr>(Init)) { // The array index has to be known. if (auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { - int64_t i = CI->getValue().getSExtValue(); - // If it is known that the index is out of bounds, we can return - // an undefined value. - if (i < 0) + // If it is not an array, return Undef. + QualType T = VD->getType(); + const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T); + if (!CAT) + return UndefinedVal(); + + // C++20 [expr.add] 7.6.6.4 (excerpt): + // If P points to an array element i of an array object x with n + // elements, where i < 0 or i > n, the behavior is undefined. + // C++20 6.5.6.8 + // NOTE: The Standard declares that an array element is legal when + // i <= n. But dereferencing is not allowed on the "one past the + // last element" (when i == n). So, we return Undefined as well. + // For multidimensional arrays this means that pointer arithmetics + // shall not go outside the lowest array extent. Example: + // const int arr[8][2][5] = {}; // [5] is the lowest extent. + // const int *ptr = (const int*)arr; // &arr[0][0][0] + // int x = ptr[42]; // `ptr` index shall be in a range [0, 5]. + SmallVector<uint64_t, 2> Extents = CAT->getAllExtents(); + const uint64_t LowestExtent = Extents.back(); + const llvm::APSInt Idx = CI->getValue(); + uint64_t I = static_cast<uint64_t>(Idx.getExtValue()); + if (Idx < 0 || I >= LowestExtent) return UndefinedVal(); - if (auto CAT = Ctx.getAsConstantArrayType(VD->getType())) - if (CAT->getSize().sle(i)) - return UndefinedVal(); + // Get an actual Expr by index. + const Expr *E = + getExprFromInitListByConstArrayRawIndex(InitList, Extents, I); - // If there is a list, but no init, it must be zero. - if (i >= InitList->getNumInits()) + // If E is the same as InitList, then there is no value specified + // in the list and we shall return a zero value. + if (E == InitList) return svalBuilder.makeZeroVal(R->getElementType()); - if (const Expr *ElemInit = InitList->getInit(i)) - if (Optional<SVal> V = svalBuilder.getConstantVal(ElemInit)) - return *V; + // Return a constant value. + if (Optional<SVal> V = svalBuilder.getConstantVal(E)) + return *V; } } } @@ -1731,6 +1756,53 @@ return getBindingForFieldOrElementCommon(B, R, R->getElementType()); } +/// Return a value-expression under the given index. +/// +/// \param Idx Direct index beginning from the first value of the list. +/// It's a direct index as it would be in a one-dimentional array. +/// For instance for `Idx = 3`: +/// - `const T x[2][2] = {{1,2},{3,4}}` returns '4'; +/// - `const T x[2][2] = {{1,},{3,4}}` returns '4'; +/// - `const T x[3][2] = {{1,},{},{5,6}}` returns '0'. +/// \returns +/// - value-expression for the valid index; +/// - `ILE` if there's no explicit expression for the valid index; +/// - `nullptr` if `Idx < 0` or `Idx > lowest extent`. +const Expr *RegionStoreManager::getExprFromInitListByConstArrayRawIndex( + const InitListExpr *ILE, const SmallVectorImpl<uint64_t> &Extents, + uint64_t Idx) { + // Calculate array total size. + uint64_t Size = std::accumulate(Extents.begin(), Extents.end(), uint64_t(1), + std::multiplies<uint64_t>()); + + // We can iterate through the multi-dimensional array the same as through + // one-dimensional array, because of the continuous layout in memory. + const InitListExpr *InitList = ILE; + uint64_t I = 0; + for (uint64_t Ext : Extents) { + Size /= Ext; + I = Idx / Size; + Idx -= I * Size; + + // If there is a list but no value, in theory, it must be zero. + // Return `ILE` to notify the user of this particular case. + // This should be handled by a caller. + if (I >= InitList->getNumInits()) + return ILE; + + const Expr *E = InitList->getInit(I); + + // If it is not an InitListExpr, then we've reached the actual values. + // Return this Expr. + if (!(InitList = dyn_cast<InitListExpr>(E))) + return cast<Expr>(E); + } + + // We shouldn't reach here unless we missed some legal syntax while parsing. + // Otherwise, fix parsing logic above. + llvm_unreachable("List initialization is formed in an unusual way."); +} + SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B, const FieldRegion* R) { Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -138,6 +138,18 @@ ArrayTypeBits.SizeModifier = sm; } +/// Return an array with extents of the declared array type. +/// +/// E.g. for `const int x[1][2][3];` returns {1,2,3}. +SmallVector<uint64_t, 2> ConstantArrayType::getAllExtents() const { + SmallVector<uint64_t, 2> Extents; + const ConstantArrayType *CAT = this; + do { + Extents.push_back(*CAT->getSize().getRawData()); + } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType()))); + return Extents; +} + unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context, QualType ElementType, const llvm::APInt &NumElements) { Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -2949,6 +2949,7 @@ public: const llvm::APInt &getSize() const { return Size; } + SmallVector<uint64_t, 2> getAllExtents() const; const Expr *getSizeExpr() const { return ConstantArrayTypeBits.HasStoredSizeExpr ? *getTrailingObjects<const Expr *>()
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits