[llvm] [clang-tools-extra] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

via cfe-commits Fri, 26 Jan 2024 10:38:41 -0800

=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/78...@github.com>



https://github.com/NagyDonat updated 
https://github.com/llvm/llvm-project/pull/78315

>From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Tue, 12 Dec 2023 13:07:54 +0100
Subject: [PATCH 01/14] [analyzer] Support interestingness in ArrayBoundV2

This commit improves alpha.security.ArrayBoundV2 in two connected areas:
(1) It calls `markInteresting()` on the symbolic values that are
responsible for the out of bounds access.
(2) Its index-is-in-bounds assumptions are reported in note tags if they
provide information about the value of an interesting symbol.

This commit is limited to "display" changes: it introduces new
diagnostic pieces (potentially to bugs found by other checkers), but the
ArrayBoundV2 will make the same assumptions and detect the same bugs
before and after this change.

As a minor unrelated change, this commit also updates/removes some very
old comments which became obsolate due to my previous changes.
---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 345 ++++++++++++++----
 .../test/Analysis/out-of-bounds-diagnostics.c |  10 +
 clang/test/Analysis/out-of-bounds-notes.c     | 128 +++++++
 3 files changed, 413 insertions(+), 70 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-notes.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 6c7a1601402efac..4241fa3a2ce8af6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -33,7 +33,66 @@ using namespace taint;
 using llvm::formatv;
 
 namespace {
-enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+class StateUpdateReporter {
+  const SubRegion *Reg;
+  NonLoc ByteOffsetVal;
+  std::optional<QualType> ElementType = std::nullopt;
+  std::optional<int64_t> ElementSize = std::nullopt;
+  bool AssumedNonNegative = false;
+  std::optional<NonLoc> AssumedUpperBound = std::nullopt;
+
+public:
+  StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
+                      CheckerContext &C)
+      : Reg(R), ByteOffsetVal(ByteOffsVal) {
+    initializeElementInfo(E, C);
+  }
+
+  void initializeElementInfo(const Expr *E, CheckerContext &C) {
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
+      const MemRegion *SubscriptBaseReg =
+          C.getSVal(ASE->getBase()).getAsRegion();
+      if (!SubscriptBaseReg)
+        return;
+      SubscriptBaseReg = SubscriptBaseReg->StripCasts();
+      if (!isa_and_nonnull<ElementRegion>(SubscriptBaseReg)) {
+        ElementType = ASE->getType();
+        ElementSize =
+            C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity();
+      }
+    }
+  }
+  void recordNonNegativeAssumption() { AssumedNonNegative = true; }
+  void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
+    AssumedUpperBound = UpperBoundVal;
+  }
+
+  const NoteTag *createNoteTag(CheckerContext &C) const;
+
+private:
+  std::string getMessage(PathSensitiveBugReport &BR) const;
+
+  /// Return true if information about the value of `Sym` can put constraints
+  /// on some symbol which is interesting within the bug report `BR`.
+  /// In particular, this returns true when `Sym` is interesting within `BR`;
+  /// but it also returns true if `Sym` is an expression that contains integer
+  /// constants and a single symbolic operand which is interesting (in `BR`).
+  /// We need to use this instead of plain `BR.isInteresting()` because if we
+  /// are analyzing code like
+  ///   int array[10];
+  ///   int f(int arg) {
+  ///     return array[arg] && array[arg + 10];
+  ///   }
+  /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
+  /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
+  /// to detect this out of bounds access).
+  static bool providesInformationAboutInteresting(SymbolRef Sym,
+                                                  PathSensitiveBugReport &BR);
+  static bool providesInformationAboutInteresting(SVal SV,
+                                                  PathSensitiveBugReport &BR) {
+    return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
+  }
+};
 
 struct Messages {
   std::string Short, Full;
@@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public 
Checker<check::PostStmt<ArraySubscriptExpr>,
 
   void performCheck(const Expr *E, CheckerContext &C) const;
 
-  void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
-                 NonLoc Offset, Messages Msgs) const;
+  void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
+                 NonLoc Offset, bool IsTaintBug = false) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
 
+  static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
+                                       NonLoc Offset, NonLoc Limit,
+                                       CheckerContext &C);
   static bool isInAddressOf(const Stmt *S, ASTContext &AC);
 
 public:
@@ -133,12 +195,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, 
SVal Location) {
   return std::nullopt;
 }
 
-// TODO: once the constraint manager is smart enough to handle non simplified
-// symbolic expressions remove this function. Note that this can not be used in
-// the constraint manager as is, since this does not handle overflows. It is
-// safe to assume, however, that memory offsets will not overflow.
-// NOTE: callers of this function need to be aware of the effects of overflows
-// and signed<->unsigned conversions!
+// NOTE: This function is the "heart" of this checker. It simplifies
+// inequalities with transformations that are valid (and very elementary) in
+// pure mathematics, but become invalid if we use them in C++ number model
+// where the calculations may overflow.
+// Due to the overflow issues I think it's impossible (or at least not
+// practical) to integrate this kind of simplification into the resolution of
+// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
+// produces valid results if the arguments are memory offsets which are known
+// to be << SIZE_MAX.
+// TODO: This algorithm should be moved to a central location where it's
+// available for other checkers that need to compare memory offsets.
+// NOTE: When using the results of this function, don't forget that `evalBinOp`
+// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`!
 static std::pair<NonLoc, nonloc::ConcreteInt>
 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
                      SValBuilder &svalBuilder) {
@@ -239,13 +308,8 @@ static std::optional<int64_t> getConcreteValue(NonLoc SV) {
   return std::nullopt;
 }
 
-static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
-  static const char *ShortMsgTemplates[] = {
-      "Out of bound access to memory preceding {0}",
-      "Out of bound access to memory after the end of {0}",
-      "Potential out of bound access to {0} with tainted offset"};
-
-  return formatv(ShortMsgTemplates[Kind], RegName);
+static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
+  return SV ? getConcreteValue(*SV) : std::nullopt;
 }
 
 static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
@@ -255,7 +319,28 @@ static Messages getPrecedesMsgs(const SubRegion *Region, 
NonLoc Offset) {
   Out << "Access of " << RegName << " at negative byte offset";
   if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
     Out << ' ' << ConcreteIdx->getValue();
-  return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)};
+  return {formatv("Out of bound access to memory preceding {0}", RegName),
+          std::string(Buf)};
+}
+
+/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
+/// it can be performed (`Divisor` is nonzero and there is no remainder). The
+/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
+/// division is considered to be successful.
+bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2,
+                   int64_t Divisor) {
+  if (!Divisor)
+    return false;
+  const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
+  const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
+  if (!Val1HasRemainder && !Val2HasRemainder) {
+    if (Val1)
+      *Val1 /= Divisor;
+    if (Val2)
+      *Val2 /= Divisor;
+    return true;
+  }
+  return false;
 }
 
 static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
@@ -268,18 +353,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const 
SubRegion *Region,
   std::optional<int64_t> OffsetN = getConcreteValue(Offset);
   std::optional<int64_t> ExtentN = getConcreteValue(Extent);
 
-  bool UseByteOffsets = true;
-  if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
-    const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
-    const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
-    if (!OffsetHasRemainder && !ExtentHasRemainder) {
-      UseByteOffsets = false;
-      if (OffsetN)
-        *OffsetN /= ElemSize;
-      if (ExtentN)
-        *ExtentN /= ElemSize;
-    }
-  }
+  int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
+
+  bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
 
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
@@ -307,7 +383,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const 
SubRegion *Region,
       Out << "s";
   }
 
-  return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)};
+  return {
+      formatv("Out of bound access to memory after the end of {0}", RegName),
+      std::string(Buf)};
 }
 
 static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
@@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, 
const char *OffsetName) {
                   RegName, OffsetName)};
 }
 
-void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const 
{
-  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
-  // some new logic here that reasons directly about memory region extents.
-  // Once that logic is more mature, we can bring it back to assumeInBound()
-  // for all clients to use.
-  //
-  // The algorithm we are using here for bounds checking is to see if the
-  // memory access is within the extent of the base region.  Since we
-  // have some flexibility in defining the base region, we can achieve
-  // various levels of conservatism in our buffer overflow checking.
+const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
+  // Don't create a note tag if we didn't assume anything:
+  if (!AssumedNonNegative && !AssumedUpperBound)
+    return nullptr;
+
+  return C.getNoteTag(
+      [*this](PathSensitiveBugReport &BR) -> std::string {
+        return getMessage(BR);
+      },
+      /*isPrunable=*/false);
+}
+
+std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
+  bool ShouldReportNonNegative = AssumedNonNegative;
+  if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
+    if (AssumedUpperBound &&
+        providesInformationAboutInteresting(*AssumedUpperBound, BR))
+      ShouldReportNonNegative = false;
+    else
+      return "";
+  }
+
+  std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
+  std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
 
+  const bool UseIndex =
+      ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Assuming ";
+  if (UseIndex) {
+    Out << "index ";
+    if (OffsetN)
+      Out << "'" << OffsetN << "' ";
+  } else if (AssumedUpperBound) {
+    Out << "byte offset ";
+    if (OffsetN)
+      Out << "'" << OffsetN << "' ";
+  } else {
+    Out << "offset ";
+  }
+
+  Out << "is";
+  if (ShouldReportNonNegative) {
+    Out << " non-negative";
+  }
+  if (AssumedUpperBound) {
+    if (ShouldReportNonNegative)
+      Out << " and";
+    Out << " less than ";
+    if (ExtentN)
+      Out << *ExtentN << ", ";
+    if (UseIndex && ElementType)
+      Out << "the number of '" << ElementType->getAsString()
+          << "' elements in ";
+    else
+      Out << "the extent of ";
+    Out << getRegionName(Reg);
+  }
+  return std::string(Out.str());
+}
+
+bool StateUpdateReporter::providesInformationAboutInteresting(
+    SymbolRef Sym, PathSensitiveBugReport &BR) {
+  if (!Sym)
+    return false;
+  for (SymbolRef PartSym : Sym->symbols()) {
+    // The interestingess mark may appear on any layer as we're stripping off
+    // the SymIntExpr, UnarySymExpr etc. layers...
+    if (BR.isInteresting(PartSym))
+      return true;
+    // ...but if both sides of the expression are symbolic (i.e. unknown), then
+    // the analyzer can't use the combined result to constrain the operands.
+    if (isa<SymSymExpr>(PartSym))
+      return false;
+  }
+  return false;
+}
+
+void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const 
{
   const SVal Location = C.getSVal(E);
 
   // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
@@ -350,6 +498,10 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
 
   auto [Reg, ByteOffset] = *RawOffset;
 
+  // The state updates will be reported as a single note tag, which will be
+  // composed by this helper class.
+  StateUpdateReporter SUR(Reg, ByteOffset, E, C);
+
   // CHECK LOWER BOUND
   const MemSpaceRegion *Space = Reg->getMemorySpace();
   if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
@@ -363,13 +515,22 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
     auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
         State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
 
-    if (PrecedesLowerBound && !WithinLowerBound) {
-      // We know that the index definitely precedes the lower bound.
-      Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
-      reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs);
-      return;
+    if (PrecedesLowerBound) {
+      // The offset may be invalid (negative)...
+      if (!WithinLowerBound) {
+        // ...and it cannot be valid (>= 0), so report an error.
+        Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
+        reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset);
+        return;
+      }
+      // ...but it can be valid as well, so the checker will (optimistically)
+      // assume that it's valid and mention this in the note tag.
+      SUR.recordNonNegativeAssumption();
     }
 
+    // Actually update the state. The "if" only fails in the extremely unlikely
+    // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+    // evalBinOpNN fails to evaluate the less-than operator.
     if (WithinLowerBound)
       State = WithinLowerBound;
   }
@@ -381,32 +542,30 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
         compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
     if (ExceedsUpperBound) {
+      // The offset may be invalid (>= Size)...
       if (!WithinUpperBound) {
-        // We know that the index definitely exceeds the upper bound.
-        if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) 
{
-          // ...but this is within an addressof expression, so we need to check
-          // for the exceptional case that `&array[size]` is valid.
-          auto [EqualsToThreshold, NotEqualToThreshold] =
-              compareValueToThreshold(ExceedsUpperBound, ByteOffset, 
*KnownSize,
-                                      SVB, /*CheckEquality=*/true);
-          if (EqualsToThreshold && !NotEqualToThreshold) {
-            // We are definitely in the exceptional case, so return early
-            // instead of reporting a bug.
-            C.addTransition(EqualsToThreshold);
-            return;
-          }
+        // ...and it cannot be within bounds, so report an error, unless we can
+        // definitely determine that this is an idiomatic `&array[size]`
+        // expression that calculates the past-the-end pointer.
+        if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
+                                     *KnownSize, C)) {
+          // FIXME: this duplicates the `addTransition` at the end of the
+          // function, but `goto` is taboo nowdays.
+          C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
+          return;
         }
+
         Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
                                        *KnownSize, Location);
-        reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
+        reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset);
         return;
       }
+      // ...and it can be valid as well...
       if (isTainted(State, ByteOffset)) {
-        // Both cases are possible, but the offset is tainted, so report.
-        std::string RegName = getRegionName(Reg);
+        // ...but it's tainted, so report an error.
 
-        // Diagnostic detail: "tainted offset" is always correct, but the
-        // common case is that 'idx' is tainted in 'arr[idx]' and then it's
+        // Diagnostic detail: saying "tainted offset" is always correct, but
+        // the common case is that 'idx' is tainted in 'arr[idx]' and then it's
         // nicer to say "tainted index".
         const char *OffsetName = "offset";
         if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
@@ -414,33 +573,67 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
             OffsetName = "index";
 
         Messages Msgs = getTaintMsgs(Reg, OffsetName);
-        reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
+        reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, /*IsTaintBug=*/true);
         return;
       }
+      // ...and it isn't tainted, so the checker will (optimistically) assume
+      // that the offset is in bounds and mention this in the note tag.
+      SUR.recordUpperBoundAssumption(*KnownSize);
     }
 
+    // Actually update the state. The "if" only fails in the extremely unlikely
+    // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+    // evalBinOpNN fails to evaluate the less-than operator.
     if (WithinUpperBound)
       State = WithinUpperBound;
   }
 
-  C.addTransition(State);
+  // Add a transition, reporting the state updates that we accumulated.
+  C.addTransition(State, SUR.createNoteTag(C));
 }
 
 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
-                                    ProgramStateRef ErrorState, OOB_Kind Kind,
-                                    NonLoc Offset, Messages Msgs) const {
+                                    ProgramStateRef ErrorState, Messages Msgs,
+                                    NonLoc Offset, bool IsTaintBug) const {
 
   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
   if (!ErrorNode)
     return;
 
   auto BR = std::make_unique<PathSensitiveBugReport>(
-      Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
+      IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
+
+  // FIXME: ideally we would just call trackExpressionValue() and that would
+  // "do the right thing": mark the relevant symbols as interesting, track the
+  // control dependencies and statements storing the relevant values and add
+  // helpful diagnostic pieces. However, right now trackExpressionValue() is
+  // a heap of unreliable heuristics, so it would cause several issues:
+  // - Interestingness is not applied consistently, e.g. if `array[x+10]`
+  //   causes an overflow, then `x` is not marked as interesting.
+  // - We get irrelevant diagnostic pieces, e.g. in the code
+  //   `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`
+  //   it places a "Storing uninitialized value" note on the `malloc` call
+  //   (which is technically true, but irrelevant).
+  // If trackExpressionValue() becomes reliable, it should be applied instead
+  // of the manual markInteresting() calls.
+
+  if (SymbolRef OffsetSym = Offset.getAsSymbol()) {
+    // If the offset is a symbolic value, iterate over its "parts" with
+    // `SymExpr::symbols()` and mark each of them as interesting.
+    // For example, if the offset is `x*4 + y` then we put interestingness onto
+    // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
+    // `x` and `y`.
+    for (SymbolRef PartSym : OffsetSym->symbols())
+      BR->markInteresting(PartSym);
+  }
 
-  // Track back the propagation of taintedness.
-  if (Kind == OOB_Taint)
+  if (IsTaintBug) {
+    // If the issue that we're reporting depends on the taintedness of the
+    // offset, then put interestingness onto symbols that could be the origin
+    // of the taint.
     for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
       BR->markInteresting(Sym);
+  }
 
   C.emitReport(std::move(BR));
 }
@@ -476,6 +669,18 @@ bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, 
ASTContext &ACtx) {
   return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
 }
 
+bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
+                                                   ProgramStateRef State,
+                                                   NonLoc Offset, NonLoc Limit,
+                                                   CheckerContext &C) {
+  if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
+    auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
+        State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);
+    return EqualsToThreshold && !NotEqualToThreshold;
+  }
+  return false;
+}
+
 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
   mgr.registerChecker<ArrayBoundCheckerV2>();
 }
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c 
b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 769a8954f796691..7a8f59650fef161 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -195,12 +195,22 @@ void *malloc(size_t size);
 
 int *mallocRegion(void) {
   int *mem = (int*)malloc(2*sizeof(int));
+
   mem[3] = -2;
   // expected-warning@-1 {{Out of bound access to memory after the end of the 
heap area}}
   // expected-note@-2 {{Access of the heap area at index 3, while it holds 
only 2 'int' elements}}
   return mem;
 }
 
+int *mallocRegionDeref(void) {
+  int *mem = (int*)malloc(2*sizeof(int));
+
+  *(mem + 3) = -2;
+  // expected-warning@-1 {{Out of bound access to memory after the end of the 
heap area}}
+  // expected-note@-2 {{Access of the heap area at index 3, while it holds 
only 2 'int' elements}}
+  return mem;
+}
+
 void *alloca(size_t size);
 
 int allocaRegion(void) {
diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
new file mode 100644
index 000000000000000..290df79f3e18a41
--- /dev/null
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -0,0 +1,128 @@
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text        \
+// RUN:     
-analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection
 -verify %s
+
+int array[10];
+
+void clang_analyzer_value(unsigned arg);
+
+int irrelevantAssumptions(int arg) {
+  int a = array[arg];
+  // Here the analyzer assumes that `arg` is in bounds, but doesn't report this
+  // because `arg` is not interesting for the bug.
+  int b = array[13];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at index 13, while it holds only 10 
'int' elements}}
+  return a + b;
+}
+
+
+int assumingBoth(int arg) {
+  int a = array[arg];
+  // expected-note@-1 {{Assuming index is non-negative and less than 10, the 
number of 'int' elements in 'array'}}
+  int b = array[arg]; // no additional note, we already assumed that 'arg' is 
in bounds
+  int c = array[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return a + b + c;
+}
+
+int assumingLower(int arg) {
+  // expected-note@+2 {{Assuming 'arg' is < 10}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 10)
+    return 0;
+  int a = array[arg];
+  // expected-note@-1 {{Assuming index is non-negative}}
+  int b = array[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return a + b;
+}
+
+int assumingUpper(int arg) {
+  // expected-note@+2 {{Assuming 'arg' is >= 0}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg < 0)
+    return 0;
+  int a = array[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
+  int b = array[arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
+  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  return a + b;
+}
+
+int assumingUpperIrrelevant(int arg) {
+  // FIXME: The assumption "assuming index is less than 10" is printed because
+  // it's assuming something about the interesting variable `arg`; however,
+  // it's irrelevant because in this testcase the out of bound access is
+  // deduced from the _lower_ bound on `arg`. Currently the analyzer cannot
+  // filter out assumptions that are logically irrelevant but "touch"
+  // interesting symbols; eventually it would be good to add support for this.
+
+  // expected-note@+2 {{Assuming 'arg' is >= 0}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg < 0)
+    return 0;
+  int a = array[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
+  int b = array[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return a + b;
+}
+
+int assumingUpperUnsigned(unsigned arg) {
+  int a = array[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
+  int b = array[(int)arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
+  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  return a + b;
+}
+
+int assumingNothing(unsigned arg) {
+  // expected-note@+2 {{Assuming 'arg' is < 10}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 10)
+    return 0;
+  int a = array[arg]; // no note here, we already know that 'arg' is in bounds
+  int b = array[(int)arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
+  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  return a + b;
+}
+
+short assumingConverted(int arg) {
+  // When indices are reported, the note will use the element type that's the
+  // result type of the subscript operator.
+  char *cp = (char*)array;
+  char a = cp[arg];
+  // expected-note@-1 {{Assuming index is non-negative and less than 40, the 
number of 'char' elements in 'array'}}
+  char b = cp[arg]; // no additional note, we already assumed that 'arg' is in 
bounds
+  char c = cp[arg + 40];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 40 'char' elements}}
+  return a + b + c;
+}
+
+struct foo {
+  int num;
+  char a[8];
+  char b[5];
+};
+
+int assumingConverted2(struct foo f, int arg) {
+  // When indices are reported, the note will use the element type that's the
+  // result type of the subscript operator.
+  int a = ((int*)(f.a))[arg];
+  // expected-note@-1 {{Assuming index is non-negative and less than 2, the 
number of 'int' elements in 'f.a'}}
+  // However, if the extent of the memory region is not divisible by the
+  // element size, the checker measures the offset and extent in bytes.
+  int b = ((int*)(f.b))[arg];
+  // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 
'f.b'}}
+  int c = array[arg-2];
+  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
+  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  return a + b + c;
+}

>From c0a09db0fbb48626bbb069f85a587d2c94a8998a Mon Sep 17 00:00:00 2001
From: NagyDonat <donat.n...@ericsson.com>
Date: Fri, 19 Jan 2024 19:10:54 +0100
Subject: [PATCH 02/14] Remove unused reference to debug.ExprInspection

---
 clang/test/Analysis/out-of-bounds-notes.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
index 290df79f3e18a41..3070ab643d832ea 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -1,10 +1,8 @@
 // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text        \
-// RUN:     
-analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection
 -verify %s
+// RUN:     
-analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint
 -verify %s
 
 int array[10];
 
-void clang_analyzer_value(unsigned arg);
-
 int irrelevantAssumptions(int arg) {
   int a = array[arg];
   // Here the analyzer assumes that `arg` is in bounds, but doesn't report this

>From 18c00f79123ed8b22feefdb3b4bcc2f08b59c79b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 22 Jan 2024 15:26:57 +0100
Subject: [PATCH 03/14] Improve comments (as suggested on code review)

---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 4241fa3a2ce8af6..593095a2bebafef 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -202,12 +202,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, 
SVal Location) {
 // Due to the overflow issues I think it's impossible (or at least not
 // practical) to integrate this kind of simplification into the resolution of
 // arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
-// produces valid results if the arguments are memory offsets which are known
-// to be << SIZE_MAX.
+// produces valid results when the calculations are handling memory offsets
+// and every value is well below SIZE_MAX.
 // TODO: This algorithm should be moved to a central location where it's
 // available for other checkers that need to compare memory offsets.
-// NOTE: When using the results of this function, don't forget that `evalBinOp`
-// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`!
+// NOTE: the simplification preserves the order of the two operands in a
+// mathematical sense, but it may change the result produced by a C++
+// comparison operator (and the automatic type conversions).
+// For example, consider a comparison "X+1 < 0", where the LHS is stored as a
+// size_t and the RHS is stored in an int. (As size_t is unsigned, this
+// comparison is false for all values of "X".) However, the simplification may
+// turn it into "X < -1", which is still always false in a mathematical sense,
+// but can produce a true result when evaluated by `evalBinOp` (which follows
+// the rules of C++ and casts -1 to SIZE_MAX).
 static std::pair<NonLoc, nonloc::ConcreteInt>
 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
                      SValBuilder &svalBuilder) {
@@ -401,11 +408,9 @@ const NoteTag 
*StateUpdateReporter::createNoteTag(CheckerContext &C) const {
   if (!AssumedNonNegative && !AssumedUpperBound)
     return nullptr;
 
-  return C.getNoteTag(
-      [*this](PathSensitiveBugReport &BR) -> std::string {
-        return getMessage(BR);
-      },
-      /*isPrunable=*/false);
+  return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
+    return getMessage(BR);
+  });
 }
 
 std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
@@ -468,8 +473,9 @@ bool 
StateUpdateReporter::providesInformationAboutInteresting(
     // the SymIntExpr, UnarySymExpr etc. layers...
     if (BR.isInteresting(PartSym))
       return true;
-    // ...but if both sides of the expression are symbolic (i.e. unknown), then
-    // the analyzer can't use the combined result to constrain the operands.
+    // ...but if both sides of the expression are symbolic, then there is no
+    // practical algorithm to produce separate constraints for the two
+    // operands (from the single combined result).
     if (isa<SymSymExpr>(PartSym))
       return false;
   }
@@ -549,8 +555,6 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
         // expression that calculates the past-the-end pointer.
         if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
                                      *KnownSize, C)) {
-          // FIXME: this duplicates the `addTransition` at the end of the
-          // function, but `goto` is taboo nowdays.
           C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
           return;
         }

>From 6ed41d51078796983983b777af6067056de78299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 22 Jan 2024 15:56:30 +0100
Subject: [PATCH 04/14] Make a function 'static'

---
 clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 593095a2bebafef..e6f0cc0cbf0dd31 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -334,8 +334,8 @@ static Messages getPrecedesMsgs(const SubRegion *Region, 
NonLoc Offset) {
 /// it can be performed (`Divisor` is nonzero and there is no remainder). The
 /// values `Val1` and `Val2` may be nullopt and in that case the corresponding
 /// division is considered to be successful.
-bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2,
-                   int64_t Divisor) {
+static bool tryDividePair(std::optional<int64_t> &Val1,
+                          std::optional<int64_t> &Val2, int64_t Divisor) {
   if (!Divisor)
     return false;
   const bool Val1HasRemainder = Val1 && *Val1 % Divisor;

>From 1413b2cce38859e867f23e65d418b97a112093c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 22 Jan 2024 17:11:08 +0100
Subject: [PATCH 05/14] Add testcase 'AssumingPlainOffset'

...and give more meaningful names to two testcases before it.

Currently this TC doesn't demonstrate the intended behavior because the
simplification algorithm used by the checker is somewhat limited and it
doesn't perform divisions when the RHS is not divisible by the
multiplier that's present in LHS. This will be fixed in a separate PR
that improves the simplification algorithm.
---
 clang/test/Analysis/out-of-bounds-notes.c | 26 +++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
index 3070ab643d832ea..be69526393cd483 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -91,7 +91,7 @@ int assumingNothing(unsigned arg) {
   return a + b;
 }
 
-short assumingConverted(int arg) {
+short assumingConvertedToCharP(int arg) {
   // When indices are reported, the note will use the element type that's the
   // result type of the subscript operator.
   char *cp = (char*)array;
@@ -110,7 +110,7 @@ struct foo {
   char b[5];
 };
 
-int assumingConverted2(struct foo f, int arg) {
+int assumingConvertedToIntP(struct foo f, int arg) {
   // When indices are reported, the note will use the element type that's the
   // result type of the subscript operator.
   int a = ((int*)(f.a))[arg];
@@ -124,3 +124,25 @@ int assumingConverted2(struct foo f, int arg) {
   // expected-note@-2 {{Access of 'array' at negative byte offset}}
   return a + b + c;
 }
+
+int assumingPlainOffset(struct foo f, int arg) {
+  // This TC is intended to check the corner case that the checker prints the
+  // shorter "offset" instead of "byte offset" when it's irrelevant that the
+  // offset is measured in bytes.
+
+  // expected-note@+2 {{Assuming 'arg' is < 2}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 2)
+    return 0;
+
+  int b = ((int*)(f.b))[arg];
+  // expected-note@-1 {{Assuming byte offset is non-negative and less than 5, 
the extent of 'f.b'}}
+  // FIXME: this should be {{Assuming offset is non-negative}}
+  // but the current simplification algorithm doesn't realize that arg <= 1
+  // implies that the byte offset arg*4 will be less than 5.
+
+  int c = array[arg+10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return b + c;
+}

>From 6f6dc734d9b0b163f93189ebf094d03d16dd48c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 22 Jan 2024 17:35:57 +0100
Subject: [PATCH 06/14] Explain a corner case that's currently non-functional

---
 .../StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e6f0cc0cbf0dd31..002d0315bc65972 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -417,10 +417,19 @@ std::string 
StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
   bool ShouldReportNonNegative = AssumedNonNegative;
   if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
     if (AssumedUpperBound &&
-        providesInformationAboutInteresting(*AssumedUpperBound, BR))
+        providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
+      // Even if the byte offset isn't interesting (e.g. it's a constant 
value),
+      // the assumption can still be interesting if it provides information 
about
+      // an interesting symbolic upper bound.
+      // FIXME: This code path is currently non-functional and untested because
+      // `getSimplifiedOffsets()` only works when the RHS (extent) is constant.
+
+      // In this case don't display the "offset is non-negative" assumption
+      // (because the offset isn't interesting).
       ShouldReportNonNegative = false;
-    else
+    } else {
       return "";
+    }
   }
 
   std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);

>From a57cbf2834b9ce3041de6435c9e7ec1b244a2832 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 22 Jan 2024 17:43:40 +0100
Subject: [PATCH 07/14] Satisfy git-clang-format

---
 clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 002d0315bc65972..24247f340317be4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -419,8 +419,8 @@ std::string 
StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
     if (AssumedUpperBound &&
         providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
       // Even if the byte offset isn't interesting (e.g. it's a constant 
value),
-      // the assumption can still be interesting if it provides information 
about
-      // an interesting symbolic upper bound.
+      // the assumption can still be interesting if it provides information
+      // about an interesting symbolic upper bound.
       // FIXME: This code path is currently non-functional and untested because
       // `getSimplifiedOffsets()` only works when the RHS (extent) is constant.
 

>From 399deca38538d5b682ff1307a42460fa706cc78e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Thu, 25 Jan 2024 17:31:52 +0100
Subject: [PATCH 08/14] Workaround to partially resolve issues with symbolic
 extent

It seems that constraints on symbols used as the dynamic extent of an
allocated area can be garbage collected when there are no other
references. This commit adds a temporary `(void)variable` hack in
`out-of-bounds-diagnostics.c` to keep the relevant symbols alive and
ensure that the testcase can validate the behavior of
ArrayBoundCheckerV2.

In addition, the misleading names of some testcases are fixed.
---
 .../test/Analysis/out-of-bounds-diagnostics.c | 37 ++++++++++++++++---
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c 
b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 7a8f59650fef161..786962fc1b708c3 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -221,18 +221,38 @@ int allocaRegion(void) {
   return *mem;
 }
 
-int *unknownExtent(int arg) {
-  if (arg >= 2)
+int *symbolicExtent(int arg) {
+  // expected-note@+2 {{Assuming 'arg' is < 5}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 5)
     return 0;
   int *mem = (int*)malloc(arg);
+
+  // TODO: without the following reference to 'arg', the analyzer would discard
+  // the range information about (the symbolic value of) 'arg'. This is
+  // incorrect because while the variable itself is inaccessible, it becomes
+  // the symbolic extent of 'mem', so we still want to reason about its
+  // potential values.
+  (void)arg;
+
   mem[8] = -2;
-  // FIXME: this should produce
-  //   {{Out of bound access to memory after the end of the heap area}}
-  //   {{Access of 'int' element in the heap area at index 8}}
+  // expected-warning@-1 {{Out of bound access to memory after the end of the 
heap area}}
+  // expected-note@-2 {{Access of 'int' element in the heap area at index 8}}
   return mem;
 }
 
-void unknownIndex(int arg) {
+int *symbolicExtentDiscardedRangeInfo(int arg) {
+  // This is a copy of the case 'symbolicExtent' without the '(void)arg' hack.
+  // TODO: if the analyzer can detect the out-of-bounds access within this TC,
+  // then remove this TC and the `(void)arg` hack from `symbolicExtent`.
+  if (arg >= 5)
+    return 0;
+  int *mem = (int*)malloc(arg);
+  mem[8] = -2;
+  return mem;
+}
+
+void symbolicIndex(int arg) {
   // expected-note@+2 {{Assuming 'arg' is >= 12}}
   // expected-note@+1 {{Taking true branch}}
   if (arg >= 12)
@@ -245,10 +265,15 @@ int *nothingIsCertain(int x, int y) {
   if (x >= 2)
     return 0;
   int *mem = (int*)malloc(x);
+
+  // TODO: The next line is a temporary hack; see 'symbolicExtent()' for 
details.
+  (void)x;
+
   if (y >= 8)
     mem[y] = -2;
   // FIXME: this should produce
   //   {{Out of bound access to memory after the end of the heap area}}
   //   {{Access of 'int' element in the heap area at an overflowing index}}
+  // but apparently the analyzer isn't smart enough to deduce this.
   return mem;
 }

>From 1fb1ce411d1788951b0a97befafc0550880bc374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 12:41:37 +0100
Subject: [PATCH 09/14] Move initialization of StateUpdateReporter to static
 function

...`const`ify some data members and add comments to clarify some obscure
goals of this code.
---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 53 ++++++++++++-------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 24247f340317be4..3299863e1a5ae6a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -33,35 +33,48 @@ using namespace taint;
 using llvm::formatv;
 
 namespace {
+/// If `E` is a "clean" array subscript expression, return the type of the
+/// accessed element. If the base of the subscript expression is modified by
+/// pointer arithmetic (and not the beginning of a "full" memory region), this
+/// always returns nullopt because that's the right (or the least bad) thing to
+/// do for the diagnostic output that's relying on this.
+static std::optional<QualType> determineElementType(const Expr *E, const 
CheckerContext &C) {
+  const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
+  if (!ASE)
+    return std::nullopt;
+
+  const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
+  if (!SubscriptBaseReg)
+    return std::nullopt;
+
+  // The base of the subscript expression is affected by pointer arithmetics,
+  // so we want to report byte offsets instead of indices.
+  if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
+    return std::nullopt;
+
+  return ASE->getType();
+}
+
+
+static std::optional<int64_t> determineElementSize(const 
std::optional<QualType> T, const CheckerContext &C) {
+  if (!T)
+    return std::nullopt;
+  return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
+}
+
 class StateUpdateReporter {
   const SubRegion *Reg;
-  NonLoc ByteOffsetVal;
-  std::optional<QualType> ElementType = std::nullopt;
-  std::optional<int64_t> ElementSize = std::nullopt;
+  const NonLoc ByteOffsetVal;
+  const std::optional<QualType> ElementType;
+  const std::optional<int64_t> ElementSize;
   bool AssumedNonNegative = false;
   std::optional<NonLoc> AssumedUpperBound = std::nullopt;
 
 public:
   StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
                       CheckerContext &C)
-      : Reg(R), ByteOffsetVal(ByteOffsVal) {
-    initializeElementInfo(E, C);
-  }
+      : Reg(R), ByteOffsetVal(ByteOffsVal), 
ElementType(determineElementType(E, C)), 
ElementSize(determineElementSize(ElementType, C)) {}
 
-  void initializeElementInfo(const Expr *E, CheckerContext &C) {
-    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
-      const MemRegion *SubscriptBaseReg =
-          C.getSVal(ASE->getBase()).getAsRegion();
-      if (!SubscriptBaseReg)
-        return;
-      SubscriptBaseReg = SubscriptBaseReg->StripCasts();
-      if (!isa_and_nonnull<ElementRegion>(SubscriptBaseReg)) {
-        ElementType = ASE->getType();
-        ElementSize =
-            C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity();
-      }
-    }
-  }
   void recordNonNegativeAssumption() { AssumedNonNegative = true; }
   void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
     AssumedUpperBound = UpperBoundVal;

>From b949ef90fb474a26f4e0f954b7149f5f94498114 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 12:52:37 +0100
Subject: [PATCH 10/14] Add testcase assumingBothPointerToMiddle

---
 clang/test/Analysis/out-of-bounds-notes.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
index be69526393cd483..6dcba98c3be518f 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -24,6 +24,20 @@ int assumingBoth(int arg) {
   return a + b + c;
 }
 
+int assumingBothPointerToMiddle(int arg) {
+  // If we're accessing an array through a pointer pointing to its middle, the 
checker
+  // will speak about the "byte offset" measured from the beginning of the 
array.
+  int *p = array + 2;
+  int a = p[arg];
+  // expected-note@-1 {{Assuming byte offset is non-negative and less than 40, 
the extent of 'array'}}
+  int b = array[arg]; // This is normal access, and only the lower bound is 
new.
+  // expected-note@-1 {{Assuming index is non-negative}}
+  int c = array[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
+  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return a + b + c;
+}
+
 int assumingLower(int arg) {
   // expected-note@+2 {{Assuming 'arg' is < 10}}
   // expected-note@+1 {{Taking false branch}}

>From 24a355d36015e9904bbce6613b30b7387ab194c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 19:05:44 +0100
Subject: [PATCH 11/14] Add a FIXME note in assumingBothPointerToMiddle

Unfortunately this testcase does not work because there is YET ANOTHER
arbitrary limitation in the clang SA engine.
---
 clang/test/Analysis/out-of-bounds-notes.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
index 6dcba98c3be518f..b409af94d576f9d 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -29,7 +29,12 @@ int assumingBothPointerToMiddle(int arg) {
   // will speak about the "byte offset" measured from the beginning of the 
array.
   int *p = array + 2;
   int a = p[arg];
-  // expected-note@-1 {{Assuming byte offset is non-negative and less than 40, 
the extent of 'array'}}
+  // FIXME: The following note does not appear:
+  //  {{Assuming byte offset is non-negative and less than 40, the extent of 
'array'}}
+  // It seems that the analyzer "gives up" modeling this pointer arithmetics 
and says that
+  // `p[arg]` is just an UnknownVal (instead of calculating that it's
+  // equivalent to `array[2+arg]`).
+
   int b = array[arg]; // This is normal access, and only the lower bound is 
new.
   // expected-note@-1 {{Assuming index is non-negative}}
   int c = array[arg + 10];

>From e2b420cba8841d5149cf96378a968c33e29bf3f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 19:19:19 +0100
Subject: [PATCH 12/14] Rename example array `array` to `TenElements`

...for the convenience of a reader who looks at a random testcase in the
middle of the test file.
---
 .../test/Analysis/out-of-bounds-diagnostics.c |  93 +++++++--------
 clang/test/Analysis/out-of-bounds-notes.c     | 110 +++++++++---------
 2 files changed, 102 insertions(+), 101 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c 
b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 786962fc1b708c3..845aa7afa2377ef 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -1,20 +1,20 @@
 // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text        \
 // RUN:     
-analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint
 -verify %s
 
-int array[10];
+int TenElements[10];
 
 void arrayUnderflow(void) {
-  array[-3] = 5;
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset -12}}
+  TenElements[-3] = 5;
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
 }
 
 int underflowWithDeref(void) {
-  int *p = array;
+  int *p = TenElements;
   --p;
   return *p;
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset -4}}
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
 }
 
 int scanf(const char *restrict fmt, ...);
@@ -24,30 +24,31 @@ void taintedIndex(void) {
   scanf("%d", &index);
   // expected-note@-1 {{Taint originated here}}
   // expected-note@-2 {{Taint propagated to the 2nd argument}}
-  array[index] = 5;
-  // expected-warning@-1 {{Potential out of bound access to 'array' with 
tainted index}}
-  // expected-note@-2 {{Access of 'array' with a tainted index that may be too 
large}}
+  TenElements[index] = 5;
+  // expected-warning@-1 {{Potential out of bound access to 'TenElements' with 
tainted index}}
+  // expected-note@-2 {{Access of 'TenElements' with a tainted index that may 
be too large}}
 }
 
 int *taintedIndexAfterTheEndPtr(void) {
   // NOTE: Technically speaking, this testcase does not trigger any UB because
-  // &array[10] is the after-the-end pointer which is well-defined; but this is
-  // a bug-prone situation and far from the idiomatic use of `&array[size]`, so
-  // it's better to report an error. This report can be easily silenced by
-  // writing array+index instead of &array[index].
+  // &TenElements[10] is the after-the-end pointer which is well-defined; but
+  // this is a bug-prone situation and far from the idiomatic use of
+  // `&TenElements[size]`, so it's better to report an error. This report can
+  // be easily silenced by writing TenElements+index instead of
+  // &TenElements[index].
   int index;
   scanf("%d", &index);
   // expected-note@-1 {{Taint originated here}}
   // expected-note@-2 {{Taint propagated to the 2nd argument}}
   if (index < 0 || index > 10)
-    return array;
+    return TenElements;
   // expected-note@-2 {{Assuming 'index' is >= 0}}
   // expected-note@-3 {{Left side of '||' is false}}
   // expected-note@-4 {{Assuming 'index' is <= 10}}
   // expected-note@-5 {{Taking false branch}}
-  return &array[index];
-  // expected-warning@-1 {{Potential out of bound access to 'array' with 
tainted index}}
-  // expected-note@-2 {{Access of 'array' with a tainted index that may be too 
large}}
+  return &TenElements[index];
+  // expected-warning@-1 {{Potential out of bound access to 'TenElements' with 
tainted index}}
+  // expected-note@-2 {{Access of 'TenElements' with a tainted index that may 
be too large}}
 }
 
 void taintedOffset(void) {
@@ -55,57 +56,57 @@ void taintedOffset(void) {
   scanf("%d", &index);
   // expected-note@-1 {{Taint originated here}}
   // expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int *p = array + index;
+  int *p = TenElements + index;
   p[0] = 5;
-  // expected-warning@-1 {{Potential out of bound access to 'array' with 
tainted offset}}
-  // expected-note@-2 {{Access of 'array' with a tainted offset that may be 
too large}}
+  // expected-warning@-1 {{Potential out of bound access to 'TenElements' with 
tainted offset}}
+  // expected-note@-2 {{Access of 'TenElements' with a tainted offset that may 
be too large}}
 }
 
 void arrayOverflow(void) {
-  array[12] = 5;
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 12, while it holds only 10 
'int' elements}}
+  TenElements[12] = 5;
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 12, while it holds 
only 10 'int' elements}}
 }
 
 void flippedOverflow(void) {
-  12[array] = 5;
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 12, while it holds only 10 
'int' elements}}
+  12[TenElements] = 5;
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 12, while it holds 
only 10 'int' elements}}
 }
 
 int *afterTheEndPtr(void) {
-  // This is an unusual but standard-compliant way of writing (array + 10).
-  return &array[10]; // no-warning
+  // This is an unusual but standard-compliant way of writing (TenElements + 
10).
+  return &TenElements[10]; // no-warning
 }
 
 int useAfterTheEndPtr(void) {
   // ... but dereferencing the after-the-end pointer is still invalid.
   return *afterTheEndPtr();
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 10, while it holds only 10 
'int' elements}}
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 10, while it holds 
only 10 'int' elements}}
 }
 
 int *afterAfterTheEndPtr(void) {
   // This is UB, it's invalid to form an after-after-the-end pointer.
-  return &array[11];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 11, while it holds only 10 
'int' elements}}
+  return &TenElements[11];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 11, while it holds 
only 10 'int' elements}}
 }
 
 int *potentialAfterTheEndPtr(int idx) {
   if (idx < 10) { /* ...do something... */ }
   // expected-note@-1 {{Assuming 'idx' is >= 10}}
   // expected-note@-2 {{Taking false branch}}
-  return &array[idx];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  return &TenElements[idx];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   // NOTE: On the idx >= 10 branch the normal "optimistic" behavior would've
   // been continuing with the assumption that idx == 10 and the return value is
   // a legitimate after-the-end pointer. The checker deviates from this by
   // reporting an error because this situation is very suspicious and far from
-  // the idiomatic `&array[size]` expressions. If the report is FP, the
-  // developer can easily silence it by writing array+idx instead of
-  // &array[idx].
+  // the idiomatic `&TenElements[size]` expressions. If the report is FP, the
+  // developer can easily silence it by writing TenElements+idx instead of
+  // &TenElements[idx].
 }
 
 int scalar;
@@ -156,9 +157,9 @@ int arrayOfStructsArrow(void) {
 }
 
 short convertedArray(void) {
-  return ((short*)array)[47];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 47, while it holds only 20 
'short' elements}}
+  return ((short*)TenElements)[47];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 47, while it holds 
only 20 'short' elements}}
 }
 
 struct two_bytes {
@@ -256,9 +257,9 @@ void symbolicIndex(int arg) {
   // expected-note@+2 {{Assuming 'arg' is >= 12}}
   // expected-note@+1 {{Taking true branch}}
   if (arg >= 12)
-    array[arg] = -2;
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+    TenElements[arg] = -2;
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
 }
 
 int *nothingIsCertain(int x, int y) {
diff --git a/clang/test/Analysis/out-of-bounds-notes.c 
b/clang/test/Analysis/out-of-bounds-notes.c
index b409af94d576f9d..1fc6f89b6276e1d 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -1,45 +1,45 @@
 // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text        \
 // RUN:     
-analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint
 -verify %s
 
-int array[10];
+int TenElements[10];
 
 int irrelevantAssumptions(int arg) {
-  int a = array[arg];
+  int a = TenElements[arg];
   // Here the analyzer assumes that `arg` is in bounds, but doesn't report this
   // because `arg` is not interesting for the bug.
-  int b = array[13];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at index 13, while it holds only 10 
'int' elements}}
+  int b = TenElements[13];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at index 13, while it holds 
only 10 'int' elements}}
   return a + b;
 }
 
 
 int assumingBoth(int arg) {
-  int a = array[arg];
-  // expected-note@-1 {{Assuming index is non-negative and less than 10, the 
number of 'int' elements in 'array'}}
-  int b = array[arg]; // no additional note, we already assumed that 'arg' is 
in bounds
-  int c = array[arg + 10];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  int a = TenElements[arg];
+  // expected-note@-1 {{Assuming index is non-negative and less than 10, the 
number of 'int' elements in 'TenElements'}}
+  int b = TenElements[arg]; // no additional note, we already assumed that 
'arg' is in bounds
+  int c = TenElements[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   return a + b + c;
 }
 
 int assumingBothPointerToMiddle(int arg) {
-  // If we're accessing an array through a pointer pointing to its middle, the 
checker
-  // will speak about the "byte offset" measured from the beginning of the 
array.
-  int *p = array + 2;
+  // If we're accessing an TenElements through a pointer pointing to its 
middle, the checker
+  // will speak about the "byte offset" measured from the beginning of the 
TenElements.
+  int *p = TenElements + 2;
   int a = p[arg];
   // FIXME: The following note does not appear:
-  //  {{Assuming byte offset is non-negative and less than 40, the extent of 
'array'}}
-  // It seems that the analyzer "gives up" modeling this pointer arithmetics 
and says that
-  // `p[arg]` is just an UnknownVal (instead of calculating that it's
-  // equivalent to `array[2+arg]`).
+  //  {{Assuming byte offset is non-negative and less than 40, the extent of 
'TenElements'}}
+  // It seems that the analyzer "gives up" modeling this pointer arithmetics
+  // and says that `p[arg]` is just an UnknownVal (instead of calculating that
+  // it's equivalent to `TenElements[2+arg]`).
 
-  int b = array[arg]; // This is normal access, and only the lower bound is 
new.
+  int b = TenElements[arg]; // This is normal access, and only the lower bound 
is new.
   // expected-note@-1 {{Assuming index is non-negative}}
-  int c = array[arg + 10];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  int c = TenElements[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   return a + b + c;
 }
 
@@ -48,11 +48,11 @@ int assumingLower(int arg) {
   // expected-note@+1 {{Taking false branch}}
   if (arg >= 10)
     return 0;
-  int a = array[arg];
+  int a = TenElements[arg];
   // expected-note@-1 {{Assuming index is non-negative}}
-  int b = array[arg + 10];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  int b = TenElements[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   return a + b;
 }
 
@@ -61,11 +61,11 @@ int assumingUpper(int arg) {
   // expected-note@+1 {{Taking false branch}}
   if (arg < 0)
     return 0;
-  int a = array[arg];
-  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
-  int b = array[arg - 10];
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  int a = TenElements[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
+  int b = TenElements[arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
   return a + b;
 }
 
@@ -81,20 +81,20 @@ int assumingUpperIrrelevant(int arg) {
   // expected-note@+1 {{Taking false branch}}
   if (arg < 0)
     return 0;
-  int a = array[arg];
-  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
-  int b = array[arg + 10];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  int a = TenElements[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
+  int b = TenElements[arg + 10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   return a + b;
 }
 
 int assumingUpperUnsigned(unsigned arg) {
-  int a = array[arg];
-  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'array'}}
-  int b = array[(int)arg - 10];
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  int a = TenElements[arg];
+  // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
+  int b = TenElements[(int)arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
   return a + b;
 }
 
@@ -103,23 +103,23 @@ int assumingNothing(unsigned arg) {
   // expected-note@+1 {{Taking false branch}}
   if (arg >= 10)
     return 0;
-  int a = array[arg]; // no note here, we already know that 'arg' is in bounds
-  int b = array[(int)arg - 10];
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  int a = TenElements[arg]; // no note here, we already know that 'arg' is in 
bounds
+  int b = TenElements[(int)arg - 10];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
   return a + b;
 }
 
 short assumingConvertedToCharP(int arg) {
   // When indices are reported, the note will use the element type that's the
   // result type of the subscript operator.
-  char *cp = (char*)array;
+  char *cp = (char*)TenElements;
   char a = cp[arg];
-  // expected-note@-1 {{Assuming index is non-negative and less than 40, the 
number of 'char' elements in 'array'}}
+  // expected-note@-1 {{Assuming index is non-negative and less than 40, the 
number of 'char' elements in 'TenElements'}}
   char b = cp[arg]; // no additional note, we already assumed that 'arg' is in 
bounds
   char c = cp[arg + 40];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 40 'char' elements}}
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 40 'char' elements}}
   return a + b + c;
 }
 
@@ -138,9 +138,9 @@ int assumingConvertedToIntP(struct foo f, int arg) {
   // element size, the checker measures the offset and extent in bytes.
   int b = ((int*)(f.b))[arg];
   // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 
'f.b'}}
-  int c = array[arg-2];
-  // expected-warning@-1 {{Out of bound access to memory preceding 'array'}}
-  // expected-note@-2 {{Access of 'array' at negative byte offset}}
+  int c = TenElements[arg-2];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
   return a + b + c;
 }
 
@@ -160,8 +160,8 @@ int assumingPlainOffset(struct foo f, int arg) {
   // but the current simplification algorithm doesn't realize that arg <= 1
   // implies that the byte offset arg*4 will be less than 5.
 
-  int c = array[arg+10];
-  // expected-warning@-1 {{Out of bound access to memory after the end of 
'array'}}
-  // expected-note@-2 {{Access of 'array' at an overflowing index, while it 
holds only 10 'int' elements}}
+  int c = TenElements[arg+10];
+  // expected-warning@-1 {{Out of bound access to memory after the end of 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while 
it holds only 10 'int' elements}}
   return b + c;
 }

>From 1e04ac56dd3072a15c77d47a5c58328b19dfe413 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 19:27:45 +0100
Subject: [PATCH 13/14] Tweak comments related to the constraint hack

---
 clang/test/Analysis/out-of-bounds-diagnostics.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c 
b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 845aa7afa2377ef..0c3c67c6a546ad2 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -244,8 +244,8 @@ int *symbolicExtent(int arg) {
 
 int *symbolicExtentDiscardedRangeInfo(int arg) {
   // This is a copy of the case 'symbolicExtent' without the '(void)arg' hack.
-  // TODO: if the analyzer can detect the out-of-bounds access within this TC,
-  // then remove this TC and the `(void)arg` hack from `symbolicExtent`.
+  // TODO: if the analyzer can detect the out-of-bounds access within this
+  // testcase, then remove this and the `(void)arg` hack from `symbolicExtent`.
   if (arg >= 5)
     return 0;
   int *mem = (int*)malloc(arg);
@@ -267,14 +267,16 @@ int *nothingIsCertain(int x, int y) {
     return 0;
   int *mem = (int*)malloc(x);
 
-  // TODO: The next line is a temporary hack; see 'symbolicExtent()' for 
details.
-  (void)x;
-
   if (y >= 8)
     mem[y] = -2;
   // FIXME: this should produce
   //   {{Out of bound access to memory after the end of the heap area}}
   //   {{Access of 'int' element in the heap area at an overflowing index}}
   // but apparently the analyzer isn't smart enough to deduce this.
+
+  // Keep constraints alive. (Without this, the overeager garbage collection of
+  // constraints would _also_ prevent the intended behavior in this testcase.)
+  (void)x;
+
   return mem;
 }

>From be9b2e749e4264357d6d8db1dfa3d05bf8974e43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 26 Jan 2024 19:37:40 +0100
Subject: [PATCH 14/14] Oops, I forgot git-clang-format again

It's Friday...
---
 .../StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 3299863e1a5ae6a..614276525ff4c46 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -38,7 +38,8 @@ namespace {
 /// pointer arithmetic (and not the beginning of a "full" memory region), this
 /// always returns nullopt because that's the right (or the least bad) thing to
 /// do for the diagnostic output that's relying on this.
-static std::optional<QualType> determineElementType(const Expr *E, const 
CheckerContext &C) {
+static std::optional<QualType> determineElementType(const Expr *E,
+                                                    const CheckerContext &C) {
   const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
   if (!ASE)
     return std::nullopt;
@@ -55,8 +56,8 @@ static std::optional<QualType> determineElementType(const 
Expr *E, const Checker
   return ASE->getType();
 }
 
-
-static std::optional<int64_t> determineElementSize(const 
std::optional<QualType> T, const CheckerContext &C) {
+static std::optional<int64_t>
+determineElementSize(const std::optional<QualType> T, const CheckerContext &C) 
{
   if (!T)
     return std::nullopt;
   return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
@@ -73,7 +74,9 @@ class StateUpdateReporter {
 public:
   StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
                       CheckerContext &C)
-      : Reg(R), ByteOffsetVal(ByteOffsVal), 
ElementType(determineElementType(E, C)), 
ElementSize(determineElementSize(ElementType, C)) {}
+      : Reg(R), ByteOffsetVal(ByteOffsVal),
+        ElementType(determineElementType(E, C)),
+        ElementSize(determineElementSize(ElementType, C)) {}
 
   void recordNonNegativeAssumption() { AssumedNonNegative = true; }
   void recordUpperBoundAssumption(NonLoc UpperBoundVal) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang-tools-extra] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

Reply via email to