[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-02-05 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -381,66 +574,105 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext ) const {
 compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
 if (ExceedsUpperBound) {
+  // The offset may be invalid (>= Size)...
   if (!WithinUpperBound) {
-// We know that the index definitely exceeds the upper bound.
-if (isa(E) && isInAddressOf(E, C.getASTContext())) 
{
-  // ...but this is within an addressof expression, so we need to check
-  // for the exceptional case that `[size]` is valid.
-  auto [EqualsToThreshold, NotEqualToThreshold] =
-  compareValueToThreshold(ExceedsUpperBound, ByteOffset, 
*KnownSize,
-  SVB, /*CheckEquality=*/true);
-  if (EqualsToThreshold && !NotEqualToThreshold) {
-// We are definitely in the exceptional case, so return early
-// instead of reporting a bug.
-C.addTransition(EqualsToThreshold);
-return;
-  }
+// ...and it cannot be within bounds, so report an error, unless we can
+// definitely determine that this is an idiomatic `[size]`
+// expression that calculates the past-the-end pointer.
+if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
+ *KnownSize, C)) {
+  C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
+  return;
 }
+
 Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
*KnownSize, Location);
-reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
+reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
 return;
   }
+  // ...and it can be valid as well...
   if (isTainted(State, ByteOffset)) {
-// Both cases are possible, but the offset is tainted, so report.
-std::string RegName = getRegionName(Reg);
+// ...but it's tainted, so report an error.
 
-// Diagnostic detail: "tainted offset" is always correct, but the
-// common case is that 'idx' is tainted in 'arr[idx]' and then it's
+// Diagnostic detail: saying "tainted offset" is always correct, but
+// the common case is that 'idx' is tainted in 'arr[idx]' and then it's
 // nicer to say "tainted index".
 const char *OffsetName = "offset";
 if (const auto *ASE = dyn_cast(E))
   if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
 OffsetName = "index";
 
 Messages Msgs = getTaintMsgs(Reg, OffsetName);
-reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
+reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
+  /*IsTaintBug=*/true);
 return;
   }
+  // ...and it isn't tainted, so the checker will (optimistically) assume
+  // that the offset is in bounds and mention this in the note tag.
+  SUR.recordUpperBoundAssumption(*KnownSize);
 }
 
+// Actually update the state. The "if" only fails in the extremely unlikely
+// case when compareValueToThreshold returns {nullptr, nullptr} becasue
+// evalBinOpNN fails to evaluate the less-than operator.
 if (WithinUpperBound)
   State = WithinUpperBound;
   }
 
-  C.addTransition(State);
+  // Add a transition, reporting the state updates that we accumulated.
+  C.addTransition(State, SUR.createNoteTag(C));
+}
+
+void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport ,
+   ProgramStateRef ErrorState,
+   NonLoc Val, bool MarkTaint) {
+  if (SymbolRef Sym = Val.getAsSymbol()) {
+// If the offset is a symbolic value, iterate over its "parts" with
+// `SymExpr::symbols()` and mark each of them as interesting.
+// For example, if the offset is `x*4 + y` then we put interestingness onto
+// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
+// `x` and `y`.
+for (SymbolRef PartSym : Sym->symbols())
+  BR.markInteresting(PartSym);
+  }
+
+  if (MarkTaint) {
+// If the issue that we're reporting depends on the taintedness of the
+// offset, then put interestingness onto symbols that could be the origin
+// of the taint.
+for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
+  BR.markInteresting(Sym);
+  }

NagyDonat wrote:

No, it is not redundant :upside_down_face: because 

[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-02-05 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -381,66 +574,105 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext ) const {
 compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
 if (ExceedsUpperBound) {
+  // The offset may be invalid (>= Size)...
   if (!WithinUpperBound) {
-// We know that the index definitely exceeds the upper bound.
-if (isa(E) && isInAddressOf(E, C.getASTContext())) 
{
-  // ...but this is within an addressof expression, so we need to check
-  // for the exceptional case that `[size]` is valid.
-  auto [EqualsToThreshold, NotEqualToThreshold] =
-  compareValueToThreshold(ExceedsUpperBound, ByteOffset, 
*KnownSize,
-  SVB, /*CheckEquality=*/true);
-  if (EqualsToThreshold && !NotEqualToThreshold) {
-// We are definitely in the exceptional case, so return early
-// instead of reporting a bug.
-C.addTransition(EqualsToThreshold);
-return;
-  }
+// ...and it cannot be within bounds, so report an error, unless we can
+// definitely determine that this is an idiomatic `[size]`
+// expression that calculates the past-the-end pointer.
+if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
+ *KnownSize, C)) {
+  C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
+  return;
 }
+
 Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
*KnownSize, Location);
-reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
+reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
 return;
   }
+  // ...and it can be valid as well...
   if (isTainted(State, ByteOffset)) {
-// Both cases are possible, but the offset is tainted, so report.
-std::string RegName = getRegionName(Reg);
+// ...but it's tainted, so report an error.
 
-// Diagnostic detail: "tainted offset" is always correct, but the
-// common case is that 'idx' is tainted in 'arr[idx]' and then it's
+// Diagnostic detail: saying "tainted offset" is always correct, but
+// the common case is that 'idx' is tainted in 'arr[idx]' and then it's
 // nicer to say "tainted index".
 const char *OffsetName = "offset";
 if (const auto *ASE = dyn_cast(E))
   if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
 OffsetName = "index";
 
 Messages Msgs = getTaintMsgs(Reg, OffsetName);
-reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
+reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
+  /*IsTaintBug=*/true);
 return;
   }
+  // ...and it isn't tainted, so the checker will (optimistically) assume
+  // that the offset is in bounds and mention this in the note tag.
+  SUR.recordUpperBoundAssumption(*KnownSize);
 }
 
+// Actually update the state. The "if" only fails in the extremely unlikely
+// case when compareValueToThreshold returns {nullptr, nullptr} becasue
+// evalBinOpNN fails to evaluate the less-than operator.
 if (WithinUpperBound)
   State = WithinUpperBound;
   }
 
-  C.addTransition(State);
+  // Add a transition, reporting the state updates that we accumulated.
+  C.addTransition(State, SUR.createNoteTag(C));
+}
+
+void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport ,
+   ProgramStateRef ErrorState,
+   NonLoc Val, bool MarkTaint) {
+  if (SymbolRef Sym = Val.getAsSymbol()) {
+// If the offset is a symbolic value, iterate over its "parts" with
+// `SymExpr::symbols()` and mark each of them as interesting.
+// For example, if the offset is `x*4 + y` then we put interestingness onto
+// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
+// `x` and `y`.
+for (SymbolRef PartSym : Sym->symbols())
+  BR.markInteresting(PartSym);
+  }
+
+  if (MarkTaint) {
+// If the issue that we're reporting depends on the taintedness of the
+// offset, then put interestingness onto symbols that could be the origin
+// of the taint.
+for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
+  BR.markInteresting(Sym);
+  }
 }
 
 void ArrayBoundCheckerV2::reportOOB(CheckerContext ,
-

[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-02-05 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -318,17 +424,91 @@ static Messages getTaintMsgs(const SubRegion *Region, 
const char *OffsetName) {
   RegName, OffsetName)};
 }
 
-void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const 
{
-  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
-  // some new logic here that reasons directly about memory region extents.
-  // Once that logic is more mature, we can bring it back to assumeInBound()
-  // for all clients to use.
-  //
-  // The algorithm we are using here for bounds checking is to see if the
-  // memory access is within the extent of the base region.  Since we
-  // have some flexibility in defining the base region, we can achieve
-  // various levels of conservatism in our buffer overflow checking.
+const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext ) const {
+  // Don't create a note tag if we didn't assume anything:
+  if (!AssumedNonNegative && !AssumedUpperBound)
+return nullptr;
+
+  return C.getNoteTag([*this](PathSensitiveBugReport ) -> std::string {
+return getMessage(BR);
+  });
+}
+
+std::string StateUpdateReporter::getMessage(PathSensitiveBugReport ) const {
+  bool ShouldReportNonNegative = AssumedNonNegative;
+  if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
+if (AssumedUpperBound &&
+providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
+  // Even if the byte offset isn't interesting (e.g. it's a constant 
value),
+  // the assumption can still be interesting if it provides information
+  // about an interesting symbolic upper bound.
+  ShouldReportNonNegative = false;
+} else {
+  // We don't have anything interesting, don't report the assumption.
+  return "";
+}

steakhal wrote:

```suggestion
}
// We don't have anything interesting, don't report the assumption.
return "";
```
Return after else.

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-29 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


NagyDonat wrote:

@steakhal To my best knowledge I implemented or answered all of your 
suggestions, so I'm ready for the next round (or I'm also ready to merge this 
commit and start the next one :grin:). 

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-29 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/NagyDonat updated 
https://github.com/llvm/llvm-project/pull/78315

>From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= 
Date: Tue, 12 Dec 2023 13:07:54 +0100
Subject: [PATCH 01/16] [analyzer] Support interestingness in ArrayBoundV2

This commit improves alpha.security.ArrayBoundV2 in two connected areas:
(1) It calls `markInteresting()` on the symbolic values that are
responsible for the out of bounds access.
(2) Its index-is-in-bounds assumptions are reported in note tags if they
provide information about the value of an interesting symbol.

This commit is limited to "display" changes: it introduces new
diagnostic pieces (potentially to bugs found by other checkers), but the
ArrayBoundV2 will make the same assumptions and detect the same bugs
before and after this change.

As a minor unrelated change, this commit also updates/removes some very
old comments which became obsolate due to my previous changes.
---
 .../Checkers/ArrayBoundCheckerV2.cpp  | 345 ++
 .../test/Analysis/out-of-bounds-diagnostics.c |  10 +
 clang/test/Analysis/out-of-bounds-notes.c | 128 +++
 3 files changed, 413 insertions(+), 70 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-notes.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 6c7a1601402efac..4241fa3a2ce8af6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -33,7 +33,66 @@ using namespace taint;
 using llvm::formatv;
 
 namespace {
-enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+class StateUpdateReporter {
+  const SubRegion *Reg;
+  NonLoc ByteOffsetVal;
+  std::optional ElementType = std::nullopt;
+  std::optional ElementSize = std::nullopt;
+  bool AssumedNonNegative = false;
+  std::optional AssumedUpperBound = std::nullopt;
+
+public:
+  StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
+  CheckerContext )
+  : Reg(R), ByteOffsetVal(ByteOffsVal) {
+initializeElementInfo(E, C);
+  }
+
+  void initializeElementInfo(const Expr *E, CheckerContext ) {
+if (const auto *ASE = dyn_cast(E)) {
+  const MemRegion *SubscriptBaseReg =
+  C.getSVal(ASE->getBase()).getAsRegion();
+  if (!SubscriptBaseReg)
+return;
+  SubscriptBaseReg = SubscriptBaseReg->StripCasts();
+  if (!isa_and_nonnull(SubscriptBaseReg)) {
+ElementType = ASE->getType();
+ElementSize =
+C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity();
+  }
+}
+  }
+  void recordNonNegativeAssumption() { AssumedNonNegative = true; }
+  void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
+AssumedUpperBound = UpperBoundVal;
+  }
+
+  const NoteTag *createNoteTag(CheckerContext ) const;
+
+private:
+  std::string getMessage(PathSensitiveBugReport ) const;
+
+  /// Return true if information about the value of `Sym` can put constraints
+  /// on some symbol which is interesting within the bug report `BR`.
+  /// In particular, this returns true when `Sym` is interesting within `BR`;
+  /// but it also returns true if `Sym` is an expression that contains integer
+  /// constants and a single symbolic operand which is interesting (in `BR`).
+  /// We need to use this instead of plain `BR.isInteresting()` because if we
+  /// are analyzing code like
+  ///   int array[10];
+  ///   int f(int arg) {
+  /// return array[arg] && array[arg + 10];
+  ///   }
+  /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
+  /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
+  /// to detect this out of bounds access).
+  static bool providesInformationAboutInteresting(SymbolRef Sym,
+  PathSensitiveBugReport );
+  static bool providesInformationAboutInteresting(SVal SV,
+  PathSensitiveBugReport ) {
+return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
+  }
+};
 
 struct Messages {
   std::string Short, Full;
@@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public 
Checker,
 
   void performCheck(const Expr *E, CheckerContext ) const;
 
-  void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- NonLoc Offset, Messages Msgs) const;
+  void reportOOB(CheckerContext , ProgramStateRef ErrorState, Messages Msgs,
+ 

[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-25 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -221,18 +221,38 @@ int allocaRegion(void) {
   return *mem;
 }
 
-int *unknownExtent(int arg) {
-  if (arg >= 2)
+int *symbolicExtent(int arg) {
+  // expected-note@+2 {{Assuming 'arg' is < 5}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 5)
 return 0;
   int *mem = (int*)malloc(arg);
+
+  // TODO: without the following reference to 'arg', the analyzer would discard
+  // the range information about (the symbolic value of) 'arg'. This is
+  // incorrect because while the variable itself is inaccessible, it becomes
+  // the symbolic extent of 'mem', so we still want to reason about its
+  // potential values.
+  (void)arg;
+
   mem[8] = -2;
-  // FIXME: this should produce
-  //   {{Out of bound access to memory after the end of the heap area}}
-  //   {{Access of 'int' element in the heap area at index 8}}
+  // expected-warning@-1 {{Out of bound access to memory after the end of the 
heap area}}
+  // expected-note@-2 {{Access of 'int' element in the heap area at index 8}}
   return mem;
 }
 
-void unknownIndex(int arg) {
+int *symbolicExtentDiscardedRangeInfo(int arg) {
+  // This is a copy of the case 'symbolicExtent' without the '(void)arg' hack.
+  // TODO: if the analyzer can detect the out-of-bounds access within this TC,
+  // then remove this TC and the `(void)arg` hack from `symbolicExtent`.

steakhal wrote:

I'd prefer to expand `TC`, it's not that longer.

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


NagyDonat wrote:

Ouch, that seems to be a nasty issue. Thanks for doing the review and I hope 
that you'll be able to share it eventually :)

(If there is no clean resolution, feel free to dump a semi-formatted copypaste 
/ "Save As..." html snapshot of the review that you prepared. As long as your 
suggestions are readable, they'll be helpful.)

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


steakhal wrote:

> @steakhal I handled all the suggestions from the first review round (either 
> by updating the PR, or by replying / asking follow-up questions when the 
> situation wasn't clear for me).

I wanted to submit my review earlier today, but failed. See the details 
[here](https://discourse.llvm.org/t/enabling-fine-grained-access-tokens-for-the-llvm-organization/76383/3).

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, 
const char *OffsetName) {
   RegName, OffsetName)};
 }
 
-void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const 
{
-  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
-  // some new logic here that reasons directly about memory region extents.
-  // Once that logic is more mature, we can bring it back to assumeInBound()
-  // for all clients to use.
-  //
-  // The algorithm we are using here for bounds checking is to see if the
-  // memory access is within the extent of the base region.  Since we
-  // have some flexibility in defining the base region, we can achieve
-  // various levels of conservatism in our buffer overflow checking.
+const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext ) const {
+  // Don't create a note tag if we didn't assume anything:
+  if (!AssumedNonNegative && !AssumedUpperBound)
+return nullptr;
+
+  return C.getNoteTag(
+  [*this](PathSensitiveBugReport ) -> std::string {
+return getMessage(BR);
+  },
+  /*isPrunable=*/false);
+}
+
+std::string StateUpdateReporter::getMessage(PathSensitiveBugReport ) const {
+  bool ShouldReportNonNegative = AssumedNonNegative;
+  if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
+if (AssumedUpperBound &&
+providesInformationAboutInteresting(*AssumedUpperBound, BR))
+  ShouldReportNonNegative = false;
+else
+  return "";
+  }
+
+  std::optional OffsetN = getConcreteValue(ByteOffsetVal);
+  std::optional ExtentN = getConcreteValue(AssumedUpperBound);
 
+  const bool UseIndex =
+  ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Assuming ";
+  if (UseIndex) {
+Out << "index ";
+if (OffsetN)
+  Out << "'" << OffsetN << "' ";
+  } else if (AssumedUpperBound) {
+Out << "byte offset ";
+if (OffsetN)
+  Out << "'" << OffsetN << "' ";
+  } else {
+Out << "offset ";
+  }
+
+  Out << "is";
+  if (ShouldReportNonNegative) {
+Out << " non-negative";
+  }
+  if (AssumedUpperBound) {
+if (ShouldReportNonNegative)
+  Out << " and";
+Out << " less than ";
+if (ExtentN)
+  Out << *ExtentN << ", ";
+if (UseIndex && ElementType)
+  Out << "the number of '" << ElementType->getAsString()
+  << "' elements in ";
+else
+  Out << "the extent of ";
+Out << getRegionName(Reg);
+  }

NagyDonat wrote:

If I see it correctly now I have testcases covering each message fragment 
that's accessible; but it turns out that there are several code paths that are 
inaccessible now because the inequality simplification algorithm is too dumb. I 
marked these limitations as FIXMEs, I'll try to fix them in a separate commit.

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 3440466536c97ced20e366301a60f12f4fd01e30 
6f6dc734d9b0b163f93189ebf094d03d16dd48c0 -- 
clang/test/Analysis/out-of-bounds-notes.c 
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
clang/test/Analysis/out-of-bounds-diagnostics.c
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 002d0315bc..24247f3403 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -419,8 +419,8 @@ std::string 
StateUpdateReporter::getMessage(PathSensitiveBugReport ) const {
 if (AssumedUpperBound &&
 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
   // Even if the byte offset isn't interesting (e.g. it's a constant 
value),
-  // the assumption can still be interesting if it provides information 
about
-  // an interesting symbolic upper bound.
+  // the assumption can still be interesting if it provides information
+  // about an interesting symbolic upper bound.
   // FIXME: This code path is currently non-functional and untested because
   // `getSimplifiedOffsets()` only works when the RHS (extent) is constant.
 

``




https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/NagyDonat updated 
https://github.com/llvm/llvm-project/pull/78315

>From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= 
Date: Tue, 12 Dec 2023 13:07:54 +0100
Subject: [PATCH 1/6] [analyzer] Support interestingness in ArrayBoundV2

This commit improves alpha.security.ArrayBoundV2 in two connected areas:
(1) It calls `markInteresting()` on the symbolic values that are
responsible for the out of bounds access.
(2) Its index-is-in-bounds assumptions are reported in note tags if they
provide information about the value of an interesting symbol.

This commit is limited to "display" changes: it introduces new
diagnostic pieces (potentially to bugs found by other checkers), but the
ArrayBoundV2 will make the same assumptions and detect the same bugs
before and after this change.

As a minor unrelated change, this commit also updates/removes some very
old comments which became obsolate due to my previous changes.
---
 .../Checkers/ArrayBoundCheckerV2.cpp  | 345 ++
 .../test/Analysis/out-of-bounds-diagnostics.c |  10 +
 clang/test/Analysis/out-of-bounds-notes.c | 128 +++
 3 files changed, 413 insertions(+), 70 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-notes.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 6c7a1601402efac..4241fa3a2ce8af6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -33,7 +33,66 @@ using namespace taint;
 using llvm::formatv;
 
 namespace {
-enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+class StateUpdateReporter {
+  const SubRegion *Reg;
+  NonLoc ByteOffsetVal;
+  std::optional ElementType = std::nullopt;
+  std::optional ElementSize = std::nullopt;
+  bool AssumedNonNegative = false;
+  std::optional AssumedUpperBound = std::nullopt;
+
+public:
+  StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
+  CheckerContext )
+  : Reg(R), ByteOffsetVal(ByteOffsVal) {
+initializeElementInfo(E, C);
+  }
+
+  void initializeElementInfo(const Expr *E, CheckerContext ) {
+if (const auto *ASE = dyn_cast(E)) {
+  const MemRegion *SubscriptBaseReg =
+  C.getSVal(ASE->getBase()).getAsRegion();
+  if (!SubscriptBaseReg)
+return;
+  SubscriptBaseReg = SubscriptBaseReg->StripCasts();
+  if (!isa_and_nonnull(SubscriptBaseReg)) {
+ElementType = ASE->getType();
+ElementSize =
+C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity();
+  }
+}
+  }
+  void recordNonNegativeAssumption() { AssumedNonNegative = true; }
+  void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
+AssumedUpperBound = UpperBoundVal;
+  }
+
+  const NoteTag *createNoteTag(CheckerContext ) const;
+
+private:
+  std::string getMessage(PathSensitiveBugReport ) const;
+
+  /// Return true if information about the value of `Sym` can put constraints
+  /// on some symbol which is interesting within the bug report `BR`.
+  /// In particular, this returns true when `Sym` is interesting within `BR`;
+  /// but it also returns true if `Sym` is an expression that contains integer
+  /// constants and a single symbolic operand which is interesting (in `BR`).
+  /// We need to use this instead of plain `BR.isInteresting()` because if we
+  /// are analyzing code like
+  ///   int array[10];
+  ///   int f(int arg) {
+  /// return array[arg] && array[arg + 10];
+  ///   }
+  /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
+  /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
+  /// to detect this out of bounds access).
+  static bool providesInformationAboutInteresting(SymbolRef Sym,
+  PathSensitiveBugReport );
+  static bool providesInformationAboutInteresting(SVal SV,
+  PathSensitiveBugReport ) {
+return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
+  }
+};
 
 struct Messages {
   std::string Short, Full;
@@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public 
Checker,
 
   void performCheck(const Expr *E, CheckerContext ) const;
 
-  void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- NonLoc Offset, Messages Msgs) const;
+  void reportOOB(CheckerContext , ProgramStateRef ErrorState, Messages Msgs,
+ NonLoc Offset, bool IsTaintBug = false) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext );
 
+  static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
+   NonLoc Offset, NonLoc 

[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)

2024-01-22 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -124,3 +124,25 @@ int assumingConverted2(struct foo f, int arg) {
   // expected-note@-2 {{Access of 'array' at negative byte offset}}
   return a + b + c;
 }
+
+int assumingPlainOffset(struct foo f, int arg) {
+  // This TC is intended to check the corner case that the checker prints the
+  // shorter "offset" instead of "byte offset" when it's irrelevant that the
+  // offset is measured in bytes.
+
+  // expected-note@+2 {{Assuming 'arg' is < 2}}
+  // expected-note@+1 {{Taking false branch}}
+  if (arg >= 2)
+return 0;
+
+  int b = ((int*)(f.b))[arg];
+  // expected-note@-1 {{Assuming byte offset is non-negative and less than 5, 
the extent of 'f.b'}}
+  // FIXME: this should be {{Assuming offset is non-negative}}
+  // but the current simplification algorithm doesn't realize that arg <= 1
+  // implies that the byte offset arg*4 will be less than 5.

NagyDonat wrote:

Also note that currently the checker only verifies that _(byte offset to first 
byte of accessed element) < (extent of memory region in bytes)_; so e.g. it 
would accept "`char b[5]; ((int*)b)[1] = 123;`" despite the fact that (assuming 
4-bit `int`) this writes 3 bytes after the end of the array `b`.

This issue seems to be mostly theoretical (this sort of low-level memory abuse 
should be very rare), but I'll try to ensure that the checker handles it 
correctly.

https://github.com/llvm/llvm-project/pull/78315
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits