[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -381,66 +574,105 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const { compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); if (ExceedsUpperBound) { + // The offset may be invalid (>= Size)... if (!WithinUpperBound) { -// We know that the index definitely exceeds the upper bound. -if (isa(E) && isInAddressOf(E, C.getASTContext())) { - // ...but this is within an addressof expression, so we need to check - // for the exceptional case that `[size]` is valid. - auto [EqualsToThreshold, NotEqualToThreshold] = - compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, - SVB, /*CheckEquality=*/true); - if (EqualsToThreshold && !NotEqualToThreshold) { -// We are definitely in the exceptional case, so return early -// instead of reporting a bug. -C.addTransition(EqualsToThreshold); -return; - } +// ...and it cannot be within bounds, so report an error, unless we can +// definitely determine that this is an idiomatic `[size]` +// expression that calculates the past-the-end pointer. +if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, + *KnownSize, C)) { + C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); + return; } + Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize, Location); -reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); +reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } + // ...and it can be valid as well... if (isTainted(State, ByteOffset)) { -// Both cases are possible, but the offset is tainted, so report. -std::string RegName = getRegionName(Reg); +// ...but it's tainted, so report an error. -// Diagnostic detail: "tainted offset" is always correct, but the -// common case is that 'idx' is tainted in 'arr[idx]' and then it's +// Diagnostic detail: saying "tainted offset" is always correct, but +// the common case is that 'idx' is tainted in 'arr[idx]' and then it's // nicer to say "tainted index". const char *OffsetName = "offset"; if (const auto *ASE = dyn_cast(E)) if (isTainted(State, ASE->getIdx(), C.getLocationContext())) OffsetName = "index"; Messages Msgs = getTaintMsgs(Reg, OffsetName); -reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); +reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, + /*IsTaintBug=*/true); return; } + // ...and it isn't tainted, so the checker will (optimistically) assume + // that the offset is in bounds and mention this in the note tag. + SUR.recordUpperBoundAssumption(*KnownSize); } +// Actually update the state. The "if" only fails in the extremely unlikely +// case when compareValueToThreshold returns {nullptr, nullptr} becasue +// evalBinOpNN fails to evaluate the less-than operator. if (WithinUpperBound) State = WithinUpperBound; } - C.addTransition(State); + // Add a transition, reporting the state updates that we accumulated. + C.addTransition(State, SUR.createNoteTag(C)); +} + +void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport , + ProgramStateRef ErrorState, + NonLoc Val, bool MarkTaint) { + if (SymbolRef Sym = Val.getAsSymbol()) { +// If the offset is a symbolic value, iterate over its "parts" with +// `SymExpr::symbols()` and mark each of them as interesting. +// For example, if the offset is `x*4 + y` then we put interestingness onto +// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols +// `x` and `y`. +for (SymbolRef PartSym : Sym->symbols()) + BR.markInteresting(PartSym); + } + + if (MarkTaint) { +// If the issue that we're reporting depends on the taintedness of the +// offset, then put interestingness onto symbols that could be the origin +// of the taint. +for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val)) + BR.markInteresting(Sym); + } NagyDonat wrote: No, it is not redundant :upside_down_face: because
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -381,66 +574,105 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const { compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); if (ExceedsUpperBound) { + // The offset may be invalid (>= Size)... if (!WithinUpperBound) { -// We know that the index definitely exceeds the upper bound. -if (isa(E) && isInAddressOf(E, C.getASTContext())) { - // ...but this is within an addressof expression, so we need to check - // for the exceptional case that `[size]` is valid. - auto [EqualsToThreshold, NotEqualToThreshold] = - compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, - SVB, /*CheckEquality=*/true); - if (EqualsToThreshold && !NotEqualToThreshold) { -// We are definitely in the exceptional case, so return early -// instead of reporting a bug. -C.addTransition(EqualsToThreshold); -return; - } +// ...and it cannot be within bounds, so report an error, unless we can +// definitely determine that this is an idiomatic `[size]` +// expression that calculates the past-the-end pointer. +if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, + *KnownSize, C)) { + C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); + return; } + Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize, Location); -reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); +reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } + // ...and it can be valid as well... if (isTainted(State, ByteOffset)) { -// Both cases are possible, but the offset is tainted, so report. -std::string RegName = getRegionName(Reg); +// ...but it's tainted, so report an error. -// Diagnostic detail: "tainted offset" is always correct, but the -// common case is that 'idx' is tainted in 'arr[idx]' and then it's +// Diagnostic detail: saying "tainted offset" is always correct, but +// the common case is that 'idx' is tainted in 'arr[idx]' and then it's // nicer to say "tainted index". const char *OffsetName = "offset"; if (const auto *ASE = dyn_cast(E)) if (isTainted(State, ASE->getIdx(), C.getLocationContext())) OffsetName = "index"; Messages Msgs = getTaintMsgs(Reg, OffsetName); -reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); +reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, + /*IsTaintBug=*/true); return; } + // ...and it isn't tainted, so the checker will (optimistically) assume + // that the offset is in bounds and mention this in the note tag. + SUR.recordUpperBoundAssumption(*KnownSize); } +// Actually update the state. The "if" only fails in the extremely unlikely +// case when compareValueToThreshold returns {nullptr, nullptr} becasue +// evalBinOpNN fails to evaluate the less-than operator. if (WithinUpperBound) State = WithinUpperBound; } - C.addTransition(State); + // Add a transition, reporting the state updates that we accumulated. + C.addTransition(State, SUR.createNoteTag(C)); +} + +void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport , + ProgramStateRef ErrorState, + NonLoc Val, bool MarkTaint) { + if (SymbolRef Sym = Val.getAsSymbol()) { +// If the offset is a symbolic value, iterate over its "parts" with +// `SymExpr::symbols()` and mark each of them as interesting. +// For example, if the offset is `x*4 + y` then we put interestingness onto +// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols +// `x` and `y`. +for (SymbolRef PartSym : Sym->symbols()) + BR.markInteresting(PartSym); + } + + if (MarkTaint) { +// If the issue that we're reporting depends on the taintedness of the +// offset, then put interestingness onto symbols that could be the origin +// of the taint. +for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val)) + BR.markInteresting(Sym); + } } void ArrayBoundCheckerV2::reportOOB(CheckerContext , -
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -318,17 +424,91 @@ static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { RegName, OffsetName)}; } -void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const { - // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping - // some new logic here that reasons directly about memory region extents. - // Once that logic is more mature, we can bring it back to assumeInBound() - // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. +const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext ) const { + // Don't create a note tag if we didn't assume anything: + if (!AssumedNonNegative && !AssumedUpperBound) +return nullptr; + + return C.getNoteTag([*this](PathSensitiveBugReport ) -> std::string { +return getMessage(BR); + }); +} + +std::string StateUpdateReporter::getMessage(PathSensitiveBugReport ) const { + bool ShouldReportNonNegative = AssumedNonNegative; + if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) { +if (AssumedUpperBound && +providesInformationAboutInteresting(*AssumedUpperBound, BR)) { + // Even if the byte offset isn't interesting (e.g. it's a constant value), + // the assumption can still be interesting if it provides information + // about an interesting symbolic upper bound. + ShouldReportNonNegative = false; +} else { + // We don't have anything interesting, don't report the assumption. + return ""; +} steakhal wrote: ```suggestion } // We don't have anything interesting, don't report the assumption. return ""; ``` Return after else. https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: NagyDonat wrote: @steakhal To my best knowledge I implemented or answered all of your suggestions, so I'm ready for the next round (or I'm also ready to merge this commit and start the next one :grin:). https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/78315 >From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Tue, 12 Dec 2023 13:07:54 +0100 Subject: [PATCH 01/16] [analyzer] Support interestingness in ArrayBoundV2 This commit improves alpha.security.ArrayBoundV2 in two connected areas: (1) It calls `markInteresting()` on the symbolic values that are responsible for the out of bounds access. (2) Its index-is-in-bounds assumptions are reported in note tags if they provide information about the value of an interesting symbol. This commit is limited to "display" changes: it introduces new diagnostic pieces (potentially to bugs found by other checkers), but the ArrayBoundV2 will make the same assumptions and detect the same bugs before and after this change. As a minor unrelated change, this commit also updates/removes some very old comments which became obsolate due to my previous changes. --- .../Checkers/ArrayBoundCheckerV2.cpp | 345 ++ .../test/Analysis/out-of-bounds-diagnostics.c | 10 + clang/test/Analysis/out-of-bounds-notes.c | 128 +++ 3 files changed, 413 insertions(+), 70 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-notes.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 6c7a1601402efac..4241fa3a2ce8af6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -33,7 +33,66 @@ using namespace taint; using llvm::formatv; namespace { -enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; +class StateUpdateReporter { + const SubRegion *Reg; + NonLoc ByteOffsetVal; + std::optional ElementType = std::nullopt; + std::optional ElementSize = std::nullopt; + bool AssumedNonNegative = false; + std::optional AssumedUpperBound = std::nullopt; + +public: + StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E, + CheckerContext ) + : Reg(R), ByteOffsetVal(ByteOffsVal) { +initializeElementInfo(E, C); + } + + void initializeElementInfo(const Expr *E, CheckerContext ) { +if (const auto *ASE = dyn_cast(E)) { + const MemRegion *SubscriptBaseReg = + C.getSVal(ASE->getBase()).getAsRegion(); + if (!SubscriptBaseReg) +return; + SubscriptBaseReg = SubscriptBaseReg->StripCasts(); + if (!isa_and_nonnull(SubscriptBaseReg)) { +ElementType = ASE->getType(); +ElementSize = +C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity(); + } +} + } + void recordNonNegativeAssumption() { AssumedNonNegative = true; } + void recordUpperBoundAssumption(NonLoc UpperBoundVal) { +AssumedUpperBound = UpperBoundVal; + } + + const NoteTag *createNoteTag(CheckerContext ) const; + +private: + std::string getMessage(PathSensitiveBugReport ) const; + + /// Return true if information about the value of `Sym` can put constraints + /// on some symbol which is interesting within the bug report `BR`. + /// In particular, this returns true when `Sym` is interesting within `BR`; + /// but it also returns true if `Sym` is an expression that contains integer + /// constants and a single symbolic operand which is interesting (in `BR`). + /// We need to use this instead of plain `BR.isInteresting()` because if we + /// are analyzing code like + /// int array[10]; + /// int f(int arg) { + /// return array[arg] && array[arg + 10]; + /// } + /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not + /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough + /// to detect this out of bounds access). + static bool providesInformationAboutInteresting(SymbolRef Sym, + PathSensitiveBugReport ); + static bool providesInformationAboutInteresting(SVal SV, + PathSensitiveBugReport ) { +return providesInformationAboutInteresting(SV.getAsSymbol(), BR); + } +}; struct Messages { std::string Short, Full; @@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public Checker, void performCheck(const Expr *E, CheckerContext ) const; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, Messages Msgs) const; + void reportOOB(CheckerContext , ProgramStateRef ErrorState, Messages Msgs, +
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -221,18 +221,38 @@ int allocaRegion(void) { return *mem; } -int *unknownExtent(int arg) { - if (arg >= 2) +int *symbolicExtent(int arg) { + // expected-note@+2 {{Assuming 'arg' is < 5}} + // expected-note@+1 {{Taking false branch}} + if (arg >= 5) return 0; int *mem = (int*)malloc(arg); + + // TODO: without the following reference to 'arg', the analyzer would discard + // the range information about (the symbolic value of) 'arg'. This is + // incorrect because while the variable itself is inaccessible, it becomes + // the symbolic extent of 'mem', so we still want to reason about its + // potential values. + (void)arg; + mem[8] = -2; - // FIXME: this should produce - // {{Out of bound access to memory after the end of the heap area}} - // {{Access of 'int' element in the heap area at index 8}} + // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} + // expected-note@-2 {{Access of 'int' element in the heap area at index 8}} return mem; } -void unknownIndex(int arg) { +int *symbolicExtentDiscardedRangeInfo(int arg) { + // This is a copy of the case 'symbolicExtent' without the '(void)arg' hack. + // TODO: if the analyzer can detect the out-of-bounds access within this TC, + // then remove this TC and the `(void)arg` hack from `symbolicExtent`. steakhal wrote: I'd prefer to expand `TC`, it's not that longer. https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: NagyDonat wrote: Ouch, that seems to be a nasty issue. Thanks for doing the review and I hope that you'll be able to share it eventually :) (If there is no clean resolution, feel free to dump a semi-formatted copypaste / "Save As..." html snapshot of the review that you prepared. As long as your suggestions are readable, they'll be helpful.) https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: steakhal wrote: > @steakhal I handled all the suggestions from the first review round (either > by updating the PR, or by replying / asking follow-up questions when the > situation wasn't clear for me). I wanted to submit my review earlier today, but failed. See the details [here](https://discourse.llvm.org/t/enabling-fine-grained-access-tokens-for-the-llvm-organization/76383/3). https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { RegName, OffsetName)}; } -void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext ) const { - // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping - // some new logic here that reasons directly about memory region extents. - // Once that logic is more mature, we can bring it back to assumeInBound() - // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. +const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext ) const { + // Don't create a note tag if we didn't assume anything: + if (!AssumedNonNegative && !AssumedUpperBound) +return nullptr; + + return C.getNoteTag( + [*this](PathSensitiveBugReport ) -> std::string { +return getMessage(BR); + }, + /*isPrunable=*/false); +} + +std::string StateUpdateReporter::getMessage(PathSensitiveBugReport ) const { + bool ShouldReportNonNegative = AssumedNonNegative; + if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) { +if (AssumedUpperBound && +providesInformationAboutInteresting(*AssumedUpperBound, BR)) + ShouldReportNonNegative = false; +else + return ""; + } + + std::optional OffsetN = getConcreteValue(ByteOffsetVal); + std::optional ExtentN = getConcreteValue(AssumedUpperBound); + const bool UseIndex = + ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize); + + SmallString<256> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Assuming "; + if (UseIndex) { +Out << "index "; +if (OffsetN) + Out << "'" << OffsetN << "' "; + } else if (AssumedUpperBound) { +Out << "byte offset "; +if (OffsetN) + Out << "'" << OffsetN << "' "; + } else { +Out << "offset "; + } + + Out << "is"; + if (ShouldReportNonNegative) { +Out << " non-negative"; + } + if (AssumedUpperBound) { +if (ShouldReportNonNegative) + Out << " and"; +Out << " less than "; +if (ExtentN) + Out << *ExtentN << ", "; +if (UseIndex && ElementType) + Out << "the number of '" << ElementType->getAsString() + << "' elements in "; +else + Out << "the extent of "; +Out << getRegionName(Reg); + } NagyDonat wrote: If I see it correctly now I have testcases covering each message fragment that's accessible; but it turns out that there are several code paths that are inaccessible now because the inequality simplification algorithm is too dumb. I marked these limitations as FIXMEs, I'll try to fix them in a separate commit. https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 3440466536c97ced20e366301a60f12f4fd01e30 6f6dc734d9b0b163f93189ebf094d03d16dd48c0 -- clang/test/Analysis/out-of-bounds-notes.c clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/test/Analysis/out-of-bounds-diagnostics.c `` View the diff from clang-format here. ``diff diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 002d0315bc..24247f3403 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -419,8 +419,8 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport ) const { if (AssumedUpperBound && providesInformationAboutInteresting(*AssumedUpperBound, BR)) { // Even if the byte offset isn't interesting (e.g. it's a constant value), - // the assumption can still be interesting if it provides information about - // an interesting symbolic upper bound. + // the assumption can still be interesting if it provides information + // about an interesting symbolic upper bound. // FIXME: This code path is currently non-functional and untested because // `getSimplifiedOffsets()` only works when the RHS (extent) is constant. `` https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/78315 >From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Tue, 12 Dec 2023 13:07:54 +0100 Subject: [PATCH 1/6] [analyzer] Support interestingness in ArrayBoundV2 This commit improves alpha.security.ArrayBoundV2 in two connected areas: (1) It calls `markInteresting()` on the symbolic values that are responsible for the out of bounds access. (2) Its index-is-in-bounds assumptions are reported in note tags if they provide information about the value of an interesting symbol. This commit is limited to "display" changes: it introduces new diagnostic pieces (potentially to bugs found by other checkers), but the ArrayBoundV2 will make the same assumptions and detect the same bugs before and after this change. As a minor unrelated change, this commit also updates/removes some very old comments which became obsolate due to my previous changes. --- .../Checkers/ArrayBoundCheckerV2.cpp | 345 ++ .../test/Analysis/out-of-bounds-diagnostics.c | 10 + clang/test/Analysis/out-of-bounds-notes.c | 128 +++ 3 files changed, 413 insertions(+), 70 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-notes.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 6c7a1601402efac..4241fa3a2ce8af6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -33,7 +33,66 @@ using namespace taint; using llvm::formatv; namespace { -enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; +class StateUpdateReporter { + const SubRegion *Reg; + NonLoc ByteOffsetVal; + std::optional ElementType = std::nullopt; + std::optional ElementSize = std::nullopt; + bool AssumedNonNegative = false; + std::optional AssumedUpperBound = std::nullopt; + +public: + StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E, + CheckerContext ) + : Reg(R), ByteOffsetVal(ByteOffsVal) { +initializeElementInfo(E, C); + } + + void initializeElementInfo(const Expr *E, CheckerContext ) { +if (const auto *ASE = dyn_cast(E)) { + const MemRegion *SubscriptBaseReg = + C.getSVal(ASE->getBase()).getAsRegion(); + if (!SubscriptBaseReg) +return; + SubscriptBaseReg = SubscriptBaseReg->StripCasts(); + if (!isa_and_nonnull(SubscriptBaseReg)) { +ElementType = ASE->getType(); +ElementSize = +C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity(); + } +} + } + void recordNonNegativeAssumption() { AssumedNonNegative = true; } + void recordUpperBoundAssumption(NonLoc UpperBoundVal) { +AssumedUpperBound = UpperBoundVal; + } + + const NoteTag *createNoteTag(CheckerContext ) const; + +private: + std::string getMessage(PathSensitiveBugReport ) const; + + /// Return true if information about the value of `Sym` can put constraints + /// on some symbol which is interesting within the bug report `BR`. + /// In particular, this returns true when `Sym` is interesting within `BR`; + /// but it also returns true if `Sym` is an expression that contains integer + /// constants and a single symbolic operand which is interesting (in `BR`). + /// We need to use this instead of plain `BR.isInteresting()` because if we + /// are analyzing code like + /// int array[10]; + /// int f(int arg) { + /// return array[arg] && array[arg + 10]; + /// } + /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not + /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough + /// to detect this out of bounds access). + static bool providesInformationAboutInteresting(SymbolRef Sym, + PathSensitiveBugReport ); + static bool providesInformationAboutInteresting(SVal SV, + PathSensitiveBugReport ) { +return providesInformationAboutInteresting(SV.getAsSymbol(), BR); + } +}; struct Messages { std::string Short, Full; @@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public Checker, void performCheck(const Expr *E, CheckerContext ) const; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, Messages Msgs) const; + void reportOOB(CheckerContext , ProgramStateRef ErrorState, Messages Msgs, + NonLoc Offset, bool IsTaintBug = false) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext ); + static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State, + NonLoc Offset, NonLoc
[clang-tools-extra] [llvm] [clang] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -124,3 +124,25 @@ int assumingConverted2(struct foo f, int arg) { // expected-note@-2 {{Access of 'array' at negative byte offset}} return a + b + c; } + +int assumingPlainOffset(struct foo f, int arg) { + // This TC is intended to check the corner case that the checker prints the + // shorter "offset" instead of "byte offset" when it's irrelevant that the + // offset is measured in bytes. + + // expected-note@+2 {{Assuming 'arg' is < 2}} + // expected-note@+1 {{Taking false branch}} + if (arg >= 2) +return 0; + + int b = ((int*)(f.b))[arg]; + // expected-note@-1 {{Assuming byte offset is non-negative and less than 5, the extent of 'f.b'}} + // FIXME: this should be {{Assuming offset is non-negative}} + // but the current simplification algorithm doesn't realize that arg <= 1 + // implies that the byte offset arg*4 will be less than 5. NagyDonat wrote: Also note that currently the checker only verifies that _(byte offset to first byte of accessed element) < (extent of memory region in bytes)_; so e.g. it would accept "`char b[5]; ((int*)b)[1] = 123;`" despite the fact that (assuming 4-bit `int`) this writes 3 bytes after the end of the array `b`. This issue seems to be mostly theoretical (this sort of low-level memory abuse should be very rare), but I'll try to ensure that the checker handles it correctly. https://github.com/llvm/llvm-project/pull/78315 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits