=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/78...@github.com>
================ @@ -381,66 +574,105 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); if (ExceedsUpperBound) { + // The offset may be invalid (>= Size)... if (!WithinUpperBound) { - // We know that the index definitely exceeds the upper bound. - if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { - // ...but this is within an addressof expression, so we need to check - // for the exceptional case that `&array[size]` is valid. - auto [EqualsToThreshold, NotEqualToThreshold] = - compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, - SVB, /*CheckEquality=*/true); - if (EqualsToThreshold && !NotEqualToThreshold) { - // We are definitely in the exceptional case, so return early - // instead of reporting a bug. - C.addTransition(EqualsToThreshold); - return; - } + // ...and it cannot be within bounds, so report an error, unless we can + // definitely determine that this is an idiomatic `&array[size]` + // expression that calculates the past-the-end pointer. + if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, + *KnownSize, C)) { + C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); + return; } + Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize, Location); - reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); + reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } + // ...and it can be valid as well... if (isTainted(State, ByteOffset)) { - // Both cases are possible, but the offset is tainted, so report. - std::string RegName = getRegionName(Reg); + // ...but it's tainted, so report an error. - // Diagnostic detail: "tainted offset" is always correct, but the - // common case is that 'idx' is tainted in 'arr[idx]' and then it's + // Diagnostic detail: saying "tainted offset" is always correct, but + // the common case is that 'idx' is tainted in 'arr[idx]' and then it's // nicer to say "tainted index". const char *OffsetName = "offset"; if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) if (isTainted(State, ASE->getIdx(), C.getLocationContext())) OffsetName = "index"; Messages Msgs = getTaintMsgs(Reg, OffsetName); - reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); + reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, + /*IsTaintBug=*/true); return; } + // ...and it isn't tainted, so the checker will (optimistically) assume + // that the offset is in bounds and mention this in the note tag. + SUR.recordUpperBoundAssumption(*KnownSize); } + // Actually update the state. The "if" only fails in the extremely unlikely + // case when compareValueToThreshold returns {nullptr, nullptr} becasue + // evalBinOpNN fails to evaluate the less-than operator. if (WithinUpperBound) State = WithinUpperBound; } - C.addTransition(State); + // Add a transition, reporting the state updates that we accumulated. + C.addTransition(State, SUR.createNoteTag(C)); +} + +void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR, + ProgramStateRef ErrorState, + NonLoc Val, bool MarkTaint) { + if (SymbolRef Sym = Val.getAsSymbol()) { + // If the offset is a symbolic value, iterate over its "parts" with + // `SymExpr::symbols()` and mark each of them as interesting. + // For example, if the offset is `x*4 + y` then we put interestingness onto + // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols + // `x` and `y`. + for (SymbolRef PartSym : Sym->symbols()) + BR.markInteresting(PartSym); + } + + if (MarkTaint) { + // If the issue that we're reporting depends on the taintedness of the + // offset, then put interestingness onto symbols that could be the origin + // of the taint. + for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val)) + BR.markInteresting(Sym); + } } void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, - ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, Messages Msgs) const { + ProgramStateRef ErrorState, Messages Msgs, + NonLoc Offset, std::optional<NonLoc> Extent, + bool IsTaintBug) const { ---------------- NagyDonat wrote: By the way `git-clang-format` adds a space before this kind of comment (which is IMHO a bit strange). https://github.com/llvm/llvm-project/pull/78315 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits