Author: DonĂ¡t Nagy Date: 2024-07-22T11:44:20+02:00 New Revision: 2bc38dc30bc9baad610925d7e90e724a9d09ee7d
URL: https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d DIFF: https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d.diff LOG: [analyzer] Improve bug report hashing, merge similar reports (#98621) Previously there were certain situations where alpha.security.ArrayBoundV2 produced lots of very similar and redundant reports that only differed in their full `Description` that contained the (negative) byte offset value. (See https://github.com/llvm/llvm-project/issues/86969 for details.) This change updates the `Profile()` method of `PathSensitiveBugReport` to ensure that it uses `getShortDescription()` instead of the full `Description` so the standard report deduplication eliminates most of these redundant reports. Note that the effects of this change are very limited because there are very few checkers that specify a separate short description, and so `getShortDescription()` practically always defaults to returning the full `Description`. For the sake of consistency `BasicBugReport::Profile()` is also updated to use the short description. (Right now there are no checkers that use `BasicBugReport` with separate long and short descriptions.) This commit also includes some small code quality improvements in `ArrayBoundV2` that are IMO too trivial to be moved into a separate commit. Added: Modified: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/lib/StaticAnalyzer/Core/BugReporter.cpp clang/test/Analysis/out-of-bounds-diagnostics.c Removed: ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index f82288f1099e8..3f837564cf47c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -373,14 +373,14 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { } static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { - std::string RegName = getRegionName(Region); - SmallString<128> Buf; - llvm::raw_svector_ostream Out(Buf); - Out << "Access of " << RegName << " at negative byte offset"; - if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) - Out << ' ' << ConcreteIdx->getValue(); - return {formatv("Out of bound access to memory preceding {0}", RegName), - std::string(Buf)}; + std::string RegName = getRegionName(Region), OffsetStr = ""; + + if (auto ConcreteOffset = getConcreteValue(Offset)) + OffsetStr = formatv(" {0}", ConcreteOffset); + + return { + formatv("Out of bound access to memory preceding {0}", RegName), + formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; } /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if @@ -609,7 +609,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { // CHECK UPPER BOUND DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB); if (auto KnownSize = Size.getAs<NonLoc>()) { - // In a situation where both overflow and overflow are possible (but the + // In a situation where both underflow and overflow are possible (but the // index is either tainted or known to be invalid), the logic of this // checker will first assume that the offset is non-negative, and then // (with this additional assumption) it will detect an overflow error. diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index e2002bfbe594a..d73dc40cf03fb 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2198,7 +2198,7 @@ const Decl *PathSensitiveBugReport::getDeclWithIssue() const { void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const { hash.AddInteger(static_cast<int>(getKind())); hash.AddPointer(&BT); - hash.AddString(Description); + hash.AddString(getShortDescription()); assert(Location.isValid()); Location.Profile(hash); @@ -2213,7 +2213,7 @@ void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const { void PathSensitiveBugReport::Profile(llvm::FoldingSetNodeID &hash) const { hash.AddInteger(static_cast<int>(getKind())); hash.AddPointer(&BT); - hash.AddString(Description); + hash.AddString(getShortDescription()); PathDiagnosticLocation UL = getUniqueingLocation(); if (UL.isValid()) { UL.Profile(hash); diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index 92f983d8b1561..de70e483add1c 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -17,6 +17,27 @@ int underflowWithDeref(void) { // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} } +int rng(void); +int getIndex(void) { + switch (rng()) { + case 1: return -152; + case 2: return -160; + case 3: return -168; + default: return -172; + } +} + +void gh86959(void) { + // Previously code like this produced many almost-identical bug reports that + // only diff ered in the offset value. Verify that now we only see one report. + + // expected-note@+1 {{Entering loop body}} + while (rng()) + TenElements[getIndex()] = 10; + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}} +} + int scanf(const char *restrict fmt, ...); void taintedIndex(void) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits