Author: DonĂ¡t Nagy
Date: 2024-07-22T11:44:20+02:00
New Revision: 2bc38dc30bc9baad610925d7e90e724a9d09ee7d

URL: 
https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d
DIFF: 
https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d.diff

LOG: [analyzer] Improve bug report hashing, merge similar reports (#98621)

Previously there were certain situations where
alpha.security.ArrayBoundV2 produced lots of very similar and redundant
reports that only differed in their full `Description` that contained
the (negative) byte offset value. (See
https://github.com/llvm/llvm-project/issues/86969 for details.)

This change updates the `Profile()` method of `PathSensitiveBugReport`
to ensure that it uses `getShortDescription()` instead of the full
`Description` so the standard report deduplication eliminates most of
these redundant reports.

Note that the effects of this change are very limited because there are
very few checkers that specify a separate short description, and so
`getShortDescription()` practically always defaults to returning the
full `Description`.

For the sake of consistency `BasicBugReport::Profile()` is also updated
to use the short description. (Right now there are no checkers that use
`BasicBugReport` with separate long and short descriptions.)

This commit also includes some small code quality improvements in
`ArrayBoundV2` that are IMO too trivial to be moved into a separate
commit.

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
    clang/lib/StaticAnalyzer/Core/BugReporter.cpp
    clang/test/Analysis/out-of-bounds-diagnostics.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index f82288f1099e8..3f837564cf47c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -373,14 +373,14 @@ static std::optional<int64_t> 
getConcreteValue(std::optional<NonLoc> SV) {
 }
 
 static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
-  std::string RegName = getRegionName(Region);
-  SmallString<128> Buf;
-  llvm::raw_svector_ostream Out(Buf);
-  Out << "Access of " << RegName << " at negative byte offset";
-  if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
-    Out << ' ' << ConcreteIdx->getValue();
-  return {formatv("Out of bound access to memory preceding {0}", RegName),
-          std::string(Buf)};
+  std::string RegName = getRegionName(Region), OffsetStr = "";
+
+  if (auto ConcreteOffset = getConcreteValue(Offset))
+    OffsetStr = formatv(" {0}", ConcreteOffset);
+
+  return {
+      formatv("Out of bound access to memory preceding {0}", RegName),
+      formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
 }
 
 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
@@ -609,7 +609,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, 
CheckerContext &C) const {
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
   if (auto KnownSize = Size.getAs<NonLoc>()) {
-    // In a situation where both overflow and overflow are possible (but the
+    // In a situation where both underflow and overflow are possible (but the
     // index is either tainted or known to be invalid), the logic of this
     // checker will first assume that the offset is non-negative, and then
     // (with this additional assumption) it will detect an overflow error.

diff  --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp 
b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index e2002bfbe594a..d73dc40cf03fb 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2198,7 +2198,7 @@ const Decl *PathSensitiveBugReport::getDeclWithIssue() 
const {
 void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const {
   hash.AddInteger(static_cast<int>(getKind()));
   hash.AddPointer(&BT);
-  hash.AddString(Description);
+  hash.AddString(getShortDescription());
   assert(Location.isValid());
   Location.Profile(hash);
 
@@ -2213,7 +2213,7 @@ void BasicBugReport::Profile(llvm::FoldingSetNodeID& 
hash) const {
 void PathSensitiveBugReport::Profile(llvm::FoldingSetNodeID &hash) const {
   hash.AddInteger(static_cast<int>(getKind()));
   hash.AddPointer(&BT);
-  hash.AddString(Description);
+  hash.AddString(getShortDescription());
   PathDiagnosticLocation UL = getUniqueingLocation();
   if (UL.isValid()) {
     UL.Profile(hash);

diff  --git a/clang/test/Analysis/out-of-bounds-diagnostics.c 
b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 92f983d8b1561..de70e483add1c 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -17,6 +17,27 @@ int underflowWithDeref(void) {
   // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
 }
 
+int rng(void);
+int getIndex(void) {
+  switch (rng()) {
+    case 1: return -152;
+    case 2: return -160;
+    case 3: return -168;
+    default: return -172;
+  }
+}
+
+void gh86959(void) {
+  // Previously code like this produced many almost-identical bug reports that
+  // only 
diff ered in the offset value. Verify that now we only see one report.
+
+  // expected-note@+1 {{Entering loop body}}
+  while (rng())
+    TenElements[getIndex()] = 10;
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
+}
+
 int scanf(const char *restrict fmt, ...);
 
 void taintedIndex(void) {


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to