[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);

DonatNagyE wrote:

Oh, I see, that makes sense.
Anyway, I think I leave it as a `static` local (unless there are objections), 
because that's also fine in this use case.

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);

steakhal wrote:

I didnt mean local statics, simple locals to global cstring constant literals 
should be fine.

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);

DonatNagyE wrote:

I tend to avoid local static variables, because local visibility + global 
lifetime seems to be a tricky combination; but in this case it's harmless. 

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy ,
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/DonatNagyE updated 
https://github.com/llvm/llvm-project/pull/70056

>From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= 
Date: Mon, 2 Oct 2023 13:32:31 +0200
Subject: [PATCH 1/3] [analyzer] Improve reports from ArrayBoundCheckerV2

Previously alpha.security.ArrayBoundV2 produced very spartan bug
reports; this commit ensures that the relevant (and known) details are
reported to the user.

The logic for detecting bugs is not changed, after this commit the
checker will report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new
test file 'out-of-bounds-diagnostics.c'. Three of the testcases are
added with FIXME notes because they reveal shortcomings of the existing
modeling and bounds checking code. I will try to fix them in separate
follow-up commits.
---
 .../Checkers/ArrayBoundCheckerV2.cpp  | 181 ++
 .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++
 clang/test/Analysis/out-of-bounds-new.cpp |  26 +--
 clang/test/Analysis/out-of-bounds.c   |  28 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |   4 +-
 clang/test/Analysis/taint-generic.c   |  20 +-
 clang/test/Analysis/taint-generic.cpp |  14 +-
 7 files changed, 339 insertions(+), 83 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e80a06e38587520..7d97f36e4cffe44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -22,23 +22,25 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
+using llvm::formatv;
 
 namespace {
+enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+
 class ArrayBoundCheckerV2 :
 public Checker {
   BugType BT{this, "Out-of-bound access"};
   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
-
   void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- SVal TaintedSVal = UnknownVal()) const;
+ NonLoc Offset, std::string RegName, std::string Msg) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext );
 
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
 /// Arr and the distance of Location from the beginning of Arr (expressed in a
 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
 /// cannot be determined.
-std::optional>
+static std::optional>
 computeOffset(ProgramStateRef State, SValBuilder , SVal Location) {
   QualType T = SVB.getArrayIndexType();
   auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) 
{
@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);
+}
+
+static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << RegName << " at negative byte offset";
+  if (auto ConcreteIdx = Offset.getAs())
+Out << ' ' << 

[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();

steakhal wrote:

```suggestion
return ConcreteVal->getValue().tryExtValue();
```

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";

steakhal wrote:

```suggestion
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
  return formatv("the field '{0}'", Name);
return "the unnamed field";
```

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/steakhal requested changes to this pull request.

Looks good. I only have minor remarks.
Consider renaming the PR `Improve reports` -> `Improve messages`, or 
`diagnostics`, to highlight that the "messages" aspect is improved, not e.g. 
improving the FP/TP rate or something like that.

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);

steakhal wrote:

You could have the `ShortMsgTemplates` defined within this function, closer to 
the place being used.

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 



@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
-CheckerContext ) const {
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;

steakhal wrote:

```suggestion
  if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
return RegName;
```

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-31 Thread Balazs Benics via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/steakhal edited 
https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-27 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


DonatNagyE wrote:

Note that this PR introduces a `PathSensitiveBugReport` where the `Description` 
and `ShortDescription` fields (inherited from `BugReport`) are different 
(because I'm using the four-argument constructor). These fields are intended 
for different purposes, but practically every checker stores the same string in 
both of them.

To clarify this situation, I opened a [discussion on 
discourse](https://discourse.llvm.org/t/bug-report-warning-message-duplicated-as-a-note-by-most-checkers/74480).

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-25 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


DonatNagyE wrote:

I renamed some variables because (1) they weren't CamelCased because they were 
"inherited" from the old versions of this checker and (2) their names were 
slightly too long and they introduced extra linebreaks in the newly added code.

https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-25 Thread via cfe-commits
=?utf-8?q?Donát?= Nagy 
Message-ID:
In-Reply-To: 


https://github.com/DonatNagyE updated 
https://github.com/llvm/llvm-project/pull/70056

>From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= 
Date: Mon, 2 Oct 2023 13:32:31 +0200
Subject: [PATCH 1/2] [analyzer] Improve reports from ArrayBoundCheckerV2

Previously alpha.security.ArrayBoundV2 produced very spartan bug
reports; this commit ensures that the relevant (and known) details are
reported to the user.

The logic for detecting bugs is not changed, after this commit the
checker will report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new
test file 'out-of-bounds-diagnostics.c'. Three of the testcases are
added with FIXME notes because they reveal shortcomings of the existing
modeling and bounds checking code. I will try to fix them in separate
follow-up commits.
---
 .../Checkers/ArrayBoundCheckerV2.cpp  | 181 ++
 .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++
 clang/test/Analysis/out-of-bounds-new.cpp |  26 +--
 clang/test/Analysis/out-of-bounds.c   |  28 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |   4 +-
 clang/test/Analysis/taint-generic.c   |  20 +-
 clang/test/Analysis/taint-generic.cpp |  14 +-
 7 files changed, 339 insertions(+), 83 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e80a06e38587520..7d97f36e4cffe44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -22,23 +22,25 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
+using llvm::formatv;
 
 namespace {
+enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+
 class ArrayBoundCheckerV2 :
 public Checker {
   BugType BT{this, "Out-of-bound access"};
   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
-
   void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- SVal TaintedSVal = UnknownVal()) const;
+ NonLoc Offset, std::string RegName, std::string Msg) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext );
 
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
 /// Arr and the distance of Location from the beginning of Arr (expressed in a
 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
 /// cannot be determined.
-std::optional>
+static std::optional>
 computeOffset(ProgramStateRef State, SValBuilder , SVal Location) {
   QualType T = SVB.getArrayIndexType();
   auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) 
{
@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);
+}
+
+static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << RegName << " at negative byte offset";
+  if (auto ConcreteIdx = Offset.getAs())
+Out << ' ' << ConcreteIdx->getValue();
+  return 

[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-24 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 6e3e21d2032216b5d01b47a0442638225e66dc1b 
77143e74edda6177248bebdf0424db915aa68a05 -- 
clang/test/Analysis/out-of-bounds-diagnostics.c 
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
clang/test/Analysis/out-of-bounds-new.cpp clang/test/Analysis/out-of-bounds.c 
clang/test/Analysis/taint-diagnostic-visitor.c 
clang/test/Analysis/taint-generic.c clang/test/Analysis/taint-generic.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 7d97f36e4cff..5741243c7c31 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -287,7 +287,7 @@ static std::string getTaintMsg(std::string RegName) {
 }
 
 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-const Stmt* LoadS,
+const Stmt *LoadS,
 CheckerContext ) const {
 
   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping

``




https://github.com/llvm/llvm-project/pull/70056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-24 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (DonatNagyE)


Changes

Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this 
commit ensures that the relevant (and known) details are reported to the user.

The logic for detecting bugs is not changed, after this commit the checker will 
report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new test file 
'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME 
notes because they reveal shortcomings of the existing modeling and bounds 
checking code. I will try to fix them in separate follow-up commits.

---

Patch is 30.06 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/70056.diff


7 Files Affected:

- (modified) clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
(+144-37) 
- (added) clang/test/Analysis/out-of-bounds-diagnostics.c (+149) 
- (modified) clang/test/Analysis/out-of-bounds-new.cpp (+13-13) 
- (modified) clang/test/Analysis/out-of-bounds.c (+14-14) 
- (modified) clang/test/Analysis/taint-diagnostic-visitor.c (+2-2) 
- (modified) clang/test/Analysis/taint-generic.c (+10-10) 
- (modified) clang/test/Analysis/taint-generic.cpp (+7-7) 


``diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e80a06e38587520..7d97f36e4cffe44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -22,23 +22,25 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
+using llvm::formatv;
 
 namespace {
+enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+
 class ArrayBoundCheckerV2 :
 public Checker {
   BugType BT{this, "Out-of-bound access"};
   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
-
   void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- SVal TaintedSVal = UnknownVal()) const;
+ NonLoc Offset, std::string RegName, std::string Msg) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext );
 
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
 /// Arr and the distance of Location from the beginning of Arr (expressed in a
 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
 /// cannot be determined.
-std::optional>
+static std::optional>
 computeOffset(ProgramStateRef State, SValBuilder , SVal Location) {
   QualType T = SVB.getArrayIndexType();
   auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) 
{
@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound access to memory after the end of {0}",
+"Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);
+}
+
+static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << RegName << " at negative byte offset";
+  if (auto ConcreteIdx = Offset.getAs())
+Out << ' ' << ConcreteIdx->getValue();
+  return std::string(Buf);
+}
+static std::string getExceedsMsg(ASTContext , std::string RegName,
+ NonLoc Offset, NonLoc Extent, SVal Location) {
+  const auto *EReg = 

[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)

2023-10-24 Thread via cfe-commits

https://github.com/DonatNagyE created 
https://github.com/llvm/llvm-project/pull/70056

Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this 
commit ensures that the relevant (and known) details are reported to the user.

The logic for detecting bugs is not changed, after this commit the checker will 
report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new test file 
'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME 
notes because they reveal shortcomings of the existing modeling and bounds 
checking code. I will try to fix them in separate follow-up commits.

>From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= 
Date: Mon, 2 Oct 2023 13:32:31 +0200
Subject: [PATCH] [analyzer] Improve reports from ArrayBoundCheckerV2

Previously alpha.security.ArrayBoundV2 produced very spartan bug
reports; this commit ensures that the relevant (and known) details are
reported to the user.

The logic for detecting bugs is not changed, after this commit the
checker will report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new
test file 'out-of-bounds-diagnostics.c'. Three of the testcases are
added with FIXME notes because they reveal shortcomings of the existing
modeling and bounds checking code. I will try to fix them in separate
follow-up commits.
---
 .../Checkers/ArrayBoundCheckerV2.cpp  | 181 ++
 .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++
 clang/test/Analysis/out-of-bounds-new.cpp |  26 +--
 clang/test/Analysis/out-of-bounds.c   |  28 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |   4 +-
 clang/test/Analysis/taint-generic.c   |  20 +-
 clang/test/Analysis/taint-generic.cpp |  14 +-
 7 files changed, 339 insertions(+), 83 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e80a06e38587520..7d97f36e4cffe44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -22,23 +22,25 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
+using llvm::formatv;
 
 namespace {
+enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+
 class ArrayBoundCheckerV2 :
 public Checker {
   BugType BT{this, "Out-of-bound access"};
   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
-
   void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind,
- SVal TaintedSVal = UnknownVal()) const;
+ NonLoc Offset, std::string RegName, std::string Msg) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext );
 
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
 /// Arr and the distance of Location from the beginning of Arr (expressed in a
 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
 /// cannot be determined.
-std::optional>
+static std::optional>
 computeOffset(ProgramStateRef State, SValBuilder , SVal Location) {
   QualType T = SVB.getArrayIndexType();
   auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) 
{
@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc 
Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs()) {
+StringRef Name = FR->getDecl()->getName();
+if (!Name.empty())
+  return formatv("the field '{0}'", Name);
+return "the unnamed field";
+  }
+
+  if (isa(Region))
+return "the memory returned by 'alloca'";
+
+  if (isa(Region) &&
+  isa(Region->getMemorySpace()))
+return "the heap area";
+
+  if (isa(Region))
+return "the string literal";
+
+  return "the region";
+}
+
+static std::optional getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs()) {
+const llvm::APSInt  = ConcreteVal->getValue();
+return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+"Out of bound access to memory preceding {0}",
+"Out of bound