[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); DonatNagyE wrote: Oh, I see, that makes sense. Anyway, I think I leave it as a `static` local (unless there are objections), because that's also fine in this use case. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); steakhal wrote: I didnt mean local statics, simple locals to global cstring constant literals should be fine. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); DonatNagyE wrote: I tend to avoid local static variables, because local visibility + global lifetime seems to be a tricky combination; but in this case it's harmless. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/DonatNagyE updated https://github.com/llvm/llvm-project/pull/70056 >From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Mon, 2 Oct 2023 13:32:31 +0200 Subject: [PATCH 1/3] [analyzer] Improve reports from ArrayBoundCheckerV2 Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits. --- .../Checkers/ArrayBoundCheckerV2.cpp | 181 ++ .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++ clang/test/Analysis/out-of-bounds-new.cpp | 26 +-- clang/test/Analysis/out-of-bounds.c | 28 +-- .../test/Analysis/taint-diagnostic-visitor.c | 4 +- clang/test/Analysis/taint-generic.c | 20 +- clang/test/Analysis/taint-generic.cpp | 14 +- 7 files changed, 339 insertions(+), 83 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index e80a06e38587520..7d97f36e4cffe44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -22,23 +22,25 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace ento; using namespace taint; +using llvm::formatv; namespace { +enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; + class ArrayBoundCheckerV2 : public Checker { BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; - enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - SVal TaintedSVal = UnknownVal()) const; + NonLoc Offset, std::string RegName, std::string Msg) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext ); @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 : /// Arr and the distance of Location from the beginning of Arr (expressed in a /// NonLoc that specifies the number of CharUnits). Returns nullopt when these /// cannot be determined. -std::optional> +static std::optional> computeOffset(ProgramStateRef State, SValBuilder , SVal Location) { QualType T = SVB.getArrayIndexType(); auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); +} + +static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) { + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Access of " << RegName << " at negative byte offset"; + if (auto ConcreteIdx = Offset.getAs()) +Out << ' ' <<
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); steakhal wrote: ```suggestion return ConcreteVal->getValue().tryExtValue(); ``` https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; steakhal wrote: ```suggestion if (StringRef Name = FR->getDecl()->getName(); !Name.empty()) return formatv("the field '{0}'", Name); return "the unnamed field"; ``` https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal requested changes to this pull request. Looks good. I only have minor remarks. Consider renaming the PR `Improve reports` -> `Improve messages`, or `diagnostics`, to highlight that the "messages" aspect is improved, not e.g. improving the FP/TP rate or something like that. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); steakhal wrote: You could have the `ShortMsgTemplates` defined within this function, closer to the place being used. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, -CheckerContext ) const { +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; steakhal wrote: ```suggestion if (std::string RegName = Region->getDescriptiveName(); !RegName.empty()) return RegName; ``` https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: DonatNagyE wrote: Note that this PR introduces a `PathSensitiveBugReport` where the `Description` and `ShortDescription` fields (inherited from `BugReport`) are different (because I'm using the four-argument constructor). These fields are intended for different purposes, but practically every checker stores the same string in both of them. To clarify this situation, I opened a [discussion on discourse](https://discourse.llvm.org/t/bug-report-warning-message-duplicated-as-a-note-by-most-checkers/74480). https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: DonatNagyE wrote: I renamed some variables because (1) they weren't CamelCased because they were "inherited" from the old versions of this checker and (2) their names were slightly too long and they introduced extra linebreaks in the newly added code. https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/DonatNagyE updated https://github.com/llvm/llvm-project/pull/70056 >From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Mon, 2 Oct 2023 13:32:31 +0200 Subject: [PATCH 1/2] [analyzer] Improve reports from ArrayBoundCheckerV2 Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits. --- .../Checkers/ArrayBoundCheckerV2.cpp | 181 ++ .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++ clang/test/Analysis/out-of-bounds-new.cpp | 26 +-- clang/test/Analysis/out-of-bounds.c | 28 +-- .../test/Analysis/taint-diagnostic-visitor.c | 4 +- clang/test/Analysis/taint-generic.c | 20 +- clang/test/Analysis/taint-generic.cpp | 14 +- 7 files changed, 339 insertions(+), 83 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index e80a06e38587520..7d97f36e4cffe44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -22,23 +22,25 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace ento; using namespace taint; +using llvm::formatv; namespace { +enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; + class ArrayBoundCheckerV2 : public Checker { BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; - enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - SVal TaintedSVal = UnknownVal()) const; + NonLoc Offset, std::string RegName, std::string Msg) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext ); @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 : /// Arr and the distance of Location from the beginning of Arr (expressed in a /// NonLoc that specifies the number of CharUnits). Returns nullopt when these /// cannot be determined. -std::optional> +static std::optional> computeOffset(ProgramStateRef State, SValBuilder , SVal Location) { QualType T = SVB.getArrayIndexType(); auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); +} + +static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) { + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Access of " << RegName << " at negative byte offset"; + if (auto ConcreteIdx = Offset.getAs()) +Out << ' ' << ConcreteIdx->getValue(); + return
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 6e3e21d2032216b5d01b47a0442638225e66dc1b 77143e74edda6177248bebdf0424db915aa68a05 -- clang/test/Analysis/out-of-bounds-diagnostics.c clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/test/Analysis/out-of-bounds-new.cpp clang/test/Analysis/out-of-bounds.c clang/test/Analysis/taint-diagnostic-visitor.c clang/test/Analysis/taint-generic.c clang/test/Analysis/taint-generic.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 7d97f36e4cff..5741243c7c31 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -287,7 +287,7 @@ static std::string getTaintMsg(std::string RegName) { } void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, -const Stmt* LoadS, +const Stmt *LoadS, CheckerContext ) const { // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping `` https://github.com/llvm/llvm-project/pull/70056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (DonatNagyE) Changes Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits. --- Patch is 30.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70056.diff 7 Files Affected: - (modified) clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp (+144-37) - (added) clang/test/Analysis/out-of-bounds-diagnostics.c (+149) - (modified) clang/test/Analysis/out-of-bounds-new.cpp (+13-13) - (modified) clang/test/Analysis/out-of-bounds.c (+14-14) - (modified) clang/test/Analysis/taint-diagnostic-visitor.c (+2-2) - (modified) clang/test/Analysis/taint-generic.c (+10-10) - (modified) clang/test/Analysis/taint-generic.cpp (+7-7) ``diff diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index e80a06e38587520..7d97f36e4cffe44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -22,23 +22,25 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace ento; using namespace taint; +using llvm::formatv; namespace { +enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; + class ArrayBoundCheckerV2 : public Checker { BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; - enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - SVal TaintedSVal = UnknownVal()) const; + NonLoc Offset, std::string RegName, std::string Msg) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext ); @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 : /// Arr and the distance of Location from the beginning of Arr (expressed in a /// NonLoc that specifies the number of CharUnits). Returns nullopt when these /// cannot be determined. -std::optional> +static std::optional> computeOffset(ProgramStateRef State, SValBuilder , SVal Location) { QualType T = SVB.getArrayIndexType(); auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound access to memory after the end of {0}", +"Potential out of bound access to {0} with tainted offset"}; + +static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { + return formatv(ShortMsgTemplates[Kind], RegName); +} + +static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) { + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Access of " << RegName << " at negative byte offset"; + if (auto ConcreteIdx = Offset.getAs()) +Out << ' ' << ConcreteIdx->getValue(); + return std::string(Buf); +} +static std::string getExceedsMsg(ASTContext , std::string RegName, + NonLoc Offset, NonLoc Extent, SVal Location) { + const auto *EReg =
[clang] [analyzer] Improve reports from ArrayBoundCheckerV2 (PR #70056)
https://github.com/DonatNagyE created https://github.com/llvm/llvm-project/pull/70056 Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits. >From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Mon, 2 Oct 2023 13:32:31 +0200 Subject: [PATCH] [analyzer] Improve reports from ArrayBoundCheckerV2 Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits. --- .../Checkers/ArrayBoundCheckerV2.cpp | 181 ++ .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++ clang/test/Analysis/out-of-bounds-new.cpp | 26 +-- clang/test/Analysis/out-of-bounds.c | 28 +-- .../test/Analysis/taint-diagnostic-visitor.c | 4 +- clang/test/Analysis/taint-generic.c | 20 +- clang/test/Analysis/taint-generic.cpp | 14 +- 7 files changed, 339 insertions(+), 83 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index e80a06e38587520..7d97f36e4cffe44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -22,23 +22,25 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace ento; using namespace taint; +using llvm::formatv; namespace { +enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; + class ArrayBoundCheckerV2 : public Checker { BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; - enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; - void reportOOB(CheckerContext , ProgramStateRef ErrorState, OOB_Kind Kind, - SVal TaintedSVal = UnknownVal()) const; + NonLoc Offset, std::string RegName, std::string Msg) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext ); @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 : /// Arr and the distance of Location from the beginning of Arr (expressed in a /// NonLoc that specifies the number of CharUnits). Returns nullopt when these /// cannot be determined. -std::optional> +static std::optional> computeOffset(ProgramStateRef State, SValBuilder , SVal Location) { QualType T = SVB.getArrayIndexType(); auto EvalBinOp = [, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { @@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, nullptr}; } +static std::string getRegionName(const SubRegion *Region) { + std::string RegName = Region->getDescriptiveName(); + if (!RegName.empty()) +return RegName; + + // Field regions only have descriptive names when their parent has a + // descriptive name; so we provide a fallback representation for them: + if (const auto *FR = Region->getAs()) { +StringRef Name = FR->getDecl()->getName(); +if (!Name.empty()) + return formatv("the field '{0}'", Name); +return "the unnamed field"; + } + + if (isa(Region)) +return "the memory returned by 'alloca'"; + + if (isa(Region) && + isa(Region->getMemorySpace())) +return "the heap area"; + + if (isa(Region)) +return "the string literal"; + + return "the region"; +} + +static std::optional getConcreteValue(NonLoc SV) { + if (auto ConcreteVal = SV.getAs()) { +const llvm::APSInt = ConcreteVal->getValue(); +return IntVal.tryExtValue(); + } + return std::nullopt; +} + +static const char *ShortMsgTemplates[] = { +"Out of bound access to memory preceding {0}", +"Out of bound