[clang] [analyzer][NFC] Make RegionStore dumps deterministic (PR #115615)
@@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRefhttps://github.com/llvm/llvm-project/pull/115615 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][NFC] Make RegionStore dumps deterministic (PR #115615)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/115615 Dump the memory space clusters before the other clusters, in alphabetical order. Then default bindings over direct bindings, and if any has symbolic offset, then those should come before the ones with concrete offsets. In theory, we should either have a symbolic offset OR concrete offsets, but never both at the same time. Needed for #114835 >From 26f0cfabe3328c8eb8a861dd5d1d541921499f0c Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Sat, 9 Nov 2024 15:55:08 +0100 Subject: [PATCH] [analyzer][NFC] Make RegionStore dumps deterministic Dump the memory space clusters before the other clusters, in alphabetical order. Then default bindings over direct bindings, and if any has symbolic offset, then those should come before the ones with concrete offsets. In theory, we should either have a symbolic offset OR concrete offsets, but never both at the same time. --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 86 --- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 674099dd7e1f0f..6bad9a93a30169 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -67,9 +67,10 @@ class BindingKey { isa(r)) && "Not a base"); } -public: +public: bool isDirect() const { return P.getInt() & Direct; } + bool isDefault() const { return !isDirect(); } bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } const MemRegion *getRegion() const { return P.getPointer(); } @@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRef StringifyCache; +auto ToString = [&StringifyCache](const MemRegion *R) { + auto [Place, Inserted] = StringifyCache.try_emplace(R); + if (!Inserted) +return Place->second; + std::string Res; + raw_string_ostream OS(Res); + OS << R; + Place->second = Res; + return Res; +}; + +using Cluster = +std::pair>; +using Binding = std::pair; + +const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L, + const Cluster *R) { + if (isa(L->first) && !isa(R->first)) +return true; + if (!isa(L->first) && isa(R->first)) +return false; + return ToString(L->first) < ToString(R->first); +}; + +const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L, + const BindingKey &R) { + if (L.hasSymbolicOffset() && !R.hasSymbolicOffset()) +return true; + if (!L.hasSymbolicOffset() && R.hasSymbolicOffset()) +return false; + if (L.hasSymbolicOffset() && R.hasSymbolicOffset()) +return ToString(L.getRegion()) < ToString(R.getRegion()); + return L.getOffset() < R.getOffset(); +}; + +const auto DefaultBindingBeforeDirectBindings = +[&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) { + const BindingKey &L = LPtr->first; + const BindingKey &R = RPtr->first; + if (L.isDefault() && !R.isDefault()) +return true; + if (!L.isDefault() && R.isDefault()) +return false; + assert(L.isDefault() == R.isDefault()); + return SymbolicBeforeOffset(L, R); +}; + +const auto AddrOf = [](const auto &Item) { return &Item; }; + +std::vector SortedClusters; +SortedClusters.reserve(std::distance(begin(), end())); +append_range(SortedClusters, map_range(*this, AddrOf)); +llvm::sort(SortedClusters, MemSpaceBeforeRegionName); + +for (auto [Idx, C] : llvm::enumerate(SortedClusters)) { + const auto &[BaseRegion, Bindings] = *C; Indent(Out, Space, IsDot) - << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \"" - << (const void *)I.getKey() << "\", \"items\": [" << NL; + << "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \"" + << (const void *)BaseRegion << "\", \"items\": [" << NL; + + std::vector SortedBindings; + SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end())); + append_range(SortedBindings, map_range(Bindings, AddrOf)); + llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings); ++Space; - const ClusterBindings &CB = I.getData(); - for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE; - ++CI) { -Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": "; -CI.getData().printJson(Out, /*AddQuotes=*/true); + for (auto [Idx, B] : llvm::enumerate(SortedBindings)) { +const auto &[Key, Value] = *B; +Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": "; +Value.printJson(Out, /*AddQuotes=*/true); Out
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
steakhal wrote: > Thanks for the explanation -- code example reduction friendliness is a good > point that I didn't think about. Based on this, I support keeping that > commit, but perhaps add some remarks (in comments or the commit message, > wherever you think it's well-placed) that mentions code reduction as a > motivation. Added the explanation to the [[analyzer] Allow copying empty structs (1/4)](https://github.com/llvm/llvm-project/pull/114835/commits/efa54ca20448e3af84dc9ea5d5bb9dbfbf021ed9#diff-c6b319d67a948674d0a135c65a7f64157d996b4f2b33d89dbfdeb4085d71907aR2345-R2348). https://github.com/llvm/llvm-project/pull/114835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
@@ -15,20 +19,103 @@ struct empty { void test_copy_return() { aggr s1 = {1, 2}; aggr const& cr1 = aggr(s1); - clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} empty s2; empty const& cr2 = empty{s2}; - clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} } void test_assign_return() { aggr s1 = {1, 2}; aggr d1; - clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }} + clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }} empty s2; empty d2; - clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown + clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown } + +namespace trivial_struct_copy { + +void _01_empty_structs() { + clang_analyzer_dump_val(conjure()); // expected-warning {{lazyCompoundVal}} + empty Empty = conjure(); + empty Empty2 = Empty; + empty Empty3 = Empty2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + // There were Unknown before: + clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}} + + // Notice that we don't have entries for the copies, only for the original "Empty" object. + // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision. + // The copies are not present because the ExprEngine skips the evalBind of empty structs. + // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields, + // and there are no fields within "Empty", thus we wouldn't have any entries anyways. + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT:{ "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT:]}, + // CHECK-NEXT:{ "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT:]}, steakhal wrote: Fix ordering issue should be fixed by https://github.com/llvm/llvm-project/pull/114835/commits/26f0cfabe3328c8eb8a861dd5d1d541921499f0c. https://github.com/llvm/llvm-project/pull/114835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/114835 >From 26f0cfabe3328c8eb8a861dd5d1d541921499f0c Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Sat, 9 Nov 2024 15:55:08 +0100 Subject: [PATCH 1/5] [analyzer][NFC] Make RegionStore dumps deterministic Dump the memory space clusters before the other clusters, in alphabetical order. Then default bindings over direct bindings, and if any has symbolic offset, then those should come before the ones with concrete offsets. In theory, we should either have a symbolic offset OR concrete offsets, but never both at the same time. --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 86 --- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 674099dd7e1f0f..6bad9a93a30169 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -67,9 +67,10 @@ class BindingKey { isa(r)) && "Not a base"); } -public: +public: bool isDirect() const { return P.getInt() & Direct; } + bool isDefault() const { return !isDirect(); } bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } const MemRegion *getRegion() const { return P.getPointer(); } @@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRef StringifyCache; +auto ToString = [&StringifyCache](const MemRegion *R) { + auto [Place, Inserted] = StringifyCache.try_emplace(R); + if (!Inserted) +return Place->second; + std::string Res; + raw_string_ostream OS(Res); + OS << R; + Place->second = Res; + return Res; +}; + +using Cluster = +std::pair>; +using Binding = std::pair; + +const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L, + const Cluster *R) { + if (isa(L->first) && !isa(R->first)) +return true; + if (!isa(L->first) && isa(R->first)) +return false; + return ToString(L->first) < ToString(R->first); +}; + +const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L, + const BindingKey &R) { + if (L.hasSymbolicOffset() && !R.hasSymbolicOffset()) +return true; + if (!L.hasSymbolicOffset() && R.hasSymbolicOffset()) +return false; + if (L.hasSymbolicOffset() && R.hasSymbolicOffset()) +return ToString(L.getRegion()) < ToString(R.getRegion()); + return L.getOffset() < R.getOffset(); +}; + +const auto DefaultBindingBeforeDirectBindings = +[&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) { + const BindingKey &L = LPtr->first; + const BindingKey &R = RPtr->first; + if (L.isDefault() && !R.isDefault()) +return true; + if (!L.isDefault() && R.isDefault()) +return false; + assert(L.isDefault() == R.isDefault()); + return SymbolicBeforeOffset(L, R); +}; + +const auto AddrOf = [](const auto &Item) { return &Item; }; + +std::vector SortedClusters; +SortedClusters.reserve(std::distance(begin(), end())); +append_range(SortedClusters, map_range(*this, AddrOf)); +llvm::sort(SortedClusters, MemSpaceBeforeRegionName); + +for (auto [Idx, C] : llvm::enumerate(SortedClusters)) { + const auto &[BaseRegion, Bindings] = *C; Indent(Out, Space, IsDot) - << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \"" - << (const void *)I.getKey() << "\", \"items\": [" << NL; + << "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \"" + << (const void *)BaseRegion << "\", \"items\": [" << NL; + + std::vector SortedBindings; + SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end())); + append_range(SortedBindings, map_range(Bindings, AddrOf)); + llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings); ++Space; - const ClusterBindings &CB = I.getData(); - for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE; - ++CI) { -Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": "; -CI.getData().printJson(Out, /*AddQuotes=*/true); + for (auto [Idx, B] : llvm::enumerate(SortedBindings)) { +const auto &[Key, Value] = *B; +Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": "; +Value.printJson(Out, /*AddQuotes=*/true); Out << " }"; -if (std::next(CI) != CE) +if (Idx != SortedBindings.size() - 1) Out << ','; Out << NL; } - --Space; Indent(Out, Space, IsDot) << "]}"; - if (std::next(I) != E) + if (Idx != SortedClusters.size() - 1) Out << ','; Out << NL; } >From efa54ca20448e3
[clang] [StaticAnalyzer] early return if sym is concrete on assuming (PR #115579)
steakhal wrote: Hi, thanks for the PR! I'm slightly confused that the compiler crash you refer to comes from the stdlibrary fn checker. This suggest to me a checker problem - and likely relates to the stdlibraryfn checker early return. However, this also couples with a solver change. Is this improving the solver that would also make the checker avoid the crash? If this is the case, then we should have separate PRs because we should harden the checker on one side, and also improve the solver in an other area. In any case, I'll have a look at this next week. https://github.com/llvm/llvm-project/pull/115579 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Trust base to derived casts for dynamic types (PR #69057)
steakhal wrote: > > However, what should we do if multiple (N) classes implement Base? Trying > > each N, and basically splitting the state to (N+1) ways is not going to > > scale. Unless N is of course really small, like 2 or 3 at most. > > That's kind of what I imagined - try them all. The Analyzer has a built-in > timeout mechanism that will come into play. > > If you wanted to be fancy, there could be an obscure config option, with a > default of N=3. If config value is not set, and N>3 it asserts, alerting the > user to the situation. If config value is set, it limits to analyzing the > first N. I re-read the thread, and I think the most promising approach would be to to split the path N ways to see the outcome. An alternative workaround would be to somehow annotate the code to force it to split into N ways and then inject a static cast to each way doing the relevant base->derived cast that this PR already instrument and do the desired behavior. Something like this: ```c++ template void clang_analyzer_try_cast(const T *base) { extern bool analyzer_internal_coin(); if (analyzer_internal_coin()) (void)static_cast(base); } template void clang_analyzer_split_n_ways(const T *base) { static_assert(sizeof...(Ts) > 0); (clang_analyzer_try_cast(base), ...); } struct Base {}; struct A : Base {}; struct B : Base {}; struct C : Base {}; void test(const Base *p) { clang_analyzer_split_n_ways(p); // <--- This is how the no-op static casts would split into A, B, C. // We should have like 3 paths here, plus the conservatively eval called path. } https://github.com/llvm/llvm-project/pull/69057 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][NFC] Spell out DynTypedNode instead of auto (PR #114427)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/114427 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
steakhal wrote: > [...] I'm a bit confused by the first commit because as far as I see, the > empty struct is _unable to_ transfer any attacker-controlled data, and > therefore I don't know what does it mean that it's tainted. Exactly. Because we don't let the bind operations go through, the copies won't have an identity, thus there is nothing where taint could bind to. However, it only looking at the code users could get confused (rightly so). For example, when the *dedicated* users reduce their reproducers, they would find it surprising to suddenly break taint propagation in their example when they remove the last remaining member of the class that was supposed to carry taint. They may overlook that they broke their reproducer and draw false conclusions of the issue. >[...] Do you have a practical use case where this would be useful? I'd say, not really. Another way of looking at that is remaining consistent no matter how many fields we have in a class. And btw, given that all objects must be at least of one byte, actually, even an empty class has 1 byte of semi-usable storage. If that empty class is tainted, then reinterpret-casting it to a single char should be also considered tainted. But again, I don't really expect this to happen in practice, and we can drop that commit from the stack without affecting the outcome of fixing the issue motivating this stack. However, I'd lean towards a more consistent behavior for structs, no matter their guts. https://github.com/llvm/llvm-project/pull/114835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] [NFC] Refactor AST visitors in Sema and the static analyser to use DRAV (PR #115144)
steakhal wrote: What's the benefit of using `DRAV` over `RAV`? I couldn't find the motivation in the PR summary in neither of this or the linked PR. https://github.com/llvm/llvm-project/pull/115144 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][NFC] Spell out DynTypedNode instead of auto (PR #114427)
https://github.com/steakhal approved this pull request. I don't really feel that this is relevant enough to be merged, but let's do it anyways. If you find more things to refactor, or have plans for doing so, let me know. https://github.com/llvm/llvm-project/pull/114427 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Add steakhal to the Clang Static Analyzer maintainers (PR #114991)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/114991 I've been contributing to the Clang Static Analyzer for a while now. I think from 2019, or something like that. I've ensured the quality of the Static Analyzer releases for the last ~4-6 releases now, with testing, fixing and backporting patches; also writing comprehensive release notes for each release. I have a strong sense of ownership of the code I contribute. I follow the issue tracker, and also try to follow and participate in RFCs on Discourse if I'm not overloaded. I also check Discord time-to-time, but I rarely see anything there. You can find the maintainer section of the LLVM DeveloperPolicy [here](https://llvm.org/docs/DeveloperPolicy.html#maintainers) to read more about the responsibilities. >From 8da0b1e7d3ea2d129dc8b753f217a62a81f99e8c Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Tue, 5 Nov 2024 14:44:04 +0100 Subject: [PATCH] [clang] Add steakhal to the Clang Static Analyzer maintainers --- clang/Maintainers.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 27255a9c0319b4..4d865c3a6889e5 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -131,6 +131,8 @@ Clang static analyzer | Gábor Horváth | xazax.hun\@gmail.com (email), xazax.hun (Phabricator), Xazax-hun (GitHub) +| Balázs Benics +| benicsbalazs\@gmail.com (email), steakhal (Phabricator), steakhal (GitHub) Compiler options ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer] Remove alpha.core.IdenticalExpr Checker (PR #114715)
steakhal wrote: I'm pretty sure last time I've compared this checker to the tidy implementation, I saw slight differences. Have you thoroughly compared the two? Have you seen discrepancies? Speaking of the tests, I'd also prefer migrating existing ones to uncover differences. Unfortunately, frequently our test coverage is not great and because of that I don't think we can drop this unless we/you do a through comparison of the two. BTW I'm all in favor of reducing code duplication, and clang-tidy is a much better place for this rule. https://github.com/llvm/llvm-project/pull/114715 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/114835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
@@ -15,20 +19,103 @@ struct empty { void test_copy_return() { aggr s1 = {1, 2}; aggr const& cr1 = aggr(s1); - clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} empty s2; empty const& cr2 = empty{s2}; - clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} } void test_assign_return() { aggr s1 = {1, 2}; aggr d1; - clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }} + clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }} empty s2; empty d2; - clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown + clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown } + +namespace trivial_struct_copy { + +void _01_empty_structs() { + clang_analyzer_dump_val(conjure()); // expected-warning {{lazyCompoundVal}} + empty Empty = conjure(); + empty Empty2 = Empty; + empty Empty3 = Empty2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + // There were Unknown before: + clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}} + + // Notice that we don't have entries for the copies, only for the original "Empty" object. + // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision. + // The copies are not present because the ExprEngine skips the evalBind of empty structs. + // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields, + // and there are no fields within "Empty", thus we wouldn't have any entries anyways. + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT:{ "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT:]}, + // CHECK-NEXT:{ "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT:]}, steakhal wrote: It turns out on Windows, the order of these entries could be different. :( I'll check how could I make these stable. https://github.com/llvm/llvm-project/pull/114835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/114835 Review commit by commit, but I'm presenting here a stacked PR to let you know of the motivation, and more importantly how the pieces would fit together. Each commit has it's own description to help you focus on what matters. Once this PR is reviewed, I'll split this into individual PRs and merge one-by-one. The original intent was to fix #114270. >From 63d31ee43cf91042507449c722ecf92949c7b5fe Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Sat, 2 Nov 2024 14:13:00 +0100 Subject: [PATCH 1/4] [analyzer] Allow copying empty structs (1/4) We represent copies of structs by LazyCompoundVals, that is basically a snapshot of the Store and Region that your copy would refer to. This snapshot is actually not taken for empty structs (structs that have no non-static data members), because the users won't be able to access any fields anyways, so why bother. However, when it comes to taint propagation, it would be nice if instances of empty structs would behave similar to non-empty structs. For this, we need an identity for which taint can bind, so Unknown - that was used in the past wouldn't work. Consequently, copying the value of an empty struct should behave the same way as a non-empty struct, thus be represented by a LazyCompoundVal. --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 10 +- clang/test/Analysis/ctor-trivial-copy.cpp | 101 -- 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 674099dd7e1f0f..805a266e7ab0f3 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2276,18 +2276,10 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R); } -static bool isRecordEmpty(const RecordDecl *RD) { - if (!RD->field_empty()) -return false; - if (const CXXRecordDecl *CRD = dyn_cast(RD)) -return CRD->getNumBases() == 0; - return true; -} - SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R) { const RecordDecl *RD = R->getValueType()->castAs()->getDecl(); - if (!RD->getDefinition() || isRecordEmpty(RD)) + if (!RD->getDefinition()) return UnknownVal(); return createLazyBinding(B, R); diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp index 5ed188aa8f1eae..a1209c24136e20 100644 --- a/clang/test/Analysis/ctor-trivial-copy.cpp +++ b/clang/test/Analysis/ctor-trivial-copy.cpp @@ -1,8 +1,12 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \ +// RUN: 2>&1 | FileCheck %s -template -void clang_analyzer_dump(T&); +void clang_analyzer_printState(); +template void clang_analyzer_dump_lref(T&); +template void clang_analyzer_dump_val(T); +template T conjure(); +template void nop(const Ts &... args) {} struct aggr { int x; @@ -15,20 +19,103 @@ struct empty { void test_copy_return() { aggr s1 = {1, 2}; aggr const& cr1 = aggr(s1); - clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} empty s2; empty const& cr2 = empty{s2}; - clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} } void test_assign_return() { aggr s1 = {1, 2}; aggr d1; - clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }} + clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }} empty s2; empty d2; - clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown + clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown } + +namespace trivial_struct_copy { + +void _01_empty_structs() { + clang_analyzer_dump_val(conjure()); // expected-warning {{lazyCompoundVal}} + empty Empty = conjure(); + empty Empty2 = Empty; + empty Empty3 = Empty2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + // There were Unknown before: + clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}} + + // Notice that we don't have entries for the copies, only f
[clang] [clang][NFC] Spell out DynTypedNode instead of auto (PR #114427)
steakhal wrote: Have you other similar cases in the Static Analyzer code base? https://github.com/llvm/llvm-project/pull/114427 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix false double free when including 3rd-party headers with overloaded delete operator as system headers (PR #85224)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/85224 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][NFC] Remove check::BranchCondition from debug.DumpTraversal (PR #113906)
=?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal approved this pull request. There are fundamental problems with using stdout and FileCheck. Those assertions (`CHECK` line) depend on their relative ordering. However, the ordering of the top-level entry points may change by design. This made it impractical in the past to use FileCheck for tests, and was generally avoided. https://github.com/llvm/llvm-project/pull/113906 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] EvalBinOpLL should return Unknown less often (PR #114222)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/114222 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] EvalBinOpLL should return Unknown less often (PR #114222)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/114222 >From 37ef4f72d42b01e70795bb2fa86138f7fbdd8077 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 30 Oct 2024 13:56:40 +0100 Subject: [PATCH 1/2] [analyzer] EvalBinOpLL should return Unknown less often SValBuilder::getKnownValue, getMinValue, getMaxValue use SValBuilder::simplifySVal. simplifySVal does repeated simplification until a fixed-point is reached. A single step is done by SimpleSValBuilder::simplifySValOnce, using a Simplifier visitor. That will basically decompose SymSymExprs, and apply constant folding using the constraints we have in the State. Once it decomposes a SymSymExpr, it simplifies both sides and then uses the SValBuilder::evalBinOp to reconstruct the same - but now simpler - SymSymExpr, while applying some caching to remain performant. This decomposition, and then the subsequent re-composition poses new challenges to the SValBuilder::evalBinOp, which is built to handle expressions coming from real C/C++ code, thus applying some implicit assumptions. One previous assumption was that nobody would form an expression like "((int*)0) - q" (where q is an int pointer), because it doesn't really makes sense to write code like that. However, during simplification, we may end up with a call to evalBinOp similar to this. To me, simplifying a SymbolRef should never result in Unknown or Undef, unless it was Unknown or Undef initially or, during simplification we realized that it's a division by zero once we did the constant folding, etc. In the following case the simplified SVal should not become UnknownVal: ```c++ void top(char *p, char *q) { int diff = p - q; // diff: reg - reg if (!p) // p: NULL simplify(diff); // diff after simplification should be: 0(loc) - reg } ``` Returning Unknown from the simplifySVal can weaken analysis precision in other places too, such as in SValBuilder::getKnownValue, getMinValue, or getMaxValue because we call simplifySVal before doing anything else. For nonloc::SymbolVals, this loss of precision is critical, because for those the SymbolRef carries an accurate type of the encoded computation, thus we should at least have a conservative upper or lower bound that we could return from getMinValue or getMaxValue - yet we would just return nullptr. ```c++ const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, SVal V) { return getConstValue(state, simplifySVal(state, V)); } const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state, SVal V) { V = simplifySVal(state, V); if (const llvm::APSInt *Res = getConcreteValue(V)) return Res; if (SymbolRef Sym = V.getAsSymbol()) return state->getConstraintManager().getSymMinVal(state, Sym); return nullptr; } ``` For now, I don't plan to make the simplification bullet-proof, I'm just explaining why I made this change and what you need to look out for in the future if you see a similar issue. CPP-5750 --- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 7 +- clang/unittests/StaticAnalyzer/CMakeLists.txt | 1 + .../StaticAnalyzer/SValSimplifyerTest.cpp | 103 ++ 3 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 45e48d435aca6a..229169f848e228 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -860,11 +860,12 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // If one of the operands is a symbol and the other is a constant, // build an expression for use by the constraint manager. if (SymbolRef rSym = rhs.getAsLocSymbol()) { - // We can only build expressions with symbols on the left, - // so we need a reversible operator. - if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) + if (op == BO_Cmp) return UnknownVal(); + if (!BinaryOperator::isComparisonOp(op)) +return makeNonLoc(L.getValue(), op, rSym, resultTy); + op = BinaryOperator::reverseComparisonOp(op); return makeNonLoc(rSym, op, L.getValue(), resultTy); } diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt index 5ef72cfaea4011..f5da86e5456030 100644 --- a/clang/unittests/StaticAnalyzer/CMakeLists.txt +++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt @@ -20,6 +20,7 @@ add_clang_unittest(StaticAnalysisTests RegisterCustomCheckersTest.cpp StoreTest.cpp SymbolReaperTest.cpp + SValSimplifyerTest.cpp SValTest.cpp TestReturnValueUnderConstruction.cpp Z3CrosscheckOracleTest.cpp diff --git a/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp b/cla
[clang] [clang][Index][USR][NFC] Allow customizing langopts for USR generation (PR #109574)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/109574 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][Index][USR][NFC] Allow customizing langopts for USR generation (PR #109574)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/109574 >From 643ad2cd6e788b6e62f22c980ed06abd192ded55 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Sun, 22 Sep 2024 12:04:33 +0200 Subject: [PATCH] [clang][Index][USR][NFC] Allow customizing langopts for USR generation This helps to produce USRs for custom LangOpts - that differ from the one associated with the given Decl. This can unlock usecases in tooling opportunities that we have downstream. This is NFC because existing calls will still result in the right overload, thus using the LangOpts associated with the ASTContext of the Decls and Types. --- clang/include/clang/Index/USRGeneration.h | 8 +++- clang/lib/Index/USRGeneration.cpp | 48 ++- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/clang/include/clang/Index/USRGeneration.h b/clang/include/clang/Index/USRGeneration.h index f89fc5cf49302c..61d267f3545a70 100644 --- a/clang/include/clang/Index/USRGeneration.h +++ b/clang/include/clang/Index/USRGeneration.h @@ -15,6 +15,7 @@ namespace clang { class ASTContext; class Decl; +class LangOptions; class MacroDefinitionRecord; class Module; class SourceLocation; @@ -30,6 +31,8 @@ static inline StringRef getUSRSpacePrefix() { /// Generate a USR for a Decl, including the USR prefix. /// \returns true if the results should be ignored, false otherwise. bool generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf); +bool generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf, +const LangOptions &LangOpts); /// Generate a USR fragment for an Objective-C class. void generateUSRForObjCClass(StringRef Cls, raw_ostream &OS, @@ -75,7 +78,10 @@ bool generateUSRForMacro(StringRef MacroName, SourceLocation Loc, /// Generates a USR for a type. /// /// \return true on error, false on success. -bool generateUSRForType(QualType T, ASTContext &Ctx, SmallVectorImpl &Buf); +bool generateUSRForType(QualType T, ASTContext &Ctx, +SmallVectorImpl &Buf); +bool generateUSRForType(QualType T, ASTContext &Ctx, SmallVectorImpl &Buf, +const LangOptions &LangOpts); /// Generate a USR for a module, including the USR prefix. /// \returns true on error, false on success. diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index f00bc56429f1a7..1cfca8b6aaa368 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -62,20 +62,17 @@ namespace { class USRGenerator : public ConstDeclVisitor { SmallVectorImpl &Buf; llvm::raw_svector_ostream Out; - bool IgnoreResults; ASTContext *Context; - bool generatedLoc; + const LangOptions &LangOpts; + bool IgnoreResults = false; + bool generatedLoc = false; llvm::DenseMap TypeSubstitutions; public: - explicit USRGenerator(ASTContext *Ctx, SmallVectorImpl &Buf) - : Buf(Buf), -Out(Buf), -IgnoreResults(false), -Context(Ctx), -generatedLoc(false) - { + USRGenerator(ASTContext *Ctx, SmallVectorImpl &Buf, + const LangOptions &LangOpts) + : Buf(Buf), Out(Buf), Context(Ctx), LangOpts(LangOpts) { // Add the USR space prefix. Out << getUSRSpacePrefix(); } @@ -246,14 +243,13 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) { } else Out << "@F@"; - PrintingPolicy Policy(Context->getLangOpts()); + PrintingPolicy Policy(LangOpts); // Forward references can have different template argument names. Suppress the // template argument names in constructors to make their USR more stable. Policy.SuppressTemplateArgsInCXXConstructors = true; D->getDeclName().print(Out, Policy); - ASTContext &Ctx = *Context; - if ((!Ctx.getLangOpts().CPlusPlus || D->isExternC()) && + if ((!LangOpts.CPlusPlus || D->isExternC()) && !D->hasAttr()) return; @@ -657,9 +653,10 @@ bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) { return IgnoreResults; } -static void printQualifier(llvm::raw_ostream &Out, ASTContext &Ctx, NestedNameSpecifier *NNS) { +static void printQualifier(llvm::raw_ostream &Out, const LangOptions &LangOpts, + NestedNameSpecifier *NNS) { // FIXME: Encode the qualifier, don't just print it. - PrintingPolicy PO(Ctx.getLangOpts()); + PrintingPolicy PO(LangOpts); PO.SuppressTagKeyword = true; PO.SuppressUnwrittenScope = true; PO.ConstantArraySizeAsWritten = false; @@ -948,7 +945,7 @@ void USRGenerator::VisitType(QualType T) { } if (const DependentNameType *DNT = T->getAs()) { Out << '^'; - printQualifier(Out, Ctx, DNT->getQualifier()); + printQualifier(Out, LangOpts, DNT->getQualifier()); Out << ':' << DNT->getIdentifier()->getName(); return; } @@ -1090,7 +1087,7 @@ void USRGenerator::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl return; VisitDeclContext(D->getDeclContext()); Out <<
[clang] [analyzer] EvalBinOpLL should return Unknown less often (PR #114222)
steakhal wrote: > Makes sense for me. It would be nice if we had a minimal reproducer for a > regression test instead of maintaining a large-ish test. I wish I could add a LIT test instead. Out of the get min/max val, or getKnownValue APIs, only the latter is used slightly more broadly, where the callsites can't assume it should return a valid result. Still, these APIs are barely used. In my case I have additional context in my downstream checker, where I can make this assumption and I was surprised to see crashes. So I had no other options but to craft a unittest for this. https://github.com/llvm/llvm-project/pull/114222 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] EvalBinOpLL should return Unknown less often (PR #114222)
steakhal wrote: @necto https://github.com/llvm/llvm-project/pull/114222 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] EvalBinOpLL should return Unknown less often (PR #114222)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/114222 SValBuilder::getKnownValue, getMinValue, getMaxValue use SValBuilder::simplifySVal. simplifySVal does repeated simplification until a fixed-point is reached. A single step is done by SimpleSValBuilder::simplifySValOnce, using a Simplifier visitor. That will basically decompose SymSymExprs, and apply constant folding using the constraints we have in the State. Once it decomposes a SymSymExpr, it simplifies both sides and then uses the SValBuilder::evalBinOp to reconstruct the same - but now simpler - SymSymExpr, while applying some caching to remain performant. This decomposition, and then the subsequent re-composition poses new challenges to the SValBuilder::evalBinOp, which is built to handle expressions coming from real C/C++ code, thus applying some implicit assumptions. One previous assumption was that nobody would form an expression like "((int*)0) - q" (where q is an int pointer), because it doesn't really makes sense to write code like that. However, during simplification, we may end up with a call to evalBinOp similar to this. To me, simplifying a SymbolRef should never result in Unknown or Undef, unless it was Unknown or Undef initially or, during simplification we realized that it's a division by zero once we did the constant folding, etc. In the following case the simplified SVal should not become UnknownVal: ```c++ void top(char *p, char *q) { int diff = p - q; // diff: reg - reg if (!p) // p: NULL simplify(diff); // diff after simplification should be: 0(loc) - reg } ``` Returning Unknown from the simplifySVal can weaken analysis precision in other places too, such as in SValBuilder::getKnownValue, getMinValue, or getMaxValue because we call simplifySVal before doing anything else. For nonloc::SymbolVals, this loss of precision is critical, because for those the SymbolRef carries an accurate type of the encoded computation, thus we should at least have a conservative upper or lower bound that we could return from getMinValue or getMaxValue - yet we would just return nullptr. ```c++ const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, SVal V) { return getConstValue(state, simplifySVal(state, V)); } const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state, SVal V) { V = simplifySVal(state, V); if (const llvm::APSInt *Res = getConcreteValue(V)) return Res; if (SymbolRef Sym = V.getAsSymbol()) return state->getConstraintManager().getSymMinVal(state, Sym); return nullptr; } ``` For now, I don't plan to make the simplification bullet-proof, I'm just explaining why I made this change and what you need to look out for in the future if you see a similar issue. CPP-5750 >From 37ef4f72d42b01e70795bb2fa86138f7fbdd8077 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 30 Oct 2024 13:56:40 +0100 Subject: [PATCH] [analyzer] EvalBinOpLL should return Unknown less often SValBuilder::getKnownValue, getMinValue, getMaxValue use SValBuilder::simplifySVal. simplifySVal does repeated simplification until a fixed-point is reached. A single step is done by SimpleSValBuilder::simplifySValOnce, using a Simplifier visitor. That will basically decompose SymSymExprs, and apply constant folding using the constraints we have in the State. Once it decomposes a SymSymExpr, it simplifies both sides and then uses the SValBuilder::evalBinOp to reconstruct the same - but now simpler - SymSymExpr, while applying some caching to remain performant. This decomposition, and then the subsequent re-composition poses new challenges to the SValBuilder::evalBinOp, which is built to handle expressions coming from real C/C++ code, thus applying some implicit assumptions. One previous assumption was that nobody would form an expression like "((int*)0) - q" (where q is an int pointer), because it doesn't really makes sense to write code like that. However, during simplification, we may end up with a call to evalBinOp similar to this. To me, simplifying a SymbolRef should never result in Unknown or Undef, unless it was Unknown or Undef initially or, during simplification we realized that it's a division by zero once we did the constant folding, etc. In the following case the simplified SVal should not become UnknownVal: ```c++ void top(char *p, char *q) { int diff = p - q; // diff: reg - reg if (!p) // p: NULL simplify(diff); // diff after simplification should be: 0(loc) - reg } ``` Returning Unknown from the simplifySVal can weaken analysis precision in other places too, such as in SValBuilder::getKnownValue, getMinValue, or getMaxValue because we call simplifySVal before doing anything else. For nonloc::SymbolVals, this loss of precision is critical, because for those the SymbolRef carries an accurate type of the encoded comput
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
@@ -2524,8 +2625,33 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, C.addTransition(State); } -void CStringChecker::evalMemset(CheckerContext &C, -const CallEvent &Call) const { +namespace { +CharUnits getSizeOfUnit(CharKind CK, CheckerContext &C) { + assert(CK == CK_Regular || CK == CK_Wide); + auto UnitType = + CK == CK_Regular ? C.getASTContext().CharTy : C.getASTContext().WCharTy; + + return C.getASTContext().getTypeSizeInChars(UnitType); +} + +SVal getCharValCast(CharKind CK, CheckerContext &C, ProgramStateRef State, +const Expr *CharE) { + const LocationContext *LCtx = C.getLocationContext(); + auto CharVal = State->getSVal(CharE, LCtx); + if (CK == CK_Regular) { +auto &svalBuilder = C.getSValBuilder(); +const auto &Ctx = C.getASTContext(); +// With the semantic of 'memset()', we should convert the CharVal to +// unsigned char. +return svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); + } + return CharVal; +} + +} // namespace steakhal wrote: ```suggestion ``` https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][Index][USR][NFC] Allow customizing langopts for USR generation (PR #109574)
steakhal wrote: @Endilll Could you please help me reaching out to the right set of reviewers? https://github.com/llvm/llvm-project/pull/109574 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
steakhal wrote: One more note: Anyone reviewing this, don't be afraid of the size of the PR. Basically 3/4 is just tests. https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
@@ -2524,8 +2625,33 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, C.addTransition(State); } -void CStringChecker::evalMemset(CheckerContext &C, -const CallEvent &Call) const { +namespace { +CharUnits getSizeOfUnit(CharKind CK, CheckerContext &C) { + assert(CK == CK_Regular || CK == CK_Wide); + auto UnitType = + CK == CK_Regular ? C.getASTContext().CharTy : C.getASTContext().WCharTy; + + return C.getASTContext().getTypeSizeInChars(UnitType); +} + +SVal getCharValCast(CharKind CK, CheckerContext &C, ProgramStateRef State, steakhal wrote: ```suggestion static SVal getCharValCast(CharKind CK, CheckerContext &C, ProgramStateRef State, ``` https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
@@ -2524,8 +2625,33 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, C.addTransition(State); } -void CStringChecker::evalMemset(CheckerContext &C, -const CallEvent &Call) const { +namespace { +CharUnits getSizeOfUnit(CharKind CK, CheckerContext &C) { steakhal wrote: ```suggestion static CharUnits getSizeOfUnit(CharKind CK, CheckerContext &C) { ``` https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
@@ -2524,8 +2625,33 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, C.addTransition(State); } -void CStringChecker::evalMemset(CheckerContext &C, -const CallEvent &Call) const { +namespace { +CharUnits getSizeOfUnit(CharKind CK, CheckerContext &C) { + assert(CK == CK_Regular || CK == CK_Wide); + auto UnitType = + CK == CK_Regular ? C.getASTContext().CharTy : C.getASTContext().WCharTy; + + return C.getASTContext().getTypeSizeInChars(UnitType); +} + +SVal getCharValCast(CharKind CK, CheckerContext &C, ProgramStateRef State, +const Expr *CharE) { + const LocationContext *LCtx = C.getLocationContext(); + auto CharVal = State->getSVal(CharE, LCtx); + if (CK == CK_Regular) { +auto &svalBuilder = C.getSValBuilder(); +const auto &Ctx = C.getASTContext(); +// With the semantic of 'memset()', we should convert the CharVal to +// unsigned char. +return svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); steakhal wrote: ```suggestion const auto &Ctx = C.getASTContext(); // With the semantic of 'memset()', we should convert the CharVal to // unsigned char. return C.getSValBuilder().evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); ``` https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)
https://github.com/steakhal commented: Oh yes, I already reviewed this one. I had only a few comments, otherwise it's good to me. I'll get upstream folks involved on this one. https://github.com/llvm/llvm-project/pull/113908 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Bring checker 'alpha.unix.cstring.NotNullTerminated' out of alpha (PR #113899)
=?utf-8?q?Balázs_Kéri?= Message-ID: In-Reply-To: @@ -1899,6 +1899,29 @@ Check the size argument passed into C string functions for common erroneous patt // warn: potential buffer overflow } +.. _unix-cstring-NotNullTerminated: + +unix.cstring.NotNullTerminated (C) +"" +Check for arguments which are not null-terminated strings; +applies to the ``strlen``, ``strcpy``, ``strcat``, ``strcmp`` family of functions. + +Only very fundamental cases are detected where the passed memory block is +absolutely different from a null-terminated string. This checker does not +find if a memory buffer is passed where the terminating zero character +is missing. + +.. code-block:: c + + void test1() { + int l = strlen((char *)&test); // warn steakhal wrote: I think you wanted to take the address of test1. https://github.com/llvm/llvm-project/pull/113899 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
steakhal wrote: > @steakhal I wanted to wait for the pipeline to pass before merging. Shouldn't > we only merge patches with a green pipeline? The full pipeline will never be green. Downstream I wouldn't do this, but here I think we did what is reasonable. Waited enough. We can always revert with a single push of a button. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
https://github.com/steakhal approved this pull request. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
@@ -330,3 +330,41 @@ void no_crash() { } } // namespace crash + +namespace array_subscript_initializer { +struct S { + int x; +}; + +void no_crash() { + S arr[][2] = {{1, 2}}; + + const auto [a, b] = arr[0]; // no-crash steakhal wrote: My bad. Im too tired now. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
https://github.com/steakhal approved this pull request. Still looks good. I couldnt find the "no-crash" comments in the tests at the statements where they crashed but im fine without them too. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Untangle subcheckers of CStringChecker (PR #113312)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/113312 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Untangle subcheckers of CStringChecker (PR #113312)
@@ -702,9 +702,17 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); if (stateTrue && !stateFalse) { -// If the values are known to be equal, that's automatically an overlap. -emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); -return nullptr; +if (Filter.CheckCStringBufferOverlap) { + // If the values are known to be equal, that's automatically an overlap. + emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); + return nullptr; +} +// FIXME: We detected a fatal error here, we should stop analysis even if we +// chose not to emit a report here. However, as long as our overlap checker +// is in alpha, lets just pretend nothing happened. +//C.addSink(); steakhal wrote: Same about commented code. https://github.com/llvm/llvm-project/pull/113312 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Untangle subcheckers of CStringChecker (PR #113312)
@@ -579,8 +579,14 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, // These checks are either enabled by the CString out-of-bounds checker // explicitly or implicitly by the Malloc checker. // In the latter case we only do modeling but do not emit warning. -if (!Filter.CheckCStringOutOfBounds) - return nullptr; +// FIXME: We detected a fatal error here, we should stop analysis even if we +// chose not to emit a report here. However, as long as our out-of-bounds +// checker is in alpha, lets just pretend nothing happened. +if (!Filter.CheckCStringOutOfBounds) { + //C.addSink(); steakhal wrote: Commented out code is a code smell. Use the comment "We should not sink execution here." https://github.com/llvm/llvm-project/pull/113312 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
https://github.com/steakhal approved this pull request. Could you please add `no-crash` comment in the test code where we would have crashed? Other than this, this looks good to me. Thabks for the prompt fix! https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Untangle subcheckers of CStringChecker (PR #113312)
https://github.com/steakhal commented: I only reviewed this on my phone, but looks promising. I'll leave this PR for the others to finish. I agree with the direction. https://github.com/llvm/llvm-project/pull/113312 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
@@ -330,3 +330,47 @@ void no_crash() { } } // namespace crash + +namespace array_subscript_initializer { steakhal wrote: Could you please put these braces on the same line of the namespace/struct declarations? That way its slightly more compact. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
@@ -513,70 +513,25 @@ ProgramStateRef ExprEngine::updateObjectsUnderConstruction( static ProgramStateRef bindRequiredArrayElementToEnvironment(ProgramStateRef State, const ArrayInitLoopExpr *AILE, - const LocationContext *LCtx, SVal Idx) { - // The ctor in this case is guaranteed to be a copy ctor, otherwise we hit a - // compile time error. - // - // -ArrayInitLoopExpr<-- we're here - // |-OpaqueValueExpr - // | `-DeclRefExpr <-- match this - // `-CXXConstructExpr - // `-ImplicitCastExpr - // `-ArraySubscriptExpr - // |-ImplicitCastExpr - // | `-OpaqueValueExpr - // | `-DeclRefExpr - // `-ArrayInitIndexExpr - // - // The resulting expression might look like the one below in an implicit - // copy/move ctor. - // - // ArrayInitLoopExpr<-- we're here - // |-OpaqueValueExpr - // | `-MemberExpr <-- match this - // | (`-CXXStaticCastExpr) <-- move ctor only - // | `-DeclRefExpr - // `-CXXConstructExpr - // `-ArraySubscriptExpr - // |-ImplicitCastExpr - // | `-OpaqueValueExpr - // | `-MemberExpr - // | `-DeclRefExpr - // `-ArrayInitIndexExpr - // - // The resulting expression for a multidimensional array. - // ArrayInitLoopExpr <-- we're here - // |-OpaqueValueExpr - // | `-DeclRefExpr<-- match this - // `-ArrayInitLoopExpr - // |-OpaqueValueExpr - // | `-ArraySubscriptExpr - // | |-ImplicitCastExpr - // | | `-OpaqueValueExpr - // | | `-DeclRefExpr - // | `-ArrayInitIndexExpr - // `-CXXConstructExpr <-- extract this - // ` ... - - const auto *OVESrc = AILE->getCommonExpr()->getSourceExpr(); + const LocationContext *LCtx, NonLoc Idx) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + MemRegionManager &MRMgr = SVB.getRegionManager(); + ASTContext &Ctx = SVB.getContext(); // HACK: There is no way we can put the index of the array element into the // CFG unless we unroll the loop, so we manually select and bind the required // parameter to the environment. - const auto *CE = + const Expr *SourceArray = AILE->getCommonExpr()->getSourceExpr(); + const auto *Ctor = cast(extractElementInitializerFromNestedAILE(AILE)); - SVal Base = UnknownVal(); - if (const auto *ME = dyn_cast(OVESrc)) -Base = State->getSVal(ME, LCtx); - else if (const auto *DRE = dyn_cast(OVESrc)) -Base = State->getLValue(cast(DRE->getDecl()), LCtx); - else -llvm_unreachable("ArrayInitLoopExpr contains unexpected source expression"); - - SVal NthElem = State->getLValue(CE->getType(), Idx, Base); + const SubRegion *SourceArrayRegion = steakhal wrote: Use const auto here as the type is already spelled in the statement. https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (PR #113570)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/113570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
@@ -748,6 +747,22 @@ SVal CXXInstanceCall::getCXXThisVal() const { return ThisVal; } +const CXXRecordDecl *CXXInstanceCall::getDeclForDynamicType() const { + const MemRegion *R = getCXXThisVal().getAsRegion(); + if (!R) +return nullptr; + + DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R); + if (!DynType.isValid()) +return nullptr; + + QualType Ty = DynType.getType()->getPointeeType(); + if (Ty.isNull()) steakhal wrote: Fixed. https://github.com/llvm/llvm-project/pull/38 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/38 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
@@ -797,6 +798,10 @@ RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const { return {}; } + const MemRegion *R = getCXXThisVal().getAsRegion(); + DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R); steakhal wrote: That's true. I figured it's an acceptable tradeoff for removing some duplication. I'll have a look what I can do here. https://github.com/llvm/llvm-project/pull/38 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/38 >From d3e8f1fefc06e4bf52adc128b286d3c259aa3151 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 4 Oct 2024 13:46:09 +0200 Subject: [PATCH 1/3] [analyzer] Use dynamic type when invalidating by a member function call When instantiating "callable", the "class CallableType" nested type will only have a declaration in the copy for the instantiation - because it's not refered to directly by any other code that would need a complete definition. However, in the past, when conservative eval calling member function, we took the static type of the "this" expr, and looked up the CXXRecordDecl it refered to to see if it has any mutable members (to decide if it needs to refine invalidation or not). Unfortunately, that query needs a definition, and it asserts otherwise, thus we crashed. To fix this, we should consult the dynamic type of the object, because that will have the definition. I anyways added a check for "hasDefinition" just to be on the safe side. Fixes #77378 --- .../Core/PathSensitive/CallEvent.h| 4 ++ clang/lib/StaticAnalyzer/Core/CallEvent.cpp | 55 ++- clang/test/Analysis/const-method-call.cpp | 46 3 files changed, 80 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h index 549c864dc91ef2..209f40b5b9f4c5 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -690,6 +690,10 @@ class CXXInstanceCall : public AnyFunctionCall { ValueList &Values, RegionAndSymbolInvalidationTraits *ETraits) const override; + /// Returns the decl refered to by the "dynamic type" of the current object. + /// This may be null. + const CXXRecordDecl *getDeclForDynamicType() const; + public: /// Returns the expression representing the implicit 'this' object. virtual const Expr *getCXXThisExpr() const { return nullptr; } diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 1efac6ac1c57f4..2c04b42629b756 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -711,18 +711,17 @@ void CXXInstanceCall::getExtraInvalidatedValues( if (const auto *D = cast_or_null(getDecl())) { if (!D->isConst()) return; -// Get the record decl for the class of 'This'. D->getParent() may return a -// base class decl, rather than the class of the instance which needs to be -// checked for mutable fields. -// TODO: We might as well look at the dynamic type of the object. -const Expr *Ex = getCXXThisExpr()->IgnoreParenBaseCasts(); -QualType T = Ex->getType(); -if (T->isPointerType()) // Arrow or implicit-this syntax? - T = T->getPointeeType(); -const CXXRecordDecl *ParentRecord = T->getAsCXXRecordDecl(); -assert(ParentRecord); + +// Get the record decl for the class of 'This'. D->getParent() may return +// a base class decl, rather than the class of the instance which needs to +// be checked for mutable fields. +const CXXRecordDecl *ParentRecord = getDeclForDynamicType(); +if (!ParentRecord || !ParentRecord->hasDefinition()) + return; + if (ParentRecord->hasMutableFields()) return; + // Preserve CXXThis. const MemRegion *ThisRegion = ThisVal.getAsRegion(); if (!ThisRegion) @@ -748,6 +747,22 @@ SVal CXXInstanceCall::getCXXThisVal() const { return ThisVal; } +const CXXRecordDecl *CXXInstanceCall::getDeclForDynamicType() const { + const MemRegion *R = getCXXThisVal().getAsRegion(); + if (!R) +return nullptr; + + DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R); + if (!DynType.isValid()) +return nullptr; + + QualType Ty = DynType.getType()->getPointeeType(); + if (Ty.isNull()) +return nullptr; + + return Ty->getAsCXXRecordDecl(); +} + RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const { // Do we have a decl at all? const Decl *D = getDecl(); @@ -759,21 +774,7 @@ RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const { if (!MD->isVirtual()) return AnyFunctionCall::getRuntimeDefinition(); - // Do we know the implicit 'this' object being called? - const MemRegion *R = getCXXThisVal().getAsRegion(); - if (!R) -return {}; - - // Do we know anything about the type of 'this'? - DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R); - if (!DynType.isValid()) -return {}; - - // Is the type a C++ class? (This is mostly a defensive check.) - QualType RegionType = DynType.getType()->getPointeeType(); - assert(!RegionType.isNull() && "DynamicTypeInfo should always be a pointer."); - - const CXXRecordDecl *RD = RegionType->getAsCXXRecordDecl(); + const CXXReco
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
@@ -748,6 +747,22 @@ SVal CXXInstanceCall::getCXXThisVal() const { return ThisVal; } +const CXXRecordDecl *CXXInstanceCall::getDeclForDynamicType() const { + const MemRegion *R = getCXXThisVal().getAsRegion(); + if (!R) +return nullptr; + + DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R); + if (!DynType.isValid()) +return nullptr; + + QualType Ty = DynType.getType()->getPointeeType(); + if (Ty.isNull()) steakhal wrote: My reasoning was that while it was asserted here, it may not apply in general. Given that I abstract this code in a separate function, I may shouldn't enforce this invariant. Although, the `assert˙wouldn't fire in any of the tests we have. What I could do, is to have an assert and also a graceful return in that case. That way we would get the benefit of both world. https://github.com/llvm/llvm-project/pull/38 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][StackAddressEscape] Remove redundant "returned to caller" suffix for compound literal (PR #112135)
https://github.com/steakhal approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/112135 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][StackAddressEscape] Remove redundant "returned to caller" suffix for compound literal (PR #112135)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/112135 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
@@ -751,12 +758,20 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const { } // Get variable name. - if (R && R->canPrintPrettyAsExpr()) { -R->printPrettyAsExpr(os); -if (UseQuotes) - return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); -else - return (llvm::Twine(os.str()) + ArrayIndices).str(); + if (R) { +// MemRegion can be pretty printed. +if (R->canPrintPrettyAsExpr()) { + R->printPrettyAsExpr(os); + return QuoteIfNeeded(llvm::Twine(os.str()) + ArrayIndices); +} + +// FieldRegion may have ElementRegion as SuperRegion. +if (const auto *FR = R->getAs()) { steakhal wrote: ```suggestion if (const auto *FR = R->getAs()) { ``` https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
https://github.com/steakhal approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Use dynamic type when invalidating by a member function call (PR #111138)
steakhal wrote: Ping https://github.com/llvm/llvm-project/pull/38 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][Index][USR][NFC] Allow customizing langopts for USR generation (PR #109574)
steakhal wrote: Ping @AaronBallman https://github.com/llvm/llvm-project/pull/109574 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity (2/2) (PR #112887)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/112887 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver][NFC] Cleanup const-correctness inside range-based solver (PR #112891)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/112891 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver][NFC] Cleanup const-correctness inside range-based solver (PR #112891)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/112891 >From 6b1702b1ea58e5fa4566069345d277e081cd1165 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 18 Oct 2024 14:12:46 +0200 Subject: [PATCH] [analyzer][Solver][NFC] Cleanup const-correctness inside range-based solver --- .../Core/RangeConstraintManager.cpp | 59 +-- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index f0311b7028f56d..c39fa81109c853 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1953,27 +1953,27 @@ class RangeConstraintManager : public RangedConstraintManager { const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; private: - RangeSet::Factory F; + mutable RangeSet::Factory F; - RangeSet getRange(ProgramStateRef State, SymbolRef Sym); + RangeSet getRange(ProgramStateRef State, SymbolRef Sym) const; ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); + const llvm::APSInt &Adjustment) const; RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); + const llvm::APSInt &Adjustment) const; RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); + const llvm::APSInt &Adjustment) const; RangeSet getSymLERange(llvm::function_ref RS, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); + const llvm::APSInt &Adjustment) const; RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); + const llvm::APSInt &Adjustment) const; }; //===--===// @@ -2880,21 +2880,18 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - auto &MutableSelf = const_cast(*this); - return MutableSelf.getRange(St, Sym).getConcreteValue(); + return getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - auto &MutableSelf = const_cast(*this); - RangeSet Range = MutableSelf.getRange(St, Sym); + RangeSet Range = getRange(St, Sym); return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - auto &MutableSelf = const_cast(*this); - RangeSet Range = MutableSelf.getRange(St, Sym); + RangeSet Range = getRange(St, Sym); return Range.isEmpty() ? nullptr : &Range.getMaxValue(); } @@ -3039,7 +3036,7 @@ RangeConstraintManager::removeDeadBindings(ProgramStateRef State, } RangeSet RangeConstraintManager::getRange(ProgramStateRef State, - SymbolRef Sym) { + SymbolRef Sym) const { return SymbolicRangeInferrer::inferRange(F, State, Sym); } @@ -3094,10 +3091,10 @@ RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym, return setRange(St, Sym, New); } -RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St, - SymbolRef Sym, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment) { +RangeSet +RangeConstraintManager::getSymLTRange(ProgramStateRef St, SymbolRef Sym, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment) const { // Before we do any real work, see if the value can even show up. APSIntType AdjustmentType(Adjustment); switch (AdjustmentType.testInRange(Int, true)) { @@ -3131,10 +3128,10 @@ RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym, return setRange(St, Sym, New); } -RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St, -
[clang] [analyzer] Suppress out of bounds reports after weak loop assumptions (PR #109804)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: steakhal wrote: Thank you for sharing your roadmap. I didn't remember there was consensus on a direction of dropping execution paths in the related [RFC](https://discourse.llvm.org/t/loop-handling-improvement-plans/80417/13). Nevertheless the direction seems interesting, but without consensus I'm not sure we should pursue that in upstream LLVM. > (As I said earlier, I'm strongly opposed to a solution where the > visitor-based suppression is activated by each checker [or many checkers] > because then we'd be discarding a significant percentage (perhaps even the > majority) of work done by the analyzer.) My problem with this approach is still that it's intrusive. We can only cut those branches once all checkers agree on suppressing all the reports coming from those branches. And it's not only about upstream checkers. There could be downstream checkers, that for example could do much more with the ExplodedGraph than just reporting issues. One particular (slightly broken) checker that crosses my mind in this case is the `UnreachableCodeChecker` which checks the `Eng.hasWorkRemaining()` in the `checkEndAnalysis` callback to see if we explored all possible branches in the top-level function we just analyzed. If so, it checks what BasicBlocks in the CFG were left unvisited to diagnose unreachable code. I don't imply that this particular checker is of high quality or used in production (AFAIK); all I'm saying is that these make me skeptical of not exploring these branches. This makes the argument of future-proofing weaker to me. > Edit: the very recent bug report > https://github.com/llvm/llvm-project/issues/112527 is another example where > applying this heuristic to core.UndefinedBinaryOperatorResult would've > probably prevented the false positive. Another way of looking at that issue is, if we would have inlined the `std::array::size()` would would have known that the given branch is infeasible, like in [this](https://github.com/llvm/llvm-project/pull/109804) older case. In any case, I wouldn't draw far arching conclusions. > I don't think that discarding the execution paths with these "weak" > assumptions would be a really big change: > * It only discards paths, so it cannot introduce any false positives (unlike > the loop widening plans, which performs invalidation). > * I don't think that there is a systemic class of bugs that are only found on > these "weak assumption execution path" branches and can be distinguished > algorithmically from the highly unwanted results. I think different tool vendors may want to target different FP TP rates. This argument only holds if we assume it's fine to drop TPs in favor of droping FPs. To me, it always depends. As a general guideline, I agree with your sentiment though. With mass changes like this, I'm not sure anymore without empirical evidence. > > [...] and with options for disabling it while developing and preparing this > > new version. > > Obviously I can put the freshly developed stuff behind analyzer options if > there's demand for it. In the case of this `ArrayBoundV2` change I did not do > so, because `ArrayBoundV2` is an alpha checker and it's very noisy without > this patch, but if I extend this to stable checkers I'll probably introduce > an option. The `ArrayBoundV2` changes make sense, exactly because it's an `alpha` checker like you said. I'm not challenging that. > > > @steakhal @ anybody else Is my eager implementation acceptable for you > > > (in theory, assuming that it's cleaned up sufficiently)? If yes, I'll > > > start to work on implementing the suggestions of @isuckatcs and any other > > > observations. > > > > > > I don't think we reached consensus yet. > > I see. What can I do to approach consensus? Is there some aspect of either of > the implementations that we should discuss in more detail? Or should we ask > other contributors? I had a slight discomfort of implementing something in the Engine that could be done in higher-level, like in a BR visitor. What you said about dropping execution paths makes sense and would potentially bring a huge improvement in multiple aspects such as TP FP rate and even analysis time. Making research and experimentation tackling these issues is really important. The perfect is the enemy of good; so I'm fine now with doing this in the Engine. Although, I still have major discomfort of extending this beyond OOBv2, and skipping analyzing execution paths we do today - like you shared in your plans. To convince me, I'd need evidence of the net benefit without any major blocking issues (in other words loosing truly valuable TPs for example, or if it would turn out that some internal components couldn't be upgraded to the new loop modeling model). This is a lot more difficult,
[clang] [clang][analyzer][doc] Migrate ClangSA www FAQ section (PR #112831)
steakhal wrote: I more than welcome migrating from raw html to rst. - or basically to any other format. https://github.com/llvm/llvm-project/pull/112831 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Improve getSymVal and friends (1/2) (PR #112583)
@@ -2883,22 +2883,16 @@ const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMinValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMaxValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); steakhal wrote: The cleanup will follow in https://github.com/llvm/llvm-project/pull/112891 https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver][NFC] Cleanup const-correctness inside range-based solver (PR #112891)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/112891 None >From e380dd4572c8914fbd87a812e74addba1f24304d Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:53:20 +0200 Subject: [PATCH 1/2] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity (2/2) This patch should not introduce much overhead as it only does one more constraint map lookup, which is really quick. --- .../Core/RangeConstraintManager.cpp | 17 ++ clang/test/Analysis/unary-sym-expr.c | 33 +-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index ecf7974c838650..f0311b7028f56d 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1249,6 +1249,8 @@ class SymbolicRangeInferrer // calculate the effective range set by intersecting the range set // for A - B and the negated range set of B - A. getRangeForNegatedSymSym(SSE), +// If commutative, we may have constaints for the commuted variant. +getRangeCommutativeSymSym(SSE), // If Sym is a comparison expression (except <=>), // find any other comparisons with the same operands. // See function description. @@ -1485,6 +1487,21 @@ class SymbolicRangeInferrer Sym->getType()); } + std::optional getRangeCommutativeSymSym(const SymSymExpr *SSE) { +auto Op = SSE->getOpcode(); +bool IsCommutative = llvm::is_contained( +// ==, !=, |, &, +, *, ^ +{BO_EQ, BO_NE, BO_Or, BO_And, BO_Add, BO_Mul, BO_Xor}, Op); +if (!IsCommutative) + return std::nullopt; + +SymbolRef Commuted = State->getSymbolManager().getSymSymExpr( +SSE->getRHS(), Op, SSE->getLHS(), SSE->getType()); +if (const RangeSet *Range = getConstraint(State, Commuted)) + return *Range; +return std::nullopt; + } + // Returns ranges only for binary comparison operators (except <=>) // when left and right operands are symbolic values. // Finds any other comparisons with the same operands. diff --git a/clang/test/Analysis/unary-sym-expr.c b/clang/test/Analysis/unary-sym-expr.c index 7c4774f3cca82f..92e11b295bee7c 100644 --- a/clang/test/Analysis/unary-sym-expr.c +++ b/clang/test/Analysis/unary-sym-expr.c @@ -29,12 +29,39 @@ int test(int x, int y) { return 42; } -void test_svalbuilder_simplification(int x, int y) { +void test_svalbuilder_simplification_add(int x, int y) { if (x + y != 3) return; clang_analyzer_eval(-(x + y) == -3); // expected-warning{{TRUE}} - // FIXME Commutativity is not supported yet. - clang_analyzer_eval(-(y + x) == -3); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(-(y + x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_mul(int x, int y) { + if (x * y != 3) +return; + clang_analyzer_eval(-(x * y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y * x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_and(int x, int y) { + if ((x & y) != 3) +return; + clang_analyzer_eval(-(x & y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y & x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_or(int x, int y) { + if ((x | y) != 3) +return; + clang_analyzer_eval(-(x | y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y | x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_xor(int x, int y) { + if ((x ^ y) != 3) +return; + clang_analyzer_eval(-(x ^ y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y ^ x) == -3); // expected-warning{{TRUE}} } int test_fp(int flag) { >From 8c2d83cf20631f022a49ee5ace89a9ad4e11545a Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 18 Oct 2024 14:12:46 +0200 Subject: [PATCH 2/2] [analyzer][Solver][NFC] Cleanup const-correctness inside range-based solver --- .../Core/RangeConstraintManager.cpp | 59 +-- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index f0311b7028f56d..c39fa81109c853 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1953,27 +1953,27 @@ class RangeConstraintManager : public RangedConstraintManager { const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; private: - RangeSet::Factory F; + mutable RangeSet::Factory F; - RangeSet getRange(ProgramStateRef State, SymbolRef Sym); + RangeSet getRange(ProgramStateRef State, SymbolRef Sym) const; ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
[clang] [clang][analyzer][doc] Update Clang SA www docs index.html (PR #112833)
https://github.com/steakhal approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/112833 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
@@ -50,31 +50,10 @@ void test2() { b = d; a -= d; - if (a != 0) -return; - - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive checks. - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning, OK + if (a != 0) return; - } - // Should not be reachable. - clang_analyzer_warnIfReached(); // expected-wrning{{REACHABLE}} - */ - // The parent state is already infeasible, but we realize that only if b is - // constrained. - clang_analyzer_eval(b > 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(b <= 0); // expected-warning{{UNKNOWN}} - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning -return; - } - clang_analyzer_warnIfReached(); // no-warning + clang_analyzer_warnIfReached(); // no-warning: Even the parent state is unreachable. steakhal wrote: Fixed. https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity (2/2) (PR #112887)
steakhal wrote: This was already reviewed in the linked PR. I just need a separate PR to merge multiple commits, as rebase-merges are not allowed on llvm. https://github.com/llvm/llvm-project/pull/112887 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity (2/2) (PR #112887)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/112887 This patch should not introduce much overhead as it only does one more constraint map lookup, which is really quick. Depends on #112583 >From e380dd4572c8914fbd87a812e74addba1f24304d Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:53:20 +0200 Subject: [PATCH] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity (2/2) This patch should not introduce much overhead as it only does one more constraint map lookup, which is really quick. --- .../Core/RangeConstraintManager.cpp | 17 ++ clang/test/Analysis/unary-sym-expr.c | 33 +-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index ecf7974c838650..f0311b7028f56d 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1249,6 +1249,8 @@ class SymbolicRangeInferrer // calculate the effective range set by intersecting the range set // for A - B and the negated range set of B - A. getRangeForNegatedSymSym(SSE), +// If commutative, we may have constaints for the commuted variant. +getRangeCommutativeSymSym(SSE), // If Sym is a comparison expression (except <=>), // find any other comparisons with the same operands. // See function description. @@ -1485,6 +1487,21 @@ class SymbolicRangeInferrer Sym->getType()); } + std::optional getRangeCommutativeSymSym(const SymSymExpr *SSE) { +auto Op = SSE->getOpcode(); +bool IsCommutative = llvm::is_contained( +// ==, !=, |, &, +, *, ^ +{BO_EQ, BO_NE, BO_Or, BO_And, BO_Add, BO_Mul, BO_Xor}, Op); +if (!IsCommutative) + return std::nullopt; + +SymbolRef Commuted = State->getSymbolManager().getSymSymExpr( +SSE->getRHS(), Op, SSE->getLHS(), SSE->getType()); +if (const RangeSet *Range = getConstraint(State, Commuted)) + return *Range; +return std::nullopt; + } + // Returns ranges only for binary comparison operators (except <=>) // when left and right operands are symbolic values. // Finds any other comparisons with the same operands. diff --git a/clang/test/Analysis/unary-sym-expr.c b/clang/test/Analysis/unary-sym-expr.c index 7c4774f3cca82f..92e11b295bee7c 100644 --- a/clang/test/Analysis/unary-sym-expr.c +++ b/clang/test/Analysis/unary-sym-expr.c @@ -29,12 +29,39 @@ int test(int x, int y) { return 42; } -void test_svalbuilder_simplification(int x, int y) { +void test_svalbuilder_simplification_add(int x, int y) { if (x + y != 3) return; clang_analyzer_eval(-(x + y) == -3); // expected-warning{{TRUE}} - // FIXME Commutativity is not supported yet. - clang_analyzer_eval(-(y + x) == -3); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(-(y + x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_mul(int x, int y) { + if (x * y != 3) +return; + clang_analyzer_eval(-(x * y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y * x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_and(int x, int y) { + if ((x & y) != 3) +return; + clang_analyzer_eval(-(x & y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y & x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_or(int x, int y) { + if ((x | y) != 3) +return; + clang_analyzer_eval(-(x | y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y | x) == -3); // expected-warning{{TRUE}} +} + +void test_svalbuilder_simplification_xor(int x, int y) { + if ((x ^ y) != 3) +return; + clang_analyzer_eval(-(x ^ y) == -3); // expected-warning{{TRUE}} + clang_analyzer_eval(-(y ^ x) == -3); // expected-warning{{TRUE}} } int test_fp(int flag) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Improve getSymVal and friends (1/2) (PR #112583)
https://github.com/steakhal closed https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer][Solver] Improve getSymVal and friends (1/2) (PR #112583)
https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/112583 >From dfb86a3ddad88a9a97fff054af1bf86d746ee49d Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:52:31 +0200 Subject: [PATCH] [analyzer][Solver] Improve getSymVal and friends (1/2) Instead of just doing a lookup in the existing constraints, let's use `getRange()` for dissambling SymSymExprs and inferring from the constraint system what end range set we can simplify. This means that `getSymVal` gets more expensive while getting smarter. I don't expect it to be an issue as this API is only rarely used, and `getRange` is used a lot more often - yet without any observable hit on performance. This patch also removes dead declarations of EQClass overloads. --- .../Checkers/BitwiseShiftChecker.cpp | 3 +- .../Core/RangeConstraintManager.cpp | 21 +- clang/test/Analysis/infeasible-sink.c | 29 +++ 3 files changed, 14 insertions(+), 39 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/BitwiseShiftChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BitwiseShiftChecker.cpp index 339927c165fe00..17f1214195b3ee 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BitwiseShiftChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BitwiseShiftChecker.cpp @@ -177,7 +177,8 @@ BugReportPtr BitwiseShiftValidator::checkOvershift() { RightOpStr = formatv(" '{0}'", ConcreteRight->getValue()); else { SValBuilder &SVB = Ctx.getSValBuilder(); -if (const llvm::APSInt *MinRight = SVB.getMinValue(FoldedState, Right)) { +if (const llvm::APSInt *MinRight = SVB.getMinValue(FoldedState, Right); +MinRight && *MinRight >= LHSBitWidth) { LowerBoundStr = formatv(" >= {0},", MinRight->getExtValue()); } } diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 70d5a609681790..ecf7974c838650 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1939,11 +1939,8 @@ class RangeConstraintManager : public RangedConstraintManager { RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2866,24 +2863,22 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMinValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMaxValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMaxValue(); } //===--===// diff --git a/clang/test/Analysis/infeasible-sink.c b/clang/test/Analysis/infeasible-sink.c index 9cb66fcac0b6be..a88ca42f27e441 100644 --- a/clang/test/Analysis/infeasible-sink.c +++ b/clang/test/Analysis/infeasible-sink.c @@ -38,7 +38,7 @@ void test1(int x) { } int a, b, c, d, e; -void test2() { +void test2(void) { if (a == 0) return; @@ -50,31 +50,10 @@ void test2() { b = d; a -= d; - if (a != 0) -return; - - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive
[clang] [analyzer] Improve solver (PR #112583)
@@ -2883,22 +2883,16 @@ const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMinValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMaxValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); steakhal wrote: Given this patch touches a sensitive component, I'd land it without spreading the const-correctness. It turns out other member functions could be also marked with `const` once this Factory member is mutable. I'll come back to this later separately. https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
@@ -2883,22 +2883,16 @@ const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMinValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMaxValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); steakhal wrote: IDK why I haven't thought about that, but now you say that it makes sense :D Let me see what can I do here. https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/112583 >From 4bf74c7f9caf89b67e6001b601f70741c1a672cc Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:52:31 +0200 Subject: [PATCH 1/7] [analyzer][Solver] Improve getSymVal and friends Instead of just doing a lookup in the existing constraints, let's use `getRange()` for dissambling SymSymExprs and inferring from the constraint system what end range set we can simplify. This means that `getSymVal` gets more expensive while getting smarter. I don't expect it to be an issue as this API is only rarely used, and `getRange` is used a lot more often - yet without any observable hit on performance. This patch also removes dead declarations of EQClass overloads. --- .../Core/RangeConstraintManager.cpp | 11 + clang/test/Analysis/infeasible-sink.c | 23 +-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 70d5a609681790..d1999c86f8ecd8 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1939,11 +1939,8 @@ class RangeConstraintManager : public RangedConstraintManager { RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2866,12 +2863,14 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; @@ -2880,6 +2879,8 @@ const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; diff --git a/clang/test/Analysis/infeasible-sink.c b/clang/test/Analysis/infeasible-sink.c index 9cb66fcac0b6be..d5b4e82268f5b6 100644 --- a/clang/test/Analysis/infeasible-sink.c +++ b/clang/test/Analysis/infeasible-sink.c @@ -38,7 +38,7 @@ void test1(int x) { } int a, b, c, d, e; -void test2() { +void test2(void) { if (a == 0) return; @@ -50,28 +50,17 @@ void test2() { b = d; a -= d; + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + if (a != 0) return; - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} - - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive checks. - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning, OK -return; - } - // Should not be reachable. - clang_analyzer_warnIfReached(); // expected-wrning{{REACHABLE}} - */ + clang_analyzer_warnIfReached(); // no-warning: Even the parent state is unreachable. // The parent state is already infeasible, but we realize that only if b is // constrained. - clang_analyzer_eval(b > 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(b <= 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b > 0); // no-warning + clang_analyzer_eval(b <= 0); // no-warning if (b > 0) { clang_analyzer_warnIfReached(); // no-warning return; >From be65bb0694be3353fda260ab73777aea28de9113 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:53:20 +0200 Subject: [PATCH 2/7] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity This patch shou
[clang] [analyzer] Improve solver (PR #112583)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/112583 >From 4bf74c7f9caf89b67e6001b601f70741c1a672cc Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:52:31 +0200 Subject: [PATCH 1/7] [analyzer][Solver] Improve getSymVal and friends Instead of just doing a lookup in the existing constraints, let's use `getRange()` for dissambling SymSymExprs and inferring from the constraint system what end range set we can simplify. This means that `getSymVal` gets more expensive while getting smarter. I don't expect it to be an issue as this API is only rarely used, and `getRange` is used a lot more often - yet without any observable hit on performance. This patch also removes dead declarations of EQClass overloads. --- .../Core/RangeConstraintManager.cpp | 11 + clang/test/Analysis/infeasible-sink.c | 23 +-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 70d5a609681790..d1999c86f8ecd8 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1939,11 +1939,8 @@ class RangeConstraintManager : public RangedConstraintManager { RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2866,12 +2863,14 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; @@ -2880,6 +2879,8 @@ const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; diff --git a/clang/test/Analysis/infeasible-sink.c b/clang/test/Analysis/infeasible-sink.c index 9cb66fcac0b6be..d5b4e82268f5b6 100644 --- a/clang/test/Analysis/infeasible-sink.c +++ b/clang/test/Analysis/infeasible-sink.c @@ -38,7 +38,7 @@ void test1(int x) { } int a, b, c, d, e; -void test2() { +void test2(void) { if (a == 0) return; @@ -50,28 +50,17 @@ void test2() { b = d; a -= d; + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + if (a != 0) return; - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} - - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive checks. - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning, OK -return; - } - // Should not be reachable. - clang_analyzer_warnIfReached(); // expected-wrning{{REACHABLE}} - */ + clang_analyzer_warnIfReached(); // no-warning: Even the parent state is unreachable. // The parent state is already infeasible, but we realize that only if b is // constrained. - clang_analyzer_eval(b > 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(b <= 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b > 0); // no-warning + clang_analyzer_eval(b <= 0); // no-warning if (b > 0) { clang_analyzer_warnIfReached(); // no-warning return; >From be65bb0694be3353fda260ab73777aea28de9113 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:53:20 +0200 Subject: [PATCH 2/7] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity This patch shou
[clang] [analyzer] Improve solver (PR #112583)
@@ -2883,22 +2883,16 @@ const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMinValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); + return Range.isEmpty() ? nullptr : &Range.getMinValue(); } const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { - // TODO: Use `getRange()` like in `getSymVal()`, but that would make some - // of the reports of `BitwiseShiftChecker` look awkward. - const RangeSet *T = getConstraint(St, Sym); - if (!T || T->isEmpty()) -return nullptr; - return &T->getMaxValue(); + auto &MutableSelf = const_cast(*this); + RangeSet Range = MutableSelf.getRange(St, Sym); steakhal wrote: I had the same question. `SymbolicRangeInferrer` needs access to the private `RangeSet::Factory F` member of the `RangeConstraintManager` object. That Factor does internal caching of the RangeSets it hands out. https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/112583 >From 4bf74c7f9caf89b67e6001b601f70741c1a672cc Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:52:31 +0200 Subject: [PATCH 1/5] [analyzer][Solver] Improve getSymVal and friends Instead of just doing a lookup in the existing constraints, let's use `getRange()` for dissambling SymSymExprs and inferring from the constraint system what end range set we can simplify. This means that `getSymVal` gets more expensive while getting smarter. I don't expect it to be an issue as this API is only rarely used, and `getRange` is used a lot more often - yet without any observable hit on performance. This patch also removes dead declarations of EQClass overloads. --- .../Core/RangeConstraintManager.cpp | 11 + clang/test/Analysis/infeasible-sink.c | 23 +-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 70d5a609681790..d1999c86f8ecd8 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1939,11 +1939,8 @@ class RangeConstraintManager : public RangedConstraintManager { RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2866,12 +2863,14 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; @@ -2880,6 +2879,8 @@ const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; diff --git a/clang/test/Analysis/infeasible-sink.c b/clang/test/Analysis/infeasible-sink.c index 9cb66fcac0b6be..d5b4e82268f5b6 100644 --- a/clang/test/Analysis/infeasible-sink.c +++ b/clang/test/Analysis/infeasible-sink.c @@ -38,7 +38,7 @@ void test1(int x) { } int a, b, c, d, e; -void test2() { +void test2(void) { if (a == 0) return; @@ -50,28 +50,17 @@ void test2() { b = d; a -= d; + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + if (a != 0) return; - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} - - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive checks. - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning, OK -return; - } - // Should not be reachable. - clang_analyzer_warnIfReached(); // expected-wrning{{REACHABLE}} - */ + clang_analyzer_warnIfReached(); // no-warning: Even the parent state is unreachable. // The parent state is already infeasible, but we realize that only if b is // constrained. - clang_analyzer_eval(b > 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(b <= 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b > 0); // no-warning + clang_analyzer_eval(b <= 0); // no-warning if (b > 0) { clang_analyzer_warnIfReached(); // no-warning return; >From be65bb0694be3353fda260ab73777aea28de9113 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:53:20 +0200 Subject: [PATCH 2/5] [analyzer][Solver] Teach SymbolicRangeInferrer about commutativity This patch shou
[clang] [analyzer] Improve solver (PR #112583)
@@ -2866,12 +2877,14 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. steakhal wrote: Yea, have a look at `clang/test/Analysis/bitwise-shift-common.c` now. https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
@@ -1485,6 +1487,18 @@ class SymbolicRangeInferrer Sym->getType()); } + std::optional getRangeCommutativeSymSym(const SymSymExpr *SSE) { +bool IsCommutative = llvm::is_contained({BO_Add, BO_Mul}, SSE->getOpcode()); +if (!IsCommutative) + return std::nullopt; + +SymbolRef Commuted = State->getSymbolManager().getSymSymExpr( +SSE->getRHS(), BO_Add, SSE->getLHS(), SSE->getType()); steakhal wrote: Uhh, yes. That was a nasty bug. Thanks for spotting it! https://github.com/llvm/llvm-project/pull/112583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Improve solver (PR #112583)
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/112583 This PR should be review commit by commit, and ought to be rebase-merged. See the individual commits for their summary. Once these two commits are reviewed, I'll force push to this PR the first commit, and open a new PR for the second commit because rebase-merge is not an option for upstream llvm, but I still wanted to let you know the whole picture, and that these two commits are related. >From 4bf74c7f9caf89b67e6001b601f70741c1a672cc Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 16 Oct 2024 15:52:31 +0200 Subject: [PATCH 1/2] [analyzer][Solver] Improve getSymVal and friends Instead of just doing a lookup in the existing constraints, let's use `getRange()` for dissambling SymSymExprs and inferring from the constraint system what end range set we can simplify. This means that `getSymVal` gets more expensive while getting smarter. I don't expect it to be an issue as this API is only rarely used, and `getRange` is used a lot more often - yet without any observable hit on performance. This patch also removes dead declarations of EQClass overloads. --- .../Core/RangeConstraintManager.cpp | 11 + clang/test/Analysis/infeasible-sink.c | 23 +-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 70d5a609681790..d1999c86f8ecd8 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1939,11 +1939,8 @@ class RangeConstraintManager : public RangedConstraintManager { RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2866,12 +2863,14 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St, SymbolRef Sym) const { - const RangeSet *T = getConstraint(St, Sym); - return T ? T->getConcreteValue() : nullptr; + auto &MutableSelf = const_cast(*this); + return MutableSelf.getRange(St, Sym).getConcreteValue(); } const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; @@ -2880,6 +2879,8 @@ const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St, const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St, SymbolRef Sym) const { + // TODO: Use `getRange()` like in `getSymVal()`, but that would make some + // of the reports of `BitwiseShiftChecker` look awkward. const RangeSet *T = getConstraint(St, Sym); if (!T || T->isEmpty()) return nullptr; diff --git a/clang/test/Analysis/infeasible-sink.c b/clang/test/Analysis/infeasible-sink.c index 9cb66fcac0b6be..d5b4e82268f5b6 100644 --- a/clang/test/Analysis/infeasible-sink.c +++ b/clang/test/Analysis/infeasible-sink.c @@ -38,7 +38,7 @@ void test1(int x) { } int a, b, c, d, e; -void test2() { +void test2(void) { if (a == 0) return; @@ -50,28 +50,17 @@ void test2() { b = d; a -= d; + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + if (a != 0) return; - clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} - - /* The BASELINE passes these checks ('wrning' is used to avoid lit to match) - // The parent state is already infeasible, look at this contradiction: - clang_analyzer_eval(b > 0); // expected-wrning{{FALSE}} - clang_analyzer_eval(b <= 0); // expected-wrning{{FALSE}} - // Crashes with expensive checks. - if (b > 0) { -clang_analyzer_warnIfReached(); // no-warning, OK -return; - } - // Should not be reachable. - clang_analyzer_warnIfReached(); // expected-wrning{{REACHABLE}} - */ + clang_analyzer_warnIfReached(); // no-warning: Even the parent state is unreachable. // The parent state is already infeasible, but we realize that only if b is // constrained. - clang_analyzer_eval(b > 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(b <= 0); // expected-warning{{UNKNOWN}} + clang_an
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -299,13 +299,12 @@ ANALYZER_OPTION( ANALYZER_OPTION( bool, ShouldEagerlyAssume, "eagerly-assume", -"Whether we should eagerly assume evaluations of conditionals, thus, " -"bifurcating the path. This indicates how the engine should handle " -"expressions such as: 'x = (y != 0)'. When this is true then the " -"subexpression 'y != 0' will be eagerly assumed to be true or false, thus " -"evaluating it to the integers 0 or 1 respectively. The upside is that " -"this can increase analysis precision until we have a better way to lazily " -"evaluate such logic. The downside is that it eagerly bifurcates paths.", +"If this is enabled (the default behavior), when the analyzer encounters " +"a comparison operator or logical negation, it immediately splits the " steakhal wrote: Thanks for the explanation. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Suppress out of bounds reports after weak loop assumptions (PR #109804)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: steakhal wrote: > * I think it's likely (although not guaranteed) that this heuristic would be > helpful for other checkers as well, and if we want to activate it for all > checkers, then it must be done in an eager way (because eagerly sinking lots > of paths is significantly better performance-wise than lazily discarding all > those results). Alright. So, if I understand you is not only to fix the OOBv2 checker but also to later expand the scope to the whole engine - and skip exploring paths that were explored before. I think that deserves a deeper discussion, but I also understand that we need implementation experience for experimentation back the RFC with data. This is wasn't clear to me initially. The PR title and summary refereed to suppression, which didn't motivate this change well enough to touch the engine. Last time I've checked the Loop hanging RFC [discuss post](https://discourse.llvm.org/t/loop-handling-improvement-plans/80417/15), I though you finally decided to take @haoNoQ's suggestion - and suppress issues instead of changing exploration strategies (if I understood that correctly). That said, if this patch only want's to suppress reports, then a bug report visitor is the right choice to me. If we shoot for something bigger (and this is just the first step), than it starts to make sense. However, achieving something like that is going to be really really tough, so the chances are slim. I wonder if you have plans for doing it incrementally and with options for disabling it while developing and preparing this new version. > * I agree with @Szelethus that this heuristic is handling an engine issue, so > its logic should be in the engine itself. This point partially resonates with me that this is both a checker and engine issue, thus a hard and important problem to solve. > * The `OrigCursor` trickery is not unacceptable by itself, but the added > complexity is one disadvantage of the visitor approach. I highly agree that that approach is highly unorthodox, and surprising. I have two observations for this. 1st, if we always had a ExplodedNode tag for taking an **eager** decision (and some similar tag when eager assumptions are disabled), then I wouldn't need that hack. You can see in my commit history there that this was my initial idea - but I realized that we may have good reasons for disabling eager assumptions, and in that case I wouldn't have those tags anymore :s 2nd, It's a convenient lie to have a stripped single-path "exploded graph" from a BugReport visitor, because then it's unambiguous what parent nodes it chooses, thus developers can focus on what matters the most. However, like in this case, it would be nice if we could do arbitrary things. If we had a better API doing this, it wouldn't feel odd anymore. So there are ways to improve this. > * The "enumerate all subexpressions of the loop condition and check the > number of times they were assumed to be true/false" approach can handle more > of my testcases than my original implementation, but it can produce very > counter-intuitive results: [...] Yes, I agree that this "try all subexpressions" (that I implemented) isn't bulletproof. However, I only did it in slightly more than a day. I'd say, if this is a concern, I can give it a bit of a polishing, and put it to the bench to see how it works in life. I think if you have concerns about specifics, let's discuss that under that PR https://github.com/llvm/llvm-project/pull/111258, such that we all have the code in front of us. I think we can get fairly far once we can settle on some concrete examples where it would be surprising with the current implementation, and refine it from there. > @steakhal @ anybody else Is my eager implementation acceptable for you (in > theory, assuming that it's cleaned up sufficiently)? If yes, I'll start to > work on implementing the suggestions of @isuckatcs and any other observations. I don't think we reached consensus yet. I'm less concerned about the details, because I know you can do it. This is why I didn't look into the details of this PR, or commented nits so far. https://github.com/llvm/llvm-project/pull/109804 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Don=C3=A1t?= Nagy , =?utf-8?q?Don=C3=A1t?= Nagy , =?utf-8?q?Don=C3=A1t?= Nagy , =?utf-8?q?Don=C3=A1t?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal approved this pull request. LGTM, modulo single inline unresolved comment thread. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -299,13 +299,12 @@ ANALYZER_OPTION( ANALYZER_OPTION( bool, ShouldEagerlyAssume, "eagerly-assume", -"Whether we should eagerly assume evaluations of conditionals, thus, " -"bifurcating the path. This indicates how the engine should handle " -"expressions such as: 'x = (y != 0)'. When this is true then the " -"subexpression 'y != 0' will be eagerly assumed to be true or false, thus " -"evaluating it to the integers 0 or 1 respectively. The upside is that " -"this can increase analysis precision until we have a better way to lazily " -"evaluate such logic. The downside is that it eagerly bifurcates paths.", +"If this is enabled (the default behavior), when the analyzer encounters " +"a comparison operator or logical negation, it immediately splits the " steakhal wrote: I don't get your response here. I thought we want to update the doc comments here to reflect what the function does. I raised that I think implicit conversions also trigger this code path (and do eager splits). Consequently, I figured that this behavior should be also mentioned here and now. If bool conversions don't trigger this function, then of course my suggestion is void. I didn't mean to suggest function changes by my comment. About the TODO notes: I wish we had some lightweight list of things that we as maintainers agree that is easy to do, we have a clear NFC or semantic change in mind but nobody has the time doing it. I have in mind a lot of NFC refactors, API cleanups where I never get to, but I wish would be done. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -3767,28 +3764,26 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, continue; } -ProgramStateRef state = Pred->getState(); -SVal V = state->getSVal(Ex, Pred->getLocationContext()); +ProgramStateRef State = Pred->getState(); +SVal V = State->getSVal(Ex, Pred->getLocationContext()); std::optional SEV = V.getAs(); if (SEV && SEV->isExpression()) { - const std::pair &tags = -geteagerlyAssumeBinOpBifurcationTags(); + auto [TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags(); - ProgramStateRef StateTrue, StateFalse; - std::tie(StateTrue, StateFalse) = state->assume(*SEV); + auto [StateTrue, StateFalse] = State->assume(*SEV); // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val); -Bldr.generateNode(Ex, Pred, StateTrue, tags.first); +Bldr.generateNode(Ex, Pred, StateTrue, TrueTag); } // Next, assume that the condition is false. if (StateFalse) { SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val); -Bldr.generateNode(Ex, Pred, StateFalse, tags.second); +Bldr.generateNode(Ex, Pred, StateFalse, FalseTag); steakhal wrote: I'd be fine with having a different tag there. Actually, that would be really useful at times - e.g. with tooling around egraphs that is not present upstream. So having a tag is not an issue. Having a tag that lies about that we took an eager decision here is the problem. I agree that this shouldn't be done part of this PR. And I'm also fine leaving this alone in the future. I just wanted to raise this never the less. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
@@ -751,12 +751,27 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const { } // Get variable name. - if (R && R->canPrintPrettyAsExpr()) { -R->printPrettyAsExpr(os); -if (UseQuotes) - return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); -else + if (R) { +// MemRegion can be pretty printed. +if (R->canPrintPrettyAsExpr()) { + R->printPrettyAsExpr(os); + if (UseQuotes) +return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); return (llvm::Twine(os.str()) + ArrayIndices).str(); +} + +// FieldRegion may have ElementRegion as SuperRegion. +if (const clang::ento::FieldRegion *FR = +R->getAs()) { + std::string Super = FR->getSuperRegion()->getDescriptiveName(false); + if (Super.empty()) +return ""; + + if (UseQuotes) +return (llvm::Twine("'") + Super + "." + FR->getDecl()->getName() + "'") +.str(); + return (llvm::Twine(Super) + "." + FR->getDecl()->getName()).str(); steakhal wrote: I wish we had a `singleQuote(const Twine &Subject)` function doing `('" + Subject + "'").str()` Along with: ``` auto QuoteIfNeeded = [UseQuotes](const Twine &Subject) { if (UseQuotes) return singleQuote(Subject); return Subject.str(); }; ``` Then you could do `QuoteIfNeeded(Super + "." + FR->getDecl()->getName())` etc. To make our life easier, we could `using llvm::Twine` at the top of this function to make it less verbose. https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
@@ -751,12 +751,27 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const { } // Get variable name. - if (R && R->canPrintPrettyAsExpr()) { -R->printPrettyAsExpr(os); -if (UseQuotes) - return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); -else + if (R) { +// MemRegion can be pretty printed. +if (R->canPrintPrettyAsExpr()) { + R->printPrettyAsExpr(os); + if (UseQuotes) +return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); return (llvm::Twine(os.str()) + ArrayIndices).str(); +} + +// FieldRegion may have ElementRegion as SuperRegion. +if (const clang::ento::FieldRegion *FR = +R->getAs()) { + std::string Super = FR->getSuperRegion()->getDescriptiveName(false); steakhal wrote: ```suggestion if (const auto *FR = R->getAs()) { std::string Super = FR->getSuperRegion()->getDescriptiveName(/*UseQuotes=*/false); ``` We prefer not spelling out the type if the line already spells it. We don't need to use fully qualified names, as we are already within the `ento` namespace. We usually use named arguments for bool parameters, to make it more descriptive. Why are `FieldRegions` so special? Couldn't we do something more generic? I wonder if we could use something like a visitor to bring some structure to this function. This starts to look really messy to me, without evidence that we couldn't do something better. WDYT? https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
@@ -143,4 +142,18 @@ void top() { EXPECT_EQ(Output, "DescriptiveNameChecker: array[x]\n"); } +TEST(MemRegionDescriptiveNameTest, FieldRegWithSuperElementReg) { + StringRef Code = R"cpp( +void reportDescriptiveName(int *p); +struct val_struct { int val; }; +extern struct val_struct val_struct_array[3]; steakhal wrote: Could you test multidimensional arrays too? ``` extern struct val_struct val_struct_array[3][4]; reportDescriptiveName(&val_struct_array[1][2].val); ``` https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fieldregion descript name (PR #112313)
https://github.com/steakhal requested changes to this pull request. https://github.com/llvm/llvm-project/pull/112313 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -3742,23 +3742,20 @@ void ExprEngine::evalLocation(ExplodedNodeSet &Dst, BldrTop.addNodes(Tmp); } -std::pair -ExprEngine::geteagerlyAssumeBinOpBifurcationTags() { - static SimpleProgramPointTag - eagerlyAssumeBinOpBifurcationTrue(TagProviderName, - "Eagerly Assume True"), - eagerlyAssumeBinOpBifurcationFalse(TagProviderName, -"Eagerly Assume False"); - return std::make_pair(&eagerlyAssumeBinOpBifurcationTrue, -&eagerlyAssumeBinOpBifurcationFalse); +std::pair +ExprEngine::getEagerlyAssumeBifurcationTags() { + static SimpleProgramPointTag TrueTag(TagProviderName, "Eagerly Assume True"), + FalseTag(TagProviderName, "Eagerly Assume False"); + + return std::make_pair(&TrueTag, &FalseTag); steakhal wrote: I'd prefer class static const members for the tags over a function like this. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -3767,28 +3764,26 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, continue; } -ProgramStateRef state = Pred->getState(); -SVal V = state->getSVal(Ex, Pred->getLocationContext()); +ProgramStateRef State = Pred->getState(); +SVal V = State->getSVal(Ex, Pred->getLocationContext()); std::optional SEV = V.getAs(); if (SEV && SEV->isExpression()) { - const std::pair &tags = -geteagerlyAssumeBinOpBifurcationTags(); + auto [TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags(); - ProgramStateRef StateTrue, StateFalse; - std::tie(StateTrue, StateFalse) = state->assume(*SEV); + auto [StateTrue, StateFalse] = State->assume(*SEV); // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val); -Bldr.generateNode(Ex, Pred, StateTrue, tags.first); +Bldr.generateNode(Ex, Pred, StateTrue, TrueTag); } // Next, assume that the condition is false. if (StateFalse) { SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val); -Bldr.generateNode(Ex, Pred, StateFalse, tags.second); +Bldr.generateNode(Ex, Pred, StateFalse, FalseTag); steakhal wrote: It always bothered me why do we pass this **eager** tag if we didn't split the states (when exactly either `StateTrue` or `StateFalse` holds)? https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Don=C3=A1t?= Nagy , =?utf-8?q?Don=C3=A1t?= Nagy , =?utf-8?q?Don=C3=A1t?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal commented: Looks good to me overall. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: https://github.com/steakhal edited https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
=?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy , =?utf-8?q?Donát?= Nagy Message-ID: In-Reply-To: @@ -299,13 +299,12 @@ ANALYZER_OPTION( ANALYZER_OPTION( bool, ShouldEagerlyAssume, "eagerly-assume", -"Whether we should eagerly assume evaluations of conditionals, thus, " -"bifurcating the path. This indicates how the engine should handle " -"expressions such as: 'x = (y != 0)'. When this is true then the " -"subexpression 'y != 0' will be eagerly assumed to be true or false, thus " -"evaluating it to the integers 0 or 1 respectively. The upside is that " -"this can increase analysis precision until we have a better way to lazily " -"evaluate such logic. The downside is that it eagerly bifurcates paths.", +"If this is enabled (the default behavior), when the analyzer encounters " +"a comparison operator or logical negation, it immediately splits the " steakhal wrote: This should also apply to implicit bool conversions, such as ptr to bool. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [analyzer][clang-tidy][NFC] Clean up eagerly-assume handling (PR #112209)
steakhal wrote: I want to have a look at this PR tomorrow. https://github.com/llvm/llvm-project/pull/112209 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [analyzer] Disable graph-trim-interval by default (PR #111843)
steakhal wrote: > > Is it a possible way forward dropping that assert? > > The function that performs the assertion is not part of the static analyzer, > it's a generic graph algorithm from an LLVM support library and the assertion > seems to be a really obvious sanity check. I don't think that it's reasonable > to remove the assertion months after it was added. Ah, yes. Well, thats makes this choice easier. Somehow I misremembered. Thanks for insisting! https://github.com/llvm/llvm-project/pull/111843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits