[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/balazske closed https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/NagyDonat approved this pull request. LGTM, thanks for the updates! https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/balazske updated https://github.com/llvm/llvm-project/pull/96501 From b431151f83fa2980e4a132191ccf5713ab69806b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Mon, 24 Jun 2024 16:48:54 +0200 Subject: [PATCH 1/2] [clang][analyzer] Improve PointerSubChecker The checker could report false positives if pointer arithmetic was done on pointers to non-array data before pointer subtraction. Another problem is fixed that could cause false positive if members of the same structure but in different memory objects are subtracted. --- .../Checkers/PointerSubChecker.cpp| 22 -- clang/test/Analysis/pointer-sub.c | 29 +-- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp index eea93a41f1384..63ed4df67d6d5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -49,12 +49,28 @@ class PointerSubChecker }; } +static bool isArrayVar(const MemRegion *R) { + while (R) { +if (isa(R)) + return true; +if (const auto *ER = dyn_cast(R)) + R = ER->getSuperRegion(); +else + return false; + } + return false; +} + bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, const ElementRegion *ElemReg, const MemRegion *Reg) const { if (!ElemReg) return true; + const MemRegion *SuperReg = ElemReg->getSuperRegion(); + if (!isArrayVar(SuperReg)) +return true; + auto ReportBug = [&](const llvm::StringLiteral &Msg) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto R = std::make_unique(BT, Msg, N); @@ -64,7 +80,6 @@ bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, }; ProgramStateRef State = C.getState(); - const MemRegion *SuperReg = ElemReg->getSuperRegion(); SValBuilder &SVB = C.getSValBuilder(); if (SuperReg == Reg) { @@ -121,8 +136,9 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B, if (LR == RR) return; - // No warning if one operand is unknown. - if (isa(LR) || isa(RR)) + // No warning if one operand is unknown or resides in a region that could be + // equal to the other. + if (LR->getSymbolicBase() || RR->getSymbolicBase()) return; const auto *ElemLR = dyn_cast(LR); diff --git a/clang/test/Analysis/pointer-sub.c b/clang/test/Analysis/pointer-sub.c index 88e6dec2d172f..404b8530b89c0 100644 --- a/clang/test/Analysis/pointer-sub.c +++ b/clang/test/Analysis/pointer-sub.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.core.PointerSub -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.PointerSub -analyzer-output=text-minimal -verify %s void f1(void) { int x, y, z[10]; @@ -73,15 +73,15 @@ void f4(void) { d = a[2] - a[1]; // expected-warning{{Subtraction of two pointers that}} } -typedef struct { +struct S { int a; int b; int c[10]; // expected-note2{{Array at the right-hand side of subtraction}} int d[10]; // expected-note2{{Array at the left-hand side of subtraction}} -} S; +}; void f5(void) { - S s; + struct S s; int y; int d; @@ -92,18 +92,18 @@ void f5(void) { d = &s.d[3] - &s.c[2]; // expected-warning{{Subtraction of two pointers that}} d = s.d - s.c; // expected-warning{{Subtraction of two pointers that}} - S sa[10]; + struct S sa[10]; d = &sa[2] - &sa[1]; d = &sa[2].a - &sa[1].b; // expected-warning{{Subtraction of two pointers that}} } void f6(void) { - long long l; + long long l = 2; char *a1 = (char *)&l; int d = a1[3] - l; - long long la1[3]; // expected-note{{Array at the right-hand side of subtraction}} - long long la2[3]; // expected-note{{Array at the left-hand side of subtraction}} + long long la1[3] = {1}; // expected-note{{Array at the right-hand side of subtraction}} + long long la2[3] = {1}; // expected-note{{Array at the left-hand side of subtraction}} char *pla1 = (char *)la1; char *pla2 = (char *)la2; d = pla1[1] - pla1[0]; @@ -117,6 +117,17 @@ void f7(int *p) { } void f8(int n) { - int a[10]; + int a[10] = {1}; int d = a[n] - a[0]; } + +int f9(const char *p1) { + const char *p2 = p1; + --p1; + ++p2; + return p1 - p2; // no-warning +} + +int f10(struct S *p1, struct S *p2) { + return &p1->c[5] - &p2->c[5]; // no-warning +} From ce80876f7364fba57de3cf5377d9f3673d6744b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Thu, 27 Jun 2024 10:26:20 +0200 Subject: [PATCH 2/2] improved note messages --- .../Checkers/PointerSubChecker.cpp| 17 - clang/test/Analysis/pointer-sub.c | 25 ++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
dkrupp wrote: > > Even protobuf contains this type of code: > > https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=protobuf_v3.13.0_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub&report-id=5545776&report-hash=1bcd310fbaeccbcc13645b9b277239a2&report-filepath=%2adescriptor.pb.cc > > I still think that this (1) is undeniably undefined behavior (2) isn't > common, so won't cause "spam" problems and (3( can be replaced by > standard-compliant code (`offsetof`) so there is no need to introduce a > special case for it. I agree with @NagyDonat that we don't need special handling of this case in the code, however I think the checker [documentation ](https://clang.llvm.org/docs/analyzer/checkers.html#alpha-core-pointersub-c) should be extended with the description of this special case as it may be a surprising warning from the checker with an example. Specifically that it warns for cases where two pointers are subtracted which point to members of the same struct and suggest the usage of the standard compliant solution: offsetof. So please describe which pointer subtractions the checker accepts and which don't (with examples) and a reference to the standard where it describes the undefined behaviour. https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
NagyDonat wrote: > Even protobuf contains this type of code: > https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=protobuf_v3.13.0_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub&report-id=5545776&report-hash=1bcd310fbaeccbcc13645b9b277239a2&report-filepath=%2adescriptor.pb.cc I still think that this (1) is undeniably undefined behavior (2) isn't common, so won't cause "spam" problems and (3( can be replaced by standard-compliant code (`offsetof`) so there is no need to introduce a special case for it. https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
balazske wrote: Even protobuf contains this type of code: https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=protobuf_v3.13.0_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub&report-id=5545776&report-hash=1bcd310fbaeccbcc13645b9b277239a2&report-filepath=%2adescriptor.pb.cc https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
balazske wrote: If the array bounds checker does the same job then the array bounds check it is not needed in this checker. Specially if it makes no difference if the indexing is used at pointer subtraction. https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
NagyDonat wrote: > The warning message may be still misleading if the LHS or RHS "arrays" are > non-array variables. I think that the warning message is OK: "Subtraction of two pointers that do not point into the same array is undefined behavior." -- this also covers the case when one or both of the pointers do not point to arrays. (It doesn't mention the corner case that it's also valid to subtract two identical pointers that point to a non-array value, but that's completely irrelevant in practice, so wouldn't be a helpful suggestion.) > (or detect if `offsetof` can be used and include it in the message)? I think that would be a waste of time, because it's very rare that a project manually reimplements `offsetof` -- I think it only appears in `vim` becasue it's a very old codebase. (Also developers who play with this kind of low-level trickery should be familiar with the standard and understand what's the problem.) https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
balazske wrote: The warning message may be still misleading if the LHS or RHS "arrays" are non-array variables. Is it better to improve the messages in this case (or detect if `offsetof` can be used and include it in the message)? https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
NagyDonat wrote: > These results look correct according to the checker, but I am not sure if > such results are useful or really invalid: > https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=vim_v8.2.1920_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub > In these cases the address difference of an (array) member of a struct and > start of the struct is taken. According to the checker rules, taking > difference of addresses of any variables or member variables is invalid. This trickery basically reimplements `offsetof()` (or something very similar to it), and as it is in the stable `vim` repo I assume that it's accepted by practically all compilers; but it clearly violates the language standard so I think it's good that the checker reports it. (Perhaps this trick would've been acceptable thirty years ago, but now we have standard `offsetof()` that usually expands to `__builtin_offsetof()` so developers who try to use homemade alternatives deserve a warning.) https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
balazske wrote: These results look correct according to the checker, but I am not sure if such results are useful or really invalid: https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=vim_v8.2.1920_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub In these cases the address difference of an (array) member of a struct and start of the struct is taken. According to the checker rules, taking difference of addresses of any variables or member variables is invalid. https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/NagyDonat edited https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/NagyDonat approved this pull request. LGTM. I'm a bit surprised to see that this checker duplicates the logic of the array bounds checkers (in the special case when the indexing operation is within a pointer subtraction). Right now this is OK but we'll need to delete this once ArrayBoundV2 becomes stable. https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balázs Kéri (balazske) Changes The checker could report false positives if pointer arithmetic was done on pointers to non-array data before pointer subtraction. Another problem is fixed that could cause false positive if members of the same structure but in different memory objects are subtracted. --- Full diff: https://github.com/llvm/llvm-project/pull/96501.diff 2 Files Affected: - (modified) clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp (+19-3) - (modified) clang/test/Analysis/pointer-sub.c (+20-9) ``diff diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp index eea93a41f1384..63ed4df67d6d5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -49,12 +49,28 @@ class PointerSubChecker }; } +static bool isArrayVar(const MemRegion *R) { + while (R) { +if (isa(R)) + return true; +if (const auto *ER = dyn_cast(R)) + R = ER->getSuperRegion(); +else + return false; + } + return false; +} + bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, const ElementRegion *ElemReg, const MemRegion *Reg) const { if (!ElemReg) return true; + const MemRegion *SuperReg = ElemReg->getSuperRegion(); + if (!isArrayVar(SuperReg)) +return true; + auto ReportBug = [&](const llvm::StringLiteral &Msg) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto R = std::make_unique(BT, Msg, N); @@ -64,7 +80,6 @@ bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, }; ProgramStateRef State = C.getState(); - const MemRegion *SuperReg = ElemReg->getSuperRegion(); SValBuilder &SVB = C.getSValBuilder(); if (SuperReg == Reg) { @@ -121,8 +136,9 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B, if (LR == RR) return; - // No warning if one operand is unknown. - if (isa(LR) || isa(RR)) + // No warning if one operand is unknown or resides in a region that could be + // equal to the other. + if (LR->getSymbolicBase() || RR->getSymbolicBase()) return; const auto *ElemLR = dyn_cast(LR); diff --git a/clang/test/Analysis/pointer-sub.c b/clang/test/Analysis/pointer-sub.c index 88e6dec2d172f..404b8530b89c0 100644 --- a/clang/test/Analysis/pointer-sub.c +++ b/clang/test/Analysis/pointer-sub.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.core.PointerSub -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.PointerSub -analyzer-output=text-minimal -verify %s void f1(void) { int x, y, z[10]; @@ -73,15 +73,15 @@ void f4(void) { d = a[2] - a[1]; // expected-warning{{Subtraction of two pointers that}} } -typedef struct { +struct S { int a; int b; int c[10]; // expected-note2{{Array at the right-hand side of subtraction}} int d[10]; // expected-note2{{Array at the left-hand side of subtraction}} -} S; +}; void f5(void) { - S s; + struct S s; int y; int d; @@ -92,18 +92,18 @@ void f5(void) { d = &s.d[3] - &s.c[2]; // expected-warning{{Subtraction of two pointers that}} d = s.d - s.c; // expected-warning{{Subtraction of two pointers that}} - S sa[10]; + struct S sa[10]; d = &sa[2] - &sa[1]; d = &sa[2].a - &sa[1].b; // expected-warning{{Subtraction of two pointers that}} } void f6(void) { - long long l; + long long l = 2; char *a1 = (char *)&l; int d = a1[3] - l; - long long la1[3]; // expected-note{{Array at the right-hand side of subtraction}} - long long la2[3]; // expected-note{{Array at the left-hand side of subtraction}} + long long la1[3] = {1}; // expected-note{{Array at the right-hand side of subtraction}} + long long la2[3] = {1}; // expected-note{{Array at the left-hand side of subtraction}} char *pla1 = (char *)la1; char *pla2 = (char *)la2; d = pla1[1] - pla1[0]; @@ -117,6 +117,17 @@ void f7(int *p) { } void f8(int n) { - int a[10]; + int a[10] = {1}; int d = a[n] - a[0]; } + +int f9(const char *p1) { + const char *p2 = p1; + --p1; + ++p2; + return p1 - p2; // no-warning +} + +int f10(struct S *p1, struct S *p2) { + return &p1->c[5] - &p2->c[5]; // no-warning +} `` https://github.com/llvm/llvm-project/pull/96501 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)
https://github.com/balazske created https://github.com/llvm/llvm-project/pull/96501 The checker could report false positives if pointer arithmetic was done on pointers to non-array data before pointer subtraction. Another problem is fixed that could cause false positive if members of the same structure but in different memory objects are subtracted. From b431151f83fa2980e4a132191ccf5713ab69806b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Mon, 24 Jun 2024 16:48:54 +0200 Subject: [PATCH] [clang][analyzer] Improve PointerSubChecker The checker could report false positives if pointer arithmetic was done on pointers to non-array data before pointer subtraction. Another problem is fixed that could cause false positive if members of the same structure but in different memory objects are subtracted. --- .../Checkers/PointerSubChecker.cpp| 22 -- clang/test/Analysis/pointer-sub.c | 29 +-- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp index eea93a41f1384..63ed4df67d6d5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -49,12 +49,28 @@ class PointerSubChecker }; } +static bool isArrayVar(const MemRegion *R) { + while (R) { +if (isa(R)) + return true; +if (const auto *ER = dyn_cast(R)) + R = ER->getSuperRegion(); +else + return false; + } + return false; +} + bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, const ElementRegion *ElemReg, const MemRegion *Reg) const { if (!ElemReg) return true; + const MemRegion *SuperReg = ElemReg->getSuperRegion(); + if (!isArrayVar(SuperReg)) +return true; + auto ReportBug = [&](const llvm::StringLiteral &Msg) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { auto R = std::make_unique(BT, Msg, N); @@ -64,7 +80,6 @@ bool PointerSubChecker::checkArrayBounds(CheckerContext &C, const Expr *E, }; ProgramStateRef State = C.getState(); - const MemRegion *SuperReg = ElemReg->getSuperRegion(); SValBuilder &SVB = C.getSValBuilder(); if (SuperReg == Reg) { @@ -121,8 +136,9 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B, if (LR == RR) return; - // No warning if one operand is unknown. - if (isa(LR) || isa(RR)) + // No warning if one operand is unknown or resides in a region that could be + // equal to the other. + if (LR->getSymbolicBase() || RR->getSymbolicBase()) return; const auto *ElemLR = dyn_cast(LR); diff --git a/clang/test/Analysis/pointer-sub.c b/clang/test/Analysis/pointer-sub.c index 88e6dec2d172f..404b8530b89c0 100644 --- a/clang/test/Analysis/pointer-sub.c +++ b/clang/test/Analysis/pointer-sub.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.core.PointerSub -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.PointerSub -analyzer-output=text-minimal -verify %s void f1(void) { int x, y, z[10]; @@ -73,15 +73,15 @@ void f4(void) { d = a[2] - a[1]; // expected-warning{{Subtraction of two pointers that}} } -typedef struct { +struct S { int a; int b; int c[10]; // expected-note2{{Array at the right-hand side of subtraction}} int d[10]; // expected-note2{{Array at the left-hand side of subtraction}} -} S; +}; void f5(void) { - S s; + struct S s; int y; int d; @@ -92,18 +92,18 @@ void f5(void) { d = &s.d[3] - &s.c[2]; // expected-warning{{Subtraction of two pointers that}} d = s.d - s.c; // expected-warning{{Subtraction of two pointers that}} - S sa[10]; + struct S sa[10]; d = &sa[2] - &sa[1]; d = &sa[2].a - &sa[1].b; // expected-warning{{Subtraction of two pointers that}} } void f6(void) { - long long l; + long long l = 2; char *a1 = (char *)&l; int d = a1[3] - l; - long long la1[3]; // expected-note{{Array at the right-hand side of subtraction}} - long long la2[3]; // expected-note{{Array at the left-hand side of subtraction}} + long long la1[3] = {1}; // expected-note{{Array at the right-hand side of subtraction}} + long long la2[3] = {1}; // expected-note{{Array at the left-hand side of subtraction}} char *pla1 = (char *)la1; char *pla2 = (char *)la2; d = pla1[1] - pla1[0]; @@ -117,6 +117,17 @@ void f7(int *p) { } void f8(int n) { - int a[10]; + int a[10] = {1}; int d = a[n] - a[0]; } + +int f9(const char *p1) { + const char *p2 = p1; + --p1; + ++p2; + return p1 - p2; // no-warning +} + +int f10(struct S *p1, struct S *p2) { + return &p1->c[5] - &p2->c[5]; // no-warning +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin