https://github.com/malavikasamak updated https://github.com/llvm/llvm-project/pull/75650
>From 809bf6f4237f634feaeb7e5b0b88be3a2e4de455 Mon Sep 17 00:00:00 2001 From: MalavikaSamak <malavi...@apple.com> Date: Fri, 15 Dec 2023 11:40:55 -0800 Subject: [PATCH 1/3] While refactoring projects to eradicate unsafe buffer accesses using -Wunsafe-buffer-usage, there maybe accidental re-introduction of new OutOfBound accesses into the code bases. One such case is invoking span::data() method on a span variable to retrieve a pointer, which is then cast to a larger type and dereferenced. Such dereferences can introduce OutOfBound accesses. To address this, a new WarningGadget is being introduced to warn against such invocations. --- .../Analysis/Analyses/UnsafeBufferUsage.h | 2 +- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Analysis/UnsafeBufferUsage.cpp | 37 ++++- clang/lib/Sema/AnalysisBasedWarnings.cpp | 19 ++- ...e-buffer-usage-warning-data-invocation.cpp | 133 ++++++++++++++++++ 6 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 8a2d56668e32f9..b28f2c6b99c50e 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -66,7 +66,7 @@ class UnsafeBufferUsageHandler { /// Invoked when an unsafe operation over raw pointers is found. virtual void handleUnsafeOperation(const Stmt *Operation, - bool IsRelatedToDecl) = 0; + bool IsRelatedToDecl, ASTContext &Ctx) = 0; /// Invoked when a fix is suggested against a variable. This function groups /// all variables that must be fixed together (i.e their types must be changed diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 757ee452ced748..c9766168836510 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -30,6 +30,7 @@ WARNING_GADGET(Decrement) WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) +WARNING_GADGET(DataInvocation) FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 94e97a891baedc..9038dd879f54ae 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12058,7 +12058,7 @@ def warn_unsafe_buffer_variable : Warning< InGroup<UnsafeBufferUsage>, DefaultIgnore; def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" - "unsafe buffer access|function introduces unsafe buffer manipulation}0">, + "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 70eec1cee57f8e..e4eca9939b10d5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -721,6 +721,33 @@ class UnsafeBufferUsageAttrGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +// Warning gadget for unsafe invocation of span::data method. +// Triggers when the pointer returned by the invocation is immediately +// cast to a larger type. + +class DataInvocationGadget : public WarningGadget { + constexpr static const char *const OpTag = "data_invocation_expr"; + const ExplicitCastExpr *Op; + + public: + DataInvocationGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::DataInvocation), + Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::DataInvocation; + } + + static Matcher matcher() { + return stmt( + explicitCastExpr(has(cxxMemberCallExpr(callee( + cxxMethodDecl(hasName("data")))))).bind(OpTag)); + } + const Stmt *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. @@ -2657,8 +2684,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, // every problematic operation and consider it done. No need to deal // with fixable gadgets, no need to group operations by variable. for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), - /*IsRelatedToDecl=*/false); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, + D->getASTContext()); } // This return guarantees that most of the machine doesn't run when @@ -2893,7 +2920,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, Tracker, Handler, VarGrpMgr); for (const auto &G : UnsafeOps.noVar) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false + , D->getASTContext()); } for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { @@ -2904,7 +2932,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, : FixItList{}, D); for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true + , D->getASTContext()); } } } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 0947e8b0f526a3..4f8e181806957e 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2226,8 +2226,8 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { UnsafeBufferUsageReporter(Sema &S, bool SuggestSuggestions) : S(S), SuggestSuggestions(SuggestSuggestions) {} - void handleUnsafeOperation(const Stmt *Operation, - bool IsRelatedToDecl) override { + void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, + ASTContext &Ctx) override { SourceLocation Loc; SourceRange Range; unsigned MsgParam = 0; @@ -2261,6 +2261,21 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { // note_unsafe_buffer_operation doesn't have this mode yet. assert(!IsRelatedToDecl && "Not implemented yet!"); MsgParam = 3; + } else if (const auto *ECE = dyn_cast<ExplicitCastExpr>(Operation)) { + QualType destType = ECE->getType(); + const uint64_t dSize = Ctx.getTypeSize(destType.getTypePtr()->getPointeeType()); + if(const auto *CE =dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) { + + if(CE->getRecordDecl()->getQualifiedNameAsString().compare("std::span")) + return; + + QualType srcType = CE->getType(); + const uint64_t sSize = Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType()); + if(sSize >= dSize) + return; + } + MsgParam = 4; + } Loc = Operation->getBeginLoc(); Range = Operation->getSourceRange(); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp new file mode 100644 index 00000000000000..5cce91ae4753de --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -fsafe-buffer-usage-suggestions \ +// RUN: -fblocks -include %s -verify %s + +// RUN: %clang -x c++ -frtti -fsyntax-only -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s +// RUN: %clang_cc1 -std=c++11 -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s +// RUN: %clang_cc1 -std=c++20 -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s +// CHECK-NOT: [-Wunsafe-buffer-usage] + +#ifndef INCLUDED +#define INCLUDED +#pragma clang system_header + +// no spanification warnings for system headers +void foo(...); // let arguments of `foo` to hold testing expressions +#else + +namespace std { + class type_info; + class bad_cast; + class bad_typeid; +} +using size_t = __typeof(sizeof(int)); +void *malloc(size_t); + +void foo(int v) { +} + +void foo(int *p){} + +namespace std{ + template <typename T> class span { + + T *elements; + + span(T *, unsigned){} + + public: + + constexpr span<T> subspan(size_t offset, size_t count) const { + return span<T> (elements+offset, count); // expected-warning{{unsafe pointer arithmetic}} + } + + constexpr T* data() const noexcept { + return elements; + } + + + constexpr T* hello() const noexcept { + return elements; + } +}; + + template <typename T> class span_duplicate { + span_duplicate(T *, unsigned){} + + T array[10]; + + public: + + T* data() { + return array; + } + +}; +} + +class span { + + int array[10]; + + public: + + int *data() { + return array; + } +}; + +class A { + int a, b, c; +}; + +struct Base { + virtual ~Base() = default; +}; + +struct Derived: Base { + int d; +}; + +void cast_without_data(int *ptr) { + A *a = (A*) ptr; + float *p = (float*) ptr; +} + +void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span) { + int *p; + A *a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} + a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} + + // TODO:: Should we warn when we cast from base to derived type? + Derived *b = dynamic_cast<Derived*> (base_span.data());// expected-warning{{unsafe invocation of span::data}} + + // TODO:: This pattern is safe. We can add special handling for it, if we decide this + // is the recommended fixit for the unsafe invocations. + a = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}} +} + +void not_warned_patterns(std::span<A> span_ptr, std::span<Base> base_span) { + int *p = (int*)span_ptr.data(); + p = (int*)span_ptr.data(); + A *a = (A*) span_ptr.hello(); +} + +// We do not want to warn about other types +void other_classes(std::span_duplicate<int> span_ptr, span sp) { + int *p; + A *a = (A*)span_ptr.data(); + a = (A*)span_ptr.data(); + + a = (A*)sp.data(); +} + +// Potential source for false negatives + +void false_negatives(std::span<int> span_pt) { + int *ptr = span_pt.data(); + A *a = (A*)ptr; //TODO: We want to warn here eventually. + + int k = *ptr; // TODO: Can cause OOB if span_pt is empty + +} +#endif >From e80f03a07287297b1d7965301b2a1914823a4265 Mon Sep 17 00:00:00 2001 From: MalavikaSamak <malavi...@apple.com> Date: Mon, 18 Dec 2023 15:32:42 -0800 Subject: [PATCH 2/3] Addressing some comments and format issues. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 2 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 5 -- ...e-buffer-usage-warning-data-invocation.cpp | 46 +++++++++---------- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index e4eca9939b10d5..5e3bd7dfb54d4b 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -741,7 +741,7 @@ class DataInvocationGadget : public WarningGadget { static Matcher matcher() { return stmt( explicitCastExpr(has(cxxMemberCallExpr(callee( - cxxMethodDecl(hasName("data")))))).bind(OpTag)); + cxxMethodDecl(hasName("data"), ofClass(hasName("std::span"))))))).bind(OpTag)); } const Stmt *getBaseStmt() const override { return Op; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 4f8e181806957e..60406064bb7e9d 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2265,17 +2265,12 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { QualType destType = ECE->getType(); const uint64_t dSize = Ctx.getTypeSize(destType.getTypePtr()->getPointeeType()); if(const auto *CE =dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) { - - if(CE->getRecordDecl()->getQualifiedNameAsString().compare("std::span")) - return; - QualType srcType = CE->getType(); const uint64_t sSize = Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType()); if(sSize >= dSize) return; } MsgParam = 4; - } Loc = Operation->getBeginLoc(); Range = Operation->getSourceRange(); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp index 5cce91ae4753de..79eb3bb4bacc6e 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp @@ -12,7 +12,6 @@ #pragma clang system_header // no spanification warnings for system headers -void foo(...); // let arguments of `foo` to hold testing expressions #else namespace std { @@ -65,18 +64,13 @@ namespace std{ }; } -class span { +using namespace std; - int array[10]; - - public: - - int *data() { - return array; - } +class A { + int a, b, c; }; -class A { +class B { int a, b, c; }; @@ -93,41 +87,45 @@ void cast_without_data(int *ptr) { float *p = (float*) ptr; } -void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span) { - int *p; - A *a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} - a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} +void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span, span<int> span_without_qual) { + A *a1 = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} + a1 = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}} + + A *a2 = (A*) span_without_qual.data(); // expected-warning{{unsafe invocation of span::data}} // TODO:: Should we warn when we cast from base to derived type? Derived *b = dynamic_cast<Derived*> (base_span.data());// expected-warning{{unsafe invocation of span::data}} // TODO:: This pattern is safe. We can add special handling for it, if we decide this // is the recommended fixit for the unsafe invocations. - a = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}} + A *a3 = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}} } void not_warned_patterns(std::span<A> span_ptr, std::span<Base> base_span) { - int *p = (int*)span_ptr.data(); - p = (int*)span_ptr.data(); - A *a = (A*) span_ptr.hello(); + int *p = (int*) span_ptr.data(); // Cast to a smaller type + + B *b = (B*) span_ptr.data(); // Cast to a type of same size. + + p = (int*) span_ptr.data(); + A *a = (A*) span_ptr.hello(); // Invoking other methods. } // We do not want to warn about other types -void other_classes(std::span_duplicate<int> span_ptr, span sp) { +void other_classes(std::span_duplicate<int> span_ptr) { int *p; A *a = (A*)span_ptr.data(); a = (A*)span_ptr.data(); - - a = (A*)sp.data(); } // Potential source for false negatives -void false_negatives(std::span<int> span_pt) { +A false_negatives(std::span<int> span_pt, span<A> span_A) { int *ptr = span_pt.data(); - A *a = (A*)ptr; //TODO: We want to warn here eventually. - int k = *ptr; // TODO: Can cause OOB if span_pt is empty + A *a1 = (A*)ptr; //TODO: We want to warn here eventually. + + A *a2= span_A.data(); + return *a2; // TODO: Can cause OOB if span_pt is empty } #endif >From 6bcbbfdafd00360349e8cc2b516c30476dce879d Mon Sep 17 00:00:00 2001 From: MalavikaSamak <malavi...@apple.com> Date: Mon, 18 Dec 2023 15:32:42 -0800 Subject: [PATCH 3/3] Addressing some comments and format issues. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 21 +++++++++++---------- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 +++++++----- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 5e3bd7dfb54d4b..724c4304a07242 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -729,7 +729,7 @@ class DataInvocationGadget : public WarningGadget { constexpr static const char *const OpTag = "data_invocation_expr"; const ExplicitCastExpr *Op; - public: +public: DataInvocationGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::DataInvocation), Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} @@ -737,12 +737,13 @@ class DataInvocationGadget : public WarningGadget { static bool classof(const Gadget *G) { return G->getKind() == Kind::DataInvocation; } - + static Matcher matcher() { return stmt( - explicitCastExpr(has(cxxMemberCallExpr(callee( - cxxMethodDecl(hasName("data"), ofClass(hasName("std::span"))))))).bind(OpTag)); - } + explicitCastExpr(has(cxxMemberCallExpr(callee(cxxMethodDecl( + hasName("data"), ofClass(hasName("std::span"))))))) + .bind(OpTag)); + } const Stmt *getBaseStmt() const override { return Op; } DeclUseList getClaimedVarUseSites() const override { return {}; } @@ -2684,7 +2685,7 @@ void clang::checkUnsafeBufferUsage(const Decl *D, // every problematic operation and consider it done. No need to deal // with fixable gadgets, no need to group operations by variable. for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, D->getASTContext()); } @@ -2920,8 +2921,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, Tracker, Handler, VarGrpMgr); for (const auto &G : UnsafeOps.noVar) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false - , D->getASTContext()); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, + D->getASTContext()); } for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { @@ -2932,8 +2933,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, : FixItList{}, D); for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true - , D->getASTContext()); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true, + D->getASTContext()); } } } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 60406064bb7e9d..9eb1df5f024059 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2226,7 +2226,7 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { UnsafeBufferUsageReporter(Sema &S, bool SuggestSuggestions) : S(S), SuggestSuggestions(SuggestSuggestions) {} - void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, + void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) override { SourceLocation Loc; SourceRange Range; @@ -2263,11 +2263,13 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { MsgParam = 3; } else if (const auto *ECE = dyn_cast<ExplicitCastExpr>(Operation)) { QualType destType = ECE->getType(); - const uint64_t dSize = Ctx.getTypeSize(destType.getTypePtr()->getPointeeType()); - if(const auto *CE =dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) { + const uint64_t dSize = + Ctx.getTypeSize(destType.getTypePtr()->getPointeeType()); + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) { QualType srcType = CE->getType(); - const uint64_t sSize = Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType()); - if(sSize >= dSize) + const uint64_t sSize = + Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType()); + if (sSize >= dSize) return; } MsgParam = 4; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits