[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/109496 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/109496 >From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 16:27:09 -0700 Subject: [PATCH 1/4] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType - Suppress -Wunsafe-buffer-usage-in-libc-call for C files (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++- .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 + 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a16762244b1766..110a121e71a7d2 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, Handler) { - return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + if (Finder->getASTContext().getLangOpts().CPlusPlus) +return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + return true; /* Only warn about libc calls for C++ */ } AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { @@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, if (!FristParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FristParmTy->getAs()->getPointeeType(); if (!Ctx.getFILEType() .isNull() && //`FILE *` must be in the context if it is fprintf @@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { if (!FirstParmTy->isPointerType()) return false; // Not an snprint - QualType FirstPteTy = cast(FirstParmTy)->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 117b2c8bc57935..7e0b929abea683 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); // UnsafeBufferUsage analysis settings. + bool IsCXXLang = S.getLangOpts().CPlusPlus; bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20; bool UnsafeBufferUsageShouldEmitSuggestions = // Should != Can. UnsafeBufferUsageCanEmitSuggestions && @@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, SourceLocation()) || - !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) { + (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) && + IsCXXLang)) { CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU); } } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c new file mode 100644 index 00..e305c3e140dff9 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s + +void* memcpy(void *dst,const void *src, unsigned long size); + +void f(int *p, int *q) { + + memcpy(p, q, 10); // no libc warn in C + ++p[5]; // expected-warning{{unsafe buffer access}} +} >From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 18:53:28 -0700 Subject: [PATCH 2/4] Add tests and fix a typo --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 +++--- .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 110a121e71a7d2..6c1979179711b9 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, return false; // possibly some user-defined printf function ASTContext &Ctx = Finder->getASTContext(); - QualType FristParmTy = FD->getParamDecl(0)->getT
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
@@ -784,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, return false; // possibly some user-defined printf function ASTContext &Ctx = Finder->getASTContext(); - QualType FristParmTy = FD->getParamDecl(0)->getType(); + QualType FirstParmTy = FD->getParamDecl(0)->getType(); - if (!FristParmTy->isPointerType()) + if (!FirstParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); ziqingluo-90 wrote: The difference between `castAs` and `getAs` is that `castAs` asserts the canonical type is `PointerType` while `getAs` returns null if not. I think `castAs` is more suitable for our case. https://github.com/llvm/llvm-project/pull/109496 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/109496 >From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 16:27:09 -0700 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType - Suppress -Wunsafe-buffer-usage-in-libc-call for C files (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++- .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 + 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a16762244b1766..110a121e71a7d2 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, Handler) { - return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + if (Finder->getASTContext().getLangOpts().CPlusPlus) +return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + return true; /* Only warn about libc calls for C++ */ } AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { @@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, if (!FristParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FristParmTy->getAs()->getPointeeType(); if (!Ctx.getFILEType() .isNull() && //`FILE *` must be in the context if it is fprintf @@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { if (!FirstParmTy->isPointerType()) return false; // Not an snprint - QualType FirstPteTy = cast(FirstParmTy)->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 117b2c8bc57935..7e0b929abea683 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); // UnsafeBufferUsage analysis settings. + bool IsCXXLang = S.getLangOpts().CPlusPlus; bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20; bool UnsafeBufferUsageShouldEmitSuggestions = // Should != Can. UnsafeBufferUsageCanEmitSuggestions && @@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, SourceLocation()) || - !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) { + (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) && + IsCXXLang)) { CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU); } } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c new file mode 100644 index 00..e305c3e140dff9 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s + +void* memcpy(void *dst,const void *src, unsigned long size); + +void f(int *p, int *q) { + + memcpy(p, q, 10); // no libc warn in C + ++p[5]; // expected-warning{{unsafe buffer access}} +} >From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 18:53:28 -0700 Subject: [PATCH 2/3] Add tests and fix a typo --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 +++--- .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 110a121e71a7d2..6c1979179711b9 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, return false; // possibly some user-defined printf function ASTContext &Ctx = Finder->getASTContext(); - QualType FristParmTy = FD->getParamDecl(0)->getT
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/109496 >From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 16:27:09 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType - Suppress -Wunsafe-buffer-usage-in-libc-call for C files (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++- .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 + 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a16762244b1766..110a121e71a7d2 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, Handler) { - return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + if (Finder->getASTContext().getLangOpts().CPlusPlus) +return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + return true; /* Only warn about libc calls for C++ */ } AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { @@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, if (!FristParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FristParmTy->getAs()->getPointeeType(); if (!Ctx.getFILEType() .isNull() && //`FILE *` must be in the context if it is fprintf @@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { if (!FirstParmTy->isPointerType()) return false; // Not an snprint - QualType FirstPteTy = cast(FirstParmTy)->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 117b2c8bc57935..7e0b929abea683 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); // UnsafeBufferUsage analysis settings. + bool IsCXXLang = S.getLangOpts().CPlusPlus; bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20; bool UnsafeBufferUsageShouldEmitSuggestions = // Should != Can. UnsafeBufferUsageCanEmitSuggestions && @@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, SourceLocation()) || - !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) { + (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) && + IsCXXLang)) { CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU); } } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c new file mode 100644 index 00..e305c3e140dff9 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s + +void* memcpy(void *dst,const void *src, unsigned long size); + +void f(int *p, int *q) { + + memcpy(p, q, 10); // no libc warn in C + ++p[5]; // expected-warning{{unsafe buffer access}} +} >From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 18:53:28 -0700 Subject: [PATCH 2/2] Add tests and fix a typo --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 +++--- .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 110a121e71a7d2..6c1979179711b9 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, return false; // possibly some user-defined printf function ASTContext &Ctx = Finder->getASTContext(); - QualType FristParmTy = FD->getParamDecl(0)->getT
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/109496 >From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 16:27:09 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType - Suppress -Wunsafe-buffer-usage-in-libc-call for C files (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++- .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 + 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a16762244b1766..110a121e71a7d2 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, Handler) { - return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + if (Finder->getASTContext().getLangOpts().CPlusPlus) +return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + return true; /* Only warn about libc calls for C++ */ } AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { @@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, if (!FristParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FristParmTy->getAs()->getPointeeType(); if (!Ctx.getFILEType() .isNull() && //`FILE *` must be in the context if it is fprintf @@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { if (!FirstParmTy->isPointerType()) return false; // Not an snprint - QualType FirstPteTy = cast(FirstParmTy)->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 117b2c8bc57935..7e0b929abea683 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); // UnsafeBufferUsage analysis settings. + bool IsCXXLang = S.getLangOpts().CPlusPlus; bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20; bool UnsafeBufferUsageShouldEmitSuggestions = // Should != Can. UnsafeBufferUsageCanEmitSuggestions && @@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, SourceLocation()) || - !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) { + (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) && + IsCXXLang)) { CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU); } } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c new file mode 100644 index 00..e305c3e140dff9 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s + +void* memcpy(void *dst,const void *src, unsigned long size); + +void f(int *p, int *q) { + + memcpy(p, q, 10); // no libc warn in C + ++p[5]; // expected-warning{{unsafe buffer access}} +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/109496 - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType (report by @hnrklssn [here](https://github.com/llvm/llvm-project/pull/101583#discussion_r1767705992)) - Suppress -Wunsafe-buffer-usage-in-libc-call for C files >From e261352db529e221876e4a0d6b255a734d26336f Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 20 Sep 2024 16:27:09 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files - Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType - Suppress -Wunsafe-buffer-usage-in-libc-call for C files (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 6 -- .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 + 3 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a16762244b1766..110a121e71a7d2 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, Handler) { - return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + if (Finder->getASTContext().getLangOpts().CPlusPlus) +return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); + return true; /* Only warn about libc calls for C++ */ } AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) { @@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, if (!FristParmTy->isPointerType()) return false; // possibly some user-defined printf function - QualType FirstPteTy = (cast(FristParmTy))->getPointeeType(); + QualType FirstPteTy = FristParmTy->getAs()->getPointeeType(); if (!Ctx.getFILEType() .isNull() && //`FILE *` must be in the context if it is fprintf @@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { if (!FirstParmTy->isPointerType()) return false; // Not an snprint - QualType FirstPteTy = cast(FirstParmTy)->getPointeeType(); + QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 117b2c8bc57935..724b543cdf882c 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); // UnsafeBufferUsage analysis settings. + bool IsCXXLang = S.getLangOpts().CPlusPlus; bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20; bool UnsafeBufferUsageShouldEmitSuggestions = // Should != Can. UnsafeBufferUsageCanEmitSuggestions && @@ -2567,8 +2568,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( Node->getBeginLoc()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Node->getBeginLoc()) || -!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, - Node->getBeginLoc())) { +(!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, + Node->getBeginLoc()) && + IsCXXLang /* we only warn about libc calls in C++ files */)) { clang::checkUnsafeBufferUsage(Node, R, UnsafeBufferUsageShouldEmitSuggestions); } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c new file mode 100644 index 00..e305c3e140dff9 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s + +void* memcpy(void *dst,const void *src, unsigned long size); + +void f(int *p, int *q) { + + memcpy(p, q, 10); // no libc warn in C + ++p[5]; // expected-warning{{unsafe buffer access}} +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/108308 >From 3c8830a0e69922faf4fad190ba0b2e01a3392e62 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Tue, 6 Aug 2024 17:54:23 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe pattern if `a` is a constant array. In such a case, this commit will suppress the warning. (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 27 ++- ...arn-unsafe-buffer-usage-libc-functions.cpp | 14 +- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 21d4368151eb47..3451ba7cc34901 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -668,7 +668,7 @@ AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { auto *II = Node.getIdentifier(); - if (!II) + if (!II) return false; StringRef Name = LibcFunNamePrefixSuffixParser().matchName( @@ -833,9 +833,16 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, // // For the first two arguments: `ptr` and `size`, they are safe if in the // following patterns: +// +// Pattern 1: //ptr := DRE.data(); //size:= DRE.size()/DRE.size_bytes() // And DRE is a hardened container or view. +// +// Pattern 2: +//ptr := Constant-Array-DRE; +//size:= any expression that has compile-time constant value equivalent to +// sizeof (Constant-Array-DRE) AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { const FunctionDecl *FD = Node.getDirectCallee(); @@ -856,6 +863,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { !Size->getType()->isIntegerType()) return false; // not an snprintf call + // Pattern 1: static StringRef SizedObjs[] = {"span", "array", "vector", "basic_string_view", "basic_string"}; Buf = Buf->IgnoreParenImpCasts(); @@ -886,6 +894,23 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { SizedObj) return false; // It is in fact safe } + + // Pattern 2: + if (auto *DRE = dyn_cast(Buf->IgnoreParenImpCasts())) { +ASTContext &Ctx = Finder->getASTContext(); + +if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) { + Expr::EvalResult ER; + // The array element type must be compatible with `char` otherwise an + // explicit cast will be needed, which will make this check unreachable. + // Therefore, the array extent is same as its' bytewise size. + if (Size->EvaluateAsConstantExpr(ER, Ctx)) { +APSInt EVal = ER.Val.getInt(); // Size must have integer type + +return APSInt::compareValues(EVal, APSInt(CAT->getSize(), true)) != 0; + } +} + } return true; // ptr and size are not in safe pattern } } // namespace libc_func_matchers diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index a3716073609f6f..8e777a7a51c994 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -83,6 +83,12 @@ void f(char * p, char * q, std::span s, std::span s2) { sscanf(p, "%s%d", "hello", *p);// expected-warning{{function 'sscanf' is unsafe}} sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' is unsafe}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + + char a[10], b[11]; + int c[10]; + + snprintf(a, sizeof(b), "%s", __PRETTY_FUNCTION__); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + snprintf((char*)c, sizeof(c), "%s", __PRETTY_FUNCTION__); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn @@ -93,13 +99,19 @@ void f(char * p, char * q, std::span s, std::span s2) { __asan_printf();// a printf but no argument, so no warn } -void v(std::string s1, int *p) { +void safe_examples(std::string s1, int *p) { snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());
[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/108308 For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe pattern if `a` is a constant array. In such a case, this commit will suppress the warning. (rdar://117182250) This pattern will effectively remove a lot of false positives in our downstream projects. >From 3c8830a0e69922faf4fad190ba0b2e01a3392e62 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Tue, 6 Aug 2024 17:54:23 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe pattern if `a` is a constant array. In such a case, this commit will suppress the warning. (rdar://117182250) --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 27 ++- ...arn-unsafe-buffer-usage-libc-functions.cpp | 14 +- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 21d4368151eb47..3451ba7cc34901 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -668,7 +668,7 @@ AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { auto *II = Node.getIdentifier(); - if (!II) + if (!II) return false; StringRef Name = LibcFunNamePrefixSuffixParser().matchName( @@ -833,9 +833,16 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, // // For the first two arguments: `ptr` and `size`, they are safe if in the // following patterns: +// +// Pattern 1: //ptr := DRE.data(); //size:= DRE.size()/DRE.size_bytes() // And DRE is a hardened container or view. +// +// Pattern 2: +//ptr := Constant-Array-DRE; +//size:= any expression that has compile-time constant value equivalent to +// sizeof (Constant-Array-DRE) AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { const FunctionDecl *FD = Node.getDirectCallee(); @@ -856,6 +863,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { !Size->getType()->isIntegerType()) return false; // not an snprintf call + // Pattern 1: static StringRef SizedObjs[] = {"span", "array", "vector", "basic_string_view", "basic_string"}; Buf = Buf->IgnoreParenImpCasts(); @@ -886,6 +894,23 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { SizedObj) return false; // It is in fact safe } + + // Pattern 2: + if (auto *DRE = dyn_cast(Buf->IgnoreParenImpCasts())) { +ASTContext &Ctx = Finder->getASTContext(); + +if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) { + Expr::EvalResult ER; + // The array element type must be compatible with `char` otherwise an + // explicit cast will be needed, which will make this check unreachable. + // Therefore, the array extent is same as its' bytewise size. + if (Size->EvaluateAsConstantExpr(ER, Ctx)) { +APSInt EVal = ER.Val.getInt(); // Size must have integer type + +return APSInt::compareValues(EVal, APSInt(CAT->getSize(), true)) != 0; + } +} + } return true; // ptr and size are not in safe pattern } } // namespace libc_func_matchers diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index a3716073609f6f..8e777a7a51c994 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -83,6 +83,12 @@ void f(char * p, char * q, std::span s, std::span s2) { sscanf(p, "%s%d", "hello", *p);// expected-warning{{function 'sscanf' is unsafe}} sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' is unsafe}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + + char a[10], b[11]; + int c[10]; + + snprintf(a, sizeof(b), "%s", __PRETTY_FUNCTION__); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + snprintf((char*)c, sizeof(c), "%s", __PRETTY_FUNCTION__); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn @@ -93,13 +99,19 @@ void f(char * p, char * q, std::span s, std::span s2) { __asan_printf();// a printf but no argument, so no warn } -void v(std::string s1, int *p) { +void safe_examples(std::string s1, int *p) { snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn snprintf(s
[clang] [WIP][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/105383 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: > Btw a question about the new warning: So with > -Wunsafe-buffer-usage-in-libc-call clang now warns on the following? > > ``` > #include > > void foo(void) { > char q[10]; > snprintf(q, 10, "%s", "hello"); > } > ``` > > It says > > ``` > foo.c:5:3: warning: function 'snprintf' is unsafe > [-Wunsafe-buffer-usage-in-libc-call] > 5 | snprintf(q, 10, "%s", "hello"); > | ^~ > foo.c:5:12: note: buffer pointer and size may not match > 5 | snprintf(q, 10, "%s", "hello"); > |^ > 1 warning generated. > ``` > > Is that as expected? If so, how should snprintf be used to avoid the warning? Yes, this is expected. According to the C++ Safe Buffers programming model, buffer pointers should be changed to `std::span`. Then `snprintf(span.data(), span.size(), ...)` is considered safe and will not be warned. We may also allow the use of the form `snprintf(span.first(10).data(), 10, ...)` later. https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: > > We're seeing a crash with this patch when compiling with -Weverything. > > ``` > > clang: ../../clang/include/clang/AST/Expr.h:3026: const clang::Expr > > *clang::CallExpr::getArg(unsigned int) const: Assertion `Arg < getNumArgs() > > && "Arg access out of range!"' failed. > > ``` > > > > > > > > > > > > > > > > > > > > > > > > I'm working on extracting a reproducer. > > `clang -c -Weverything bbi-98867.c` with bbi-98867.c being just > > ``` > void printf() { printf(); } > ``` > > (I've creduced the reproducer, I can't share the full one) Thanks for finding the bug and making a reproducer. It has been fixed in de88d7db7b77141297fbb5638ee1e18d1fba53b8. https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: > There needs to be a flag to opt out of this to not break everybody who is > currently using unsafe-buffer-usage. #105383 seems to do that, but it really > should be in this same PR. Can this be reverted and relanded with the flag? @aeubanks you should be able to suppress the new warning with `-Wno-unsafe-buffer-usage-in-libc-call` https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: > There needs to be a flag to opt out of this to not break everybody who is > currently using unsafe-buffer-usage. #105383 seems to do that, but it really > should be in this same PR. Can this be reverted and relanded with the flag? ok, will do https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" ziqingluo-90 wrote: > I can also imagine that for some functions it won't be as simple as saying > "pointer parameter 1, 3 and 5" as it could be interplay between pointers, > integer parameters and buffer content. Not sure if I understand your concern. Could you give an example? https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: Addressed comments. https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/4] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN:-verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int swprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, +OutputIt d_first ); + + struct iterator{}; + template + struct span { +T * ptr; +T * data(); +unsigned size_bytes(); +unsigned size(); +iterator begin() const noexcept; +iterator end() const noexcept; + }; + + template + struct basic_string { +T* p; +T *c_str(); +T *data(); +unsigned size_bytes(); + }; + + typedef basic_string string; + typedef basic_string wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span s, std::span s2) { + memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}} + strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} ziqingluo-90 wrote: yep, this case actually happens a lot in our downstream projects. I planed to handle it in a follow-up patch: https://github.com/llvm/llvm-project/pull/105383 https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/105383 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" ziqingluo-90 wrote: > Have we tried getting data from a real project? Yes! Basically these special handlings are all corresponding to actual cases in our downstream projects. >I think for this warning in particular, it's valuable to point the user to the >specific %s argument. To, at least, make sure that they know we don't mean the >snprintf's target string or something. I will let the notes point to specific argument source locations. For better note messages, we could do it in a follow-up patch? https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +449,396 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) { + return Node.getNumArgs() == Num; +} + +namespace libc_func_matchers { +// Under `libc_func_matchers`, define a set of matchers that match unsafe +// functions in libc and unsafe calls to them. + +// A tiny parser to strip off common prefix and suffix of libc function names +// in real code. +// +// Given a function name, `matchName` returns `CoreName` according to the +// following grammar: +// +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +struct LibcFunNamePrefixSuffixParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { +// Try to match __builtin_: +if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); +// Try to match __asan_: +if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); +return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { +if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); +return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { +if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); +return Name; + } +}; + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) { +const CXXMethodDecl *MD = MCE->getMethodDecl(); +const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + +if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") +return true; + } + return false; +} + +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, + ASTContext &Ctx, bool isKprintf = false) { + class StringFormatStringHandler + : public analyze_format_string::FormatStringHandler { +const CallExpr *Call; +unsigned FmtArgIdx; + + public: +StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx) +: Call(Call), FmtArgIdx(FmtArgIdx) {} + +bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *startSpecifier, + unsigned specifierLen, + const TargetInfo &Target) override { + if (FS.getConversionSpecifier().getKind() == + analyze_printf::PrintfConversionSpecifier::sArg) { +unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + +if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) + if (!isNullTermPointer(Call->getArg(ArgIdx))) +return false; // stop parsing + } + return true; // continue parsing +} + }; + + const Expr *Fmt = Call->getArg(FmtArgIdx); + + if (auto *SL = dyn_cast(Fmt->IgnoreParenImpCasts())) { +StringRef FmtStr = SL->getString(); +StringFormatStringHandler Handler(Call, FmtArgIdx); + +return analyze_format_string::ParsePrintfString( +Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), +Ctx.getTargetInfo(), isKprintf); + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { +return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + +// Matches a FunctionDecl node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of lib
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -1025,6 +1421,92 @@ class DataInvocationGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +class UnsafeLibcFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; + // Extra tags for additional information: + constexpr static const char *const UnsafeSprintfTag = + "UnsafeLibcFunctionCall_sprintf"; + constexpr static const char *const UnsafeSizedByTag = + "UnsafeLibcFunctionCall_sized_by"; + constexpr static const char *const UnsafeStringTag = + "UnsafeLibcFunctionCall_string"; + constexpr static const char *const UnsafeVaListTag = + "UnsafeLibcFunctionCall_va_list"; + + enum UnsafeKind { +OTHERS = 0, // no specific information, the callee function is unsafe +SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. +SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but + // they do not conform to safe patterns +STRING = 3, // an argument is a pointer-to-char-as-string but does not + // guarantee null-termination +VA_LIST = 4, // one of the `-printf`s function that take va_list, which is + // considered unsafe as it is not compile-time check + } WarnedFunKind = OTHERS; + +public: + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), +Call(Result.Nodes.getNodeAs(Tag)) { +if (Result.Nodes.getNodeAs(UnsafeSprintfTag)) + WarnedFunKind = SPRINTF; +else if (Result.Nodes.getNodeAs(UnsafeStringTag)) + WarnedFunKind = STRING; +else if (Result.Nodes.getNodeAs(UnsafeSizedByTag)) + WarnedFunKind = SIZED_BY; +else if (Result.Nodes.getNodeAs(UnsafeVaListTag)) + WarnedFunKind = VA_LIST; + } + + static Matcher matcher() { +return stmt( +stmt( +anyOf( +// Match a call to a predefined unsafe libc function (unless the +// call has a sole string literal argument): +callExpr(callee(functionDecl( + libc_func_matchers::isPredefinedUnsafeLibcFunc())), + unless(allOf(hasArgument(0, expr(stringLiteral())), + hasNumArgs(1, +// Match a call to one of the `v*printf` functions taking +// va-list, which cannot be checked at compile-time: +callExpr(callee(functionDecl( + libc_func_matchers::isUnsafeVaListPrintfFunc( +.bind(UnsafeVaListTag), ziqingluo-90 wrote: right, that's a good point! https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN:-verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int swprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, +OutputIt d_first ); + + struct iterator{}; + template + struct span { +T * ptr; +T * data(); +unsigned size_bytes(); +unsigned size(); +iterator begin() const noexcept; +iterator end() const noexcept; + }; + + template + struct basic_string { +T* p; +T *c_str(); +T *data(); +unsigned size_bytes(); + }; + + typedef basic_string string; + typedef basic_string wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span s, std::span s2) { + memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}} + strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} ziqingluo-90 wrote: change to "... va_list, which cannot ..." https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN:-verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int swprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, +OutputIt d_first ); + + struct iterator{}; + template + struct span { +T * ptr; +T * data(); +unsigned size_bytes(); +unsigned size(); +iterator begin() const noexcept; +iterator end() const noexcept; + }; + + template + struct basic_string { +T* p; +T *c_str(); +T *data(); +unsigned size_bytes(); + }; + + typedef basic_string string; + typedef basic_string wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span s, std::span s2) { + memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}} + strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} ziqingluo-90 wrote: change to "... or a string literal .." https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Don't Merge][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/105383 Depending on https://github.com/llvm/llvm-project/pull/101583 (rdar://117182250) >From b0970a849a21180315ace77e40f3db95fddac346 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9a..0396da735e0458 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12376,6 +12376,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #inclu
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { +return Name == "strlen" && Node.getNumArgs() == 1 && + isa(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) +return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) +return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) +return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) +Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) { +const CXXMethodDecl *MD = MCE->getMethodDecl(); +const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + +if (MD && RD && RD->isInStdNamespace(
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { +return Name == "strlen" && Node.getNumArgs() == 1 && + isa(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) +return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) +return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) +return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) +Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) { +const CXXMethodDecl *MD = MCE->getMethodDecl(); +const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + +if (MD && RD && RD->isInStdNamespace(
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { +return Name == "strlen" && Node.getNumArgs() == 1 && + isa(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) ziqingluo-90 wrote: Done https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, return false; } +unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex( +const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO, +const TargetInfo &Target) { + unsigned argIndex = ArgIndex; + + // Keep looking for a %s format specifier until we have exhausted the string. + FormatStringHandler H; + while (I != E) { +const PrintfSpecifierResult &FSR = +ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false); +// Did a fail-stop error of any kind occur when parsing the specifier? +// If so, don't do any more processing. +if (FSR.shouldStop()) ziqingluo-90 wrote: Done https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H, bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); +/// Parse C format string and return index (relative to `ArgIndex`) of the first +/// found `s` specifier. Return 0 if not found. +/// \param I The start of the C format string; Updated to the first unparsed +/// position upon return. +/// \param E The end of the C format string; +/// \param ArgIndex The argument index of the last found `s` specifier; Or the +/// argument index of the formatter in initial case. +unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E, ziqingluo-90 wrote: Done https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ ziqingluo-90 wrote: Done https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } } + void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) override { +// We have checked that there is a direct callee with an identifier name: +StringRef Name = Call->getDirectCallee()->getName(); + +S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) +<< Name << Call->getSourceRange(); ziqingluo-90 wrote: Done https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { +return Name == "strlen" && Node.getNumArgs() == 1 && + isa(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) +return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) +return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) +return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) +Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) { +const CXXMethodDecl *MD = MCE->getMethodDecl(); +const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + +if (MD && RD && RD->isInStdNamespace(
[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)
@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { +return Name == "strlen" && Node.getNumArgs() == 1 && + isa(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) +return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) +return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) +return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) +Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (isa(Ptr->IgnoreParenImpCasts())) +return true; + if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) { +const CXXMethodDecl *MD = MCE->getMethodDecl(); +const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + +if (MD && RD && RD->isInStdNamespace(
[clang] [-Wunsafe-buffer-usage] Fix a small bug recently found (PR #102953)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/102953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)
@@ -404,7 +404,7 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { if (Arg0Ty->isConstantArrayType()) { const APSInt ConstArrSize = -APSInt(cast(Arg0Ty)->getSize()); +APSInt(cast(Arg0Ty.getCanonicalType())->getSize()); ziqingluo-90 wrote: Have changed to use `ASTContext::getAsConstantArrayType()` https://github.com/llvm/llvm-project/pull/102953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/102953 >From d6c860de3facc37f27b17a26a01e48bc02b4659b Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 12 Aug 2024 11:57:17 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug in the ASTMatcher for span constructors `QualType::isConstantArrayType()` checks canonical type. So a following cast should be applied to canonical type as well. And, a better option is to use `ASTContext::getAsArrayType()`. ``` if (Ty->isConstantArrayType()) cast(Ty); // this is incorrect if (auto *CAT = Context.getAsConstantArrayType(Ty)) ... // this is good ``` --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 14 +++--- ...fe-buffer-usage-in-container-span-construct.cpp | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..db7ec5741f91c4 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -402,9 +402,9 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { QualType Arg0Ty = Arg0->IgnoreImplicit()->getType(); - if (Arg0Ty->isConstantArrayType()) { -const APSInt ConstArrSize = -APSInt(cast(Arg0Ty)->getSize()); + if (auto *ConstArrTy = + Finder->getASTContext().getAsConstantArrayType(Arg0Ty)) { +const APSInt ConstArrSize = APSInt(ConstArrTy->getSize()); // Check form 4: return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0; @@ -2659,7 +2659,7 @@ static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, // Note: the code below expects the declaration to not use any type sugar like // typedef. - if (auto CAT = dyn_cast(D->getType())) { + if (auto CAT = Ctx.getAsConstantArrayType(D->getType())) { const QualType &ArrayEltT = CAT->getElementType(); assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); // FIXME: support multi-dimensional arrays @@ -2792,9 +2792,9 @@ fixVariable(const VarDecl *VD, FixitStrategy::Kind K, return {}; } case FixitStrategy::Kind::Array: { -if (VD->isLocalVarDecl() && -isa(VD->getType().getCanonicalType())) - return fixVariableWithArray(VD, Tracker, Ctx, Handler); +if (VD->isLocalVarDecl()) + if (auto CAT = Ctx.getAsConstantArrayType(VD->getType())) +return fixVariableWithArray(VD, Tracker, Ctx, Handler); DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array"); return {}; diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp index a1ddc384e0d9b8..e97511593bbd81 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp @@ -79,6 +79,8 @@ namespace construct_wt_ptr_size { unsigned Y = 10; std::span S = std::span{&X, 1}; // no-warning int Arr[10]; +typedef int TenInts_t[10]; +TenInts_t Arr2; S = std::span{&X, 2};// expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} S = std::span{new int[10], 10}; // no-warning @@ -90,6 +92,7 @@ namespace construct_wt_ptr_size { S = std::span{new int[10], 9}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{new int[10], Y}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{Arr, 10}; // no-warning +S = std::span{Arr2, 10}; // no-warning S = std::span{Arr, Y}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{p, 0}; // no-warning } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Warning Libc functions (PR #101583)
@@ -443,6 +447,314 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { +const CallExpr *const Call; +ASTContext &Ctx; +enum ResultKind { + NO, // no match + YES, // matched a name that is not a member of the printf family + SPRINTF, // matched `sprintf` + OTHER_PRINTF, // matched a printf function that is not `sprintf` +}; +FuncNameMatch(const CallExpr *Call, ASTContext &Ctx) +: Call(Call), Ctx(Ctx) {} + +// For a name `S` in `PredefinedNames` or a member of the printf/scanf +// family, define matching function names with `S` by the grammar below: +// +// CoreName := S | S["wcs"/"str"] +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) +ResultKind matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) +// Then either it is __builtin_LibcName or __builtin___LibcName_chk or +// no match: +return matchLibcNameOrBuiltinChk( +FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) +return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); +} + +// Parameter `Name` is the substring after stripping off the prefix +// "__builtin_". +ResultKind matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) +return matchLibcName( +Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); +} + +ResultKind matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) +return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); + return matchCoreName(Name); +} + +ResultKind matchCoreName(StringRef Name) { + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) +return !isSafeStrlen(Name) ? YES + : NO; // match unless it's a safe strlen call + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS) ? YES : NO; + return matchPrintfOrScanfFamily(Name); +} + +ResultKind matchPrintfOrScanfFamily(StringRef Name) { + if (Name.ends_with("scanf")) +return YES; // simply warn about scanf functions + if (!Name.ends_with("printf")) +return NO; // neither printf nor scanf + if (Name.starts_with("v")) +// cannot check args for va_list, so `vprintf`s are treated as regular +// unsafe libc calls: +return YES; + + // Truncate "printf", focus on prefixes. There are different possible + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the + // 'w' and handle printfs di
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclC
[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/102953 `QualType::isConstantArrayType()` checks canonical type. So a following cast should be applied to canonical type as well: ``` if (Ty->isConstantArrayType()) cast(Ty.getCanonicalType()); // cast(Ty) is incorrect ``` >From a15f22e9577154783165bdfc1021e640bbc4dcd0 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 12 Aug 2024 11:57:17 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug in the ASTMatcher for span constructors `QualType::isConstantArrayType()` checks canonical type. So a following cast should be applied to canonical type as well: ``` if (Ty->isConstantArrayType()) cast(Ty.getCanonicalType()); // cast(Ty) is incorrect ``` --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 2 +- .../warn-unsafe-buffer-usage-in-container-span-construct.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..379b3ea7adf9e 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -404,7 +404,7 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { if (Arg0Ty->isConstantArrayType()) { const APSInt ConstArrSize = -APSInt(cast(Arg0Ty)->getSize()); +APSInt(cast(Arg0Ty.getCanonicalType())->getSize()); // Check form 4: return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0; diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp index a1ddc384e0d9b..f4f2a028f0b8f 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp @@ -79,6 +79,8 @@ namespace construct_wt_ptr_size { unsigned Y = 10; std::span S = std::span{&X, 1}; // no-warning int Arr[10]; +typedef int TenInts_t[10]; +TenInts_t Arr2; S = std::span{&X, 2};// expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} S = std::span{new int[10], 10}; // no-warning @@ -90,6 +92,7 @@ namespace construct_wt_ptr_size { S = std::span{new int[10], 9}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{new int[10], Y}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{Arr, 10}; // no-warning +S = std::span{Arr2, 10}; // no-warning S = std::span{Arr, Y}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} // not smart enough to tell its safe S = std::span{p, 0}; // no-warning } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From 979619ea3ac0b52944468160f582d82fce3cee7d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 8 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 5 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 325 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 15 + ...-usage-libc-functions-inline-namespace.cpp | 60 ...arn-unsafe-buffer-usage-libc-functions.cpp | 87 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 501 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e11..62d0c6e350f37 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo is 0 if the callee function is not a member of the + /// printf family; is 1 if the callee is `sprintf`; is 2 if + /// the callee is other printfs + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9..e0fba7092dd6c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12376,6 +12376,11 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | use 'std::string::c_str' as pointer to guarantee null-termination}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..2028b931d67ea 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,19 +9,22 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clan
[clang] Warning Libc functions (PR #101583)
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { +const CallExpr *const Call; +FuncNameMatch(const CallExpr *Call) : Call(Call) {} + +// For a name `S` in `PredefinedNames` or a member of the printf/scanf +// family, define matching function names with `S` by the grammar below: +// +// CoreName := S | S["wcs"/"str"] +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) +bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) +// Then either it is __builtin_LibcName or __builtin___LibcName_chk or +// no match: +return matchLibcNameOrBuiltinChk( +FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) +return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); +} + +// Parameter `Name` is the substring after stripping off the prefix +// "__builtin_". +bool matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) +return matchLibcName( +Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); +} + +bool matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) +return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); + return matchCoreName(Name); +} + +bool matchCoreName(StringRef Name) { + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) +return !isSafeStrlen(Name); // match unless it's a safe strlen call + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { +NameWCS[WcsPos++] = 's'; +NameWCS[WcsPos++] = 't'; +NameWCS[WcsPos++] = 'r'; +WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) +return !isSafeStrlen(NameWCS); + return matchPrintfOrScanfFamily(Name); +} + +bool matchPrintfOrScanfFamily(StringRef Name) { + if (Name.ends_with("scanf")) +return true; // simply warn about scanf functions + if (!Name.ends_with("printf")) +return false; // neither printf nor scanf + if (Name.starts_with("v")) +// cannot check args for va_list, so `vprintf`s are treated as regular +// unsafe libc calls: +return true; + + // Truncate "printf", focus on prefixes. There are different possible + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the + // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix: + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) +Prefix = Prefix.drop_back(1); + return isUnsafePrintf(Prefix); +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (i
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From 8a8b317c2b1c73117bcbbf771a783338448724a5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 8 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 2 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 322 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...arn-unsafe-buffer-usage-libc-functions.cpp | 81 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 7 files changed, 425 insertions(+), 5 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e11..62d0c6e350f37 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo is 0 if the callee function is not a member of the + /// printf family; is 1 if the callee is `sprintf`; is 2 if + /// the callee is other printfs + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9..ce77f459fab4f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12376,6 +12376,8 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | use 'std::string::c_str' as pointer to guarantee null-termination}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..7402cf0f24c1e 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,19 +9,21 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" -#include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include #include @@
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From 8a8b317c2b1c73117bcbbf771a783338448724a5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 8 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td| 2 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 322 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...arn-unsafe-buffer-usage-libc-functions.cpp | 81 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 7 files changed, 425 insertions(+), 5 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e11..62d0c6e350f37 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo is 0 if the callee function is not a member of the + /// printf family; is 1 if the callee is `sprintf`; is 2 if + /// the callee is other printfs + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, +ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9..ce77f459fab4f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12376,6 +12376,8 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | use 'std::string::c_str' as pointer to guarantee null-termination}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..7402cf0f24c1e 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,19 +9,21 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" -#include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include #include @@ -44
[clang] Warning Libc functions (PR #101583)
ziqingluo-90 wrote: Though we probably don't need this warning anymore but this is really a great catch of wording issues! https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 279 +- ...arn-unsafe-buffer-usage-libc-functions.cpp | 80 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 4 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..926a78b6f0096 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,25 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include #include #include #include @@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { +const CallExpr *const Call; +FuncNameMatch(const CallExpr *Call) : Call(Call) {} + +// For a name `S` in `PredefinedNames` or a member of the printf/scanf +// family, define matching function names with `S` by the grammar below: +// +// CoreName := S | S["wcs"/"str"] +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) +bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) +
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583 >From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 279 +- ...arn-unsafe-buffer-usage-libc-functions.cpp | 80 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 4 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..926a78b6f0096 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,25 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include #include #include #include @@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { +const CallExpr *const Call; +FuncNameMatch(const CallExpr *Call) : Call(Call) {} + +// For a name `S` in `PredefinedNames` or a member of the printf/scanf +// family, define matching function names with `S` by the grammar below: +// +// CoreName := S | S["wcs"/"str"] +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) +bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) +
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/101583 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Warning Libc functions (PR #101583)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/101583 None >From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 279 +- ...arn-unsafe-buffer-usage-libc-functions.cpp | 80 + ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 4 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..926a78b6f0096 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,25 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include #include #include #include @@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { +const CallExpr *const Call; +FuncNameMatch(const CallExpr *Call) : Call(Call) {} + +// For a name `S` in `PredefinedNames` or a member of the printf/scanf +// family, define matching function names with `S` by the grammar below: +// +// CoreName := S | S["wcs"/"str"] +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) +bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) +
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/7] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/6] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
@@ -0,0 +1,41 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_base -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_module -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_module.pcm +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_optout -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_optout.pcm -fmodule-file=%t/safe_buffers_test_base.pcm -fmodule-file=%t/safe_buffers_test_module.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\ +// RUN: -std=c++20 -Wunsafe-buffer-usage + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -I %S/Inputs/SafeBuffers %s\ +// RUN: -x c++ -std=c++20 -Wunsafe-buffer-usage + +#include "test_sub1.h" ziqingluo-90 wrote: Thank you for the comment! `split-file` is really neat!! https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/5] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
ziqingluo-90 wrote: Learned more about how loaded files & ASTs are managed in clang. Now opt-out regions in a loaded AST can span over multiple files (within one TU). They now behave in a consistent way when they are from loaded files or local files. https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/4] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
ziqingluo-90 wrote: Just realized how stupid I am (O_o)---the current implementation only compares source locations with opt-out regions that are in the same source file, for all source locations from loaded files. This is not going to work for cases like below. ``` // in a pch header: #pragma clang unsafe_buffer_usage begin #include "other_header.h" // -Wunsafe-buffer-usage in "other_header.h" is NOT suppressed ! #pragma clang unsafe_buffer_usage end ``` Add [WIP] back to this PR. More work is needed. https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fix false positives for constant cases (PR #92432)
@@ -420,25 +420,64 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { //already duplicated // - call both from Sema and from here - const auto *BaseDRE = - dyn_cast(Node.getBase()->IgnoreParenImpCasts()); - if (!BaseDRE) + if (const auto *BaseDRE = + dyn_cast(Node.getBase()->IgnoreParenImpCasts())) { +if (!BaseDRE->getDecl()) + return false; +if (const auto *CATy = Finder->getASTContext().getAsConstantArrayType( + BaseDRE->getDecl()->getType())) { + if (const auto *IdxLit = dyn_cast(Node.getIdx())) { +const APInt ArrIdx = IdxLit->getValue(); +// FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a +// bug +if (ArrIdx.isNonNegative() && +ArrIdx.getLimitedValue() < CATy->getLimitedSize()) + return true; + } +} + } + + if (const auto *BaseSL = + dyn_cast(Node.getBase()->IgnoreParenImpCasts())) { +if (const auto *CATy = Finder->getASTContext().getAsConstantArrayType( + BaseSL->getType())) { + if (const auto *IdxLit = dyn_cast(Node.getIdx())) { + const APInt ArrIdx = IdxLit->getValue(); + // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a + // bug + if (ArrIdx.isNonNegative() && + ArrIdx.getLimitedValue() < CATy->getLimitedSize()) +return true; + } +} + } + + return false; +} + +AST_MATCHER(BinaryOperator, isSafePtrArithmetic) { + if (Node.getOpcode() != BinaryOperatorKind::BO_Add) return false; - if (!BaseDRE->getDecl()) + + const auto *LHSDRE = ziqingluo-90 wrote: For both the AST matchers, I'm just curious if we really need to make sure the left-hand side is a DRE? Could we just try to test if its' type is a constant array type regardless of its' expression kind? https://github.com/llvm/llvm-project/pull/92432 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
@@ -1551,6 +1567,58 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) { return InSafeBufferOptOutRegion; } +SmallVector +Preprocessor::serializeSafeBufferOptOutMap() const { + assert(!InSafeBufferOptOutRegion && + "Attempt to serialize safe buffer opt-out regions before file being " + "completely preprocessed"); + + SmallVector SrcSeq; + + for (const auto &[begin, end] : SafeBufferOptOutMap) { +SrcSeq.push_back(begin); +SrcSeq.push_back(end); + } + // Only `SafeBufferOptOutMap` gets serialized. No need to serialize + // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every + // pch/module in the pch-chain/module-DAG will be loaded one by one in order. + // It means that for each loading pch/module m, it just needs to load m's own + // `SafeBufferOptOutMap`. + return SrcSeq; +} + +void Preprocessor::setDeserializedSafeBufferOptOutMap( +const SmallVectorImpl &SourceLocations) { + auto It = SourceLocations.begin(); + + assert(SourceLocations.size() % 2 == 0 && + "ill-formed SourceLocation sequence"); + while (It != SourceLocations.end()) { +SourceLocation begin = *It++; +SourceLocation end = *It++; +SourceLocation FileLoc = SourceMgr.getFileLoc(begin); +FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first; + +if (FID.isInvalid()) { + // I suppose this should not happen: + assert(false && "Attempted to read a safe buffer opt-out region whose " + "begin location is associated to an invalid File ID."); + break; +} +assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file"); +// Here we assume that +// `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`. +// Though it may not hold in very rare and strange cases, i.e., a pair of ziqingluo-90 wrote: yes, we can emit front-end errors. See this newly added test: https://github.com/llvm/llvm-project/pull/92031/files#diff-697faf822726557e5d62bcf106fc6cb03ff333b30b0a42ec192d93e6e12fd8fa https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/3] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
@@ -1551,6 +1567,58 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) { return InSafeBufferOptOutRegion; } +SmallVector +Preprocessor::serializeSafeBufferOptOutMap() const { + assert(!InSafeBufferOptOutRegion && ziqingluo-90 wrote: Oh, I'm sorry, they are not warnings, they are errors: https://github.com/llvm/llvm-project/blob/c79690040acf5bb3d857558b0878db47f7f23dc3/clang/lib/Lex/Pragma.cpp#L1265 https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH 1/2] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a39..6a41e3d4138aa 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, -
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
ziqingluo-90 wrote: > I'm very happy that this is going somewhere! Everything makes sense to me but > I also don't know a lot about this stuff. > > > During serialization, it only serializes regions of the current translation > > unit. Regions from loaded files are not serialized. > > Hmm how does this work? When a precompiled header includes another > precompiled header, do they both get loaded independently and then you get to > combine the maps? For loading multiple PCHs, it is a chain. Suppose `TU` includes pch `B` and `B` includes pch `C`. So one would compile and emit PCHs starting from `C`, then do it for `B` with `C` being loaded. When compile `A`, it includes pch `B` from the command line and clang actually loads `C` and `B` separately. It is the same story for modules, except that modules form a DAG instead of a chain. Clang will load each module file in an order with respect to the DAG. So for each module/PCH emitted by clang, only their own opt-out regions need to be serialized. https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
@@ -1551,6 +1567,58 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) { return InSafeBufferOptOutRegion; } +SmallVector +Preprocessor::serializeSafeBufferOptOutMap() const { + assert(!InSafeBufferOptOutRegion && ziqingluo-90 wrote: It's a warning. https://github.com/llvm/llvm-project/pull/92031 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/92031 A pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a warning-opt-out region. The begin and end locations (opt-out regions) are stored in preprocessor instances (PP) and will be queried by the `-Wunsafe-buffer-usage` analyzer. The commit adds serialization and de-serialization implementations for the stored regions. Basically, the serialized representation of the regions of a PP is a (ordered) sequence of source location encodings. During serialization, it only serializes regions of the current translation unit. Regions from loaded files are not serialized. During de-serialization, regions from loaded files are stored by their source files. When later one queries if a loaded location `l` is in an opt-out region, PP looks up regions by the source file of `l`. The reported issue at upstream: https://github.com/llvm/llvm-project/issues/90501 rdar://124035402 >From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 13 May 2024 13:31:21 -0700 Subject: [PATCH] fix safe buffer opt-out region serialization --- clang/include/clang/Lex/Preprocessor.h| 22 +++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Lex/Preprocessor.cpp| 106 ++ clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 ++ clang/test/Modules/Inputs/SafeBuffers/base.h | 9 ++ .../SafeBuffers/safe_buffers_test.modulemap | 10 ++ .../Modules/Inputs/SafeBuffers/test_sub1.h| 20 .../Modules/Inputs/SafeBuffers/test_sub2.h| 11 ++ clang/test/Modules/safe_buffers_optout.cpp| 39 +++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 72 .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 + 12 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e89b4a2c5230e..8d6884ebe7597 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,15 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are + using SafeBufferOptOutMapTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. A region is "open" if its' start and end locations are // identical. - SmallVector, 8> SafeBufferOptOutMap; + SafeBufferOptOutMapTy SafeBufferOptOutMap; + // `SafeBufferOptOutMap`s of loaded files: + llvm::DenseMap LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2922,16 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + void setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c1967..d1a0eba943039 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -775,6 +775,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source m
[clang] Add handling of the `-nostdlibinc` option to ccc-analyzer (PR #88017)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/88017 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add handling of the `-nostdlibinc` option to ccc-analyzer (PR #88017)
https://github.com/ziqingluo-90 created https://github.com/llvm/llvm-project/pull/88017 Compiler options recognizable to ccc-analyzer are stored in maps. An option missing in the map will be dropped by ccc-analyzer. This causes a build error in one of our projects that only happens in scan-build but not regular build, because ccc-analyzer do not recognize `-nostdlibinc`. This commit adds the option to the map. >From 188398778993244223638e8433efe2ce703772dd Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 8 Apr 2024 10:25:43 -0700 Subject: [PATCH] Add handling of the `-nostdlibinc` option to ccc-analyzer Compiler options recognizable to ccc-analyzer are stored in maps. An option missing in the map will be dropped by ccc-analyzer. This causes a build error in one of our projects that only happens in scan-build but not regular build, because ccc-analyzer do not recognize `-nostdlibinc`. This commit adds the option to the map. rdar://126082053 --- clang/tools/scan-build/libexec/ccc-analyzer | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/tools/scan-build/libexec/ccc-analyzer b/clang/tools/scan-build/libexec/ccc-analyzer index 60796a543fcd06..c5588814e8f0de 100755 --- a/clang/tools/scan-build/libexec/ccc-analyzer +++ b/clang/tools/scan-build/libexec/ccc-analyzer @@ -361,6 +361,7 @@ sub Analyze { my %CompileOptionMap = ( '-nostdinc' => 0, + '-nostdlibinc' => 0, '-include' => 1, '-idirafter' => 1, '-imacros' => 1, ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage][NFC] clang-format UnsafeBufferUsage.cpp (PR #82027)
https://github.com/ziqingluo-90 approved this pull request. LGTM! Thanks for re-formatting the code. https://github.com/llvm/llvm-project/pull/82027 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)
@@ -2326,15 +2312,21 @@ static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, return {}; FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()), std::make_move_iterator(InitFixIts->end())); -// If the declaration has the form `T *ident = init`, we want to replace -// `T *ident = ` with `std::span ident`: -EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1); } - SS << " " << IdentText->str(); + // For declaration of the form `T * ident = init;`, we want to replace + // `T * ` with `std::span`. + // We ignore CV-qualifiers so for `T * const ident;` we also want to replace + // just `T *` with `std::span`. + const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc(); if (!EndLocForReplacement.isValid()) { DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); return {}; } + // The only exception is that for `T *ident` we'll add a single space between + // "std::span" and "ident". + if (EndLocForReplacement.getLocWithOffset(1) == getVarDeclIdentifierLoc(D)) ziqingluo-90 wrote: In case the identifier is a macro expansion, e.g., ``` #define IDENT ident T *IDENT; ``` , the if-condition will evaluate to false because the files of the two source locations are different. But this is rare and the fix-it is still correct. https://github.com/llvm/llvm-project/pull/81935 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)
@@ -2326,15 +2312,21 @@ static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, return {}; FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()), std::make_move_iterator(InitFixIts->end())); -// If the declaration has the form `T *ident = init`, we want to replace -// `T *ident = ` with `std::span ident`: -EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1); } - SS << " " << IdentText->str(); + // For declaration of the form `T * ident = init;`, we want to replace + // `T * ` with `std::span`. + // We ignore CV-qualifiers so for `T * const ident;` we also want to replace + // just `T *` with `std::span`. + const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc(); ziqingluo-90 wrote: This looks a correct optimization to me! I probably didn't realize the existence of `getTypeSpecEndLoc()` when I did this. https://github.com/llvm/llvm-project/pull/81935 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)
https://github.com/ziqingluo-90 approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/81935 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)
https://github.com/ziqingluo-90 edited https://github.com/llvm/llvm-project/pull/81935 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fixits for array decayed to pointer (PR #80347)
https://github.com/ziqingluo-90 approved this pull request. LGTM! (We probably do not need to use `CHECK-DAG` at most places in our tests since we have fixed the non-deterministic issue, but it's irrelevant to this PR.) https://github.com/llvm/llvm-project/pull/80347 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
https://github.com/ziqingluo-90 approved this pull request. I left all my comments here. I didn't think quite through for some of them so feel free to ignore if they don't make sense to you. Other parts LGTM! https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { +const QualType &ArrayEltT = CAT->getElementType(); +assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); +// FIXME: support multi-dimensional arrays +if (isa(ArrayEltT)) + return {}; + +const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); + +// Get the spelling of the element type as written in the source file +// (including macros, etc.). +auto MaybeElemTypeTxt = +getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), + Ctx.getLangOpts()); +if (!MaybeElemTypeTxt) + return {}; +const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); + +// Find the '[' token. +std::optional NextTok = Lexer::findNextToken( +IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); +while (NextTok && !NextTok->is(tok::l_square)) + NextTok = Lexer::findNextToken(NextTok->getLocation(), + Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!NextTok) + return {}; +const SourceLocation LSqBracketLoc = NextTok->getLocation(); + +// Get the spelling of the array size as written in the source file +// (including macros, etc.). +auto MaybeArraySizeTxt = getRangeText( +{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()}, +Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!MaybeArraySizeTxt) + return {}; +const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim(); +if (ArraySizeTxt.empty()) { + // FIXME: Support array size getting determined from the initializer. + // Examples: + //int arr1[] = {0, 1, 2}; + //int arr2{3, 4, 5}; + // We might be able to preserve the non-specified size with `auto` and + // `std::to_array`: + //auto arr1 = std::to_array({0, 1, 2}); + return {}; +} + +std::optional IdentText = +getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); + +if (!IdentText) { + DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); + return {}; +} + +SmallString<32> Replacement; +raw_svector_ostream OS(Replacement); +OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> " + << IdentText->str(); + +FixIts.push_back(FixItHint::CreateReplacement( +SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str())); ziqingluo-90 wrote: and this maybe? ``` typedef int ArrayT[10]; ArrayT a; ``` https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { ziqingluo-90 wrote: nitpick: can we replace this check with an assertion? Because it is already checked at the caller: ``` case FixitStrategy::Kind::Array: { if (VD->isLocalVarDecl() && isa(VD->getType())) return fixVariableWithArray(VD, Tracker, Ctx, Handler); ``` https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { +const QualType &ArrayEltT = CAT->getElementType(); +assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); +// FIXME: support multi-dimensional arrays +if (isa(ArrayEltT)) + return {}; + +const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); + +// Get the spelling of the element type as written in the source file +// (including macros, etc.). +auto MaybeElemTypeTxt = +getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), + Ctx.getLangOpts()); +if (!MaybeElemTypeTxt) + return {}; +const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); + +// Find the '[' token. +std::optional NextTok = Lexer::findNextToken( +IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); +while (NextTok && !NextTok->is(tok::l_square)) + NextTok = Lexer::findNextToken(NextTok->getLocation(), + Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!NextTok) + return {}; +const SourceLocation LSqBracketLoc = NextTok->getLocation(); + +// Get the spelling of the array size as written in the source file +// (including macros, etc.). +auto MaybeArraySizeTxt = getRangeText( +{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()}, +Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!MaybeArraySizeTxt) + return {}; +const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim(); +if (ArraySizeTxt.empty()) { + // FIXME: Support array size getting determined from the initializer. + // Examples: + //int arr1[] = {0, 1, 2}; + //int arr2{3, 4, 5}; + // We might be able to preserve the non-specified size with `auto` and + // `std::to_array`: + //auto arr1 = std::to_array({0, 1, 2}); + return {}; +} + +std::optional IdentText = +getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); + +if (!IdentText) { + DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); + return {}; +} + +SmallString<32> Replacement; +raw_svector_ostream OS(Replacement); +OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> " + << IdentText->str(); + +FixIts.push_back(FixItHint::CreateReplacement( +SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str())); ziqingluo-90 wrote: I'm wondering if it is possible to declare a constant array in a way that the type specifier is completely at the left-hand side of the identifier? Like the form `T a;` while `T` represents a constant array type. How about this? ``` int a[10]; decltype(a) b; ``` https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { +const QualType &ArrayEltT = CAT->getElementType(); +assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); +// FIXME: support multi-dimensional arrays +if (isa(ArrayEltT)) + return {}; + +const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); + +// Get the spelling of the element type as written in the source file +// (including macros, etc.). +auto MaybeElemTypeTxt = +getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), + Ctx.getLangOpts()); +if (!MaybeElemTypeTxt) + return {}; +const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); + +// Find the '[' token. +std::optional NextTok = Lexer::findNextToken( +IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); +while (NextTok && !NextTok->is(tok::l_square)) + NextTok = Lexer::findNextToken(NextTok->getLocation(), + Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!NextTok) + return {}; +const SourceLocation LSqBracketLoc = NextTok->getLocation(); + +// Get the spelling of the array size as written in the source file +// (including macros, etc.). +auto MaybeArraySizeTxt = getRangeText( +{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()}, +Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!MaybeArraySizeTxt) + return {}; +const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim(); +if (ArraySizeTxt.empty()) { + // FIXME: Support array size getting determined from the initializer. ziqingluo-90 wrote: In this case, maybe we can get the concrete size value from the array type directly since there is no spelling text for us to preserve? https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { +const QualType &ArrayEltT = CAT->getElementType(); +assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); +// FIXME: support multi-dimensional arrays +if (isa(ArrayEltT)) ziqingluo-90 wrote: is it possible that the element type is an array type but not a constant array type? https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Fix debug notes for unclaimed DREs (PR #80787)
https://github.com/ziqingluo-90 approved this pull request. LGTM! I think I did something similar locally when I collected data of unclaimed DREs on those big adopter projects. So I guess the data that we used to find major missing patterns for fix-its is still valid. https://github.com/llvm/llvm-project/pull/80787 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -2495,10 +2470,97 @@ static FixItList fixVariableWithSpan(const VarDecl *VD, return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); } +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + if (auto CAT = dyn_cast(D->getType())) { +const QualType &ArrayEltT = CAT->getElementType(); +assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); + +const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); + +// Get the spelling of the element type as written in the source file +// (including macros, etc.). +auto MaybeElemTypeTxt = +getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), + Ctx.getLangOpts()); +if (!MaybeElemTypeTxt) + return {}; +const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); + +// Find the '[' token. +std::optional NextTok = Lexer::findNextToken( +IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); +while (NextTok && !NextTok->is(tok::l_square)) + NextTok = Lexer::findNextToken(NextTok->getLocation(), + Ctx.getSourceManager(), Ctx.getLangOpts()); +if (!NextTok) + return {}; +const SourceLocation LSqBracketLoc = NextTok->getLocation(); + +// Get the spelling of the array size as written in the source file +// (including macros, etc.). ziqingluo-90 wrote: I wish we could have a more elegant way to get the spelling of the array size. I feel like `TypeLoc` is suppose to offer it but it does not behave as I expect in this case. (`TypeLoc` also disappointed me in dealing with cv-qualifiers. `¯\_(ツ)_/¯`) https://github.com/llvm/llvm-project/pull/80084 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)
@@ -0,0 +1,164 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \ +// RUN:-fsafe-buffer-usage-suggestions \ +// RUN:-fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s +typedef int * Int_ptr_t; +typedef int Int_t; + +void simple(unsigned idx) { + int buffer[10]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array buffer" + buffer[idx] = 0; +} + +void whitespace_in_declaration(unsigned idx) { + int buffer_w [ 10 ]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:35}:"std::array buffer_w" + buffer_w[idx] = 0; +} + +void comments_in_declaration(unsigned idx) { + int /* [A] */ buffer_w /* [B] */ [ /* [C] */ 10 /* [D] */ ] ; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:69}:"std::array buffer_w" + buffer_w[idx] = 0; +} + +void initializer(unsigned idx) { + int buffer[3] = {0}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:16}:"std::array buffer" + + int buffer2[3] = {0, 1, 2}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array buffer2" + + buffer[idx] = 0; + buffer2[idx] = 0; +} + +void auto_size(unsigned idx) { + int buffer[] = {0, 1, 2}; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +// FIXME: implement support + + buffer[idx] = 0; +} + +void universal_initialization(unsigned idx) { + int buffer[] {0, 1, 2}; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +// FIXME: implement support + + buffer[idx] = 0; +} + +void multi_decl1(unsigned idx) { + int a, buffer[10]; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +// FIXME: implement support + + buffer[idx] = 0; +} + +void multi_decl2(unsigned idx) { + int buffer[10], b; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]] +// FIXME: implement support + + buffer[idx] = 0; +} + +void local_array_ptr_to_const(unsigned idx, const int*& a) { + const int * buffer[10] = {a}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:25}:"std::array buffer" + a = buffer[idx]; +} + +void local_array_const_ptr(unsigned idx, int*& a) { + int * const buffer[10] = {a}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:25}:"std::array buffer" + + a = buffer[idx]; +} + +void local_array_const_ptr_via_typedef(unsigned idx, int*& a) { + typedef int * const my_const_ptr; + my_const_ptr buffer[10] = {a}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:26}:"std::array buffer" + + a = buffer[idx]; +} + +void local_array_const_ptr_to_const(unsigned idx, const int*& a) { + const int * const buffer[10] = {a}; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:31}:"std::array buffer" + + a = buffer[idx]; + +} + +template +void unsupported_local_array_in_template(unsigned idx) { + T buffer[10]; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:.*-[[@LINE-1]]:.*} + buffer[idx] = 0; +} +// Instantiate the template function to force its analysis. +template void unsupported_local_array_in_template(unsigned); + +typedef unsigned int my_uint; +void typedef_as_elem_type(unsigned idx) { + my_uint buffer[10]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:21}:"std::array buffer" + buffer[idx] = 0; +} + +void macro_as_elem_type(unsigned idx) { +#define MY_INT int + MY_INT buffer[10]; +// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:.*-[[@LINE-1]]:.*} +// FIXME: implement support + + buffer[idx] = 0; +#undef MY_INT +} + +void macro_as_identifier(unsigned idx) { +#define MY_BUFFER buffer + int MY_BUFFER[10]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:20}:"std::array MY_BUFFER" + MY_BUFFER[idx] = 0; +#undef MY_BUFFER +} + +void macro_as_size(unsigned idx) { +#define MY_TEN 10 + int buffer[MY_TEN]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:21}:"std::array buffer" + buffer[idx] = 0; +#undef MY_TEN +} + +void constant_as_size(unsigned idx) { + const unsigned my_const = 10; + int buffer[my_const]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:23}:"std::array buffer" + buffer[idx] = 0; +} + +void subscript_negative() { + int buffer[10]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array buffer" + + // For constant-size arrays any negative index will lead to buffer underflow. + // std::array::operator[] has unsigned parameter so the value will be casted to unsigned. + // This will very likely be buffer overflow but hardened std::array catch these at runtime. + buffer[-5] = 0; +} + +void subscript_signed(int signed_idx) { + int buffer[10]; +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array buffer" + + // For constant-size arrays any negative index will lead to buffer underflow. + // std::array::operator[] has unsigned parameter so the value will be casted to unsigned. + // This will very likely be buffer overflow but hardened std::array catches these at runtime. + buffer[signed_idx] = 0; +} ziqingluo-90 wrote: maybe we can have a test showing that multi-dimentional arrays will not be fixed? https://github.com/llvm/llvm-project/pull/80084 __
[clang] [-Wunsafe-buffer-usage] Add a new warning for uses of std::span two-parameter constructors (PR #77148)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/77148 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)
https://github.com/ziqingluo-90 closed https://github.com/llvm/llvm-project/pull/78380 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)
ziqingluo-90 wrote: @SavchenkoValeriy, sorry for the late response. Your suggested fix works for my minimal example as well as my local project where this bug was originated. And I failed to come up with a new counter-example for the fix so I believe it will also prevent future non-termination issues. I have updated the PR with respect to the simpler fix. https://github.com/llvm/llvm-project/pull/78380 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/78380 >From e7c3a3fc2a4f5d5714044a1c407bfe56f328680d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Tue, 16 Jan 2024 17:45:01 -0800 Subject: [PATCH 1/4] [-Wcompletion-handler] Fix a non-termination issue The Called-Once dataflow analysis could never terminate as a consequence of non-monotonic update on states. States of kind Escape can override states leading to non-monotonic update. The fix uses finite state sets instead of single states to represent CFG block outputs, which grows in uni-direction. To preserve the behavior of the analyzer, a single state can be extracted from a state set. The extraction follows the idea that Escape can override error states within one block but obeys to the partial-order for join. rdar://119671856 --- clang/lib/Analysis/CalledOnceCheck.cpp | 120 ++--- clang/test/SemaObjC/warn-called-once.m | 10 +++ 2 files changed, 120 insertions(+), 10 deletions(-) diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp index 04c5f6aa9c7450..3fab93b1d09cdf 100644 --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -163,7 +163,7 @@ class ParameterStatus { NotVisited = 0x8, /* 1000 */ // We already reported a violation and stopped tracking calls for this // parameter. -Reported = 0x15, /* */ +Reported = 0xF, /* */ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) }; @@ -215,6 +215,55 @@ class ParameterStatus { const Expr *Call = nullptr; }; +// Represents a set of `ParameterStatus`s collected in a single CFGBlock. +class ParamStatusSet { +private: + // The kinds of `States` spans from 0x0 to 0x15, so 16 bits are enough: + llvm::BitVector Set{16, false}; + // Following the exisitng idea: if there are multiple calls of interest in one + // block, recording one of them is good enough. + const Expr *Call = nullptr; + +public: + ParamStatusSet() {} + + // Add a new `ParameterStatus` to the set. Returns true iff the adding status + // was new to the set. + bool add(ParameterStatus PS) { +assert(PS.getKind() != ParameterStatus::Kind::NotVisited && + "the status cannot be NotVisited after visiting a block"); +if (Set.test(PS.getKind())) + return false; +Set.set(PS.getKind()); +if (PS.seenAnyCalls()) + Call = &PS.getCall(); +return true; + } + + // Get one `ParameterStatus` to represent the set of them. The idea is to + // take a join on them but let ESCAPE overrides error statuses. + ParameterStatus summaryStatus() { +unsigned Summary = 0x0; + +for (unsigned Idx : + llvm::make_range(Set.set_bits_begin(), Set.set_bits_end())) + Summary |= Idx; +assert(Summary == 0x0 || Summary == 0x1 || Summary == 0x3 || + Summary == 0x5 || Summary == 0x7 || + Summary == 0xF && "expecting a defined value"); + +ParameterStatus::Kind SummaryKind = ParameterStatus::Kind(Summary); + +if (SummaryKind > ParameterStatus::Kind::NON_ERROR_STATUS && +Set.test(ParameterStatus::Kind::Escaped)) { + SummaryKind = ParameterStatus::Kind::Escaped; +} +if (ParameterStatus::seenAnyCalls(SummaryKind)) + return {SummaryKind, Call}; +return {SummaryKind}; + } +}; + /// State aggregates statuses of all tracked parameters. class State { public: @@ -274,6 +323,53 @@ class State { private: ParamSizedVector ParamData; + + friend class CFGBlockOutputState; + + State(ParamSizedVector ParamData) : ParamData(ParamData) {} + + unsigned size() const { return ParamData.size(); } +}; + +// A different kind of "state" in addition to `State`. `CFGBlockOutputState` +// are created for dealing with the non-termination issue due to `State`s are +// not being updated monotonically at the output of each CFGBlock. +// A `CFGBlockOutputState` is in fact a finite set of `State`s that +// grows monotonically. One can extract a `State` from a `CFGBlockOutputState`. +// Note that the `State`-extraction does NOT guarantee monotone but it +// respects the existing semantics. Specifically, ESCAPE is "greater than" +// other error states in a single path but is "less than" them at JOIN. +// +// `CFGBlockOutputState` will be used to terminate the fix-point computation. +class CFGBlockOutputState { +private: + ParamSizedVector StatusSets; + +public: + CFGBlockOutputState(unsigned Size) : StatusSets{Size} {}; + + // Update this `CFGBlockOutputState` with a newly computed `State`. Return + // true iff `CFGBlockOutputState` changed. + bool update(const State &NewState) { +bool Changed = false; + +for (unsigned Idx = 0; Idx < NewState.size(); ++Idx) { + if (StatusSets[Idx].add(NewState.getStatusFor(Idx))) { +Changed = true; + } +} +return Changed; + } + + // Return a `State` that represents the `CFGBlockOutputState`. + State ge
[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/78380 >From e7c3a3fc2a4f5d5714044a1c407bfe56f328680d Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Tue, 16 Jan 2024 17:45:01 -0800 Subject: [PATCH 1/3] [-Wcompletion-handler] Fix a non-termination issue The Called-Once dataflow analysis could never terminate as a consequence of non-monotonic update on states. States of kind Escape can override states leading to non-monotonic update. The fix uses finite state sets instead of single states to represent CFG block outputs, which grows in uni-direction. To preserve the behavior of the analyzer, a single state can be extracted from a state set. The extraction follows the idea that Escape can override error states within one block but obeys to the partial-order for join. rdar://119671856 --- clang/lib/Analysis/CalledOnceCheck.cpp | 120 ++--- clang/test/SemaObjC/warn-called-once.m | 10 +++ 2 files changed, 120 insertions(+), 10 deletions(-) diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp index 04c5f6aa9c7450..3fab93b1d09cdf 100644 --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -163,7 +163,7 @@ class ParameterStatus { NotVisited = 0x8, /* 1000 */ // We already reported a violation and stopped tracking calls for this // parameter. -Reported = 0x15, /* */ +Reported = 0xF, /* */ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) }; @@ -215,6 +215,55 @@ class ParameterStatus { const Expr *Call = nullptr; }; +// Represents a set of `ParameterStatus`s collected in a single CFGBlock. +class ParamStatusSet { +private: + // The kinds of `States` spans from 0x0 to 0x15, so 16 bits are enough: + llvm::BitVector Set{16, false}; + // Following the exisitng idea: if there are multiple calls of interest in one + // block, recording one of them is good enough. + const Expr *Call = nullptr; + +public: + ParamStatusSet() {} + + // Add a new `ParameterStatus` to the set. Returns true iff the adding status + // was new to the set. + bool add(ParameterStatus PS) { +assert(PS.getKind() != ParameterStatus::Kind::NotVisited && + "the status cannot be NotVisited after visiting a block"); +if (Set.test(PS.getKind())) + return false; +Set.set(PS.getKind()); +if (PS.seenAnyCalls()) + Call = &PS.getCall(); +return true; + } + + // Get one `ParameterStatus` to represent the set of them. The idea is to + // take a join on them but let ESCAPE overrides error statuses. + ParameterStatus summaryStatus() { +unsigned Summary = 0x0; + +for (unsigned Idx : + llvm::make_range(Set.set_bits_begin(), Set.set_bits_end())) + Summary |= Idx; +assert(Summary == 0x0 || Summary == 0x1 || Summary == 0x3 || + Summary == 0x5 || Summary == 0x7 || + Summary == 0xF && "expecting a defined value"); + +ParameterStatus::Kind SummaryKind = ParameterStatus::Kind(Summary); + +if (SummaryKind > ParameterStatus::Kind::NON_ERROR_STATUS && +Set.test(ParameterStatus::Kind::Escaped)) { + SummaryKind = ParameterStatus::Kind::Escaped; +} +if (ParameterStatus::seenAnyCalls(SummaryKind)) + return {SummaryKind, Call}; +return {SummaryKind}; + } +}; + /// State aggregates statuses of all tracked parameters. class State { public: @@ -274,6 +323,53 @@ class State { private: ParamSizedVector ParamData; + + friend class CFGBlockOutputState; + + State(ParamSizedVector ParamData) : ParamData(ParamData) {} + + unsigned size() const { return ParamData.size(); } +}; + +// A different kind of "state" in addition to `State`. `CFGBlockOutputState` +// are created for dealing with the non-termination issue due to `State`s are +// not being updated monotonically at the output of each CFGBlock. +// A `CFGBlockOutputState` is in fact a finite set of `State`s that +// grows monotonically. One can extract a `State` from a `CFGBlockOutputState`. +// Note that the `State`-extraction does NOT guarantee monotone but it +// respects the existing semantics. Specifically, ESCAPE is "greater than" +// other error states in a single path but is "less than" them at JOIN. +// +// `CFGBlockOutputState` will be used to terminate the fix-point computation. +class CFGBlockOutputState { +private: + ParamSizedVector StatusSets; + +public: + CFGBlockOutputState(unsigned Size) : StatusSets{Size} {}; + + // Update this `CFGBlockOutputState` with a newly computed `State`. Return + // true iff `CFGBlockOutputState` changed. + bool update(const State &NewState) { +bool Changed = false; + +for (unsigned Idx = 0; Idx < NewState.size(); ++Idx) { + if (StatusSets[Idx].add(NewState.getStatusFor(Idx))) { +Changed = true; + } +} +return Changed; + } + + // Return a `State` that represents the `CFGBlockOutputState`. + State ge
[clang] [-Wunsafe-buffer-usage] Fix AST matcher of UUCAddAssignGadget (PR #79392)
https://github.com/ziqingluo-90 approved this pull request. LGTM, It's great to catch and fix this bug! https://github.com/llvm/llvm-project/pull/79392 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [-Wunsafe-buffer-usage] Add a new warning for uses of std::span two-parameter constructors (PR #77148)
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/77148 >From 1b169f49a129a5a69a82386b486c8a46ecfc815a Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Fri, 5 Jan 2024 13:39:39 -0800 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add a new warning for use of two-parameter std::span constructors Constructing `std::span` objects with the two parameter constructors could introduce mismatched bounds information, which defeats the purpose of using `std::span`. Therefore, we warn every use of such constructors. We also plan to incrementally teach `-Wunsafe-buffer-usage` about benign usages of those constructors. rdar://115817781 --- .../Analysis/Analyses/UnsafeBufferUsage.h | 8 +- .../Analyses/UnsafeBufferUsageGadgets.def | 8 ++ .../clang/Basic/DiagnosticSemaKinds.td| 3 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 46 ++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 15 +- ...ffer-usage-in-container-span-construct.cpp | 136 ++ ...e-buffer-usage-warning-data-invocation.cpp | 2 +- 7 files changed, 214 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index b28f2c6b99c50e..aca1ad998822c5 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -16,6 +16,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" namespace clang { @@ -98,9 +99,14 @@ class UnsafeBufferUsageHandler { #endif public: - /// Returns a reference to the `Preprocessor`: + /// \return true iff buffer safety is opt-out at `Loc`; false otherwise. virtual bool isSafeBufferOptOut(const SourceLocation &Loc) const = 0; + /// \return true iff unsafe uses in containers should NOT be reported at + /// `Loc`; false otherwise. + virtual bool + ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0; + virtual std::string getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc, StringRef WSSuffix = "") const = 0; diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index c9766168836510..07f805ebb11013 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -18,6 +18,12 @@ #define WARNING_GADGET(name) GADGET(name) #endif +/// A `WARNING_GADGET` subset, where the code pattern of each gadget +/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`). +#ifndef WARNING_CONTAINER_GADGET +#define WARNING_CONTAINER_GADGET(name) WARNING_GADGET(name) +#endif + /// Safe gadgets correspond to code patterns that aren't unsafe but need to be /// properly recognized in order to emit correct warnings and fixes over unsafe /// gadgets. @@ -31,6 +37,7 @@ WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(DataInvocation) +WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) @@ -43,4 +50,5 @@ FIXABLE_GADGET(PointerInit) #undef FIXABLE_GADGET #undef WARNING_GADGET +#undef WARNING_CONTAINER_GADGET #undef GADGET diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e54f969c19039d..438c2fcdb1c77b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12075,6 +12075,9 @@ def note_unsafe_buffer_variable_fixit_together : Note< "%select{|, and change %2 to safe types to make function %4 bounds-safe}3">; def note_safe_buffer_usage_suggestions_disabled : Note< "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; +def warn_unsafe_buffer_usage_in_container : Warning< + "the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information">, + InGroup, DefaultIgnore; #ifndef NDEBUG // Not a user-facing diagnostic. Useful for debugging false negatives in // -fsafe-buffer-usage-suggestions (i.e. lack of -Wunsafe-buffer-usage fixits). diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 724c4304a07242..3b6d69cac1afd8 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -232,6 +232,11 @@ AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const Unsa