https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/145862
>From 28cda3e7ea3f14305342d3ea663646df164ff044 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Tue, 24 Jun 2025 16:45:30 +0800 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Support safe patterns of "%.*s" in printf functions The character buffer passed to a "%s" specifier may be safely bound if the precision is specified, even if the buffer is not null-terminated. For example, ``` void f(std::span<char> span) { printf("%.*s", (int)span.size(), span.data()); // `span.data()` may not be null-terminated but is safely bound by `span.size()`, // so this call is safe } ``` rdar://154072130 --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 84 +++++++++++++++---- ...arn-unsafe-buffer-usage-libc-functions.cpp | 36 ++++++++ 2 files changed, 105 insertions(+), 15 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index ac47b12cc8d72..580ae583bc642 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -25,6 +25,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallSet.h" @@ -809,28 +810,81 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, const CallExpr *Call; unsigned FmtArgIdx; const Expr *&UnsafeArg; + ASTContext &Ctx; + + // Returns an `Expr` representing the precision if specified, null + // otherwise: + const Expr * + getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision, + const CallExpr *Call) { + unsigned PArgIdx = -1; + + if (Precision.hasDataArgument()) + PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx; + if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) { + const Expr *PArg = Call->getArg(PArgIdx); + + // Strip the cast if `PArg` is a cast-to-int expression: + if (auto *CE = dyn_cast<CastExpr>(PArg); + CE && CE->getType()->isSignedIntegerType()) + PArg = CE->getSubExpr(); + return PArg; + } + if (Precision.getHowSpecified() == + analyze_printf::OptionalAmount::HowSpecified::Constant) { + auto SizeTy = Ctx.getSizeType(); + llvm::APSInt PArgVal = llvm::APSInt( + llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()), + true); + + return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {}); + } + return nullptr; + } public: StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx, - const Expr *&UnsafeArg) - : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {} + const Expr *&UnsafeArg, ASTContext &Ctx) + : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {} bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, unsigned specifierLen, const TargetInfo &Target) override { - if (FS.getConversionSpecifier().getKind() == - analyze_printf::PrintfConversionSpecifier::sArg) { - unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; - - if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) - if (!isNullTermPointer(Call->getArg(ArgIdx))) { - UnsafeArg = Call->getArg(ArgIdx); // output - // returning false stops parsing immediately - return false; - } - } - return true; // continue parsing + if (FS.getConversionSpecifier().getKind() != + analyze_printf::PrintfConversionSpecifier::sArg) + return true; // continue parsing + + unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + + if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs())) + // If the `ArgIdx` is invalid, give up. + return true; // continue parsing + + const Expr *Arg = Call->getArg(ArgIdx); + + if (isNullTermPointer(Arg)) + // If Arg is a null-terminated pointer, it is safe anyway. + return true; // continue parsing + + // Otherwise, check if the specifier has a precision and if the character + // pointer is safely bound by the precision: + auto LengthModifier = FS.getLengthModifier(); + QualType ArgType = Arg->getType(); + bool IsArgTypeValid = // Is ArgType a character pointer type? + ArgType->isPointerType() && + (LengthModifier.getKind() == LengthModifier.AsWideChar + ? ArgType->getPointeeType()->isWideCharType() + : ArgType->getPointeeType()->isCharType()); + + ArgType.dump(); + if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call); + Precision && IsArgTypeValid) + if (isPtrBufferSafe(Arg, Precision, Ctx)) + return true; + // Handle unsafe case: + UnsafeArg = Call->getArg(ArgIdx); // output + return false; // returning false stops parsing immediately } }; @@ -846,7 +900,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, else goto CHECK_UNSAFE_PTR; - StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg); + StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx); return analyze_format_string::ParsePrintfString( Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index d6c32ca7baa5d..6ae329a736d91 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -47,11 +47,24 @@ namespace std { T *c_str(); T *data(); unsigned size_bytes(); + unsigned size(); }; typedef basic_string<char> string; typedef basic_string<wchar_t> wstring; + template<typename T> + struct basic_string_view { + T *c_str() const noexcept; + T *data() const noexcept; + unsigned size(); + const T* begin() const noexcept; + const T* end() const noexcept; + }; + + typedef basic_string_view<char> string_view; + typedef basic_string_view<wchar_t> wstring_view; + // C function under std: void memcpy(); void strcpy(); @@ -134,6 +147,29 @@ void safe_examples(std::string s1, int *p) { snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn } +void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv, + std::span<char> SpC, std::span<int> SpI) { + printf("%.*s"); + printf("%.*s", (int)Str.size(), Str.data()); + printf("%.*s", (int)Str.size_bytes(), Str.data()); + printf("%.*s", (int)Sv.size(), Sv.data()); + printf("%.*s", (int)SpC.size(), SpC.data()); + printf("%.*s", SpC.size(), SpC.data()); + printf("%.*ls", WSv.size(), WSv.data()); + printf("%.*s", SpC.data()); // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given + + printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + + char a[10]; + int b[10]; + + printf("%.10s", a); + printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} +} + void g(char *begin, char *end, char *p, std::span<char> s) { std::copy(begin, end, p); // no warn >From 470e9d95bd06f5a9f55247deead9206e79f1af8d Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 17 Jul 2025 17:50:59 +0800 Subject: [PATCH 2/2] Address comments --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 580ae583bc642..40dff7eaf06bc 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -813,7 +813,13 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, ASTContext &Ctx; // Returns an `Expr` representing the precision if specified, null - // otherwise: + // otherwise. + // The parameter `Call` is a printf call and the parameter `Precision` is + // the precision of a format specifier of the `Call`. + // + // For example, for the `printf("%d, %.10s", 10, p)` call + // `Precision` can be the precision of either "%d" or "%.10s". The former + // one will have `NotSpecified` kind. const Expr * getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision, const CallExpr *Call) { @@ -877,7 +883,6 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, ? ArgType->getPointeeType()->isWideCharType() : ArgType->getPointeeType()->isCharType()); - ArgType.dump(); if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call); Precision && IsArgTypeValid) if (isPtrBufferSafe(Arg, Precision, Ctx)) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits