https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/174683
>From ebdb94fc1a23afa41498a1152d7ed3adf1ba36cc Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Tue, 6 Jan 2026 18:16:40 -0800 Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Fix crashes in "Add check for custom printf/scanf functions #173096" The previous PR #173096 assumes that format attribute parameters always refer to valid indices of arguments. It is a wrong assumption in itself because the second attribute parameter could specify the index after the last named parameter for variadic functions and no actual arguments passed beyond named parameters. In addition, clang (possibly incorrectly) allows the following uses of the attribute: ``` void f(const char *) __attribute__((__format__ (__printf__, 1, 2))); // The second attribute argument 2 will not refer to any valid argument at any call of 'f' void g(const char *) __attribute__((__format__ (__printf__, 1, 99))); // Clang is even quiet on this, if assertions are disabled :( ``` --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 37 ++++++++-- ...arn-unsafe-buffer-usage-libc-functions.cpp | 72 ++++++++++++++++++- 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index da19b9d3902f1..7be08fe274c1a 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include <cstddef> @@ -2221,17 +2220,43 @@ class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget { }); const Expr *UnsafeArg; - if (AnyAttr && !IsPrintf && - (CE->getNumArgs() >= static_cast<unsigned>(Attr->getFirstArg()))) { - // for scanf-like functions, any format argument is considered unsafe: + if (!AnyAttr) + return false; + + // FormatAttribute indexes are 1-based: + unsigned FmtIdx = Attr->getFormatIdx() - 1; + std::optional<unsigned> FmtArgIdx = Attr->getFirstArg() - 1; + + if (isa<CXXMemberCallExpr>(CE)) { + // For CXX member calls, attribute parameters are specified as if there is + // an implicit "this". The implicit "this" is invisible through CallExpr + // `CE`. (What makes it even less ergonomic is that the + // implicit "this" is visible through CallExpr `CE` for CXX operator + // calls!) + --FmtIdx; + --*FmtArgIdx; + } else if (CE->getStmtClass() != Stmt::CallExprClass && + !isa<CXXOperatorCallExpr>(CE)) + return false; // Ignore unsupported CallExpr subclasses + if (*FmtArgIdx >= CE->getNumArgs()) + // Format arguments are allowed to be absent when variadic parameter is + // used. So we need to check if those arguments exist. Moreover, when + // variadic parameter is NOT used, `Attr->getFirstArg()` could be an + // out-of-bound value. E.g., + // clang does not complain about `__attribute__((__format__(__printf__, 2, + // 99))) void f(int, char *);`. + FmtArgIdx = std::nullopt; + + if (AnyAttr && !IsPrintf && FmtArgIdx) { + // For scanf-like functions, any format argument is considered unsafe: Result.addNode(Tag, DynTypedNode::create(*CE)); return true; } + // For printf-like functions: if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg( Ctx, CE, UnsafeArg, // FormatAttribute indexes are 1-based: - /* FmtIdx= */ Attr->getFormatIdx() - 1, - /* FmtArgIdx= */ Attr->getFirstArg() - 1)) { + FmtIdx, FmtArgIdx)) { Result.addNode(Tag, DynTypedNode::create(*CE)); Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg)); return true; diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index facbb0eb995e9..138f13039a8a1 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -136,6 +136,10 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) { snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d"); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d"); // no warn + snwprintf(s.data(), s.size_bytes(), "%s%d"); // no warn + snwprintf_s(s.data(), s.size_bytes(), "%s%d"); // no warn wprintf(L"hello %ls", L"world"); // no warn wprintf(L"hello %ls", WS.c_str()); // no warn strlen("hello");// no warn @@ -265,7 +269,21 @@ void myprintf_2(const char *, int, const char *) __attribute__((__format__ (__pr void myprintf_3(const char *, const char *, int, const char *) __attribute__((__format__ (__printf__, 2, 4))); void myscanf(const char *, ...) __attribute__((__format__ (__scanf__, 1, 2))); -void test_myprintf(char * Str, std::string StdStr) { +struct FormatAttrTestMember { + void myprintf(const char *, ...) __attribute__((__format__ (__printf__, 2, 3))); + void myprintf_2(const char *, int, const char *) __attribute__((__format__ (__printf__, 2, 4))); + void myprintf_3(const char *, const char *, int, const char *) __attribute__((__format__ (__printf__, 3, 5))); + void myscanf(const char *, ...) __attribute__((__format__ (__scanf__, 2, 3))); + + void operator()(const char * Fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); + void operator[](const char * Fmt) __attribute__((__format__ (__printf__, 2, 3))); +}; + +struct FormatAttrTestMember2 { + void operator()(const char * Fmt, char *) __attribute__((__format__ (__printf__, 2, 3))); +}; + +void test_format_attr(char * Str, std::string StdStr) { myprintf("hello", Str); myprintf("hello %s", StdStr.c_str()); myprintf("hello %s", Str); // expected-warning{{function 'myprintf' is unsafe}} \ @@ -280,11 +298,61 @@ void test_myprintf(char * Str, std::string StdStr) { myprintf_3("irrelevant", "hello %s", 0, StdStr.c_str()); myprintf_3("irrelevant", "hello %s", 0, Str); // expected-warning{{function 'myprintf_3' is unsafe}} \ expected-note{{string argument is not guaranteed to be null-terminated}} - + myscanf("hello %s"); myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}} int X; myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}} + + // Test member functions: + FormatAttrTestMember Obj; + + Obj.myprintf("hello", Str); + Obj.myprintf("hello %s", StdStr.c_str()); + Obj.myprintf("hello %s", Str); // expected-warning{{function 'myprintf' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + + Obj.myprintf_2("hello", 0, Str); + Obj.myprintf_2("hello %s", 0, StdStr.c_str()); + Obj.myprintf_2("hello %s", 0, Str); // expected-warning{{function 'myprintf_2' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + + Obj.myprintf_3("irrelevant", "hello", 0, Str); + Obj.myprintf_3("irrelevant", "hello %s", 0, StdStr.c_str()); + Obj.myprintf_3("irrelevant", "hello %s", 0, Str); // expected-warning{{function 'myprintf_3' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + + Obj.myscanf("hello %s"); + Obj.myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}} + + Obj.myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}} + + Obj("hello", Str); + Obj("hello %s", StdStr.c_str()); + Obj("hello %s", Str); // expected-warning{{function 'operator()' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} + Obj["hello"]; + Obj["hello %s"]; + + FormatAttrTestMember2 Obj2; + + Obj2("hello", Str); + Obj2("hello %s", StdStr.c_str()); + Obj2("hello %s", Str); // expected-warning{{function 'operator()' is unsafe}} \ + expected-note{{string argument is not guaranteed to be null-terminated}} +} + +// The second attribute argument, which points the starting index of +// format string arguments, may not be a valid argument index: +void myprintf_arg_idx_oob(const char *) __attribute__((__format__ (__printf__, 1, 2))); + +void test_format_attr_invalid_arg_idx(char * Str, std::string StdStr) { + myprintf_arg_idx_oob("hello"); + myprintf_arg_idx_oob(Str); // expected-warning{{function 'myprintf_arg_idx_oob' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + myprintf_arg_idx_oob(StdStr.c_str()); + myprintf("hello"); + myprintf(Str); // expected-warning{{function 'myprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + myprintf(StdStr.c_str()); } >From 5b5cd2cee7245fb01a96148fd1b8b44cc78d9fb5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Wed, 7 Jan 2026 12:03:39 -0800 Subject: [PATCH 2/3] Address comments. Add tests for default arguments. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 4 +++- .../warn-unsafe-buffer-usage-libc-functions.cpp | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 7be08fe274c1a..6b700355c651c 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -755,6 +755,9 @@ static const Expr *tryConstantFoldConditionalExpr(const Expr *E, // A pointer type expression is known to be null-terminated, if it has the // form: E.c_str(), for any expression E of `std::string` type. static bool isNullTermPointer(const Expr *Ptr, ASTContext &Ctx) { + // Strip CXXDefaultArgExpr before check: + if (const auto *DefaultArgE = dyn_cast<CXXDefaultArgExpr>(Ptr)) + Ptr = DefaultArgE->getExpr(); Ptr = tryConstantFoldConditionalExpr(Ptr, Ctx); if (isa<clang::StringLiteral>(Ptr->IgnoreParenImpCasts())) return true; @@ -2255,7 +2258,6 @@ class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget { // For printf-like functions: if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg( Ctx, CE, UnsafeArg, - // FormatAttribute indexes are 1-based: FmtIdx, FmtArgIdx)) { Result.addNode(Tag, DynTypedNode::create(*CE)); Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg)); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index 138f13039a8a1..d824463ad9618 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -269,6 +269,9 @@ void myprintf_2(const char *, int, const char *) __attribute__((__format__ (__pr void myprintf_3(const char *, const char *, int, const char *) __attribute__((__format__ (__printf__, 2, 4))); void myscanf(const char *, ...) __attribute__((__format__ (__scanf__, 1, 2))); +void myprintf_default(const char *, const char * Fmt = "hello %s", + int X = 0, const char * Str = "world") __attribute__((__format__ (__printf__, 2, 4))); + struct FormatAttrTestMember { void myprintf(const char *, ...) __attribute__((__format__ (__printf__, 2, 3))); void myprintf_2(const char *, int, const char *) __attribute__((__format__ (__printf__, 2, 4))); @@ -277,6 +280,12 @@ struct FormatAttrTestMember { void operator()(const char * Fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); void operator[](const char * Fmt) __attribute__((__format__ (__printf__, 2, 3))); + + + static const char * StrField; + + void myprintf_default(const char *, const char * Fmt = "hello %s", + int X = 0, const char * Str = StrField) __attribute__((__format__ (__printf__, 3, 5))); }; struct FormatAttrTestMember2 { @@ -298,10 +307,11 @@ void test_format_attr(char * Str, std::string StdStr) { myprintf_3("irrelevant", "hello %s", 0, StdStr.c_str()); myprintf_3("irrelevant", "hello %s", 0, Str); // expected-warning{{function 'myprintf_3' is unsafe}} \ expected-note{{string argument is not guaranteed to be null-terminated}} - myscanf("hello %s"); myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}} + myprintf_default("irrelevant"); + int X; myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}} @@ -329,6 +339,9 @@ void test_format_attr(char * Str, std::string StdStr) { Obj.myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}} + Obj.myprintf_default("irrelevant"); // expected-warning{{function 'myprintf_default' is unsafe}} + // expected-note@*{{string argument is not guaranteed to be null-terminated}} + Obj("hello", Str); Obj("hello %s", StdStr.c_str()); Obj("hello %s", Str); // expected-warning{{function 'operator()' is unsafe}} \ >From 1a418944488bfac012d00cfffd40e4c674e3dd38 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Wed, 7 Jan 2026 13:18:58 -0800 Subject: [PATCH 3/3] fix code style --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 6b700355c651c..7c0eaa2e589f5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -2257,8 +2257,7 @@ class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget { } // For printf-like functions: if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg( - Ctx, CE, UnsafeArg, - FmtIdx, FmtArgIdx)) { + Ctx, CE, UnsafeArg, FmtIdx, FmtArgIdx)) { Result.addNode(Tag, DynTypedNode::create(*CE)); Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg)); return true; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
