https://github.com/a-tarasyuk created https://github.com/llvm/llvm-project/pull/199991
This patch adds `-Wformat` support for the C23 `wN` and `wfN` length modifiers in `printf`/`scanf` format strings. #116962 >From 9d51580ef67e65562ef973dbb7b6792c51431c2b Mon Sep 17 00:00:00 2001 From: Oleksandr Tarasiuk <[email protected]> Date: Wed, 27 May 2026 16:28:30 +0300 Subject: [PATCH] [Clang] support C23 printf width length modifiers --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/ASTContext.h | 3 + clang/include/clang/AST/FormatString.h | 31 ++++++- clang/lib/AST/ASTContext.cpp | 6 ++ clang/lib/AST/FormatString.cpp | 117 ++++++++++++++++++++++++- clang/lib/AST/PrintfFormatString.cpp | 11 +++ clang/lib/AST/ScanfFormatString.cpp | 9 ++ clang/lib/Sema/SemaChecking.cpp | 14 +-- clang/test/Sema/format-strings-c23.c | 50 +++++++++++ clang/test/Sema/format-strings.c | 5 ++ 10 files changed, 234 insertions(+), 13 deletions(-) create mode 100644 clang/test/Sema/format-strings-c23.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index cef93e25f1e7d..1acfb155b974c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -227,6 +227,7 @@ C2y Feature Support C23 Feature Support ^^^^^^^^^^^^^^^^^^^ - Clang now allows C23 ``constexpr`` struct member access through the dot operator in constant expressions. (#GH178349) +- Clang now supports the C23 ``wN`` and ``wfN`` length modifiers. (#GH116962) Objective-C Language Changes ----------------------------- diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 9ef27cc1eb58e..fa7f531248526 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -949,6 +949,9 @@ class ASTContext : public RefCountedBase<ASTContext> { QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const; + QualType getLeastIntTypeForBitwidth(unsigned DestWidth, + unsigned Signed) const; + /// getRealTypeForBitwidth - /// sets floating point QualTy according to specified bitwidth. /// Returns empty type if there is no appropriate target types. diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index a3382e1a1d007..239d88ea33c7c 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -19,6 +19,7 @@ #define LLVM_CLANG_AST_FORMATSTRING_H #include "clang/AST/CanonicalType.h" +#include "llvm/ADT/StringRef.h" #include <optional> namespace clang { @@ -80,6 +81,8 @@ class LengthModifier { AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) AsInt64, // 'I64' (MSVCRT, like __int64) AsLongDouble, // 'L' + AsIntN, // 'wN' + AsFastIntN, // 'wfN' AsAllocate, // for '%as', GNU extension to C90 scanf AsMAllocate, // for '%ms', GNU extension to scanf AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z @@ -88,6 +91,8 @@ class LengthModifier { LengthModifier() : Position(nullptr), kind(None) {} LengthModifier(const char *pos, Kind k) : Position(pos), kind(k) {} + LengthModifier(const char *pos, Kind k, unsigned bitWidth, unsigned length) + : Position(pos), kind(k), BitWidth(bitWidth), ModifierLength(length) {} const char *getStart() const { return Position; } @@ -98,6 +103,9 @@ class LengthModifier { case AsLongLong: case AsChar: return 2; + case AsIntN: + case AsFastIntN: + return ModifierLength; case AsInt32: case AsInt64: return 3; @@ -109,11 +117,15 @@ class LengthModifier { Kind getKind() const { return kind; } void setKind(Kind k) { kind = k; } - const char *toString() const; + unsigned getBitWidth() const { return BitWidth; } + + StringRef toString() const; private: const char *Position; Kind kind; + unsigned BitWidth = 0; + unsigned ModifierLength = 0; }; class ConversionSpecifier { @@ -301,10 +313,18 @@ class ArgType { const char *Name = nullptr; bool Ptr = false; - /// The TypeKind identifies certain well-known types like size_t and - /// ptrdiff_t. - enum class TypeKind { DontCare, SizeT, PtrdiffT }; + /// The TypeKind identifies certain well-known types. + enum class TypeKind { + DontCare, + SizeT, + PtrdiffT, + IntN, + UIntN, + FastIntN, + FastUIntN, + }; TypeKind TK = TypeKind::DontCare; + unsigned BitWidth = 0; public: ArgType(Kind K = UnknownTy, const char *N = nullptr) : K(K), Name(N) {} @@ -341,6 +361,9 @@ class ArgType { return Res; } + static ArgType makeIntNT(ASTContext &Ctx, const LengthModifier &LengthMod, + bool Signed); + MatchKind matchesType(ASTContext &C, QualType argTy) const; MatchKind matchesArgType(ASTContext &C, const ArgType &other) const; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a401a7471e6fc..ec483df8aa61b 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -13541,6 +13541,12 @@ QualType ASTContext::getIntTypeForBitwidth(unsigned DestWidth, return QualTy; } +QualType ASTContext::getLeastIntTypeForBitwidth(unsigned DestWidth, + unsigned Signed) const { + return getFromTargetType( + getTargetInfo().getLeastIntTypeByWidth(DestWidth, Signed)); +} + /// getRealTypeForBitwidth - /// sets floating point QualTy according to specified bitwidth. /// Returns empty type if there is no appropriate target types. diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 7e1ac0de6dcaf..81a77a89268aa 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -14,7 +14,9 @@ #include "FormatStringParsing.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" +#include <limits> #include <optional> using clang::analyze_format_string::ArgType; @@ -59,6 +61,21 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, return OptionalAmount(); } +static bool ParseWidthModifier(const char *&I, const char *E, + unsigned &BitWidth, unsigned &ModifierLength) { + StringRef W = StringRef(I, E - I).take_while(llvm::isDigit); + if (W.empty() || W.front() == '0') + return false; + + if (W.getAsInteger(10, BitWidth)) + BitWidth = std::numeric_limits<unsigned>::max(); + + for (const char *End = W.end(); I != End; ++I) + ++ModifierLength; + + return true; +} + OptionalAmount clang::analyze_format_string::ParseNonPositionAmount( const char *&Beg, const char *E, unsigned &argIndex) { if (*Beg == '*') { @@ -287,6 +304,25 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, lmKind = LengthModifier::AsInt3264; break; case 'w': + if (LO.C23) { + const char *WidthModifier = I + 1; + unsigned BitWidth = 0; + unsigned ModifierLength = 1; + + LengthModifier::Kind WidthKind = LengthModifier::AsIntN; + if (WidthModifier != E && *WidthModifier == 'f') { + WidthModifier = I + 2; + ModifierLength = 2; + WidthKind = LengthModifier::AsFastIntN; + } + + if (ParseWidthModifier(WidthModifier, E, BitWidth, ModifierLength)) { + I = WidthModifier; + FS.setLengthModifier( + LengthModifier(lmPosition, WidthKind, BitWidth, ModifierLength)); + return true; + } + } lmKind = LengthModifier::AsWide; ++I; break; @@ -774,6 +810,22 @@ ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { return ArgType(Vec, Name); } +ArgType ArgType::makeIntNT(ASTContext &Ctx, const LengthModifier &LengthMod, + bool Signed) { + bool IsFast = LengthMod.getKind() == LengthModifier::AsFastIntN; + QualType Ty = + IsFast ? Ctx.getLeastIntTypeForBitwidth(LengthMod.getBitWidth(), Signed) + : Ctx.getIntTypeForBitwidth(LengthMod.getBitWidth(), Signed); + if (Ty.isNull()) + return ArgType::Invalid(); + + ArgType Res(Ty); + Res.TK = IsFast ? (Signed ? TypeKind::FastIntN : TypeKind::FastUIntN) + : (Signed ? TypeKind::IntN : TypeKind::UIntN); + Res.BitWidth = LengthMod.getBitWidth(); + return Res; +} + QualType ArgType::getRepresentativeType(ASTContext &C) const { QualType Res; switch (K) { @@ -820,6 +872,33 @@ std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { if (Name) { // Use a specific name for this type, e.g. "size_t". Alias = Name; + } else { + const char *Prefix = nullptr; + switch (TK) { + case TypeKind::IntN: + Prefix = "int"; + break; + case TypeKind::UIntN: + Prefix = "uint"; + break; + case TypeKind::FastIntN: + Prefix = "int_fast"; + break; + case TypeKind::FastUIntN: + Prefix = "uint_fast"; + break; + case TypeKind::DontCare: + case TypeKind::SizeT: + case TypeKind::PtrdiffT: + break; + } + if (Prefix) { + Alias = Prefix; + Alias += std::to_string(BitWidth); + Alias += "_t"; + } + } + if (!Alias.empty()) { if (Ptr) { // If ArgType is actually a pointer to T, append an asterisk. Alias += (Alias[Alias.size() - 1] == '*') ? "*" : " *"; @@ -847,7 +926,7 @@ analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { // Methods on LengthModifier. //===----------------------------------------------------------------------===// -const char *analyze_format_string::LengthModifier::toString() const { +StringRef analyze_format_string::LengthModifier::toString() const { switch (kind) { case AsChar: return "hh"; @@ -875,6 +954,9 @@ const char *analyze_format_string::LengthModifier::toString() const { return "I64"; case AsLongDouble: return "L"; + case AsIntN: + case AsFastIntN: + return StringRef(Position, getLength()); case AsAllocate: return "a"; case AsMAllocate: @@ -884,7 +966,7 @@ const char *analyze_format_string::LengthModifier::toString() const { case None: return ""; } - return nullptr; + llvm_unreachable("Invalid LengthModifier Kind!"); } //===----------------------------------------------------------------------===// @@ -1156,6 +1238,35 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, return false; } + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: { + if (!LO.C23) + return false; + + TargetInfo::IntType TargetType = + LM.getKind() == LengthModifier::AsIntN + ? Target.getIntTypeByWidth(LM.getBitWidth(), /*IsSigned=*/true) + : Target.getLeastIntTypeByWidth(LM.getBitWidth(), + /*IsSigned=*/true); + if (TargetType == TargetInfo::NoInt) + return false; + + switch (CS.getKind()) { + case ConversionSpecifier::bArg: + case ConversionSpecifier::BArg: + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::nArg: + return true; + default: + return false; + } + } + case LengthModifier::AsAllocate: switch (CS.getKind()) { case ConversionSpecifier::sArg: @@ -1217,6 +1328,8 @@ bool FormatSpecifier::hasStandardLengthModifier() const { case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: case LengthModifier::AsLongDouble: + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: return true; case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 6610a2de9e083..64310193e1057 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -601,6 +601,11 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx, case LengthModifier::AsPtrDiff: return ArgType::makePtrdiffT( ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::makeIntNT(Ctx, LM, + CS.getKind() != ConversionSpecifier::bArg && + CS.getKind() != ConversionSpecifier::BArg); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: @@ -639,6 +644,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx, case LengthModifier::AsPtrDiff: return ArgType::makePtrdiffT( ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::makeIntNT(Ctx, LM, /*Signed=*/false); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: @@ -684,6 +692,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx, case LengthModifier::AsPtrDiff: return ArgType::PtrTo(ArgType::makePtrdiffT( ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"))); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::PtrTo(ArgType::makeIntNT(Ctx, LM, /*Signed=*/true)); case LengthModifier::AsLongDouble: return ArgType(); // FIXME: Is this a known extension? case LengthModifier::AsAllocate: diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp index 90cbbd60bbcf5..9b832376983d2 100644 --- a/clang/lib/AST/ScanfFormatString.cpp +++ b/clang/lib/AST/ScanfFormatString.cpp @@ -300,6 +300,9 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { case LengthModifier::AsPtrDiff: return ArgType::PtrTo(ArgType::makePtrdiffT( ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"))); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::PtrTo(ArgType::makeIntNT(Ctx, LM, /*Signed=*/true)); case LengthModifier::AsLongDouble: // GNU extension. return ArgType::PtrTo(Ctx.LongLongTy); @@ -344,6 +347,9 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { case LengthModifier::AsPtrDiff: return ArgType::PtrTo(ArgType::makePtrdiffT( ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"))); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::PtrTo(ArgType::makeIntNT(Ctx, LM, /*Signed=*/false)); case LengthModifier::AsLongDouble: // GNU extension. return ArgType::PtrTo(Ctx.UnsignedLongLongTy); @@ -443,6 +449,9 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { case LengthModifier::AsPtrDiff: return ArgType::PtrTo(ArgType::makePtrdiffT( ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"))); + case LengthModifier::AsIntN: + case LengthModifier::AsFastIntN: + return ArgType::PtrTo(ArgType::makeIntNT(Ctx, LM, /*Signed=*/true)); case LengthModifier::AsLongDouble: return ArgType(); // FIXME: Is this a known extension? case LengthModifier::AsAllocate: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2309196ee1696..671ac62bfcce0 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -8322,7 +8322,7 @@ class EquatableFormatArgument { private: analyze_format_string::ArgType ArgType; - analyze_format_string::LengthModifier::Kind LengthMod; + analyze_format_string::LengthModifier LengthMod; StringRef SpecifierLetter; CharSourceRange Range; SourceLocation ElementLoc; @@ -8336,7 +8336,7 @@ class EquatableFormatArgument { public: EquatableFormatArgument(CharSourceRange Range, SourceLocation ElementLoc, - analyze_format_string::LengthModifier::Kind LengthMod, + analyze_format_string::LengthModifier LengthMod, StringRef SpecifierLetter, analyze_format_string::ArgType ArgType, FormatArgumentRole Role, @@ -8351,7 +8351,7 @@ class EquatableFormatArgument { SourceLocation getSourceLocation() const { return ElementLoc; } CharSourceRange getSourceRange() const { return Range; } analyze_format_string::LengthModifier getLengthModifier() const { - return analyze_format_string::LengthModifier(nullptr, LengthMod); + return LengthMod; } void setModifierFor(unsigned V) { ModifierFor = V; } @@ -8699,7 +8699,7 @@ bool DecomposePrintfHandler::HandlePrintfSpecifier( Specs.emplace_back( getSpecifierRange(startSpecifier, specifierLen), getLocationOfByte(FieldWidth.getStart()), - analyze_format_string::LengthModifier::None, FieldWidth.getCharacters(), + analyze_format_string::LengthModifier(), FieldWidth.getCharacters(), FieldWidth.getArgType(S.Context), EquatableFormatArgument::FAR_FieldWidth, EquatableFormatArgument::SS_None, @@ -8714,7 +8714,7 @@ bool DecomposePrintfHandler::HandlePrintfSpecifier( Specs.emplace_back( getSpecifierRange(startSpecifier, specifierLen), getLocationOfByte(Precision.getStart()), - analyze_format_string::LengthModifier::None, Precision.getCharacters(), + analyze_format_string::LengthModifier(), Precision.getCharacters(), Precision.getArgType(S.Context), EquatableFormatArgument::FAR_Precision, EquatableFormatArgument::SS_None, Precision.usesPositionalArg() ? Precision.getPositionalArgIndex() - 1 @@ -8742,7 +8742,7 @@ bool DecomposePrintfHandler::HandlePrintfSpecifier( Specs.emplace_back( getSpecifierRange(startSpecifier, specifierLen), - getLocationOfByte(CS.getStart()), FS.getLengthModifier().getKind(), + getLocationOfByte(CS.getStart()), FS.getLengthModifier(), CS.getCharacters(), FS.getArgType(S.Context, isObjCContext()), EquatableFormatArgument::FAR_Data, Sensitivity, SpecIndex, 0); @@ -8751,7 +8751,7 @@ bool DecomposePrintfHandler::HandlePrintfSpecifier( CS.getKind() == analyze_format_string::ConversionSpecifier::FreeBSDDArg) { Specs.emplace_back(getSpecifierRange(startSpecifier, specifierLen), getLocationOfByte(CS.getStart()), - analyze_format_string::LengthModifier::None, + analyze_format_string::LengthModifier(), CS.getCharacters(), analyze_format_string::ArgType::CStrTy, EquatableFormatArgument::FAR_Auxiliary, Sensitivity, diff --git a/clang/test/Sema/format-strings-c23.c b/clang/test/Sema/format-strings-c23.c new file mode 100644 index 0000000000000..7156dc21981de --- /dev/null +++ b/clang/test/Sema/format-strings-c23.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s + +typedef __INT32_TYPE__ int32_t; +typedef __UINT32_TYPE__ uint32_t; +typedef __INT_FAST32_TYPE__ int_fast32_t; +typedef __UINT_FAST32_TYPE__ uint_fast32_t; + +int printf(const char *restrict, ...); +int scanf(const char *restrict, ...); + +void t1(int32_t i32, uint32_t u32, int_fast32_t if32, uint_fast32_t uf32, int32_t *i_ptr, int_fast32_t *if_ptr, double *d_ptr) { + printf("%w32d", i32); + printf("%w32i", i32); + printf("%w32u", u32); + printf("%w32x", u32); + printf("%w32b", u32); + printf("%wf32d", if32); + printf("%wf32u", uf32); + printf("%wf32B", uf32); + + printf("%w32d", 1.0); // expected-warning{{format specifies type 'int32_t' (aka 'int') but the argument has type 'double'}} + printf("%w32u", 1.0); // expected-warning{{format specifies type 'uint32_t' (aka 'unsigned int') but the argument has type 'double'}} + printf("%wf32d", 1.0); // expected-warning{{format specifies type 'int_fast32_t' (aka 'int') but the argument has type 'double'}} + printf("%wf32u", 1.0); // expected-warning{{format specifies type 'uint_fast32_t' (aka 'unsigned int') but the argument has type 'double'}} + + printf("%w32n", i_ptr); + printf("%wf32n", if_ptr); + printf("%w32n", d_ptr); // expected-warning{{format specifies type 'int32_t *' (aka 'int *') but the argument has type 'double *'}} +} + +void t2(int32_t *i_ptr, uint32_t *u_ptr, int_fast32_t *if_ptr, uint_fast32_t *uf_ptr, double *d_ptr) { + scanf("%w32d", i_ptr); + scanf("%w32i", i_ptr); + scanf("%w32u", u_ptr); + scanf("%w32x", u_ptr); + scanf("%w32b", u_ptr); + scanf("%wf32d", if_ptr); + scanf("%wf32u", uf_ptr); + + scanf("%w32d", d_ptr); // expected-warning{{format specifies type 'int32_t *' (aka 'int *') but the argument has type 'double *'}} + scanf("%w32u", d_ptr); // expected-warning{{format specifies type 'uint32_t *' (aka 'unsigned int *') but the argument has type 'double *'}} + scanf("%wf32d", d_ptr); // expected-warning{{format specifies type 'int_fast32_t *' (aka 'int *') but the argument has type 'double *'}} + scanf("%wf32u", d_ptr); // expected-warning{{format specifies type 'uint_fast32_t *' (aka 'unsigned int *') but the argument has type 'double *'}} +} + +void t3(const char *fmt) __attribute__((format_matches(printf, 1, "%w32d"))); // expected-note{{comparing with this specifier}} +void t4(void) { + t3("%w32d"); + t3("%w64d"); // expected-warning{{format specifier 'w64d' is incompatible with 'w32d'}} +} diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index bdb4466dc6ae8..59cf5562e91ef 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -790,6 +790,11 @@ void test_opencl_vector_format(int x) { printf("%hld", x); // expected-warning{{invalid conversion specifier 'l'}} } +void test_int_width_modifiers(int x) { + printf("%w32d", x); // expected-warning {{invalid conversion specifier '3'}} + printf("%wf32d", 1.0); // expected-warning {{length modifier 'w' results in undefined behavior or no effect with 'f' conversion specifier}} +} + // Test that we correctly merge the format in both orders. extern void test14_foo(const char *, const char *, ...) __attribute__((__format__(__printf__, 1, 3))); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
