https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/143487
>From 5825b324e47c78a939d8e62d1101e1574fd04638 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Tue, 10 Jun 2025 15:50:10 +0800 Subject: [PATCH 1/7] [-Wunterminated-string-initialization] Handle C string literals ending with explicit '\0' In C, a char array needs no "nonstring" attribute, if its initializer is a string literal that 1) explicitly ends with '\0' and 2) fits in the array after a possible truncation. For example `char a[4] = "ABC\0"; // fine, needs no "nonstring" attr` rdar://152506883 --- clang/lib/Sema/SemaInit.cpp | 5 +++++ clang/test/Sema/attr-nonstring_safe.c | 28 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 clang/test/Sema/attr-nonstring_safe.c diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index da56225b2f926..f7592688e0327 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -260,6 +260,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, diag::ext_initializer_string_for_char_array_too_long) << Str->getSourceRange(); else if (StrLength - 1 == ArrayLen) { + // If the string literal is null-terminated explicitly, e.g., `char a[4] = + // "ABC\0"`, there should be no warn: + if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens())) + if (SL->isOrdinary() && SL->getBytes().back() == 0) + return; // If the entity being initialized has the nonstring attribute, then // silence the "missing nonstring" diagnostic. If there's no entity, // check whether we're initializing an array of arrays; if so, walk the diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c new file mode 100644 index 0000000000000..3ea441e033dba --- /dev/null +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c +// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++ + + +// In C, the following examples are fine: +#if __cplusplus +char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +struct S { + char buf[3]; + char fub[3]; +} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +#else +//expected-no-diagnostics +char foo[3] = "fo\0"; + +struct S { + char buf[3]; + char fub[3]; +} s = { "ba\0", "bo\0" }; + +signed char scfoo[3] = "fo\0"; +unsigned char ucfoo[3] = "fo\0"; +#endif >From afb909341d4c5152f1d5ac6f2deef5385901bb61 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 11 Jun 2025 14:47:30 +0800 Subject: [PATCH 2/7] address comments --- clang/lib/Sema/SemaInit.cpp | 9 +++--- clang/test/Sema/attr-nonstring_safe.c | 45 +++++++++++++++++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f7592688e0327..ac611aed6d581 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -261,10 +261,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, << Str->getSourceRange(); else if (StrLength - 1 == ArrayLen) { // If the string literal is null-terminated explicitly, e.g., `char a[4] = - // "ABC\0"`, there should be no warn: - if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens())) - if (SL->isOrdinary() && SL->getBytes().back() == 0) - return; + // "ABC\0"`, there should be no warning: + if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()); + SL && SL->getLength() > 0 && + SL->getCodeUnit(SL->getLength() - 1) == 0) + return; // If the entity being initialized has the nonstring attribute, then // silence the "missing nonstring" diagnostic. If there's no entity, // check whether we're initializing an array of arrays; if so, walk the diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index 3ea441e033dba..93715d18db5a8 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -1,28 +1,53 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c -// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++ +// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected -Wunterminated-string-initialization %s -x c++ -// In C, the following examples are fine: -#if __cplusplus -char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +#ifdef __cplusplus +// C++ is stricter so the following cases should be warned about: + +char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}} struct S { char buf[3]; char fub[3]; -} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} - -signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} #else -//expected-no-diagnostics -char foo[3] = "fo\0"; + +// In C, the following examples are fine: +#include <stddef.h> +typedef unsigned short char16_t; +typedef unsigned int char32_t; + +char foo3[3] = "fo\0"; +char foo1[1] = "\0"; struct S { char buf[3]; char fub[3]; } s = { "ba\0", "bo\0" }; +// Test different encodings: signed char scfoo[3] = "fo\0"; unsigned char ucfoo[3] = "fo\0"; +wchar_t wcfoo[3] = L"fo\0"; +char16_t c16foo[3] = u"fo\0"; +char32_t c32foo[3] = U"fo\0"; + +// Test list initializer: +signed char scfoo_lst[3] = {'f', 'o', '\0'}; +unsigned char ucfoo_lst[3] = {'f', 'o', '\0'}; +wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'}; +char16_t c16foo_lst[3] = {u'f', u'o', u'\0'}; +char32_t c32foo_lst[3] = {U'f', U'o', U'\0'}; + +// Declaring an array of size 0 is invalid by C standard but compilers +// may allow it: +char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} #endif >From fca602a4c18fdfe1ada285ea096e3436d7fa8253 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 11 Jun 2025 14:53:19 +0800 Subject: [PATCH 3/7] add release notes --- clang/docs/ReleaseNotes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index beed0da6883d6..13e4db89e5dc8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -220,6 +220,7 @@ C Language Changes char buf1[3] = "foo"; // -Wunterminated-string-initialization char buf2[3] = "flarp"; // -Wexcess-initializers + char buf3[3] = "fo\0"; // This is fine, no warning. This diagnostic can be suppressed by adding the new ``nonstring`` attribute to the field or variable being initialized. #GH137705 >From 6cfbbb36b0b79ab72f7d2772723a429fa996156a Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 12 Jun 2025 11:43:03 +0800 Subject: [PATCH 4/7] remove '#include <stddef.h>' --- clang/test/Sema/attr-nonstring_safe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index 93715d18db5a8..68e153d90cb42 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -21,9 +21,9 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i #else // In C, the following examples are fine: -#include <stddef.h> typedef unsigned short char16_t; typedef unsigned int char32_t; +typedef int wchar_t; char foo3[3] = "fo\0"; char foo1[1] = "\0"; >From ebb85743af08f4547a7416bb6e1098a899ca1b5b Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Fri, 13 Jun 2025 10:12:19 +0800 Subject: [PATCH 5/7] fix test --- clang/test/Sema/attr-nonstring_safe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index 68e153d90cb42..f3f3b3e0758d7 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -23,7 +23,7 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i // In C, the following examples are fine: typedef unsigned short char16_t; typedef unsigned int char32_t; -typedef int wchar_t; +typedef unsigned int wchar_t; char foo3[3] = "fo\0"; char foo1[1] = "\0"; >From 40d654385da4157cff8dd666e5d9ca7b721d5702 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Fri, 13 Jun 2025 15:49:08 +0800 Subject: [PATCH 6/7] fix test --- clang/test/Sema/attr-nonstring_safe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index f3f3b3e0758d7..86e5441520456 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -23,7 +23,7 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i // In C, the following examples are fine: typedef unsigned short char16_t; typedef unsigned int char32_t; -typedef unsigned int wchar_t; +typedef __WCHAR_TYPE__ wchar_t; char foo3[3] = "fo\0"; char foo1[1] = "\0"; >From 5239320b1f2ad25ba22306e434898126f6dafbd2 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Mon, 23 Jun 2025 13:13:07 +0800 Subject: [PATCH 7/7] address comments --- clang/lib/Sema/SemaInit.cpp | 58 +++++++++++++------------- clang/test/Sema/attr-nonstring_safe.c | 59 +++++++++++++-------------- 2 files changed, 58 insertions(+), 59 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index ac611aed6d581..0844cb4d6c3cd 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -260,35 +260,37 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, diag::ext_initializer_string_for_char_array_too_long) << Str->getSourceRange(); else if (StrLength - 1 == ArrayLen) { - // If the string literal is null-terminated explicitly, e.g., `char a[4] = - // "ABC\0"`, there should be no warning: - if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()); - SL && SL->getLength() > 0 && - SL->getCodeUnit(SL->getLength() - 1) == 0) - return; - // If the entity being initialized has the nonstring attribute, then - // silence the "missing nonstring" diagnostic. If there's no entity, - // check whether we're initializing an array of arrays; if so, walk the - // parents to find an entity. - auto FindCorrectEntity = - [](const InitializedEntity *Entity) -> const ValueDecl * { - while (Entity) { - if (const ValueDecl *VD = Entity->getDecl()) - return VD; - if (!Entity->getType()->isArrayType()) - return nullptr; - Entity = Entity->getParent(); - } - - return nullptr; - }; - if (const ValueDecl *D = FindCorrectEntity(&Entity); - !D || !D->hasAttr<NonStringAttr>()) - S.Diag( - Str->getBeginLoc(), - diag::warn_initializer_string_for_char_array_too_long_no_nonstring) - << ArrayLen << StrLength << Str->getSourceRange(); + // In C, if the string literal is null-terminated explicitly, e.g., `char + // a[4] = "ABC\0"`, there should be no warning: + const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()); + bool IsSLSafe = SL && SL->getLength() > 0 && + SL->getCodeUnit(SL->getLength() - 1) == 0; + + if (!IsSLSafe) { + // If the entity being initialized has the nonstring attribute, then + // silence the "missing nonstring" diagnostic. If there's no entity, + // check whether we're initializing an array of arrays; if so, walk the + // parents to find an entity. + auto FindCorrectEntity = + [](const InitializedEntity *Entity) -> const ValueDecl * { + while (Entity) { + if (const ValueDecl *VD = Entity->getDecl()) + return VD; + if (!Entity->getType()->isArrayType()) + return nullptr; + Entity = Entity->getParent(); + } + return nullptr; + }; + if (const ValueDecl *D = FindCorrectEntity(&Entity); + !D || !D->hasAttr<NonStringAttr>()) + S.Diag( + Str->getBeginLoc(), + diag:: + warn_initializer_string_for_char_array_too_long_no_nonstring) + << ArrayLen << StrLength << Str->getSourceRange(); + } // Always emit the C++ compatibility diagnostic. S.Diag(Str->getBeginLoc(), diag::warn_initializer_string_for_char_array_too_long_for_cpp) diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index 86e5441520456..b59e2bfc7f691 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -1,9 +1,14 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c -// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected -Wunterminated-string-initialization %s -x c++ +// RUN: %clang_cc1 -fsyntax-only -verify=compat,expected -Wunterminated-string-initialization %s -x c +// RUN: %clang_cc1 -fsyntax-only -verify=cxx -Wunterminated-string-initialization %s -x c++ +#ifndef __cplusplus +typedef unsigned short char16_t; +typedef unsigned int char32_t; +typedef __WCHAR_TYPE__ wchar_t; +#endif -#ifdef __cplusplus -// C++ is stricter so the following cases should be warned about: +// C++ is stricter so the following cases should be warned about. In +// C, the following examples are fine. char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}} @@ -13,32 +18,23 @@ struct S { char fub[3]; } s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -#else - -// In C, the following examples are fine: -typedef unsigned short char16_t; -typedef unsigned int char32_t; -typedef __WCHAR_TYPE__ wchar_t; - -char foo3[3] = "fo\0"; -char foo1[1] = "\0"; - -struct S { - char buf[3]; - char fub[3]; -} s = { "ba\0", "bo\0" }; - +#pragma clang diagnostic push +#pragma clang diagnostic warning "-Wc++-compat" // Test different encodings: -signed char scfoo[3] = "fo\0"; -unsigned char ucfoo[3] = "fo\0"; -wchar_t wcfoo[3] = L"fo\0"; -char16_t c16foo[3] = u"fo\0"; -char32_t c32foo[3] = U"fo\0"; +signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} +wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{identifier 'wchar_t' conflicts with a C++ keyword}} +char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{identifier 'char16_t' conflicts with a C++ keyword}} +char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + compat-warning {{identifier 'char32_t' conflicts with a C++ keyword}} +#pragma clang diagnostic pop // Test list initializer: signed char scfoo_lst[3] = {'f', 'o', '\0'}; @@ -49,5 +45,6 @@ char32_t c32foo_lst[3] = {U'f', U'o', U'\0'}; // Declaring an array of size 0 is invalid by C standard but compilers // may allow it: -char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} -#endif +char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \ + cxx-error {{initializer-string for char array is too long, array size is 0 but initializer has size 1 (including the null terminating character)}} +char b[1] = ""; // no warn _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits