https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/169215
>From 65513c1712bf0d62ec02b6f7c8fae723b9d0f877 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 22:15:15 +0800 Subject: [PATCH 1/5] [clang-tidy] Fix OOB access in `FormatStringConverter` with signed chars --- .../clang-tidy/utils/FormatStringConverter.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp index 23dae04916e9b..a3af9504e6542 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp @@ -700,6 +700,7 @@ void FormatStringConverter::finalizeFormatText() { /// Append literal parts of the format text, reinstating escapes as required. void FormatStringConverter::appendFormatText(const StringRef Text) { for (const char Ch : Text) { + const unsigned char UCh = static_cast<unsigned char>(Ch); if (Ch == '\a') StandardFormatString += "\\a"; else if (Ch == '\b') @@ -724,10 +725,10 @@ void FormatStringConverter::appendFormatText(const StringRef Text) { } else if (Ch == '}') { StandardFormatString += "}}"; FormatStringNeededRewriting = true; - } else if (Ch < 32) { + } else if (UCh < 32) { StandardFormatString += "\\x"; - StandardFormatString += llvm::hexdigit(Ch >> 4, true); - StandardFormatString += llvm::hexdigit(Ch & 0xf, true); + StandardFormatString += llvm::hexdigit(UCh >> 4, true); + StandardFormatString += llvm::hexdigit(UCh & 0xf, true); } else StandardFormatString += Ch; } >From 785cf305295e09e4838a9b1514397d176f8f6b24 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 22:30:55 +0800 Subject: [PATCH 2/5] ~ --- clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp index a3af9504e6542..d210b000dfd33 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp @@ -700,7 +700,7 @@ void FormatStringConverter::finalizeFormatText() { /// Append literal parts of the format text, reinstating escapes as required. void FormatStringConverter::appendFormatText(const StringRef Text) { for (const char Ch : Text) { - const unsigned char UCh = static_cast<unsigned char>(Ch); + const auto UCh = static_cast<unsigned char>(Ch); if (Ch == '\a') StandardFormatString += "\\a"; else if (Ch == '\b') >From 574e84a3a2fdb39fa9e89118f99d43e05055e792 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 23:03:19 +0800 Subject: [PATCH 3/5] Add testcase and release notes --- clang-tools-extra/docs/ReleaseNotes.rst | 7 ++++--- .../checkers/modernize/use-std-print.cpp | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a6f80e3721db1..644c5cb573cf7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -69,7 +69,7 @@ Potentially Breaking Changes - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in :doc:`bugprone-signed-char-misuse <clang-tidy/checks/bugprone/signed-char-misuse>` - + - Modified the custom message format of :doc:`bugprone-unsafe-functions <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning to the character ``>`` at the start of the value of the option @@ -394,7 +394,7 @@ Changes in existing checks <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding an additional matcher that generalizes the copy-and-swap idiom pattern detection. - + - Improved :doc:`bugprone-unsafe-functions <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default suffix when the reason starts with the character `>` in the `CustomFunctions` @@ -497,7 +497,8 @@ Changes in existing checks - Improved :doc:`modernize-use-std-print <clang-tidy/checks/modernize/use-std-print>` check to correctly match when the format string is converted to a different type by an implicit - constructor call. + constructor call, and fixed a crash when handling format strings + containing non-ASCII characters. - Improved :doc:`performance-unnecessary-copy-initialization <clang-tidy/checks/performance/unnecessary-copy-initialization>` by printing diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index ec37f077df7fc..a48f4dcb98b86 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -54,6 +54,13 @@ void printf_deceptive_newline() { // CHECK-FIXES: std::println("Hello"); } +void printf_utf8_text() { + // Non-ASCII UTF-8 in format string should not crash. + printf("你好世界\n"); + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] + // CHECK-FIXES: std::println("你好世界"); +} + void printf_crlf_newline() { printf("Hello\r\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 'printf' [modernize-use-std-print] @@ -303,6 +310,13 @@ void fprintf_simple() { // CHECK-FIXES: std::print(stderr, "Hello"); } +void fprintf_utf8_text() { + // Non-ASCII UTF-8 in format string should not crash. + fprintf(stderr, "你好世界\n"); + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] + // CHECK-FIXES: std::println(stderr, "你好世界"); +} + void std_printf_simple() { std::printf("std::Hello"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 'printf' [modernize-use-std-print] >From 88e6348cf7bd3e0cc78fca2008ae202a258b2e11 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 23:35:07 +0800 Subject: [PATCH 4/5] Fix encoding --- .../clang-tidy/checkers/modernize/use-std-print.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index a48f4dcb98b86..8c62d90f5dacc 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -55,10 +55,10 @@ void printf_deceptive_newline() { } void printf_utf8_text() { - // Non-ASCII UTF-8 in format string should not crash. - printf("你好世界\n"); + // Hex encodes U+4F60 U+597D U+4E16 U+754C (Hello world) in UTF-8 + printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] - // CHECK-FIXES: std::println("你好世界"); + // CHECK-FIXES: std::println("{{.+}}"); } void printf_crlf_newline() { @@ -311,10 +311,10 @@ void fprintf_simple() { } void fprintf_utf8_text() { - // Non-ASCII UTF-8 in format string should not crash. - fprintf(stderr, "你好世界\n"); + // Hex encodes U+4F60 U+597D U+4E16 U+754C (Hello world) in UTF-8 + fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] - // CHECK-FIXES: std::println(stderr, "你好世界"); + // CHECK-FIXES: std::println(stderr, "{{.+}}"); } void std_printf_simple() { >From 855d4b5ca096fa6826d92309c47fba26416177e0 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Wed, 26 Nov 2025 15:36:45 +0800 Subject: [PATCH 5/5] Try skipping the test on Windows --- .../test/clang-tidy/checkers/modernize/use-std-print.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index 8c62d90f5dacc..7ffe662adc67f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -55,10 +55,11 @@ void printf_deceptive_newline() { } void printf_utf8_text() { + // UNSUPPORTED: system-windows // Hex encodes U+4F60 U+597D U+4E16 U+754C (Hello world) in UTF-8 printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] - // CHECK-FIXES: std::println("{{.+}}"); + // CHECK-FIXES: std::println("你好世界"); } void printf_crlf_newline() { @@ -311,10 +312,11 @@ void fprintf_simple() { } void fprintf_utf8_text() { + // UNSUPPORTED: system-windows // Hex encodes U+4F60 U+597D U+4E16 U+754C (Hello world) in UTF-8 fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] - // CHECK-FIXES: std::println(stderr, "{{.+}}"); + // CHECK-FIXES: std::println(stderr, "你好世界"); } void std_printf_simple() { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
