https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/169215
>From 65513c1712bf0d62ec02b6f7c8fae723b9d0f877 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 22:15:15 +0800 Subject: [PATCH 1/6] [clang-tidy] Fix OOB access in `FormatStringConverter` with signed chars --- .../clang-tidy/utils/FormatStringConverter.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp index 23dae04916e9b..a3af9504e6542 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp @@ -700,6 +700,7 @@ void FormatStringConverter::finalizeFormatText() { /// Append literal parts of the format text, reinstating escapes as required. void FormatStringConverter::appendFormatText(const StringRef Text) { for (const char Ch : Text) { + const unsigned char UCh = static_cast<unsigned char>(Ch); if (Ch == '\a') StandardFormatString += "\\a"; else if (Ch == '\b') @@ -724,10 +725,10 @@ void FormatStringConverter::appendFormatText(const StringRef Text) { } else if (Ch == '}') { StandardFormatString += "}}"; FormatStringNeededRewriting = true; - } else if (Ch < 32) { + } else if (UCh < 32) { StandardFormatString += "\\x"; - StandardFormatString += llvm::hexdigit(Ch >> 4, true); - StandardFormatString += llvm::hexdigit(Ch & 0xf, true); + StandardFormatString += llvm::hexdigit(UCh >> 4, true); + StandardFormatString += llvm::hexdigit(UCh & 0xf, true); } else StandardFormatString += Ch; } >From 785cf305295e09e4838a9b1514397d176f8f6b24 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 22:30:55 +0800 Subject: [PATCH 2/6] ~ --- clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp index a3af9504e6542..d210b000dfd33 100644 --- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp +++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp @@ -700,7 +700,7 @@ void FormatStringConverter::finalizeFormatText() { /// Append literal parts of the format text, reinstating escapes as required. void FormatStringConverter::appendFormatText(const StringRef Text) { for (const char Ch : Text) { - const unsigned char UCh = static_cast<unsigned char>(Ch); + const auto UCh = static_cast<unsigned char>(Ch); if (Ch == '\a') StandardFormatString += "\\a"; else if (Ch == '\b') >From 574e84a3a2fdb39fa9e89118f99d43e05055e792 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 23:03:19 +0800 Subject: [PATCH 3/6] Add testcase and release notes --- clang-tools-extra/docs/ReleaseNotes.rst | 7 ++++--- .../checkers/modernize/use-std-print.cpp | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a6f80e3721db1..644c5cb573cf7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -69,7 +69,7 @@ Potentially Breaking Changes - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in :doc:`bugprone-signed-char-misuse <clang-tidy/checks/bugprone/signed-char-misuse>` - + - Modified the custom message format of :doc:`bugprone-unsafe-functions <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning to the character ``>`` at the start of the value of the option @@ -394,7 +394,7 @@ Changes in existing checks <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding an additional matcher that generalizes the copy-and-swap idiom pattern detection. - + - Improved :doc:`bugprone-unsafe-functions <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default suffix when the reason starts with the character `>` in the `CustomFunctions` @@ -497,7 +497,8 @@ Changes in existing checks - Improved :doc:`modernize-use-std-print <clang-tidy/checks/modernize/use-std-print>` check to correctly match when the format string is converted to a different type by an implicit - constructor call. + constructor call, and fixed a crash when handling format strings + containing non-ASCII characters. - Improved :doc:`performance-unnecessary-copy-initialization <clang-tidy/checks/performance/unnecessary-copy-initialization>` by printing diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index ec37f077df7fc..a48f4dcb98b86 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -54,6 +54,13 @@ void printf_deceptive_newline() { // CHECK-FIXES: std::println("Hello"); } +void printf_utf8_text() { + // Non-ASCII UTF-8 in format string should not crash. + printf("你好世界\n"); + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] + // CHECK-FIXES: std::println("你好世界"); +} + void printf_crlf_newline() { printf("Hello\r\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 'printf' [modernize-use-std-print] @@ -303,6 +310,13 @@ void fprintf_simple() { // CHECK-FIXES: std::print(stderr, "Hello"); } +void fprintf_utf8_text() { + // Non-ASCII UTF-8 in format string should not crash. + fprintf(stderr, "你好世界\n"); + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] + // CHECK-FIXES: std::println(stderr, "你好世界"); +} + void std_printf_simple() { std::printf("std::Hello"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 'printf' [modernize-use-std-print] >From 9ce0e7d4d6987e39fe7b0d6d280d5118700d1eab Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Sun, 23 Nov 2025 23:35:07 +0800 Subject: [PATCH 4/6] Fix encoding --- .../clang-tidy/checkers/modernize/use-std-print.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index a48f4dcb98b86..184d8aa09639a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -55,10 +55,10 @@ void printf_deceptive_newline() { } void printf_utf8_text() { - // Non-ASCII UTF-8 in format string should not crash. - printf("你好世界\n"); + // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 + printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] - // CHECK-FIXES: std::println("你好世界"); + // CHECK-FIXES: std::println("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C"); } void printf_crlf_newline() { @@ -311,10 +311,10 @@ void fprintf_simple() { } void fprintf_utf8_text() { - // Non-ASCII UTF-8 in format string should not crash. - fprintf(stderr, "你好世界\n"); + // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 + fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] - // CHECK-FIXES: std::println(stderr, "你好世界"); + // CHECK-FIXES: std::println(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C"); } void std_printf_simple() { >From 1b2e271ea393f8241d4d45e1e8038d530ec04c35 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Mon, 24 Nov 2025 10:19:08 +0800 Subject: [PATCH 5/6] Fix test under Linux --- .../test/clang-tidy/checkers/modernize/use-std-print.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index 184d8aa09639a..9cf88f1a69364 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -58,7 +58,7 @@ void printf_utf8_text() { // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] - // CHECK-FIXES: std::println("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C"); + // CHECK-FIXES: std::println("你好世界"); } void printf_crlf_newline() { @@ -314,7 +314,7 @@ void fprintf_utf8_text() { // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] - // CHECK-FIXES: std::println(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C"); + // CHECK-FIXES: std::println(stderr, "你好世界"); } void std_printf_simple() { >From 5dd20c61acec4e26321828f099c79033a1059254 Mon Sep 17 00:00:00 2001 From: mtx <[email protected]> Date: Mon, 24 Nov 2025 10:48:37 +0800 Subject: [PATCH 6/6] Fix CI --- .github/workflows/premerge.yaml | 6 ++++-- .../test/clang-tidy/checkers/modernize/use-std-print.cpp | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index daf88b5b22125..8c3f644fc360f 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -145,7 +145,7 @@ jobs: - name: Compute Projects id: vars run: | - source <(git diff --name-only HEAD~1...HEAD | python .ci/compute_projects.py) + source <(git diff --name-only HEAD~1...HEAD | python -X utf8 .ci/compute_projects.py) if [[ "${projects_to_build}" == "" ]]; then echo "No projects to build" @@ -169,9 +169,11 @@ jobs: GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} run: | call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64 + set PYTHONUTF8=1 + set PYTHONIOENCODING=utf-8 # See the comments above in the Linux job for why we define each of # these environment variables. - bash -c "export SCCACHE_GCS_BUCKET=$CACHE_GCS_BUCKET; export SCCACHE_GCS_RW_MODE=READ_WRITE; export SCCACHE_IDLE_TIMEOUT=0; mkdir artifacts; SCCACHE_LOG=info SCCACHE_ERROR_LOG=$(pwd)/artifacts/sccache.log sccache --start-server; .ci/monolithic-windows.sh \"${{ steps.vars.outputs.windows-projects }}\" \"${{ steps.vars.outputs.windows-check-targets }}\" \"${{ steps.vars.outputs.windows-runtimes }}\" \"${{ steps.vars.outputs.windows-runtimes-check-targets }}\"" + bash -c "export PYTHONUTF8=1; export PYTHONIOENCODING=utf-8; export SCCACHE_GCS_BUCKET=$CACHE_GCS_BUCKET; export SCCACHE_GCS_RW_MODE=READ_WRITE; export SCCACHE_IDLE_TIMEOUT=0; mkdir artifacts; SCCACHE_LOG=info SCCACHE_ERROR_LOG=$(pwd)/artifacts/sccache.log sccache --start-server; .ci/monolithic-windows.sh \"${{ steps.vars.outputs.windows-projects }}\" \"${{ steps.vars.outputs.windows-check-targets }}\" \"${{ steps.vars.outputs.windows-runtimes }}\" \"${{ steps.vars.outputs.windows-runtimes-check-targets }}\"" - name: Upload Artifacts # In some cases, Github will fail to upload the artifact. We want to # continue anyways as a failed artifact upload is an infra failure, not diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp index 9cf88f1a69364..63972cc0fd25e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp @@ -55,8 +55,7 @@ void printf_deceptive_newline() { } void printf_utf8_text() { - // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 - printf("\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); + printf("你好世界\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'printf' [modernize-use-std-print] // CHECK-FIXES: std::println("你好世界"); } @@ -311,8 +310,7 @@ void fprintf_simple() { } void fprintf_utf8_text() { - // Hex encodes U+4F60 U+597D U+4E16 U+754C (你好世界) in UTF-8 - fprintf(stderr, "\xE4\xBD\xA0\xE5\xA5\xBD\xE4\xB8\x96\xE7\x95\x8C\n"); + fprintf(stderr, "你好世界\n"); // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 'fprintf' [modernize-use-std-print] // CHECK-FIXES: std::println(stderr, "你好世界"); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
