Author: mitchell
Date: 2025-11-29T10:36:01+08:00
New Revision: 5dd2b06d60d3eb9b07c7513358ad8b04386f79bc

URL: 
https://github.com/llvm/llvm-project/commit/5dd2b06d60d3eb9b07c7513358ad8b04386f79bc
DIFF: 
https://github.com/llvm/llvm-project/commit/5dd2b06d60d3eb9b07c7513358ad8b04386f79bc.diff

LOG: [clang-tidy] Fix OOB access in `FormatStringConverter` with signed chars 
(#169215)

`FormatStringConverter::appendFormatText` incorrectly treated non-ASCII
characters (e.g. UTF-8) as negative values when using signed chars. This
caused them to pass the `< 32` check for control characters.

The negative values were passed to `llvm::hexdigit`, resulting in an OOB
access and a crash.

This closes
[#169198](https://github.com/llvm/llvm-project/issues/169198)

Added: 
    

Modified: 
    clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
    clang-tools-extra/docs/ReleaseNotes.rst
    clang-tools-extra/test/clang-tidy/check_clang_tidy.py
    clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp 
b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
index 23dae04916e9b..d210b000dfd33 100644
--- a/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
@@ -700,6 +700,7 @@ void FormatStringConverter::finalizeFormatText() {
 /// Append literal parts of the format text, reinstating escapes as required.
 void FormatStringConverter::appendFormatText(const StringRef Text) {
   for (const char Ch : Text) {
+    const auto UCh = static_cast<unsigned char>(Ch);
     if (Ch == '\a')
       StandardFormatString += "\\a";
     else if (Ch == '\b')
@@ -724,10 +725,10 @@ void FormatStringConverter::appendFormatText(const 
StringRef Text) {
     } else if (Ch == '}') {
       StandardFormatString += "}}";
       FormatStringNeededRewriting = true;
-    } else if (Ch < 32) {
+    } else if (UCh < 32) {
       StandardFormatString += "\\x";
-      StandardFormatString += llvm::hexdigit(Ch >> 4, true);
-      StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
+      StandardFormatString += llvm::hexdigit(UCh >> 4, true);
+      StandardFormatString += llvm::hexdigit(UCh & 0xf, true);
     } else
       StandardFormatString += Ch;
   }

diff  --git a/clang-tools-extra/docs/ReleaseNotes.rst 
b/clang-tools-extra/docs/ReleaseNotes.rst
index a6f80e3721db1..644c5cb573cf7 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -69,7 +69,7 @@ Potentially Breaking Changes
   - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in
     :doc:`bugprone-signed-char-misuse
     <clang-tidy/checks/bugprone/signed-char-misuse>`
-  
+
 - Modified the custom message format of :doc:`bugprone-unsafe-functions
   <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning
   to the character ``>`` at the start of the value of the option
@@ -394,7 +394,7 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding
   an additional matcher that generalizes the copy-and-swap idiom pattern
   detection.
-  
+
 - Improved :doc:`bugprone-unsafe-functions
   <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default
   suffix when the reason starts with the character `>` in the `CustomFunctions`
@@ -497,7 +497,8 @@ Changes in existing checks
 - Improved :doc:`modernize-use-std-print
   <clang-tidy/checks/modernize/use-std-print>` check to correctly match
   when the format string is converted to a 
diff erent type by an implicit
-  constructor call.
+  constructor call, and fixed a crash when handling format strings
+  containing non-ASCII characters.
 
 - Improved :doc:`performance-unnecessary-copy-initialization
   <clang-tidy/checks/performance/unnecessary-copy-initialization>` by printing

diff  --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py 
b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
index 183b33f135be8..b173ecf4fbdca 100755
--- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
@@ -398,6 +398,8 @@ def parse_arguments() -> Tuple[argparse.Namespace, 
List[str]]:
 
 
 def main() -> None:
+    sys.stdout.reconfigure(encoding="utf-8")
+    sys.stderr.reconfigure(encoding="utf-8")
     args, extra_args = parse_arguments()
 
     abbreviated_stds = args.std

diff  --git 
a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
index ec37f077df7fc..63972cc0fd25e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-print.cpp
@@ -54,6 +54,12 @@ void printf_deceptive_newline() {
   // CHECK-FIXES: std::println("Hello");
 }
 
+void printf_utf8_text() {
+  printf("你好世界\n");
+  // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'printf' [modernize-use-std-print]
+  // CHECK-FIXES: std::println("你好世界");
+}
+
 void printf_crlf_newline() {
   printf("Hello\r\n");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 
'printf' [modernize-use-std-print]
@@ -303,6 +309,12 @@ void fprintf_simple() {
   // CHECK-FIXES: std::print(stderr, "Hello");
 }
 
+void fprintf_utf8_text() {
+  fprintf(stderr, "你好世界\n");
+  // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::println' instead of 
'fprintf' [modernize-use-std-print]
+  // CHECK-FIXES: std::println(stderr, "你好世界");
+}
+
 void std_printf_simple() {
   std::printf("std::Hello");
   // CHECK-MESSAGES: [[@LINE-1]]:3: warning: use 'std::print' instead of 
'printf' [modernize-use-std-print]


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to