https://github.com/ojhunt updated 
https://github.com/llvm/llvm-project/pull/173212

>From 6eff318fdc6aafb6c853e4e5fc35b34bc6d45232 Mon Sep 17 00:00:00 2001
From: Oliver Hunt <[email protected]>
Date: Sun, 21 Dec 2025 20:20:50 -0800
Subject: [PATCH 1/3] [clang][bytecode] Fix incorrect handling of arithmetic on
 string literals

Fixes #173175.

The fast path for string literals fails to consider the offset of the
pointer into an array of chars, this change simply adds the required
checks and bounds corrections prior to calling strnlen.
---
 clang/lib/AST/ByteCode/Context.cpp            |   6 +-
 ...e-interpreter-constant-string-evaluation.c | 160 ++++++++++++++++++
 2 files changed, 164 insertions(+), 2 deletions(-)
 create mode 100644 
clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c

diff --git a/clang/lib/AST/ByteCode/Context.cpp 
b/clang/lib/AST/ByteCode/Context.cpp
index 74ec986e49ca7..a64c1e3756062 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -294,12 +294,14 @@ bool Context::evaluateStrlen(State &Parent, const Expr 
*E, uint64_t &Result) {
     if (!FieldDesc->isPrimitiveArray())
       return false;
 
-    if (Ptr.isDummy() || Ptr.isUnknownSizeArray())
+    if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd())
       return false;
 
     unsigned N = Ptr.getNumElems();
     if (Ptr.elemSize() == 1) {
-      Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N);
+      unsigned Size = N - Ptr.getIndex();
+      Result =
+          strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
       return Result != N;
     }
 
diff --git a/clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c 
b/clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c
new file mode 100644
index 0000000000000..d8b191743698b
--- /dev/null
+++ b/clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c
@@ -0,0 +1,160 @@
+// RUN: %clang_cc1 -x c   -std=c2x   -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s
+// RUN: %clang_cc1 -x c   -std=c2x   -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s 
-fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s
+// RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s 
-fexperimental-new-constant-interpreter
+
+void test(char* c) {
+  __builtin_strcat(c, "42" + 0);
+  __builtin_strcat(c, "42" + 1);
+  __builtin_strcat(c, "42" + 2);
+  __builtin_strcat(c, "42" + 3);
+  __builtin_strcat(c, "42" + 4);
+  char buffer[10];
+  __builtin_sprintf(buffer, "%%d%d%d"+0, 1);
+  // expected-warning@-1 {{more '%' conversions than data arguments}}
+  __builtin_sprintf(buffer, "%%d%d%d"+1, 1);
+  // expected-warning@-1 {{more '%' conversions than data arguments}}
+  __builtin_sprintf(buffer, "%%d%d%d"+2, 1);
+  // expected-warning@-1 {{more '%' conversions than data arguments}}
+  __builtin_sprintf(buffer, "%%d%d%d"+3, 1);
+  // expected-warning@-1 {{more '%' conversions than data arguments}}
+  __builtin_sprintf(buffer, "%%d%d%d"+4, 1);
+  __builtin_sprintf(buffer, "%%d%d%d"+5, 1);
+  __builtin_sprintf(buffer, "%%d%d%d"+6, 1);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+7, 1);
+  // expected-warning@-1 {{format string is empty}}
+  __builtin_sprintf(buffer, "%%d%d%d"+8, 1);
+  // TODO: we should probably warning about the format string being out of 
bounds
+
+  __builtin_sprintf(buffer, "%%d%d%d"+0, 1, 2);
+  __builtin_sprintf(buffer, "%%d%d%d"+1, 1, 2);
+  // expected-warning@-1 {{more '%' conversions than data arguments}}
+  __builtin_sprintf(buffer, "%%d%d%d"+2, 1, 2);
+  __builtin_sprintf(buffer, "%%d%d%d"+3, 1, 2);
+  __builtin_sprintf(buffer, "%%d%d%d"+4, 1, 2);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+5, 1, 2);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+6, 1, 2);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+7, 1, 2);
+  // expected-warning@-1 {{format string is empty}}
+  __builtin_sprintf(buffer, "%%d%d%d"+8, 1, 2);
+  __builtin_sprintf(buffer, "%%d%d%d"+9, 1, 2);
+
+  __builtin_sprintf(buffer, "%%d%d%d"+0, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+1, 1, 2, 3);
+  __builtin_sprintf(buffer, "%%d%d%d"+2, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+3, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+4, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+5, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+6, 1, 2, 3);
+  // expected-warning@-1 {{data argument not used by format string}}
+  __builtin_sprintf(buffer, "%%d%d%d"+7, 1, 2, 3);
+  // expected-warning@-1 {{format string is empty}}
+  __builtin_sprintf(buffer, "%%d%d%d"+8, 1, 2, 3);
+  __builtin_sprintf(buffer, "%%d%d%d"+9, 1, 2, 3);
+  static const char format_string[] = {'%', '%', 'd', '%', 'd', '%', 'd'};
+  __builtin_sprintf(buffer, format_string+0, 1);
+  __builtin_sprintf(buffer, format_string+1, 1);
+  __builtin_sprintf(buffer, format_string+2, 1);
+  __builtin_sprintf(buffer, format_string+3, 1);
+  __builtin_sprintf(buffer, format_string+4, 1);
+  __builtin_sprintf(buffer, format_string+5, 1);
+  __builtin_sprintf(buffer, format_string+6, 1);
+  __builtin_sprintf(buffer, format_string+7, 1);
+  #ifdef __cplusplus
+  static constexpr char ce_format_string[] = {'%', '%', 'd', '%', 'd', '%', 
'd'};
+  __builtin_sprintf(buffer, ce_format_string+0, 1);
+  __builtin_sprintf(buffer, ce_format_string+1, 1);
+  __builtin_sprintf(buffer, ce_format_string+2, 1);
+  __builtin_sprintf(buffer, ce_format_string+3, 1);
+  __builtin_sprintf(buffer, ce_format_string+4, 1);
+  __builtin_sprintf(buffer, ce_format_string+5, 1);
+  __builtin_sprintf(buffer, ce_format_string+6, 1);
+  __builtin_sprintf(buffer, ce_format_string+7, 1);
+  #endif
+}
+
+#ifdef __cplusplus
+template <class FormatStringSource> bool test_template() {
+  char buffer[10];
+  __builtin_sprintf(buffer, FormatStringSource::format(0), 1); // 
#template_test1
+  __builtin_sprintf(buffer, FormatStringSource::format(1), 1); // 
#template_test2
+  __builtin_sprintf(buffer, FormatStringSource::format(2), 1); // 
#template_test3
+  __builtin_sprintf(buffer, FormatStringSource::format(3), 1); // 
#template_test4
+  __builtin_sprintf(buffer, FormatStringSource::format(4), 1); // 
#template_test5
+  __builtin_sprintf(buffer, FormatStringSource::format(5), 1); // 
#template_test6
+  __builtin_sprintf(buffer, FormatStringSource::format(6), 1); // 
#template_test7
+  __builtin_sprintf(buffer, FormatStringSource::format(7), 1); // 
#template_test8
+  __builtin_sprintf(buffer, FormatStringSource::format(8), 1); // 
#template_test9
+  return true;
+}
+
+struct LiteralFormatStr {
+  static consteval const char *format(int N) {
+    return "%%d%d%d" + N; // #LiteralFormatStrLiteral
+  }
+};
+
+struct ConstLiteralFormatStr {
+  static constexpr const char *formatStr = "%%d%d%d"; // 
#ConstLiteralFormatStrLiteral
+  static consteval const char *format(int N) {
+    return formatStr + N;
+  }
+};
+
+struct NullTerminatedArrayFormatStr {
+  static constexpr char formatStr[] = {'%', '%', 'd', '%', 'd', '%', 'd', 0};
+  static consteval const char *format(int N) {
+    return formatStr + N;
+  }
+};
+
+struct NoNullTerminatedArrayFormatStr {
+  static constexpr char formatStr[] = {'%', '%', 'd', '%', 'd', '%', 'd'};
+  static consteval const char *format(int N) {
+    return formatStr + N; // #NoNullTerminatedArrayFormatStr_format
+  }
+};
+
+void test_templates() {
+  test_template<LiteralFormatStr>();
+  // expected-note@-1 {{in instantiation of function template specialization 
'test_template<LiteralFormatStr>' requested here}}
+  // expected-warning@#template_test1 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test2 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test3 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test4 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test5 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test6 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test7 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test8 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test9 {{more '%' conversions than data 
arguments}}
+  // expected-note@#LiteralFormatStrLiteral 9 {{format string is defined here}}
+  test_template<ConstLiteralFormatStr>();
+  // expected-note@-1 {{in instantiation of function template specialization 
'test_template<ConstLiteralFormatStr>' requested here}}
+  // expected-warning@#template_test1 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test2 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test3 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test4 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test5 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test6 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test7 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test8 {{more '%' conversions than data 
arguments}}
+  // expected-warning@#template_test9 {{more '%' conversions than data 
arguments}}
+  // expected-note@#ConstLiteralFormatStrLiteral 9 {{format string is defined 
here}}
+  test_template<NullTerminatedArrayFormatStr>();
+  test_template<NoNullTerminatedArrayFormatStr>();
+  // expected-note@-1 {{in instantiation of function template specialization 
'test_template<NoNullTerminatedArrayFormatStr>' requested here}}
+  // expected-note@#NoNullTerminatedArrayFormatStr_format {{cannot refer to 
element 8 of array of 7 elements in a constant expression}}
+  // expected-error@#template_test9 {{call to consteval function 
'NoNullTerminatedArrayFormatStr::format' is not a constant expression}}
+  // expected-note@#template_test9 {{in call to 'format(8)'}}
+}
+
+#endif

>From 19e17632b72147dbc8b498b63fa2b9e1aaa9451d Mon Sep 17 00:00:00 2001
From: Oliver Hunt <[email protected]>
Date: Tue, 23 Dec 2025 14:55:55 -0800
Subject: [PATCH 2/3] split apart the tests, rename the bigger general one

---
 .../AST/ByteCode/char-buffer-arithmetic.c     | 24 +++++++++++++++++++
 ...luation.c => constant-string-evaluation.c} |  7 +-----
 2 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/AST/ByteCode/char-buffer-arithmetic.c
 rename clang/test/Sema/{bytecode-interpreter-constant-string-evaluation.c => 
constant-string-evaluation.c} (97%)

diff --git a/clang/test/AST/ByteCode/char-buffer-arithmetic.c 
b/clang/test/AST/ByteCode/char-buffer-arithmetic.c
new file mode 100644
index 0000000000000..0e95c2b4e9293
--- /dev/null
+++ b/clang/test/AST/ByteCode/char-buffer-arithmetic.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wno-string-plus-int %s
+// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wno-string-plus-int -fexperimental-new-constant-interpreter %s
+
+void test(char *c) {
+  // We test offsets 0 to 4.
+  // 0: a no op of course
+  // 1,2: these result in a different length string than the buffer size
+  // 3: the last position: this hits ptr+object_size being a valid pointer,
+  //    but not dereferencable
+  // 4: completely invalid pointer
+  __builtin_strcat(c, "42" + 0);
+  __builtin_strcat(c, "42" + 1);
+  __builtin_strcat(c, "42" + 2);
+  __builtin_strcat(c, "42" + 3);
+  __builtin_strcat(c, "42" + 4);
+  _Static_assert(__builtin_strlen("42" + 0) == 2);
+  _Static_assert(__builtin_strlen("42" + 1) == 1);
+  _Static_assert(__builtin_strlen("42" + 2) == 0);
+  _Static_assert(__builtin_strlen("42" + 3));
+  // expected-error@-1 {{static assertion expression is not an integral 
constant expression}}
+  _Static_assert(__builtin_strlen("42" + 4));
+  // expected-error@-1 {{static assertion expression is not an integral 
constant expression}}
+  // expected-note@-2 {{cannot refer to element 4 of array of 3 elements in a 
constant expression}}
+}
diff --git a/clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c 
b/clang/test/Sema/constant-string-evaluation.c
similarity index 97%
rename from clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c
rename to clang/test/Sema/constant-string-evaluation.c
index d8b191743698b..7cc5fc888933e 100644
--- a/clang/test/Sema/bytecode-interpreter-constant-string-evaluation.c
+++ b/clang/test/Sema/constant-string-evaluation.c
@@ -3,12 +3,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s
 // RUN: %clang_cc1 -x c++ -std=c++23 -fsyntax-only -verify 
-Wno-string-plus-int -Wno-unused-value %s 
-fexperimental-new-constant-interpreter
 
-void test(char* c) {
-  __builtin_strcat(c, "42" + 0);
-  __builtin_strcat(c, "42" + 1);
-  __builtin_strcat(c, "42" + 2);
-  __builtin_strcat(c, "42" + 3);
-  __builtin_strcat(c, "42" + 4);
+void test(void) {
   char buffer[10];
   __builtin_sprintf(buffer, "%%d%d%d"+0, 1);
   // expected-warning@-1 {{more '%' conversions than data arguments}}

>From c91cfd47a1e35e5de7eebef1e8e45b4da268e7f0 Mon Sep 17 00:00:00 2001
From: Oliver Hunt <[email protected]>
Date: Tue, 23 Dec 2025 18:05:49 -0800
Subject: [PATCH 3/3] Finally worked out how to test the string size
 computation

---
 clang/lib/AST/ByteCode/Context.cpp              |  2 +-
 .../test/AST/ByteCode/char-buffer-arithmetic.c  | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Context.cpp 
b/clang/lib/AST/ByteCode/Context.cpp
index a64c1e3756062..208fcb2a2732e 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -302,7 +302,7 @@ bool Context::evaluateStrlen(State &Parent, const Expr *E, 
uint64_t &Result) {
       unsigned Size = N - Ptr.getIndex();
       Result =
           strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
-      return Result != N;
+      return Result != Size;
     }
 
     PrimType ElemT = FieldDesc->getPrimType();
diff --git a/clang/test/AST/ByteCode/char-buffer-arithmetic.c 
b/clang/test/AST/ByteCode/char-buffer-arithmetic.c
index 0e95c2b4e9293..cf3aa70689163 100644
--- a/clang/test/AST/ByteCode/char-buffer-arithmetic.c
+++ b/clang/test/AST/ByteCode/char-buffer-arithmetic.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wno-string-plus-int %s
-// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wno-string-plus-int -fexperimental-new-constant-interpreter %s
+// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wfortify-source -Wno-string-plus-int %s
+// RUN: %clang_cc1 -std=c23 -triple x86_64 -verify -fsyntax-only 
-Wfortify-source -Wno-string-plus-int -fexperimental-new-constant-interpreter %s
 
 void test(char *c) {
   // We test offsets 0 to 4.
@@ -14,6 +14,19 @@ void test(char *c) {
   __builtin_strcat(c, "42" + 3);
   __builtin_strcat(c, "42" + 4);
   _Static_assert(__builtin_strlen("42" + 0) == 2);
+
+  // A test without a null terminator, this captures incorrect size computation
+  // and incorrectly specifying the buffer size to strlen.
+  char buffer[1];
+  static const char test_buffer[] = {'4','2'};
+  __builtin_strcpy(buffer, test_buffer + 0);
+  __builtin_strcpy(buffer, test_buffer + 1);
+  // Note: these show that we will not issue a fortify warning when the source
+  // buffer is not null terminated.
+  __builtin_strcpy(buffer, test_buffer + 2);
+  __builtin_strcpy(buffer, test_buffer + 3);
+
+  // Verifying strlen computes from the correct starting point.
   _Static_assert(__builtin_strlen("42" + 1) == 1);
   _Static_assert(__builtin_strlen("42" + 2) == 0);
   _Static_assert(__builtin_strlen("42" + 3));

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to