Author: Timm Baeder
Date: 2026-06-08T12:28:50+02:00
New Revision: 2e34b799749fd8c8446a3d27eee4317e78441d3c

URL: 
https://github.com/llvm/llvm-project/commit/2e34b799749fd8c8446a3d27eee4317e78441d3c
DIFF: 
https://github.com/llvm/llvm-project/commit/2e34b799749fd8c8446a3d27eee4317e78441d3c.diff

LOG: [clang][bytecode] Only count taken jumps as steps (#201845)

There are several large array declarations in llvm-libc. They usually
look similar to this:
```c++
alignas(16) inline constexpr LogRR LOG_TABLE = {
    {
        {Sign::POS, 0, 0_u128},
        {Sign::POS, -134, 0x8080abac'46f38946'662d417c'ed007a46_u128},
        {Sign::POS, -133, 0x8102b2c4'9ac23a4f'91d082dc'e3ddcd38_u128},
        {Sign::POS, -133, 0xc2492946'4655f45c'da5f3cc0'b3251dbd_u128},
        {Sign::POS, -132, 0x820aec4f'3a222380'b9e3aea6'c444ef07_u128},
// ...
```
the `_u128` is a user-defined literal, so the hex constant to the left
of it is actually a `StringLiteral` and the UDL converts that to a
different type by iterating over all chars. It calls one function per
char, and that function contains the usual switch statement over all
ASCII characters.

This was problematic with the bytecode interpreter. Support for
`-fconstexpr-steps` is implemented by counting the amount of jumps, but
switch statements are implemented by comparing the switch condition to
all case values and jumping to the case body if the two values match.
This caused the amount of steps to increase rapidly as we were _also_
counting jumps we didn't take.

This commit changes this to only count the jumps we take.

The attached test case uses roughly 4'000 steps in the current
interpreter but used to use over 8'000 with the bytecode interpreter. It
now only uses 400 in the bytecode interpreter (which might be too low
again but anyway).

This fixes compiling llvm with the bytecode interpreter.

Added: 
    clang/test/AST/ByteCode/switch-case-steps.cpp

Modified: 
    clang/lib/AST/ByteCode/Interp.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/AST/ByteCode/Interp.cpp 
b/clang/lib/AST/ByteCode/Interp.cpp
index 699b034c3c683..f6cac7aeb9fb5 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -65,15 +65,17 @@ static bool Jmp(InterpState &S, CodePtr &PC, int32_t 
Offset) {
 static bool Jt(InterpState &S, CodePtr &PC, int32_t Offset) {
   if (S.Stk.pop<bool>()) {
     PC += Offset;
+    return S.noteStep(PC);
   }
-  return S.noteStep(PC);
+  return true;
 }
 
 static bool Jf(InterpState &S, CodePtr &PC, int32_t Offset) {
   if (!S.Stk.pop<bool>()) {
     PC += Offset;
+    return S.noteStep(PC);
   }
-  return S.noteStep(PC);
+  return true;
 }
 
 static void diagnoseMissingInitializer(InterpState &S, CodePtr OpPC,

diff  --git a/clang/test/AST/ByteCode/switch-case-steps.cpp 
b/clang/test/AST/ByteCode/switch-case-steps.cpp
new file mode 100644
index 0000000000000..c3fb10be300bb
--- /dev/null
+++ b/clang/test/AST/ByteCode/switch-case-steps.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++23 %s 
-fconstexpr-steps=2000
+// RUN: %clang_cc1                                         -std=c++23 %s 
-fconstexpr-steps=2000
+
+
+
+constexpr int char_to_int(char ch) {
+  switch (ch) {
+  case '0':
+    return 0;
+  case '1':
+    return 1;
+  case '2':
+    return 2;
+  case '3':
+    return 3;
+  case '4':
+    return 4;
+  case '5':
+    return 5;
+  case '6':
+    return 6;
+  case '7':
+    return 7;
+  case '8':
+    return 8;
+  case '9':
+    return 9;
+  case 'a':
+  case 'A':
+    return 10;
+  case 'b':
+  case 'B':
+    return 11;
+  case 'c':
+  case 'C':
+    return 12;
+  case 'd':
+  case 'D':
+    return 13;
+  case 'e':
+  case 'E':
+    return 14;
+  case 'f':
+  case 'F':
+    return 15;
+  case 'g':
+  case 'G':
+    return 16;
+  case 'h':
+  case 'H':
+    return 17;
+  case 'i':
+  case 'I':
+    return 18;
+  case 'j':
+  case 'J':
+    return 19;
+  case 'k':
+  case 'K':
+    return 20;
+  case 'l':
+  case 'L':
+    return 21;
+  case 'm':
+  case 'M':
+    return 22;
+  case 'n':
+  case 'N':
+    return 23;
+  case 'o':
+  case 'O':
+    return 24;
+  case 'p':
+  case 'P':
+    return 25;
+  case 'q':
+  case 'Q':
+    return 26;
+  case 'r':
+  case 'R':
+    return 27;
+  case 's':
+  case 'S':
+    return 28;
+  case 't':
+  case 'T':
+    return 29;
+  case 'u':
+  case 'U':
+    return 30;
+  case 'v':
+  case 'V':
+    return 31;
+  case 'w':
+  case 'W':
+    return 32;
+  case 'x':
+  case 'X':
+    return 33;
+  case 'y':
+  case 'Y':
+    return 34;
+  case 'z':
+  case 'Z':
+    return 35;
+  default:
+    return 0;
+  }
+}
+
+constexpr bool check() {
+  const char *str = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n";
+  unsigned sum = 0;
+  for (const char *p = str; *p != '\0'; ++p) {
+    sum+= char_to_int(*p);
+  }
+
+  return sum != 0;
+}
+static_assert(check());


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to