[clang] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 (PR #204139)

Marlus Cadanus da Costa via cfe-commits Thu, 25 Jun 2026 06:55:43 -0700

https://github.com/marlus updated 
https://github.com/llvm/llvm-project/pull/204139


>From 055db081f85fe617fa1025e2bf582338e4d747bd Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 08:42:14 -0400
Subject: [PATCH 01/17] [Clang] Fix offsetof sign-extending unsigned array
 indices >= 128

When evaluating __builtin_offsetof with an unsigned integer array index
(e.g. uint8_t, uint16_t) whose value has the high bit set, Clang was
calling getSExtValue() on the APSInt index, which sign-extends the value
and produces a large bogus offset.

Fix this by checking whether the APSInt is unsigned and using
getZExtValue() in that case instead.

Fixes #199319
---
 clang/lib/AST/ExprConstant.cpp                |  3 +-
 clang/test/Sema/offsetof-unsigned-index.c     | 25 +++++++++++++++
 .../test/SemaCXX/offsetof-unsigned-index.cpp  | 31 +++++++++++++++++++
 3 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Sema/offsetof-unsigned-index.c
 create mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index bc98c0d86bb65..a2a3349b21baf 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19284,7 +19284,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
-      Result += IdxResult.getSExtValue() * ElementSize;
+      Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
+                                        : IdxResult.getSExtValue()) * 
ElementSize;
       break;
     }
 
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
new file mode 100644
index 0000000000000..480e486fbad72
--- /dev/null
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-fexperimental-new-constant-interpreter
+
+// expected-no-diagnostics
+
+// Test that offsetof correctly zero-extends unsigned array indices >= 128.
+// Previously, Clang would sign-extend uint8_t indices >= 128, producing
+// a large bogus offset value instead of the correct one.
+// https://github.com/llvm/llvm-project/issues/199319
+
+#include <stdint.h>
+#include <stddef.h>
+
+struct MyStruct {
+    void *ptrs[256];
+};
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 
* sizeof(void *),
+               "offsetof with uint8_t index 127 should be correct");
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 
* sizeof(void *),
+               "offsetof with uint8_t index 128 should be correctly 
zero-extended, not sign-extended");
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
+               "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
new file mode 100644
index 0000000000000..50bf50ef0fc35
--- /dev/null
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
+
+// expected-no-diagnostics
+
+// Test that offsetof correctly zero-extends unsigned array indices >= 128.
+// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
+// was set, producing a large bogus offset value instead of the correct one.
+// https://github.com/llvm/llvm-project/issues/199319
+
+#include <cstdint>
+#include <cstddef>
+
+struct MyStruct {
+    void *ptrs[256];
+};
+
+// uint8_t index: values >= 128 were incorrectly sign-extended
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * 
sizeof(void *),
+              "offsetof with uint8_t index 127 should be correct");
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * 
sizeof(void *),
+              "offsetof with uint8_t index 128 should be correctly 
zero-extended");
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * 
sizeof(void *),
+              "offsetof with uint8_t index 255 should be correctly 
zero-extended");
+
+// uint16_t index: values >= 32768 were also affected
+struct BigStruct {
+    char data[65536];
+};
+static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
+              "offsetof with uint16_t index 32768 should be correctly 
zero-extended");

>From 71f80764449046ef26d2ad399fcd2853359a2a6a Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 14:52:53 -0400
Subject: [PATCH 02/17] Apply clang-format

---
 clang/lib/AST/ExprConstant.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a2a3349b21baf..eb76c10076669 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19285,7 +19285,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
-                                        : IdxResult.getSExtValue()) * 
ElementSize;
+                                        : IdxResult.getSExtValue()) *
+                ElementSize;
       break;
     }
 

>From d23b24d816caca1c90bf7b279736f05eb059a1f8 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 15:02:17 -0400
Subject: [PATCH 03/17] Keep ByteCode compiler in sync: fix offsetof
 sign-extending unsigned array indices

---
 clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 638e6ecafb295..48425d26aeb22 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3801,8 +3801,14 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
 
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64.
+      // Cast to Sint64. For unsigned types, cast to Uint64 first to
+      // avoid sign-extending values with the high bit set (e.g. uint8_t >= 
128).
       if (IndexT != PT_Sint64) {
+        if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
+          if (!this->emitCast(IndexT, PT_Uint64, E))
+            return false;
+          IndexT = PT_Uint64;
+        }
         if (!this->emitCast(IndexT, PT_Sint64, E))
           return false;
       }

>From fcd22f3cf909345718f2820c9f6d3ad3ef806250 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:00:44 -0400
Subject: [PATCH 04/17] Reject negative and oversized offsetof array indices

---
 clang/lib/AST/ExprConstant.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index eb76c10076669..578aba948c1d7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19284,6 +19284,11 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
+      // Reject negative indices and indices too large to fit in int64_t,
+      // to avoid sign-extension issues or crashes in getZExtValue().
+      if (IdxResult.isSigned() ? IdxResult.isNegative()
+                               : IdxResult.ugt(APSInt::getMaxValue(64, 
/*Unsigned=*/false)))
+        return Error(OOE);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
                                         : IdxResult.getSExtValue()) *
                 ElementSize;

>From 224d40a02ca3ae64857122cf0279e37a817cb5b0 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:09:59 -0400
Subject: [PATCH 05/17] Reject negative and oversized offsetof array indices in
 ByteCode interpreter

Apply the same guards to InterpretOffsetOf in the ByteCode interpreter
(clang/lib/AST/ByteCode/InterpBuiltin.cpp) as were added to ExprConstant.cpp:
reject negative indices and unsigned indices that exceed INT64_MAX.

In the ByteCode path, indices arrive as int64_t after the Uint64->Sint64 cast
chain in Compiler.cpp. A negative int64_t covers both explicitly negative
signed indices and unsigned values >= 0x8000000000000000 (which wrap to
negative after the cast), so a single Index < 0 guard handles both cases.

Also extend the test files with expected-error cases for negative and
__uint128_t indices.
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp       |  8 ++++++++
 clang/test/Sema/offsetof-unsigned-index.c      | 11 +++++++++--
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 55907bf11506b..91432e204d3e2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6544,6 +6544,14 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       // When generating bytecode, we put all the index expressions as Sint64 
on
       // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
+      // Reject negative indices and unsigned indices that wrapped to negative
+      // after the Uint64->Sint64 cast (e.g. __uint128_t >= 
0x8000000000000000).
+      if (Index < 0) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_invalid_subexpr_in_const_expr)
+            << S.Current->getRange(OpPC);
+        return false;
+      }
       const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType);
       if (!AT)
         return false;
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 480e486fbad72..55f1e08f3a535 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -1,11 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-fexperimental-new-constant-interpreter
 
-// expected-no-diagnostics
-
 // Test that offsetof correctly zero-extends unsigned array indices >= 128.
 // Previously, Clang would sign-extend uint8_t indices >= 128, producing
 // a large bogus offset value instead of the correct one.
+// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
 #include <stdint.h>
@@ -15,6 +14,7 @@ struct MyStruct {
     void *ptrs[256];
 };
 
+// Unsigned indices that were previously sign-extended must be zero-extended.
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 
* sizeof(void *),
                "offsetof with uint8_t index 127 should be correct");
 
@@ -23,3 +23,10 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)128]) == 128 *
 
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
                "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
+
+// Negative indices must be rejected.
+struct NegIdxStruct { int a; int x[1]; };
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// __uint128_t indices >= 0x8000000000000000 must be rejected.
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 50bf50ef0fc35..2cea1f32be472 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -1,11 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
 
-// expected-no-diagnostics
-
 // Test that offsetof correctly zero-extends unsigned array indices >= 128.
 // Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
 // was set, producing a large bogus offset value instead of the correct one.
+// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
 #include <cstdint>
@@ -29,3 +28,10 @@ struct BigStruct {
 };
 static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
               "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
+
+// Negative indices must be rejected.
+struct NegIdxStruct { int a; int x[1]; };
+static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// __uint128_t indices >= 0x8000000000000000 must be rejected.
+static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}

>From 1861378a79242dd66140bcd1042d12374eaad18d Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:20:17 -0400
Subject: [PATCH 06/17] clang-format: break long line in VisitOffsetOfExpr

---
 clang/lib/AST/ExprConstant.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 578aba948c1d7..307b5db0b231f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19286,8 +19286,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       // Reject negative indices and indices too large to fit in int64_t,
       // to avoid sign-extension issues or crashes in getZExtValue().
+      APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
       if (IdxResult.isSigned() ? IdxResult.isNegative()
-                               : IdxResult.ugt(APSInt::getMaxValue(64, 
/*Unsigned=*/false)))
+                               : IdxResult.ugt(MaxIdx))
         return Error(OOE);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
                                         : IdxResult.getSExtValue()) *

>From bee3053414d1ff74bb12366112f62008233e1e15 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Wed, 17 Jun 2026 09:40:26 -0400
Subject: [PATCH 07/17] Fix offsetof overflow and AP index handling

---
 clang/lib/AST/ByteCode/Compiler.cpp      |  7 +++++--
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++++-----
 clang/lib/AST/ExprConstant.cpp           | 23 ++++++++++++++++-------
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 48425d26aeb22..159b910f82302 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3801,8 +3801,11 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
 
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64. For unsigned types, cast to Uint64 first to
-      // avoid sign-extending values with the high bit set (e.g. uint8_t >= 
128).
+      // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
+      // sign-extending values with the high bit set (e.g. uint8_t >= 128).
+      // AP types cannot be safely narrowed to Sint64; fail constant 
evaluation.
+      if (IndexT == PT_IntAP || IndexT == PT_IntAPS)
+        return false;
       if (IndexT != PT_Sint64) {
         if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
           if (!this->emitCast(IndexT, PT_Uint64, E))
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 91432e204d3e2..07af289f99243 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6541,11 +6541,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       break;
     }
     case OffsetOfNode::Array: {
-      // When generating bytecode, we put all the index expressions as Sint64 
on
-      // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
-      // Reject negative indices and unsigned indices that wrapped to negative
-      // after the Uint64->Sint64 cast (e.g. __uint128_t >= 
0x8000000000000000).
       if (Index < 0) {
         S.FFDiag(S.Current->getLocation(OpPC),
                  diag::note_invalid_subexpr_in_const_expr)
@@ -6557,7 +6553,13 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
         return false;
       CurrentType = AT->getElementType();
       CharUnits ElementSize = 
S.getASTContext().getTypeSizeInChars(CurrentType);
-      Result += Index * ElementSize;
+      int64_t ElemSize = ElementSize.getQuantity();
+      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index)
+        return false;
+      int64_t Offset = Index * ElemSize;
+      if (Result.getQuantity() > llvm::maxIntN(64) - Offset)
+        return false;
+      Result += CharUnits::fromQuantity(Offset);
       ++ArrayIndex;
       break;
     }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 307b5db0b231f..3b96c884a50f0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19284,15 +19284,24 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
-      // Reject negative indices and indices too large to fit in int64_t,
-      // to avoid sign-extension issues or crashes in getZExtValue().
+      // Reject negative indices, indices too large to fit in int64_t,
+      // and overflow in the offset computation.
       APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
-      if (IdxResult.isSigned() ? IdxResult.isNegative()
-                               : IdxResult.ugt(MaxIdx))
+      if (IdxResult.isSigned()
+              ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx))
+              : IdxResult.ugt(MaxIdx))
         return Error(OOE);
-      Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
-                                        : IdxResult.getSExtValue()) *
-                ElementSize;
+      int64_t IdxVal = IdxResult.isUnsigned()
+                           ? (int64_t)IdxResult.getZExtValue()
+                           : IdxResult.getSExtValue();
+      int64_t ElemSize = ElementSize.getQuantity();
+      if (IdxVal != 0 &&
+          ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)
+        return Error(OOE);
+      int64_t Offset = IdxVal * ElemSize;
+      if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset)
+        return Error(OOE);
+      Result += CharUnits::fromQuantity(Offset);
       break;
     }
 

>From e526016bdeeb2662fc6c0864f26ef3af8d1bf379 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Wed, 17 Jun 2026 10:06:38 -0400
Subject: [PATCH 08/17] Add tests for __uint128_t > UINT64_MAX and multiply
 overflow

---
 clang/test/Sema/offsetof-unsigned-index.c      | 10 ++++++++++
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 55f1e08f3a535..1b34ebe60443f 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -30,3 +30,13 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[-1]) == 0, ""); // expe
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+
+// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
+// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
+// wrong result instead of an error).
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}} 
expected-note {{subexpression not valid in a constant expression}}
+
+// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
+// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
+struct ShortArray { short data[2]; };
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 2cea1f32be472..2b3995d405705 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -35,3 +35,13 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, 
""); // expected-err
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+
+// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
+// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
+// wrong result instead of an error).
+static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
+// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
+struct ShortArray { short data[2]; };
+static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}

>From b30608ccf58263ec66b266ac6064c1b04a9942c7 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Fri, 19 Jun 2026 09:54:59 -0400
Subject: [PATCH 09/17] Handle small AP-typed offsetof indices correctly

---
 clang/lib/AST/ByteCode/Compiler.cpp            | 16 +++++++++++++---
 clang/test/Sema/offsetof-unsigned-index.c      |  5 +++++
 clang/test/SemaCXX/offsetof-unsigned-index.cpp |  5 +++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 159b910f82302..e954e2915bfc0 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3799,13 +3799,23 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
         continue;
       }
 
+      if (IndexT == PT_IntAP || IndexT == PT_IntAPS) {
+        // AP types (e.g. __uint128_t, __int128) cannot be safely cast to
+        // Sint64. Evaluate the constant and push it directly as Sint64.
+        Expr::EvalResult EvalResult;
+        if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext()))
+          return false;
+        llvm::APSInt IdxVal = EvalResult.Val.getInt();
+        if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64))
+          return false;
+        if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E))
+          return false;
+        continue;
+      }
       if (!this->visit(ArrayIndexExpr))
         return false;
       // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
       // sign-extending values with the high bit set (e.g. uint8_t >= 128).
-      // AP types cannot be safely narrowed to Sint64; fail constant 
evaluation.
-      if (IndexT == PT_IntAP || IndexT == PT_IntAPS)
-        return false;
       if (IndexT != PT_Sint64) {
         if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
           if (!this->emitCast(IndexT, PT_Uint64, E))
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 1b34ebe60443f..57bce06a25c98 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -31,6 +31,11 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[-1]) == 0, ""); // expe
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
 
+// Small __uint128_t values that fit in int64_t must work correctly.
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) ==
+               __builtin_offsetof(struct NegIdxStruct, x),
+               "offsetof with __uint128_t index 0 should work");
+
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 2b3995d405705..880c45dec0e3e 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -36,6 +36,11 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, 
""); // expected-err
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
 
+// Small __uint128_t values that fit in int64_t must work correctly.
+static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
+              __builtin_offsetof(NegIdxStruct, x),
+              "offsetof with __uint128_t index 0 should work");
+
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).

>From aa622ff06d46ee74c81e9069eaca274236508bb7 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 23 Jun 2026 09:49:51 -0400
Subject: [PATCH 10/17] Fix offsetof AP index handling: add CastNoOverflow
 opcode and APInt crash fix

- ExprConstant.cpp: replace sgt/ugt(MaxIdx) with getActiveBits() > 63 to
  avoid APInt bit-width assertion crash when comparing 128-bit values
- Opcodes.td: add APOnlyTypeClass (IntAP, IntAPS) and CastNoOverflow opcode
- Interp.h: implement CastNoOverflow -- pops AP value, rejects negative or
  values that don't fit in int64_t (activeBits > 63), pushes as Sint64
- Compiler.cpp: for AP types, emit visit + CastNoOverflow instead of
  calling EvaluateAsInt (which is not allowed in the compiler phase)
---
 clang/lib/AST/ByteCode/Compiler.cpp            | 10 ++--------
 clang/lib/AST/ByteCode/Interp.h                | 17 +++++++++++++++++
 clang/lib/AST/ByteCode/Opcodes.td              | 10 ++++++++++
 clang/lib/AST/ExprConstant.cpp                 |  5 +----
 clang/test/Sema/offsetof-unsigned-index.c      | 11 +++++++----
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 15 +++++++++------
 6 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index e954e2915bfc0..23c4bd4108a2f 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3800,15 +3800,9 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
       }
 
       if (IndexT == PT_IntAP || IndexT == PT_IntAPS) {
-        // AP types (e.g. __uint128_t, __int128) cannot be safely cast to
-        // Sint64. Evaluate the constant and push it directly as Sint64.
-        Expr::EvalResult EvalResult;
-        if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext()))
+        if (!this->visit(ArrayIndexExpr))
           return false;
-        llvm::APSInt IdxVal = EvalResult.Val.getInt();
-        if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64))
-          return false;
-        if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E))
+        if (!this->emitCastNoOverflow(IndexT, E))
           return false;
         continue;
       }
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index ad807816aa904..c8709ce0f960b 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2877,6 +2877,23 @@ bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t 
BitWidth) {
   return true;
 }
 
+// Cast an AP integer to Sint64, failing constant evaluation if the value is
+// negative or too large to fit (i.e. truncation would change the value).
+template <PrimType Name, class T = typename PrimConv<Name>::T>
+bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
+  T Source = S.Stk.pop<T>();
+  APSInt Val = Source.toAPSInt();
+  if (Val.isNegative() || Val.getActiveBits() > 63) {
+    S.FFDiag(S.Current->getLocation(OpPC),
+             diag::note_invalid_subexpr_in_const_expr)
+        << S.Current->getRange(OpPC);
+    return false;
+  }
+  S.Stk.push<Integral<64, true>>(
+      Integral<64, true>::from((int64_t)Val.getZExtValue()));
+  return true;
+}
+
 template <PrimType Name, class T = typename PrimConv<Name>::T>
 bool CastIntegralFloating(InterpState &S, CodePtr OpPC,
                           const llvm::fltSemantics *Sem, uint32_t FPOI) {
diff --git a/clang/lib/AST/ByteCode/Opcodes.td 
b/clang/lib/AST/ByteCode/Opcodes.td
index e350d7b2e547d..b375cb5f6b34d 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -748,6 +748,16 @@ def CastAPS : Opcode {
   let HasGroup = 1;
 }
 
+def APOnlyTypeClass : TypeClass {
+  let Types = [IntAP, IntAPS];
+}
+
+// Cast from an AP integer type to Sint64, failing if the value doesn't fit.
+def CastNoOverflow : Opcode {
+  let Types = [APOnlyTypeClass];
+  let HasGroup = 1;
+}
+
 // Cast an integer to a floating type
 def CastIntegralFloating : Opcode {
   let Types = [AluTypeClass];
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b96c884a50f0..efdc777127bef 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19286,10 +19286,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       // Reject negative indices, indices too large to fit in int64_t,
       // and overflow in the offset computation.
-      APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
-      if (IdxResult.isSigned()
-              ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx))
-              : IdxResult.ugt(MaxIdx))
+      if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63)
         return Error(OOE);
       int64_t IdxVal = IdxResult.isUnsigned()
                            ? (int64_t)IdxResult.getZExtValue()
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 57bce06a25c98..075a96b0e8163 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -26,22 +26,25 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)255]) == 255 *
 
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
 
 // Small __uint128_t values that fit in int64_t must work correctly.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) ==
                __builtin_offsetof(struct NegIdxStruct, x),
                "offsetof with __uint128_t index 0 should work");
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)1]) ==
+               __builtin_offsetof(struct NegIdxStruct, x) + sizeof(int),
+               "offsetof with __uint128_t index 1 should work");
 
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}} 
expected-note {{subexpression not valid in a constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}}
 
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 880c45dec0e3e..22d0eda636f08 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -7,8 +7,8 @@
 // Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
-#include <cstdint>
-#include <cstddef>
+#include <stdint.h>
+#include <stddef.h>
 
 struct MyStruct {
     void *ptrs[256];
@@ -31,22 +31,25 @@ static_assert(__builtin_offsetof(BigStruct, 
data[(uint16_t)32768]) == 32768,
 
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
-static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
-static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
 
 // Small __uint128_t values that fit in int64_t must work correctly.
 static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
               __builtin_offsetof(NegIdxStruct, x),
               "offsetof with __uint128_t index 0 should work");
+static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) ==
+              __builtin_offsetof(NegIdxStruct, x) + sizeof(int),
+              "offsetof with __uint128_t index 1 should work");
 
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
-static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}}
 
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}

>From 4bc3f2d4018a6fa6308bb61169753a4b1ba32d86 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 23 Jun 2026 14:36:29 -0400
Subject: [PATCH 11/17] Use APSInt::getExtValue() in offsetof index evaluation

---
 clang/lib/AST/ExprConstant.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index efdc777127bef..7156ed2332c28 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19288,9 +19288,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       // and overflow in the offset computation.
       if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63)
         return Error(OOE);
-      int64_t IdxVal = IdxResult.isUnsigned()
-                           ? (int64_t)IdxResult.getZExtValue()
-                           : IdxResult.getSExtValue();
+      int64_t IdxVal = IdxResult.getExtValue();
       int64_t ElemSize = ElementSize.getQuantity();
       if (IdxVal != 0 &&
           ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)

>From b8c4b580dcb1c469507cde8c82625450ff9ba847 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:24:34 -0400
Subject: [PATCH 12/17] Move BigStruct/uint16_t test to .c file and drop .cpp
 test

No C++-specific logic in the test; move uint16_t/BigStruct case to the
.c test file and delete the redundant SemaCXX variant.

Addresses reviewer comment from efriedma-quic.
---
 clang/test/Sema/offsetof-unsigned-index.c     |  5 ++
 .../test/SemaCXX/offsetof-unsigned-index.cpp  | 55 -------------------
 2 files changed, 5 insertions(+), 55 deletions(-)
 delete mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 075a96b0e8163..d873175c22eb8 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -24,6 +24,11 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)128]) == 128 *
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
                "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
 
+// uint16_t index: values >= 32768 were also affected by sign-extension.
+struct BigStruct { char data[65536]; };
+_Static_assert(__builtin_offsetof(struct BigStruct, data[(uint16_t)32768]) == 
32768,
+               "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
+
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
deleted file mode 100644
index 22d0eda636f08..0000000000000
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
-// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
-
-// Test that offsetof correctly zero-extends unsigned array indices >= 128.
-// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
-// was set, producing a large bogus offset value instead of the correct one.
-// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
-// https://github.com/llvm/llvm-project/issues/199319
-
-#include <stdint.h>
-#include <stddef.h>
-
-struct MyStruct {
-    void *ptrs[256];
-};
-
-// uint8_t index: values >= 128 were incorrectly sign-extended
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * 
sizeof(void *),
-              "offsetof with uint8_t index 127 should be correct");
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * 
sizeof(void *),
-              "offsetof with uint8_t index 128 should be correctly 
zero-extended");
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * 
sizeof(void *),
-              "offsetof with uint8_t index 255 should be correctly 
zero-extended");
-
-// uint16_t index: values >= 32768 were also affected
-struct BigStruct {
-    char data[65536];
-};
-static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
-              "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
-
-// Negative indices must be rejected.
-struct NegIdxStruct { int a; int x[1]; };
-static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
-
-// __uint128_t indices >= 0x8000000000000000 must be rejected.
-static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
-
-// Small __uint128_t values that fit in int64_t must work correctly.
-static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
-              __builtin_offsetof(NegIdxStruct, x),
-              "offsetof with __uint128_t index 0 should work");
-static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) ==
-              __builtin_offsetof(NegIdxStruct, x) + sizeof(int),
-              "offsetof with __uint128_t index 1 should work");
-
-// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
-// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
-// wrong result instead of an error).
-static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}}
-
-// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
-// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
-struct ShortArray { short data[2]; };
-static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}

>From a99a8bfd2a9da6f8feae5fe32b03ccc71e8ac330 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:33:22 -0400
Subject: [PATCH 13/17] Add specific diagnostic for overflow in offsetof
 computation

Instead of the generic 'subexpression not valid in a constant expression'
note, emit 'overflow in offsetof' when the multiply or add step of the
offsetof calculation overflows int64_t.

Add note_constexpr_offsetof_overflow to DiagnosticASTKinds.td and use it
in both the ExprConstant and ByteCode (InterpBuiltin) paths. Update the
test to expect the new note.

Addresses reviewer comment from efriedma-quic.
---
 clang/include/clang/Basic/DiagnosticASTKinds.td |  2 ++
 clang/lib/AST/ByteCode/InterpBuiltin.cpp        | 12 ++++++++++--
 clang/lib/AST/ExprConstant.cpp                  |  4 ++--
 clang/test/Sema/offsetof-unsigned-index.c       |  2 +-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td 
b/clang/include/clang/Basic/DiagnosticASTKinds.td
index bde418695f647..f86f0157b2b1f 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -24,6 +24,8 @@ def note_constexpr_invalid_downcast : Note<
   "cannot cast object of dynamic type %0 to type %1">;
 def note_constexpr_overflow : Note<
   "value %0 is outside the range of representable values of type %1">;
+def note_constexpr_offsetof_overflow : Note<
+  "overflow in offsetof">;
 def note_constexpr_negative_shift : Note<"negative shift count %0">;
 def note_constexpr_large_shift : Note<
   "shift count %0 >= width of type %1 (%2 bit%s2)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 07af289f99243..540da055eb37f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6554,11 +6554,19 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       CurrentType = AT->getElementType();
       CharUnits ElementSize = 
S.getASTContext().getTypeSizeInChars(CurrentType);
       int64_t ElemSize = ElementSize.getQuantity();
-      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index)
+      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_constexpr_offsetof_overflow)
+            << S.Current->getRange(OpPC);
         return false;
+      }
       int64_t Offset = Index * ElemSize;
-      if (Result.getQuantity() > llvm::maxIntN(64) - Offset)
+      if (Result.getQuantity() > llvm::maxIntN(64) - Offset) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_constexpr_offsetof_overflow)
+            << S.Current->getRange(OpPC);
         return false;
+      }
       Result += CharUnits::fromQuantity(Offset);
       ++ArrayIndex;
       break;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7156ed2332c28..d58e3ec84c8d5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19292,10 +19292,10 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       int64_t ElemSize = ElementSize.getQuantity();
       if (IdxVal != 0 &&
           ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)
-        return Error(OOE);
+        return Error(OOE, diag::note_constexpr_offsetof_overflow);
       int64_t Offset = IdxVal * ElemSize;
       if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset)
-        return Error(OOE);
+        return Error(OOE, diag::note_constexpr_offsetof_overflow);
       Result += CharUnits::fromQuantity(Offset);
       break;
     }
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index d873175c22eb8..cafd2ddff58f8 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -52,4 +52,4 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[((__uint128_t)1 << 64)]
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{overflow in offsetof}}

>From 073670118f8fe8ff879855b9430cd47d807f2133 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:35:54 -0400
Subject: [PATCH 14/17] Replace standard includes in test with manual typedefs

Remove #include <stdint.h> and <stddef.h> from offsetof-unsigned-index.c
and add manual typedef declarations instead, following the convention that
clang test files should not use standard includes.

Addresses reviewer comment from tbaederr.
---
 clang/test/Sema/offsetof-unsigned-index.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index cafd2ddff58f8..56396eb2a12e6 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -7,8 +7,9 @@
 // Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
-#include <stdint.h>
-#include <stddef.h>
+typedef unsigned char      uint8_t;
+typedef unsigned short     uint16_t;
+typedef unsigned long long uint64_t;
 
 struct MyStruct {
     void *ptrs[256];

>From ce0c44a2cc03412ae074551847a8ff8d522477ca Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:37:34 -0400
Subject: [PATCH 15/17] Update clang/lib/AST/ByteCode/Interp.h

Co-authored-by: Timm Baeder <[email protected]>
---
 clang/lib/AST/ByteCode/Interp.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index c8709ce0f960b..b149710bba522 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2884,10 +2884,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
   T Source = S.Stk.pop<T>();
   APSInt Val = Source.toAPSInt();
   if (Val.isNegative() || Val.getActiveBits() > 63) {
-    S.FFDiag(S.Current->getLocation(OpPC),
-             diag::note_invalid_subexpr_in_const_expr)
-        << S.Current->getRange(OpPC);
-    return false;
+return Invalid(S, OpPC);
   }
   S.Stk.push<Integral<64, true>>(
       Integral<64, true>::from((int64_t)Val.getZExtValue()));

>From f76d6076c08b0d06e967906cf1cc0b807fe59597 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:38:51 -0400
Subject: [PATCH 16/17] Fix indentation of Invalid(S, OpPC) in CastNoOverflow

---
 clang/lib/AST/ByteCode/Interp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index b149710bba522..cf8e045561f25 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2884,7 +2884,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
   T Source = S.Stk.pop<T>();
   APSInt Val = Source.toAPSInt();
   if (Val.isNegative() || Val.getActiveBits() > 63) {
-return Invalid(S, OpPC);
+    return Invalid(S, OpPC);
   }
   S.Stk.push<Integral<64, true>>(
       Integral<64, true>::from((int64_t)Val.getZExtValue()));

>From f5c9c7bc3d0c98d8a2ef76a8ce1b425a0ec16c49 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:45:14 -0400
Subject: [PATCH 17/17] Remove unnecessary two-step cast in VisitOffsetOfExpr
 ByteCode path

The intermediate Uint64 step was not needed: Cast<PT_UintN, PT_Sint64>
uses a C++ implicit conversion which already correctly zero-extends
unsigned values. Revert the non-AP path to the original direct
emitCast(IndexT, PT_Sint64).

Addresses reviewer comment from tbaederr.
---
 clang/lib/AST/ByteCode/Compiler.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 23c4bd4108a2f..91a1659985265 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3808,14 +3808,8 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
       }
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
-      // sign-extending values with the high bit set (e.g. uint8_t >= 128).
+      // Cast to Sint64.
       if (IndexT != PT_Sint64) {
-        if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
-          if (!this->emitCast(IndexT, PT_Uint64, E))
-            return false;
-          IndexT = PT_Uint64;
-        }
         if (!this->emitCast(IndexT, PT_Sint64, E))
           return false;
       }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 (PR #204139)

Reply via email to