https://github.com/marlus updated https://github.com/llvm/llvm-project/pull/204139
>From 055db081f85fe617fa1025e2bf582338e4d747bd Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 08:42:14 -0400 Subject: [PATCH 01/17] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 When evaluating __builtin_offsetof with an unsigned integer array index (e.g. uint8_t, uint16_t) whose value has the high bit set, Clang was calling getSExtValue() on the APSInt index, which sign-extends the value and produces a large bogus offset. Fix this by checking whether the APSInt is unsigned and using getZExtValue() in that case instead. Fixes #199319 --- clang/lib/AST/ExprConstant.cpp | 3 +- clang/test/Sema/offsetof-unsigned-index.c | 25 +++++++++++++++ .../test/SemaCXX/offsetof-unsigned-index.cpp | 31 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 clang/test/Sema/offsetof-unsigned-index.c create mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index bc98c0d86bb65..a2a3349b21baf 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,7 +19284,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - Result += IdxResult.getSExtValue() * ElementSize; + Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue()) * ElementSize; break; } diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c new file mode 100644 index 0000000000000..480e486fbad72 --- /dev/null +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t indices >= 128, producing +// a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <stdint.h> +#include <stddef.h> + +struct MyStruct { + void *ptrs[256]; +}; + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended, not sign-extended"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp new file mode 100644 index 0000000000000..50bf50ef0fc35 --- /dev/null +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit +// was set, producing a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <cstdint> +#include <cstddef> + +struct MyStruct { + void *ptrs[256]; +}; + +// uint8_t index: values >= 128 were incorrectly sign-extended +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended"); + +// uint16_t index: values >= 32768 were also affected +struct BigStruct { + char data[65536]; +}; +static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, + "offsetof with uint16_t index 32768 should be correctly zero-extended"); >From 71f80764449046ef26d2ad399fcd2853359a2a6a Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 14:52:53 -0400 Subject: [PATCH 02/17] Apply clang-format --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a2a3349b21baf..eb76c10076669 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19285,7 +19285,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * ElementSize; + : IdxResult.getSExtValue()) * + ElementSize; break; } >From d23b24d816caca1c90bf7b279736f05eb059a1f8 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 15:02:17 -0400 Subject: [PATCH 03/17] Keep ByteCode compiler in sync: fix offsetof sign-extending unsigned array indices --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 638e6ecafb295..48425d26aeb22 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3801,8 +3801,14 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. + // Cast to Sint64. For unsigned types, cast to Uint64 first to + // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). if (IndexT != PT_Sint64) { + if (!isSignedType(IndexT) && IndexT != PT_Uint64) { + if (!this->emitCast(IndexT, PT_Uint64, E)) + return false; + IndexT = PT_Uint64; + } if (!this->emitCast(IndexT, PT_Sint64, E)) return false; } >From fcd22f3cf909345718f2820c9f6d3ad3ef806250 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:00:44 -0400 Subject: [PATCH 04/17] Reject negative and oversized offsetof array indices --- clang/lib/AST/ExprConstant.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index eb76c10076669..578aba948c1d7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,6 +19284,11 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); + // Reject negative indices and indices too large to fit in int64_t, + // to avoid sign-extension issues or crashes in getZExtValue(). + if (IdxResult.isSigned() ? IdxResult.isNegative() + : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * ElementSize; >From 224d40a02ca3ae64857122cf0279e37a817cb5b0 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:09:59 -0400 Subject: [PATCH 05/17] Reject negative and oversized offsetof array indices in ByteCode interpreter Apply the same guards to InterpretOffsetOf in the ByteCode interpreter (clang/lib/AST/ByteCode/InterpBuiltin.cpp) as were added to ExprConstant.cpp: reject negative indices and unsigned indices that exceed INT64_MAX. In the ByteCode path, indices arrive as int64_t after the Uint64->Sint64 cast chain in Compiler.cpp. A negative int64_t covers both explicitly negative signed indices and unsigned values >= 0x8000000000000000 (which wrap to negative after the cast), so a single Index < 0 guard handles both cases. Also extend the test files with expected-error cases for negative and __uint128_t indices. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/test/Sema/offsetof-unsigned-index.c | 11 +++++++++-- clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 55907bf11506b..91432e204d3e2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6544,6 +6544,14 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, // When generating bytecode, we put all the index expressions as Sint64 on // the stack. int64_t Index = ArrayIndices[ArrayIndex]; + // Reject negative indices and unsigned indices that wrapped to negative + // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). + if (Index < 0) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 480e486fbad72..55f1e08f3a535 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t indices >= 128, producing // a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <stdint.h> @@ -15,6 +14,7 @@ struct MyStruct { void *ptrs[256]; }; +// Unsigned indices that were previously sign-extended must be zero-extended. _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), "offsetof with uint8_t index 127 should be correct"); @@ -23,3 +23,10 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 50bf50ef0fc35..2cea1f32be472 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit // was set, producing a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <cstdint> @@ -29,3 +28,10 @@ struct BigStruct { }; static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, "offsetof with uint16_t index 32768 should be correctly zero-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From 1861378a79242dd66140bcd1042d12374eaad18d Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:20:17 -0400 Subject: [PATCH 06/17] clang-format: break long line in VisitOffsetOfExpr --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 578aba948c1d7..307b5db0b231f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19286,8 +19286,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices and indices too large to fit in int64_t, // to avoid sign-extension issues or crashes in getZExtValue(). + APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + : IdxResult.ugt(MaxIdx)) return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * >From bee3053414d1ff74bb12366112f62008233e1e15 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 09:40:26 -0400 Subject: [PATCH 07/17] Fix offsetof overflow and AP index handling --- clang/lib/AST/ByteCode/Compiler.cpp | 7 +++++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++++----- clang/lib/AST/ExprConstant.cpp | 23 ++++++++++++++++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 48425d26aeb22..159b910f82302 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3801,8 +3801,11 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. For unsigned types, cast to Uint64 first to - // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). + // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid + // sign-extending values with the high bit set (e.g. uint8_t >= 128). + // AP types cannot be safely narrowed to Sint64; fail constant evaluation. + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) + return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 91432e204d3e2..07af289f99243 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6541,11 +6541,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, break; } case OffsetOfNode::Array: { - // When generating bytecode, we put all the index expressions as Sint64 on - // the stack. int64_t Index = ArrayIndices[ArrayIndex]; - // Reject negative indices and unsigned indices that wrapped to negative - // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). if (Index < 0) { S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) @@ -6557,7 +6553,13 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, return false; CurrentType = AT->getElementType(); CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); - Result += Index * ElementSize; + int64_t ElemSize = ElementSize.getQuantity(); + if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) + return false; + int64_t Offset = Index * ElemSize; + if (Result.getQuantity() > llvm::maxIntN(64) - Offset) + return false; + Result += CharUnits::fromQuantity(Offset); ++ArrayIndex; break; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 307b5db0b231f..3b96c884a50f0 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19284,15 +19284,24 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - // Reject negative indices and indices too large to fit in int64_t, - // to avoid sign-extension issues or crashes in getZExtValue(). + // Reject negative indices, indices too large to fit in int64_t, + // and overflow in the offset computation. APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isSigned() + ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) + : IdxResult.ugt(MaxIdx)) return Error(OOE); - Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * - ElementSize; + int64_t IdxVal = IdxResult.isUnsigned() + ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue(); + int64_t ElemSize = ElementSize.getQuantity(); + if (IdxVal != 0 && + ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) + return Error(OOE); + int64_t Offset = IdxVal * ElemSize; + if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset) + return Error(OOE); + Result += CharUnits::fromQuantity(Offset); break; } >From e526016bdeeb2662fc6c0864f26ef3af8d1bf379 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 10:06:38 -0400 Subject: [PATCH 08/17] Add tests for __uint128_t > UINT64_MAX and multiply overflow --- clang/test/Sema/offsetof-unsigned-index.c | 10 ++++++++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 55f1e08f3a535..1b34ebe60443f 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -30,3 +30,13 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2cea1f32be472..2b3995d405705 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -35,3 +35,13 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From b30608ccf58263ec66b266ac6064c1b04a9942c7 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Fri, 19 Jun 2026 09:54:59 -0400 Subject: [PATCH 09/17] Handle small AP-typed offsetof indices correctly --- clang/lib/AST/ByteCode/Compiler.cpp | 16 +++++++++++++--- clang/test/Sema/offsetof-unsigned-index.c | 5 +++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 5 +++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 159b910f82302..e954e2915bfc0 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3799,13 +3799,23 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { continue; } + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { + // AP types (e.g. __uint128_t, __int128) cannot be safely cast to + // Sint64. Evaluate the constant and push it directly as Sint64. + Expr::EvalResult EvalResult; + if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + return false; + llvm::APSInt IdxVal = EvalResult.Val.getInt(); + if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) + return false; + if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + return false; + continue; + } if (!this->visit(ArrayIndexExpr)) return false; // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid // sign-extending values with the high bit set (e.g. uint8_t >= 128). - // AP types cannot be safely narrowed to Sint64; fail constant evaluation. - if (IndexT == PT_IntAP || IndexT == PT_IntAPS) - return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 1b34ebe60443f..57bce06a25c98 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -31,6 +31,11 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(struct NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2b3995d405705..880c45dec0e3e 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -36,6 +36,11 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). >From aa622ff06d46ee74c81e9069eaca274236508bb7 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 23 Jun 2026 09:49:51 -0400 Subject: [PATCH 10/17] Fix offsetof AP index handling: add CastNoOverflow opcode and APInt crash fix - ExprConstant.cpp: replace sgt/ugt(MaxIdx) with getActiveBits() > 63 to avoid APInt bit-width assertion crash when comparing 128-bit values - Opcodes.td: add APOnlyTypeClass (IntAP, IntAPS) and CastNoOverflow opcode - Interp.h: implement CastNoOverflow -- pops AP value, rejects negative or values that don't fit in int64_t (activeBits > 63), pushes as Sint64 - Compiler.cpp: for AP types, emit visit + CastNoOverflow instead of calling EvaluateAsInt (which is not allowed in the compiler phase) --- clang/lib/AST/ByteCode/Compiler.cpp | 10 ++-------- clang/lib/AST/ByteCode/Interp.h | 17 +++++++++++++++++ clang/lib/AST/ByteCode/Opcodes.td | 10 ++++++++++ clang/lib/AST/ExprConstant.cpp | 5 +---- clang/test/Sema/offsetof-unsigned-index.c | 11 +++++++---- clang/test/SemaCXX/offsetof-unsigned-index.cpp | 15 +++++++++------ 6 files changed, 46 insertions(+), 22 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index e954e2915bfc0..23c4bd4108a2f 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3800,15 +3800,9 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { } if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { - // AP types (e.g. __uint128_t, __int128) cannot be safely cast to - // Sint64. Evaluate the constant and push it directly as Sint64. - Expr::EvalResult EvalResult; - if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + if (!this->visit(ArrayIndexExpr)) return false; - llvm::APSInt IdxVal = EvalResult.Val.getInt(); - if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) - return false; - if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + if (!this->emitCastNoOverflow(IndexT, E)) return false; continue; } diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index ad807816aa904..c8709ce0f960b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2877,6 +2877,23 @@ bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { return true; } +// Cast an AP integer to Sint64, failing constant evaluation if the value is +// negative or too large to fit (i.e. truncation would change the value). +template <PrimType Name, class T = typename PrimConv<Name>::T> +bool CastNoOverflow(InterpState &S, CodePtr OpPC) { + T Source = S.Stk.pop<T>(); + APSInt Val = Source.toAPSInt(); + if (Val.isNegative() || Val.getActiveBits() > 63) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } + S.Stk.push<Integral<64, true>>( + Integral<64, true>::from((int64_t)Val.getZExtValue())); + return true; +} + template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastIntegralFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, uint32_t FPOI) { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index e350d7b2e547d..b375cb5f6b34d 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -748,6 +748,16 @@ def CastAPS : Opcode { let HasGroup = 1; } +def APOnlyTypeClass : TypeClass { + let Types = [IntAP, IntAPS]; +} + +// Cast from an AP integer type to Sint64, failing if the value doesn't fit. +def CastNoOverflow : Opcode { + let Types = [APOnlyTypeClass]; + let HasGroup = 1; +} + // Cast an integer to a floating type def CastIntegralFloating : Opcode { let Types = [AluTypeClass]; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3b96c884a50f0..efdc777127bef 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19286,10 +19286,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices, indices too large to fit in int64_t, // and overflow in the offset computation. - APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() - ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63) return Error(OOE); int64_t IdxVal = IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 57bce06a25c98..075a96b0e8163 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -26,22 +26,25 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} // __uint128_t indices >= 0x8000000000000000 must be rejected. -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} // Small __uint128_t values that fit in int64_t must work correctly. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(struct NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(struct NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 880c45dec0e3e..22d0eda636f08 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -7,8 +7,8 @@ // Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 -#include <cstdint> -#include <cstddef> +#include <stdint.h> +#include <stddef.h> struct MyStruct { void *ptrs[256]; @@ -31,22 +31,25 @@ static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; -static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} // __uint128_t indices >= 0x8000000000000000 must be rejected. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} // Small __uint128_t values that fit in int64_t must work correctly. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). -static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} >From 4bc3f2d4018a6fa6308bb61169753a4b1ba32d86 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 23 Jun 2026 14:36:29 -0400 Subject: [PATCH 11/17] Use APSInt::getExtValue() in offsetof index evaluation --- clang/lib/AST/ExprConstant.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index efdc777127bef..7156ed2332c28 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19288,9 +19288,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { // and overflow in the offset computation. if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63) return Error(OOE); - int64_t IdxVal = IdxResult.isUnsigned() - ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue(); + int64_t IdxVal = IdxResult.getExtValue(); int64_t ElemSize = ElementSize.getQuantity(); if (IdxVal != 0 && ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) >From b8c4b580dcb1c469507cde8c82625450ff9ba847 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:24:34 -0400 Subject: [PATCH 12/17] Move BigStruct/uint16_t test to .c file and drop .cpp test No C++-specific logic in the test; move uint16_t/BigStruct case to the .c test file and delete the redundant SemaCXX variant. Addresses reviewer comment from efriedma-quic. --- clang/test/Sema/offsetof-unsigned-index.c | 5 ++ .../test/SemaCXX/offsetof-unsigned-index.cpp | 55 ------------------- 2 files changed, 5 insertions(+), 55 deletions(-) delete mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 075a96b0e8163..d873175c22eb8 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -24,6 +24,11 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); +// uint16_t index: values >= 32768 were also affected by sign-extension. +struct BigStruct { char data[65536]; }; +_Static_assert(__builtin_offsetof(struct BigStruct, data[(uint16_t)32768]) == 32768, + "offsetof with uint16_t index 32768 should be correctly zero-extended"); + // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp deleted file mode 100644 index 22d0eda636f08..0000000000000 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter - -// Test that offsetof correctly zero-extends unsigned array indices >= 128. -// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit -// was set, producing a large bogus offset value instead of the correct one. -// Also tests that negative indices and oversized __uint128_t indices are rejected. -// https://github.com/llvm/llvm-project/issues/199319 - -#include <stdint.h> -#include <stddef.h> - -struct MyStruct { - void *ptrs[256]; -}; - -// uint8_t index: values >= 128 were incorrectly sign-extended -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), - "offsetof with uint8_t index 127 should be correct"); -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), - "offsetof with uint8_t index 128 should be correctly zero-extended"); -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), - "offsetof with uint8_t index 255 should be correctly zero-extended"); - -// uint16_t index: values >= 32768 were also affected -struct BigStruct { - char data[65536]; -}; -static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, - "offsetof with uint16_t index 32768 should be correctly zero-extended"); - -// Negative indices must be rejected. -struct NegIdxStruct { int a; int x[1]; }; -static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} - -// __uint128_t indices >= 0x8000000000000000 must be rejected. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} - -// Small __uint128_t values that fit in int64_t must work correctly. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == - __builtin_offsetof(NegIdxStruct, x), - "offsetof with __uint128_t index 0 should work"); -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) == - __builtin_offsetof(NegIdxStruct, x) + sizeof(int), - "offsetof with __uint128_t index 1 should work"); - -// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: -// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a -// wrong result instead of an error). -static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} - -// A uint64_t index that causes index*sizeof(element) to overflow int64_t must -// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. -struct ShortArray { short data[2]; }; -static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} >From a99a8bfd2a9da6f8feae5fe32b03ccc71e8ac330 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:33:22 -0400 Subject: [PATCH 13/17] Add specific diagnostic for overflow in offsetof computation Instead of the generic 'subexpression not valid in a constant expression' note, emit 'overflow in offsetof' when the multiply or add step of the offsetof calculation overflows int64_t. Add note_constexpr_offsetof_overflow to DiagnosticASTKinds.td and use it in both the ExprConstant and ByteCode (InterpBuiltin) paths. Update the test to expect the new note. Addresses reviewer comment from efriedma-quic. --- clang/include/clang/Basic/DiagnosticASTKinds.td | 2 ++ clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++++++-- clang/lib/AST/ExprConstant.cpp | 4 ++-- clang/test/Sema/offsetof-unsigned-index.c | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index bde418695f647..f86f0157b2b1f 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -24,6 +24,8 @@ def note_constexpr_invalid_downcast : Note< "cannot cast object of dynamic type %0 to type %1">; def note_constexpr_overflow : Note< "value %0 is outside the range of representable values of type %1">; +def note_constexpr_offsetof_overflow : Note< + "overflow in offsetof">; def note_constexpr_negative_shift : Note<"negative shift count %0">; def note_constexpr_large_shift : Note< "shift count %0 >= width of type %1 (%2 bit%s2)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 07af289f99243..540da055eb37f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6554,11 +6554,19 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, CurrentType = AT->getElementType(); CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); int64_t ElemSize = ElementSize.getQuantity(); - if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) + if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_offsetof_overflow) + << S.Current->getRange(OpPC); return false; + } int64_t Offset = Index * ElemSize; - if (Result.getQuantity() > llvm::maxIntN(64) - Offset) + if (Result.getQuantity() > llvm::maxIntN(64) - Offset) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_offsetof_overflow) + << S.Current->getRange(OpPC); return false; + } Result += CharUnits::fromQuantity(Offset); ++ArrayIndex; break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7156ed2332c28..d58e3ec84c8d5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19292,10 +19292,10 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { int64_t ElemSize = ElementSize.getQuantity(); if (IdxVal != 0 && ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) - return Error(OOE); + return Error(OOE, diag::note_constexpr_offsetof_overflow); int64_t Offset = IdxVal * ElemSize; if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset) - return Error(OOE); + return Error(OOE, diag::note_constexpr_offsetof_overflow); Result += CharUnits::fromQuantity(Offset); break; } diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index d873175c22eb8..cafd2ddff58f8 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -52,4 +52,4 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)] // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{overflow in offsetof}} >From 073670118f8fe8ff879855b9430cd47d807f2133 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:35:54 -0400 Subject: [PATCH 14/17] Replace standard includes in test with manual typedefs Remove #include <stdint.h> and <stddef.h> from offsetof-unsigned-index.c and add manual typedef declarations instead, following the convention that clang test files should not use standard includes. Addresses reviewer comment from tbaederr. --- clang/test/Sema/offsetof-unsigned-index.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index cafd2ddff58f8..56396eb2a12e6 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -7,8 +7,9 @@ // Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 -#include <stdint.h> -#include <stddef.h> +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long long uint64_t; struct MyStruct { void *ptrs[256]; >From ce0c44a2cc03412ae074551847a8ff8d522477ca Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:37:34 -0400 Subject: [PATCH 15/17] Update clang/lib/AST/ByteCode/Interp.h Co-authored-by: Timm Baeder <[email protected]> --- clang/lib/AST/ByteCode/Interp.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index c8709ce0f960b..b149710bba522 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2884,10 +2884,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) { T Source = S.Stk.pop<T>(); APSInt Val = Source.toAPSInt(); if (Val.isNegative() || Val.getActiveBits() > 63) { - S.FFDiag(S.Current->getLocation(OpPC), - diag::note_invalid_subexpr_in_const_expr) - << S.Current->getRange(OpPC); - return false; +return Invalid(S, OpPC); } S.Stk.push<Integral<64, true>>( Integral<64, true>::from((int64_t)Val.getZExtValue())); >From f76d6076c08b0d06e967906cf1cc0b807fe59597 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:38:51 -0400 Subject: [PATCH 16/17] Fix indentation of Invalid(S, OpPC) in CastNoOverflow --- clang/lib/AST/ByteCode/Interp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index b149710bba522..cf8e045561f25 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2884,7 +2884,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) { T Source = S.Stk.pop<T>(); APSInt Val = Source.toAPSInt(); if (Val.isNegative() || Val.getActiveBits() > 63) { -return Invalid(S, OpPC); + return Invalid(S, OpPC); } S.Stk.push<Integral<64, true>>( Integral<64, true>::from((int64_t)Val.getZExtValue())); >From f5c9c7bc3d0c98d8a2ef76a8ce1b425a0ec16c49 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:45:14 -0400 Subject: [PATCH 17/17] Remove unnecessary two-step cast in VisitOffsetOfExpr ByteCode path The intermediate Uint64 step was not needed: Cast<PT_UintN, PT_Sint64> uses a C++ implicit conversion which already correctly zero-extends unsigned values. Revert the non-AP path to the original direct emitCast(IndexT, PT_Sint64). Addresses reviewer comment from tbaederr. --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 23c4bd4108a2f..91a1659985265 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3808,14 +3808,8 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { } if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid - // sign-extending values with the high bit set (e.g. uint8_t >= 128). + // Cast to Sint64. if (IndexT != PT_Sint64) { - if (!isSignedType(IndexT) && IndexT != PT_Uint64) { - if (!this->emitCast(IndexT, PT_Uint64, E)) - return false; - IndexT = PT_Uint64; - } if (!this->emitCast(IndexT, PT_Sint64, E)) return false; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
