llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jan Schultke (eisenwave) <details> <summary>Changes</summary> This warning diagnoses cases where `__builtin_bit_cast` (and therefore `std::bit_cast`) maps padding bits in the source to non-padding bits in the destination. Such a cast has undefined behavior for every possible argument. The warning is currently only enabled for little-endian platforms. This basically implements the behavior proposed in: - https://github.com/cplusplus/papers/issues/2627 We're going to see that paper in 2 weeks, and if it gets accepted, what is currently a warning would actually turn into a hard error. Even if it's not accepted, it's useful to warn the user about this case because it's basically like writing `/ 0` or `<< 100000`. Even without knowing the operand ... just don't write that. Implementing the warning turned out to be more difficult than I thought though: - I still haven't done the proper calculations for big-endian platforms - I'm not 100% sure if it handles the layout of data member pointers, member function pointers, etc. properly. There are currently some cases like bit-fields where the detection simply bails out and you don't get the warning, but I guess we're not going to be able to do that if it's require to be a hard error in the future. CC @<!-- -->cor3ntin --- Patch is 26.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/200362.diff 8 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+5) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+4) - (modified) clang/lib/Sema/SemaCast.cpp (+169) - (modified) clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp (+3-1) - (modified) clang/test/AST/ByteCode/builtin-bit-cast.cpp (+6-5) - (added) clang/test/SemaCXX/builtin-bit-cast-padding.cpp (+144) - (modified) clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp (+10-5) - (modified) clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp (+23-14) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 11cce36a0906c..85f44a13257fc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -456,6 +456,11 @@ Attribute Changes in Clang Improvements to Clang's diagnostics ----------------------------------- +- Added ``-Wbit-cast-padding`` to diagnose uses of ``__builtin_bit_cast`` (and + therefore ``std::bit_cast``) where padding bits in the source map to + non-padding bits in the destination. Such a cast has undefined behavior for + every possible argument. + - Fixed bug in ``-Wdocumentation`` so that it correctly handles explicit function template instantiations (#64087). diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 077aace321264..175c9a3e68799 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13417,6 +13417,10 @@ def err_bit_cast_non_trivially_copyable : Error< "'__builtin_bit_cast' %select{source|destination}0 type must be trivially copyable">; def err_bit_cast_type_size_mismatch : Error< "size of '__builtin_bit_cast' source type %0 does not match destination type %1 (%2 vs %3 bytes)">; +def warn_bit_cast_unconditional_padding_ub : Warning< + "'__builtin_bit_cast' from %0 to %1 is always undefined " + "because it unconditionally maps a padding bit onto a non-padding bit">, + InGroup<DiagGroup<"bit-cast-padding">>; def err_get_vtable_pointer_incorrect_type : Error<"__builtin_get_vtable_pointer requires an argument of%select{| " diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 0040f3aa1a891..ceda7ed894782 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -27,6 +27,7 @@ #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaRISCV.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include <set> @@ -3375,6 +3376,170 @@ void CastOperation::CheckCStyleCast() { checkCastAlign(); } +namespace { +class BitCastBitClassifier { + const ASTContext &Ctx; + + static void setRange(llvm::BitVector &BV, uint64_t Start, uint64_t Len) { + if (Len != 0) + BV.set(Start, Start + Len); + } + +public: + /// Bits in the value representation. + llvm::BitVector Value; + /// Bits enclosed by a union. + llvm::BitVector Union; + /// Bits enclosed by ``std::byte`` or unsigned ordinary character type. + llvm::BitVector ByteLike; + + explicit BitCastBitClassifier(const ASTContext &Ctx) : Ctx(Ctx) {} + + /// Classify the bits of \p T. Returns ``false`` if the type contains + /// constructs that this analysis does not model precisely (e.g. bit-fields), + /// in which case the result is unusable. + [[nodiscard]] bool classify(QualType T) { + // TODO: Implement classification on big endian. + if (Ctx.getTargetInfo().isBigEndian()) + return false; + + uint64_t Bits = Ctx.getTypeSize(T); + Value.resize(Bits, false); + Union.resize(Bits, false); + ByteLike.resize(Bits, false); + return visit(T, 0); + } + +private: + [[nodiscard]] bool visit(QualType T, uint64_t StartBit) { + if (T.isNull() || T->isDependentType() || T->isFunctionType() || + T->isReferenceType()) + return false; + + T = T.getCanonicalType().getUnqualifiedType(); + uint64_t Bits = Ctx.getTypeSize(T); + + // [bit.cast] permits casting padding to ``std::byte`` and to an unsigned + // ordinary character type. + { + QualType U = T; + bool IsByteLike = U->isStdByteType(); + if (const auto *BT = U->getAs<BuiltinType>()) + IsByteLike = BT->getKind() == BuiltinType::UChar || + BT->getKind() == BuiltinType::Char_U; + if (IsByteLike) { + setRange(Value, StartBit, Bits); + setRange(ByteLike, StartBit, Bits); + return true; + } + } + + if (const auto *AT = T->getAs<AtomicType>()) + return visit(AT->getValueType(), StartBit); + + if (const auto *CAT = Ctx.getAsConstantArrayType(T)) { + QualType Elem = CAT->getElementType(); + uint64_t ElemBits = Ctx.getTypeSize(Elem); + uint64_t N = CAT->getSize().getZExtValue(); + for (uint64_t I = 0; I < N; ++I) + if (!visit(Elem, StartBit + I * ElemBits)) + return false; + return true; + } + + if (const auto *CT = T->getAs<ComplexType>()) { + QualType Elem = CT->getElementType(); + uint64_t ElemBits = Ctx.getTypeSize(Elem); + return visit(Elem, StartBit) && visit(Elem, StartBit + ElemBits); + } + + if (const auto *VT = T->getAs<VectorType>()) { + // Packed bool vectors store one bit per element, followd by padding. + if (T->isPackedVectorBoolType(Ctx)) { + setRange(Value, StartBit, Bits); + return true; + } + // Otherwise treat vectors as arrays of their element type. + QualType Elem = VT->getElementType(); + uint64_t ElemBits = Ctx.getTypeSize(Elem); + for (unsigned I = 0; I < VT->getNumElements(); ++I) + if (!visit(Elem, StartBit + I * ElemBits)) + return false; + return true; + } + + if (const auto *RT = T->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (!RD || !RD->getDefinition() || RD->isInvalidDecl()) + return false; + RD = RD->getDefinition(); + + if (RD->isUnion()) { + setRange(Union, StartBit, Bits); + return true; + } + + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); + if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &BS : CXXRD->bases()) { + const CXXRecordDecl *BD = BS.getType()->getAsCXXRecordDecl(); + if (!BD) + return false; + uint64_t Off = Ctx.toBits(Layout.getBaseClassOffset(BD)); + if (!visit(BS.getType(), StartBit + Off)) + return false; + } + } + + unsigned Idx = 0; + for (const FieldDecl *FD : RD->fields()) { + if (FD->isBitField()) + // The value representation of a bit-field is a strict subset of + // its storage unit; skip rather than risk false positives. + return false; + uint64_t Off = Layout.getFieldOffset(Idx++); + if (!visit(FD->getType(), StartBit + Off)) + return false; + } + return true; + } + + if (const auto *BIT = T->getAs<BitIntType>()) + setRange(Value, StartBit, BIT->getNumBits()); + else if (T->isFloatingType()) + setRange(Value, StartBit, + llvm::APFloat::getSizeInBits(Ctx.getFloatTypeSemantics(T))); + else + // We assume that any other scalar type is made entirely of value bits. + setRange(Value, StartBit, Bits); + return true; + } +}; + +/// Returns true if every input to a ``__builtin_bit_cast`` from \p SrcType to +/// \p DestType maps at least one source padding bit onto a destination value +/// bit. +static bool IsDegenerateBitCast(const ASTContext &Ctx, QualType SrcType, + QualType DestType) { + assert(SrcType.isTriviallyCopyableType(Ctx) && + DestType.isTriviallyCopyableType(Ctx)); + + BitCastBitClassifier Src(Ctx); + BitCastBitClassifier Dst(Ctx); + if (!Src.classify(SrcType) || !Dst.classify(DestType)) + return false; + assert(Src.Value.size() == Dst.Value.size() && + "type sizes already checked to match"); + + for (unsigned I = 0; I < Src.Value.size(); ++I) + if (!Src.Value[I] && !Src.Union[I] && Dst.Value[I] && !Dst.Union[I] && + !Dst.ByteLike[I]) + return true; + return false; +} + +} // namespace + void CastOperation::CheckBuiltinBitCast() { QualType SrcType = SrcExpr.get()->getType(); @@ -3414,6 +3579,10 @@ void CastOperation::CheckBuiltinBitCast() { return; } + if (IsDegenerateBitCast(Self.Context, SrcType, DestType)) + Self.Diag(OpRange.getBegin(), diag::warn_bit_cast_unconditional_padding_ub) + << SrcType << DestType; + Kind = CK_LValueToRValueBitCast; } diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp index 7e34c3ff9d755..3efbd7a46334f 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp @@ -23,6 +23,7 @@ constexpr To bit_cast(const From &from) { return __builtin_bit_cast(To, from); #if __x86_64 // both-note@-2 {{indeterminate value can only initialize an object of type}} + // both-warning@-3 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} #endif } @@ -42,7 +43,8 @@ constexpr Init round_trip(const Init &init) { namespace test_long_double { #if __x86_64 constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // both-error{{must be initialized by a constant expression}}\ - // both-note{{in call}} + // both-note{{in call}}\ + // both-note{{in instantiation}} constexpr long double ld = 3.1425926539; struct bytes { diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index aea5d9d712c80..3c55e4d2cb4d3 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -1,11 +1,11 @@ -// RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only %s +// RUN: %clang_cc1 -verify=ref,both,le -std=c++2a -fsyntax-only %s // RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu %s -// RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s +// RUN: %clang_cc1 -verify=ref,both,le -std=c++2a -fsyntax-only -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s // RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple powerpc64-unknown-unknown -mabi=ieeelongdouble %s -// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -verify=expected,both,le -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter %s // RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu -fexperimental-new-constant-interpreter %s -// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s +// RUN: %clang_cc1 -verify=expected,both,le -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s // RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64-unknown-unknown -mabi=ieeelongdouble %s #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -514,7 +514,8 @@ namespace Discarded { int b; }; constexpr int bad_my_byte = (__builtin_bit_cast(my_byte[8], pad{1, 2}), 0); // both-error {{must be initialized by a constant expression}} \ - // both-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte';}} + // both-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte';}} \ + // le-warning {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} } typedef bool bool9 __attribute__((ext_vector_type(9))); diff --git a/clang/test/SemaCXX/builtin-bit-cast-padding.cpp b/clang/test/SemaCXX/builtin-bit-cast-padding.cpp new file mode 100644 index 0000000000000..d565cc470a3f9 --- /dev/null +++ b/clang/test/SemaCXX/builtin-bit-cast-padding.cpp @@ -0,0 +1,144 @@ +// Tests for the diagnostic implementing the static check proposed by +// P3969R1 ("Fixing std::bit_cast of types with padding bits"). +// +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only -triple x86_64-pc-linux-gnu %s +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only -triple x86_64-pc-linux-gnu -Wno-bit-cast-padding -DNO_WARN %s + +#ifdef NO_WARN +// expected-no-diagnostics +#endif + +namespace long_double_to_int128 { +#ifndef NO_WARN +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +__int128 a = __builtin_bit_cast(__int128, (long double)0); + +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +unsigned __int128 b = __builtin_bit_cast(unsigned __int128, (long double)0); +#endif +} // namespace long_double_to_int128 + +namespace bitint_to_int { +#ifndef NO_WARN +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +unsigned u = __builtin_bit_cast(unsigned, (_BitInt(31))0); + +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +unsigned long long ull = __builtin_bit_cast(unsigned long long, (_BitInt(63))0); +#endif +} // namespace bitint_to_int + +namespace via_struct { +#ifndef NO_WARN +struct holds_ld { long double v; }; +// Mapping padded long double through a struct wrapper is still degenerate. +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +__int128 a = __builtin_bit_cast(__int128, holds_ld{0.0L}); +#endif +} // namespace via_struct + +namespace via_array { +#ifndef NO_WARN +struct two_int128 { __int128 a, b; }; +struct ld2 { long double v[2]; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +two_int128 a = __builtin_bit_cast(two_int128, ld2{}); + +struct four_int128 { __int128 a, b, c, d; }; +struct ld_2x2 { long double v[2][2]; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +four_int128 b = __builtin_bit_cast(four_int128, ld_2x2{}); + +struct i128_2 { __int128 v[2]; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +i128_2 c = __builtin_bit_cast(i128_2, ld2{}); +#endif +} // namespace via_array + +namespace alignment_padding { +#ifndef NO_WARN +struct internal_pad { char a; int b; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +unsigned long long a = __builtin_bit_cast(unsigned long long, internal_pad{}); + +struct trailing_pad { int a; char b; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +unsigned long long b = __builtin_bit_cast(unsigned long long, trailing_pad{}); + +struct alignas(sizeof(__int128)) overaligned { long long v; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +__int128 c = __builtin_bit_cast(__int128, overaligned{}); +#endif +} // namespace alignment_padding + +namespace empty_struct { +#ifndef NO_WARN +struct empty {}; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +signed char from_empty = __builtin_bit_cast(signed char, empty{}); + +struct wraps_empty { empty e; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +signed char from_wrapped = __builtin_bit_cast(signed char, wraps_empty{}); +#endif + +struct empty2 {}; +empty2 to_empty = __builtin_bit_cast(empty2, (unsigned char)0); +} // namespace empty_struct + +namespace no_warning { +unsigned u1 = __builtin_bit_cast(unsigned, 1); +float f1 = __builtin_bit_cast(float, 1u); + +long double ld = __builtin_bit_cast(long double, (unsigned __int128)0); + +long double ld2 = __builtin_bit_cast(long double, (long double)0); + +struct u4 { unsigned v[4]; }; +struct f4 { float v[4]; }; +u4 u4_a = __builtin_bit_cast(u4, f4{}); +f4 f4_a = __builtin_bit_cast(f4, u4{}); + +struct bytes16 { unsigned char b[16]; }; +bytes16 b16 = __builtin_bit_cast(bytes16, (long double)0); + +union U { + long double ld; + unsigned char bytes[16]; +}; +U u = __builtin_bit_cast(U, (long double)0); +__int128 from_union = __builtin_bit_cast(__int128, U{}); + +// Bit-fields disable the analysis entirely (no false positives). +struct with_bitfield { unsigned long long x : 5; }; +with_bitfield wbf = __builtin_bit_cast(with_bitfield, (unsigned long long)0); +} // namespace no_warning + +namespace not_byte_like { +#ifndef NO_WARN +struct sc16 { signed char b[16]; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +sc16 a = __builtin_bit_cast(sc16, (long double)0); + +enum my_byte : unsigned char {}; +struct mb_array { my_byte b[16]; }; +// expected-warning@+1 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +mb_array mb = __builtin_bit_cast(mb_array, (long double)0); +#endif +} // namespace not_byte_like + +namespace templates { +template <class To, class From> +constexpr To my_bit_cast(const From &from) { + return __builtin_bit_cast(To, from); // #cast +} + +#ifndef NO_WARN +// expected-warning@#cast {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} +// expected-note@+1 {{in instantiation}} +__int128 instantiated = my_bit_cast<__int128>((long double)0); +#endif + +// Same template instantiated with non-degenerate types does not warn. +unsigned ok = my_bit_cast<unsigned>(1); +} // namespace templates diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp index b37b362c81e75..ab1a215fa8f90 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp @@ -24,23 +24,28 @@ namespace builtin_bit_cast { constexpr static fp80x2_v test_vec_fp80 = { 1, 2 }; constexpr static fp80x2_s test_str_fp80 = { { 0, 0, 0, 0, 0, 0, 0, -128, -1, 63, 0, 0, 0, 0, 0, 0, 0, -128, 0, 64 }, {} }; - // expected-error@+2 {{static assertion expression is not an integral constant expression}} + // expected-error@+3 {{static assertion expression is not an integral constant expression}} + // expected-warning@+2 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} static_assert(__builtin_bit_cast(fp80x2_s, test_vec_fp80) == test_str_fp80, ""); - // expected-error@+2 {{static assertion expression is not an integral constant expression}} + // expected-error@+3 {{static assertion expression is not an integral constant expression}} + // expected-warning@+2 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} static_assert(__builtin_bit_cast(fp80x2_s, __builtin_bit_cast(fp80x2_v, test_str_fp80)) == test_str_fp80, ""); - // expected-error@+2 {{constexpr variable 'bad_str_fp80_0' must be initialized by a constant expression}} + // expected-error@+3 {{constexpr variable 'bad_str_fp80_0' must be initialized by a constant expression}} + // expected-warning@+2 {{is always undefined because it unconditionally maps a padding bit onto a non-padding bit}} // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} constexpr static char bad_str_fp80_0 = __builtin_bit_cast(fp80x2_s, test_vec_fp80)._pad[0]; - // expected-error@+2 {{constexpr variable 'bad_str_fp80_1' must be initialized by a constant expression}} + // expected-error@+3 {{constexpr variable 'bad_str_fp80_1' must be initialized by a constant expression}} + // expec... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/200362 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
