hazohelet updated this revision to Diff 546832. hazohelet marked 2 inline comments as done. hazohelet retitled this revision from "[Clang][ExprConstant] Print integer instead of character on static assertion failure" to "[Clang][Sema] Fix display of characters on static assertion failure". hazohelet edited the summary of this revision. hazohelet added a comment.
Address comments from Corentin - Remove printing of character type prefix - Added code example in release note - Removed unnecessary static_cast CHANGES SINCE LAST ACTION https://reviews.llvm.org/D155610/new/ https://reviews.llvm.org/D155610 Files: clang/docs/ReleaseNotes.rst clang/lib/Sema/SemaDeclCXX.cpp clang/test/Lexer/cxx1z-trigraphs.cpp clang/test/SemaCXX/static-assert-cxx26.cpp clang/test/SemaCXX/static-assert.cpp
Index: clang/test/SemaCXX/static-assert.cpp =================================================================== --- clang/test/SemaCXX/static-assert.cpp +++ clang/test/SemaCXX/static-assert.cpp @@ -262,7 +262,29 @@ return 'c'; } static_assert(getChar() == 'a', ""); // expected-error {{failed}} \ - // expected-note {{evaluates to ''c' == 'a''}} + // expected-note {{evaluates to ''c' (99) == 'a' (97)'}} + static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''\t' (9) == 'a' (97)'}} + static_assert((char)10 == '\0', ""); // expected-error {{failed}} \ + // expected-note {{n' (10) == '<U+0000>' (0)'}} + // The note above is intended to match "evaluates to '\n' (10) == '<U+0000>' (0)'", but if we write it as it is, + // the "\n" cannot be consumed by the diagnostic consumer. + static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to '10 == '<85>' (-123)'}} + static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''<FC>' (-4) == 248'}} + static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''<80>' (-128) == '<85>' (-123)'}} + static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''<A0>' (-96) == ' ' (32)'}} +static_assert((char16_t)L'ã' == L"C̵ÌÌ¯Ì ÌÍÍ Å¥Ìº"[1], ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''ã' (12422) == '̵' (821)'}} +static_assert(L"ï¼¼ï¼"[1] == u'\xFFFD', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''ï¼' (65295) == '�' (65533)'}} +static_assert(L"â¾"[0] == U'ð', ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''â¾' (9918) == 'ð' (127757)'}} +static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''\a' (7) == '\t' (9)'}} /// Bools are printed as bools. constexpr bool invert(bool b) { Index: clang/test/SemaCXX/static-assert-cxx26.cpp =================================================================== --- clang/test/SemaCXX/static-assert-cxx26.cpp +++ clang/test/SemaCXX/static-assert-cxx26.cpp @@ -298,3 +298,12 @@ Bad<int> b; // expected-note {{in instantiation}} } + +namespace EscapeInDiagnostic { +static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''\t' (9) == '<U+0001>' (1)'}} +static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''<80>' (128) == '<85>' (133)'}} +static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \ + // expected-note {{evaluates to ''' (65279) == '\xDB93' (56211)'}} +} Index: clang/test/Lexer/cxx1z-trigraphs.cpp =================================================================== --- clang/test/Lexer/cxx1z-trigraphs.cpp +++ clang/test/Lexer/cxx1z-trigraphs.cpp @@ -21,7 +21,7 @@ #if !ENABLED_TRIGRAPHS // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}} -// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}} +// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (63) == '#' (35)'}} // expected-error@16 {{}} // expected-error@20 {{}} #else Index: clang/lib/Sema/SemaDeclCXX.cpp =================================================================== --- clang/lib/Sema/SemaDeclCXX.cpp +++ clang/lib/Sema/SemaDeclCXX.cpp @@ -46,6 +46,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/SaveAndRestore.h" #include <map> #include <optional> @@ -16799,10 +16800,47 @@ AssertMessageExpr, RParenLoc, false); } +/// Convert character's code point value to a string. +/// The code point needs to be zero-extended to 32-bits. +void ConvertCharToString(uint32_t CodePoint, const BuiltinType *BTy, + unsigned TyWidth, llvm::raw_ostream &OS) { + char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; + char *Ptr = Arr; + + // This should catch Char_S, Char_U, Char8, and use of escaped characters in + // other types. + if (CodePoint <= UCHAR_MAX) { + StringRef Escaped = escapeCStyle<EscapeChar::Single>(CodePoint); + if (!Escaped.empty()) + OS << Escaped; + else + OS << static_cast<char>(CodePoint); + return; + } + + switch (BTy->getKind()) { + case BuiltinType::Char16: + case BuiltinType::Char32: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: { + if (llvm::ConvertCodePointToUTF8(CodePoint, Ptr)) { + for (char *I = Arr; I != Ptr; ++I) + OS << *I; + } else { + OS << "\\x" << llvm::format_hex_no_prefix(CodePoint, TyWidth / 4, true); + } + break; + } + default: + llvm_unreachable("Non-character type is passed"); + } +} + /// Convert \V to a string we can present to the user in a diagnostic /// \T is the type of the expression that has been evaluated into \V static bool ConvertAPValueToString(const APValue &V, QualType T, - SmallVectorImpl<char> &Str) { + SmallVectorImpl<char> &Str, + ASTContext &Context) { if (!V.hasValue()) return false; @@ -16817,13 +16855,34 @@ "Bool type, but value is not 0 or 1"); llvm::raw_svector_ostream OS(Str); OS << (BoolValue ? "true" : "false"); - } else if (T->isCharType()) { + } else { + llvm::raw_svector_ostream OS(Str); // Same is true for chars. - Str.push_back('\''); - Str.push_back(V.getInt().getExtValue()); - Str.push_back('\''); - } else + // We want to print the character representation for textual types + const auto *BTy = T->getAs<BuiltinType>(); + if (BTy) { + switch (BTy->getKind()) { + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::Char8: + case BuiltinType::Char16: + case BuiltinType::Char32: + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: { + unsigned TyWidth = Context.getIntWidth(T); + assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width"); + uint32_t CodePoint = static_cast<uint32_t>(V.getInt().getZExtValue()); + OS << '\''; + ConvertCharToString(CodePoint, BTy, TyWidth, OS); + OS << "' (" << V.getInt() << ')'; + return true; + } + default: + break; + } + } V.getInt().toString(Str); + } break; @@ -16920,8 +16979,9 @@ Side->EvaluateAsRValue(DiagSide[I].Result, Context, true); - DiagSide[I].Print = ConvertAPValueToString( - DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString); + DiagSide[I].Print = + ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(), + DiagSide[I].ValueString, Context); } if (DiagSide[0].Print && DiagSide[1].Print) { Diag(Op->getExprLoc(), diag::note_expr_evaluates_to) Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -100,6 +100,41 @@ ----------------------------------- - Clang constexpr evaluator now prints template arguments when displaying template-specialization function calls. +- When describing the failure of static assertion of `==` expression, clang prints the integer + representation of the value as well as its character representation when + the user-provided expression is of character type. If the character is + non-printable, clang now shows the escpaed character. + Clang also prints multi-byte characters if the user-provided expression + is of multi-byte character type. + + *Example Code*: + + .. code-block:: c++ + + static_assert("A\n"[1] == U'ð'); + + *BEFORE*: + + .. code-block:: text + + source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d'' + 1 | static_assert("A\n"[1] == U'ð'); + | ^~~~~~~~~~~~~~~~~ + source:1:24: note: expression evaluates to '' + ' == 127757' + 1 | static_assert("A\n"[1] == U'ð'); + | ~~~~~~~~~^~~~~~~~ + + *AFTER*: + + .. code-block:: text + + source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d'' + 1 | static_assert("A\n"[1] == U'ð'); + | ^~~~~~~~~~~~~~~~~ + source:1:24: note: expression evaluates to ''\n' (10) == 'ð' (127757)' + 1 | static_assert("A\n"[1] == U'ð'); + | ~~~~~~~~~^~~~~~~~ Bug Fixes in This Version -------------------------
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits