hazohelet updated this revision to Diff 546782.
hazohelet added a comment.

Address comments from Corentin

- Use default `pushEscapedString` escaping (`<U+0001>`) instead of UCN 
representation `\u0001`
- Convert multi-byte characters (`wchar_t`, `char16_t`, `char32_t`) to UTF-8 
and prints them.
- Added `CharToString` utility function


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===================================================================
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -262,7 +262,29 @@
     return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-                                       // expected-note {{evaluates to ''c' == 'a''}}
+                                       // expected-note {{evaluates to ''c' (99) == 'a' (97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+                                        // expected-note {{evaluates to ''\t' (9) == 'a' (97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+                                       // expected-note {{n' (10) == '<U+0000>' (0)'}}
+  // The note above is intended to match "evaluates to '\n' (10) == '<U+0000>' (0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+                                                    // expected-note {{evaluates to '10 == '<85>' (-123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+                                                    // expected-note {{evaluates to ''<FC>' (-4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+                                               // expected-note {{evaluates to ''<80>' (-128) == '<85>' (-123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+                                             // expected-note {{evaluates to ''<A0>' (-96) == ' ' (32)'}}
+static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+                                                // expected-note {{evaluates to 'u'ゆ' (12422) == L'̵' (821)'}}
+static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+                                           // expected-note {{evaluates to 'L'/' (65295) == u'�' (65533)'}}
+static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+                                       // expected-note {{evaluates to 'L'⚾' (9918) == U'🌍' (127757)'}}
+static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+                                           // expected-note {{evaluates to 'U'\a' (7) == L'\t' (9)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===================================================================
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad<int> b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+                                       // expected-note {{evaluates to ''\t' (9) == '<U+0001>' (1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+                                                   // expected-note {{evaluates to 'u8'<80>' (128) == u8'<85>' (133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+                                                         // expected-note {{evaluates to 'u'' (65279) == u'\xDB93' (56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===================================================================
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (63) == '#' (35)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else
Index: clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -46,6 +46,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <map>
 #include <optional>
@@ -16799,10 +16800,71 @@
                                       AssertMessageExpr, RParenLoc, false);
 }
 
+/// Convert character's code point value to a string.
+/// The code point needs to be zero-extended to 32-bits.
+void ConvertCharToString(uint32_t CodePoint, const BuiltinType *BTy,
+                         unsigned TyWidth, llvm::raw_ostream &OS) {
+  char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+  char *Ptr = Arr;
+
+  // This should catch Char_S, Char_U, Char8, and use of escaped characters in
+  // other types.
+  if (CodePoint <= UCHAR_MAX) {
+    StringRef Escaped = escapeCStyle<EscapeChar::Single>(CodePoint);
+    if (!Escaped.empty())
+      OS << Escaped;
+    else
+      OS << static_cast<char>(CodePoint);
+    return;
+  }
+
+  switch (BTy->getKind()) {
+  case BuiltinType::Char16:
+  case BuiltinType::Char32:
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U: {
+    if (llvm::ConvertCodePointToUTF8(CodePoint, Ptr)) {
+      for (char *I = Arr; I != Ptr; ++I)
+        OS << static_cast<char>(*I);
+    } else {
+      OS << "\\x" << llvm::format_hex_no_prefix(CodePoint, TyWidth / 4, true);
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("Non-character type is passed");
+  }
+}
+
+static void PrintCharLiteralPrefix(BuiltinType::Kind BTK,
+                                   llvm::raw_ostream &OS) {
+  switch (BTK) {
+  case BuiltinType::Char_S:
+  case BuiltinType::Char_U:
+    break;
+  case BuiltinType::Char8:
+    OS << "u8";
+    break;
+  case BuiltinType::Char16:
+    OS << "u";
+    break;
+  case BuiltinType::Char32:
+    OS << "U";
+    break;
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U:
+    OS << "L";
+    break;
+  default:
+    llvm_unreachable("Non-character type is passed");
+  }
+}
+
 /// Convert \V to a string we can present to the user in a diagnostic
 /// \T is the type of the expression that has been evaluated into \V
 static bool ConvertAPValueToString(const APValue &V, QualType T,
-                                   SmallVectorImpl<char> &Str) {
+                                   SmallVectorImpl<char> &Str,
+                                   ASTContext &Context) {
   if (!V.hasValue())
     return false;
 
@@ -16817,13 +16879,35 @@
              "Bool type, but value is not 0 or 1");
       llvm::raw_svector_ostream OS(Str);
       OS << (BoolValue ? "true" : "false");
-    } else if (T->isCharType()) {
+    } else {
+      llvm::raw_svector_ostream OS(Str);
       // Same is true for chars.
-      Str.push_back('\'');
-      Str.push_back(V.getInt().getExtValue());
-      Str.push_back('\'');
-    } else
+      // We want to print the character representation for textual types
+      const auto *BTy = T->getAs<BuiltinType>();
+      if (BTy) {
+        switch (BTy->getKind()) {
+        case BuiltinType::Char_S:
+        case BuiltinType::Char_U:
+        case BuiltinType::Char8:
+        case BuiltinType::Char16:
+        case BuiltinType::Char32:
+        case BuiltinType::WChar_S:
+        case BuiltinType::WChar_U: {
+          unsigned TyWidth = Context.getIntWidth(T);
+          assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width");
+          uint32_t CodePoint = static_cast<uint32_t>(V.getInt().getZExtValue());
+          PrintCharLiteralPrefix(BTy->getKind(), OS);
+          OS << '\'';
+          ConvertCharToString(CodePoint, BTy, TyWidth, OS);
+          OS << "' (" << V.getInt() << ')';
+          return true;
+        }
+        default:
+          break;
+        }
+      }
       V.getInt().toString(Str);
+    }
 
     break;
 
@@ -16920,8 +17004,9 @@
 
       Side->EvaluateAsRValue(DiagSide[I].Result, Context, true);
 
-      DiagSide[I].Print = ConvertAPValueToString(
-          DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString);
+      DiagSide[I].Print =
+          ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(),
+                                 DiagSide[I].ValueString, Context);
     }
     if (DiagSide[0].Print && DiagSide[1].Print) {
       Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -100,6 +100,12 @@
 -----------------------------------
 - Clang constexpr evaluator now prints template arguments when displaying
   template-specialization function calls.
+- When describing the failure of static assertion, clang prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
 
 Bug Fixes in This Version
 -------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to