ljmf00 updated this revision to Diff 395777.
ljmf00 retitled this revision from "[lldb] Add support for custom char8_t types 
with different name" to "[lldb] Add support for UTF-8 unicode formatting".
ljmf00 edited the summary of this revision.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112564/new/

https://reviews.llvm.org/D112564

Files:
  lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
  
lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py


Index: 
lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
===================================================================
--- 
lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
+++ 
lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
@@ -115,8 +115,7 @@
         self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', 
self.getFormatted("character array", string_expr))
         self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', 
self.getFormatted("character", string_expr))
         self.assertIn('= ..90zaZA....... \n', self.getFormatted("printable 
character", string_expr))
-        # FIXME: This should probably print the characters in the uint128_t.
-        self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', 
self.getFormatted("unicode8", string_expr))
+        self.assertIn('= 0x00 0x1b 0x39 0x30 0x7a 0x61 0x5a 0x41 0x0b 0x09 
0x0d 0x0a 0x0c 0x08 0x07 0x20\n', self.getFormatted("unicode8", string_expr))
 
         # OSType
         ostype_expr = "(__UINT64_TYPE__)0x"
@@ -137,6 +136,9 @@
         # bytes with ASCII
         self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', 
self.getFormatted("bytes with ASCII", "cstring"))
 
+        # unicode8
+        self.assertIn('= 0x78 0x56 0x34 0x12\n', self.getFormatted("unicode8", 
"0x12345678"))
+
         # unicode16
         self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", 
"0x12345678"))
 
Index: lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
===================================================================
--- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -5149,6 +5149,8 @@
     case clang::BuiltinType::UChar:
     case clang::BuiltinType::WChar_U:
       return lldb::eFormatChar;
+    case clang::BuiltinType::Char8:
+      return lldb::eFormatUnicode8;
     case clang::BuiltinType::Char16:
       return lldb::eFormatUnicode16;
     case clang::BuiltinType::Char32:
@@ -8957,6 +8959,7 @@
         case eFormatCharPrintable:
         case eFormatCharArray:
         case eFormatBytes:
+        case eFormatUnicode8:
         case eFormatBytesWithASCII:
           item_count = byte_size;
           byte_size = 1;


Index: lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
===================================================================
--- lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
+++ lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
@@ -115,8 +115,7 @@
         self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character array", string_expr))
         self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character", string_expr))
         self.assertIn('= ..90zaZA....... \n', self.getFormatted("printable character", string_expr))
-        # FIXME: This should probably print the characters in the uint128_t.
-        self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("unicode8", string_expr))
+        self.assertIn('= 0x00 0x1b 0x39 0x30 0x7a 0x61 0x5a 0x41 0x0b 0x09 0x0d 0x0a 0x0c 0x08 0x07 0x20\n', self.getFormatted("unicode8", string_expr))
 
         # OSType
         ostype_expr = "(__UINT64_TYPE__)0x"
@@ -137,6 +136,9 @@
         # bytes with ASCII
         self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring"))
 
+        # unicode8
+        self.assertIn('= 0x78 0x56 0x34 0x12\n', self.getFormatted("unicode8", "0x12345678"))
+
         # unicode16
         self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678"))
 
Index: lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
===================================================================
--- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -5149,6 +5149,8 @@
     case clang::BuiltinType::UChar:
     case clang::BuiltinType::WChar_U:
       return lldb::eFormatChar;
+    case clang::BuiltinType::Char8:
+      return lldb::eFormatUnicode8;
     case clang::BuiltinType::Char16:
       return lldb::eFormatUnicode16;
     case clang::BuiltinType::Char32:
@@ -8957,6 +8959,7 @@
         case eFormatCharPrintable:
         case eFormatCharArray:
         case eFormatBytes:
+        case eFormatUnicode8:
         case eFormatBytesWithASCII:
           item_count = byte_size;
           byte_size = 1;
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to