vsk updated this revision to Diff 242661.
vsk marked 4 inline comments as done.
vsk added a comment.

- `s/cap/capacity/`
- Remove the `checkedAdd` pointer overflow check as it's not necessary.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73860/new/

https://reviews.llvm.org/D73860

Files:
  
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
  
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
  lldb/source/DataFormatters/StringPrinter.cpp
  lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp

Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -8,7 +8,6 @@
 
 #include "LibCxx.h"
 
-#include "llvm/ADT/ScopeExit.h"
 #include "lldb/Core/Debugger.h"
 #include "lldb/Core/FormatEntity.h"
 #include "lldb/Core/ValueObject.h"
@@ -527,6 +526,17 @@
     size = (layout == eLibcxxStringLayoutModeDSC)
                ? size_mode_value
                : ((size_mode_value >> 1) % 256);
+
+    // When the small-string optimization takes place, the data must fit in the
+    // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+    // likely that the string isn't initialized and we're reading garbage.
+    ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+    const llvm::Optional<uint64_t> max_bytes =
+        location_sp->GetCompilerType().GetByteSize(
+            exe_ctx.GetBestExecutionContextScope());
+    if (!max_bytes || size > *max_bytes)
+      return false;
+
     return (location_sp.get() != nullptr);
   } else {
     ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -537,9 +547,15 @@
                       ? layout_decider
                       : l->GetChildAtIndex(2, true);
     ValueObjectSP size_vo(l->GetChildAtIndex(1, true));
-    if (!size_vo || !location_sp)
+    const unsigned capacity_index =
+        (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0;
+    ValueObjectSP capacity_vo(l->GetChildAtIndex(capacity_index, true));
+    if (!size_vo || !location_sp || !capacity_vo)
+      return false;
+    size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+    const uint64_t cap = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+    if (size == LLDB_INVALID_OFFSET || cap == LLDB_INVALID_OFFSET || cap < size)
       return false;
-    size = size_vo->GetValueAsUnsigned(0);
     return true;
   }
 }
@@ -569,7 +585,9 @@
       options.SetIsTruncated(true);
     }
   }
-  location_sp->GetPointeeData(extractor, 0, size);
+  const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+  if (bytes_read < size)
+    return false;
 
   // std::wstring::size() is measured in 'characters', not bytes
   TypeSystemClang *ast_context =
@@ -591,41 +609,33 @@
 
   switch (*wchar_t_size) {
   case 1:
-    StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF8>(
         options);
     break;
 
   case 2:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF16>(
         options);
     break;
 
   case 4:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF32>(
         options);
-    break;
-
-  default:
-    stream.Printf("size for wchar_t is not valid");
-    return true;
   }
-
-  return true;
+  return false;
 }
 
 template <StringPrinter::StringElementType element_type>
 bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
                                  const TypeSummaryOptions &summary_options,
-                                 std::string prefix_token = "") {
+                                 std::string prefix_token) {
   uint64_t size = 0;
   ValueObjectSP location_sp;
-
   if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
     return false;
-
   if (size == 0) {
     stream.Printf("\"\"");
     return true;
@@ -644,7 +654,9 @@
       options.SetIsTruncated(true);
     }
   }
-  location_sp->GetPointeeData(extractor, 0, size);
+  const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+  if (bytes_read < size)
+    return false;
 
   options.SetData(extractor);
   options.SetStream(&stream);
@@ -657,28 +669,40 @@
   options.SetQuote('"');
   options.SetSourceSize(size);
   options.SetBinaryZeroIsTerminator(false);
-  StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+  return StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+}
 
+template <StringPrinter::StringElementType element_type>
+static bool formatStringImpl(ValueObject &valobj, Stream &stream,
+                             const TypeSummaryOptions &summary_options,
+                             std::string prefix_token) {
+  StreamString scratch_stream;
+  const bool success = LibcxxStringSummaryProvider<element_type>(
+      valobj, scratch_stream, summary_options, prefix_token);
+  if (success)
+    stream << scratch_stream.GetData();
+  else
+    stream << "Summary Unavailable";
   return true;
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::ASCII>(
-      valobj, stream, summary_options);
+  return formatStringImpl<StringPrinter::StringElementType::ASCII>(
+      valobj, stream, summary_options, "");
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderUTF16(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF16>(
+  return formatStringImpl<StringPrinter::StringElementType::UTF16>(
       valobj, stream, summary_options, "u");
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderUTF32(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF32>(
+  return formatStringImpl<StringPrinter::StringElementType::UTF32>(
       valobj, stream, summary_options, "U");
 }
Index: lldb/source/DataFormatters/StringPrinter.cpp
===================================================================
--- lldb/source/DataFormatters/StringPrinter.cpp
+++ lldb/source/DataFormatters/StringPrinter.cpp
@@ -131,14 +131,15 @@
                                                          uint8_t *&next) {
   StringPrinter::StringPrinterBufferPointer<> retval{nullptr};
 
-  unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
+  const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
 
-  if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
-    // I don't have enough bytes - print whatever I have left
-    retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
-    next = buffer_end + 1;
+  // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+  // print, this is some kind of corrupted string.
+  if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
+    return retval;
+  if ((buffer_end - buffer) < utf8_encoded_len)
+    // There's no room in the buffer for the utf8 sequence.
     return retval;
-  }
 
   char32_t codepoint = 0;
   switch (utf8_encoded_len) {
@@ -160,12 +161,6 @@
         (unsigned char)*buffer, (unsigned char)*(buffer + 1),
         (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
     break;
-  default:
-    // this is probably some bogus non-character thing just print it as-is and
-    // hope to sync up again soon
-    retval = {buffer, 1};
-    next = buffer + 1;
-    return retval;
   }
 
   if (codepoint) {
@@ -215,9 +210,7 @@
     return retval;
   }
 
-  // this should not happen - but just in case.. try to resync at some point
-  retval = {buffer, 1};
-  next = buffer + 1;
+  // We couldn't figure out how to print this string.
   return retval;
 }
 
@@ -227,7 +220,7 @@
 static StringPrinter::StringPrinterBufferPointer<>
 GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
              uint8_t *buffer_end, uint8_t *&next) {
-  if (!buffer)
+  if (!buffer || buffer >= buffer_end)
     return {nullptr};
 
   switch (type) {
@@ -354,13 +347,11 @@
             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
         auto printable_bytes = printable.GetBytes();
         auto printable_size = printable.GetSize();
-        if (!printable_bytes || !next_data) {
-          // GetPrintable() failed on us - print one byte in a desperate resync
-          // attempt
-          printable_bytes = utf8_data_ptr;
-          printable_size = 1;
-          next_data = utf8_data_ptr + 1;
-        }
+
+        // We failed to figure out how to print this string.
+        if (!printable_bytes || !next_data)
+          return false;
+
         for (unsigned c = 0; c < printable_size; c++)
           stream.Printf("%c", *(printable_bytes + c));
         utf8_data_ptr = (uint8_t *)next_data;
@@ -478,13 +469,11 @@
       auto printable = escaping_callback(data, data_end, next_data);
       auto printable_bytes = printable.GetBytes();
       auto printable_size = printable.GetSize();
-      if (!printable_bytes || !next_data) {
-        // GetPrintable() failed on us - print one byte in a desperate resync
-        // attempt
-        printable_bytes = data;
-        printable_size = 1;
-        next_data = data + 1;
-      }
+
+      // We failed to figure out how to print this string.
+      if (!printable_bytes || !next_data)
+        return false;
+
       for (unsigned c = 0; c < printable_size; c++)
         options.GetStream()->Printf("%c", *(printable_bytes + c));
       data = (uint8_t *)next_data;
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,4 +1,58 @@
 #include <string>
+#include <stdint.h>
+
+// For more information about libc++'s std::string ABI, see:
+//
+//   https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+  // Set the size of this short-mode string to 116. Note that in short mode,
+  // the size is encoded as `size << 1`.
+  unsigned char size = 232;
+
+  // 23 garbage bytes for the inline string payload.
+  char inline_buf[23] = {0};
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+  250 // This means that we expect a 5-byte sequence, this is invalid.
+};
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 4;
+  unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+  240 // This means that we expect a 4-byte sequence, but the buffer is too
+      // small for this.
+};
+static struct {
+  uint64_t cap = 3;
+  uint64_t size = 2;
+  unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 7;
+  const char *data = "foo";
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode with a payload that would trigger a
+// buffer overflow.
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 2;
+  uint64_t data = 0xfffffffffffffffeULL;
+} garbage_string_long_mode4;
 
 int main()
 {
@@ -17,6 +71,23 @@
     std::u32string u32_string(U"🍄🍅🍆🍌");
     std::u32string u32_empty(U"");
     std::basic_string<unsigned char> uchar(5, 'a');
+
+#if _LIBCPP_ABI_VERSION == 1
+    std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+    if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+      memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+      memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+      memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+      memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+      memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+#else
+#error "Test potentially needs to be updated for a new std::string ABI."
+#endif
+
     S.assign(L"!!!!!"); // Set break point at this line.
     return 0;
 }
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -51,6 +51,8 @@
                 "settings set target.max-children-count 256",
                 check=False)
 
+        is_64_bit = self.process().GetAddressByteSize() == 8
+
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
 
@@ -114,3 +116,10 @@
                 '(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, '
                 '%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns),
         ])
+
+        if is_64_bit:
+            self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable'])
+            self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable'])
+            self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable'])
+            self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable'])
+            self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable'])
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to