This revision was automatically updated to reflect the committed changes.
hans marked 2 inline comments as done.
Closed by commit rC336415: [ms] Fix mangling of string literals used to 
initialize arrays larger or… (authored by hans, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D48928?vs=154082&id=154359#toc

Repository:
  rC Clang

https://reviews.llvm.org/D48928

Files:
  lib/AST/MicrosoftMangle.cpp
  test/CodeGen/mangle-ms-string-literals.c

Index: lib/AST/MicrosoftMangle.cpp
===================================================================
--- lib/AST/MicrosoftMangle.cpp
+++ lib/AST/MicrosoftMangle.cpp
@@ -3171,7 +3171,7 @@
   // <literal-length> ::= <non-negative integer>  # the length of the literal
   //
   // <encoded-crc>    ::= <hex digit>+ @          # crc of the literal including
-  //                                              # null-terminator
+  //                                              # trailing null bytes
   //
   // <encoded-string> ::= <simple character>           # uninteresting character
   //                  ::= '?$' <hex digit> <hex digit> # these two nibbles
@@ -3186,44 +3186,51 @@
   MicrosoftCXXNameMangler Mangler(*this, Out);
   Mangler.getStream() << "??_C@_";
 
+  // The actual string length might be different from that of the string literal
+  // in cases like:
+  // char foo[3] = "foobar";
+  // char bar[42] = "foobar";
+  // Where it is truncated or zero-padded to fit the array. This is the length
+  // used for mangling, and any trailing null-bytes also need to be mangled.
+  unsigned StringLength = getASTContext()
+                              .getAsConstantArrayType(SL->getType())
+                              ->getSize()
+                              .getZExtValue();
+  unsigned StringByteLength = StringLength * SL->getCharByteWidth();
+
   // <char-type>: The "kind" of string literal is encoded into the mangled name.
   if (SL->isWide())
     Mangler.getStream() << '1';
   else
     Mangler.getStream() << '0';
 
   // <literal-length>: The next part of the mangled name consists of the length
-  // of the string.
-  // The StringLiteral does not consider the NUL terminator byte(s) but the
-  // mangling does.
-  // N.B. The length is in terms of bytes, not characters.
-  Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth());
+  // of the string in bytes.
+  Mangler.mangleNumber(StringByteLength);
 
   auto GetLittleEndianByte = [&SL](unsigned Index) {
     unsigned CharByteWidth = SL->getCharByteWidth();
+    if (Index / CharByteWidth >= SL->getLength())
+      return static_cast<char>(0);
     uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth);
     unsigned OffsetInCodeUnit = Index % CharByteWidth;
     return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff);
   };
 
   auto GetBigEndianByte = [&SL](unsigned Index) {
     unsigned CharByteWidth = SL->getCharByteWidth();
+    if (Index / CharByteWidth >= SL->getLength())
+      return static_cast<char>(0);
     uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth);
     unsigned OffsetInCodeUnit = (CharByteWidth - 1) - (Index % CharByteWidth);
     return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff);
   };
 
   // CRC all the bytes of the StringLiteral.
   llvm::JamCRC JC;
-  for (unsigned I = 0, E = SL->getByteLength(); I != E; ++I)
+  for (unsigned I = 0, E = StringByteLength; I != E; ++I)
     JC.update(GetLittleEndianByte(I));
 
-  // The NUL terminator byte(s) were not present earlier,
-  // we need to manually process those bytes into the CRC.
-  for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth();
-       ++NullTerminator)
-    JC.update('\x00');
-
   // <encoded-crc>: The CRC is encoded utilizing the standard number mangling
   // scheme.
   Mangler.mangleNumber(JC.getCRC());
@@ -3260,18 +3267,13 @@
 
   // Enforce our 32 bytes max, except wchar_t which gets 32 chars instead.
   unsigned MaxBytesToMangle = SL->isWide() ? 64U : 32U;
-  unsigned NumBytesToMangle = std::min(MaxBytesToMangle, SL->getByteLength());
-  for (unsigned I = 0; I != NumBytesToMangle; ++I)
+  unsigned NumBytesToMangle = std::min(MaxBytesToMangle, StringByteLength);
+  for (unsigned I = 0; I != NumBytesToMangle; ++I) {
     if (SL->isWide())
       MangleByte(GetBigEndianByte(I));
     else
       MangleByte(GetLittleEndianByte(I));
-
-  // Encode the NUL terminator if there is room.
-  if (NumBytesToMangle < MaxBytesToMangle)
-    for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth();
-         ++NullTerminator)
-      MangleByte(0);
+  }
 
   Mangler.getStream() << '@';
 }
Index: test/CodeGen/mangle-ms-string-literals.c
===================================================================
--- test/CodeGen/mangle-ms-string-literals.c
+++ test/CodeGen/mangle-ms-string-literals.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
+// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s
+
+void crbug857442(int x) {
+  // Make sure to handle truncated or padded literals. The truncation is only valid in C.
+  struct {int x; char s[2]; } truncatedAscii = {x, "hello"};
+  // CHECK: "??_C@_01CONKJJHI@he@"
+  struct {int x; char s[16]; } paddedAscii = {x, "hello"};
+  // CHECK: "??_C@_0BA@EAAINDNC@hello?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA@"
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to