MarcusJohnson91 created this revision.
MarcusJohnson91 added reviewers: aaron.ballman, efriedma.
MarcusJohnson91 added a project: clang.
MarcusJohnson91 requested review of this revision.

Split from D103426 <https://reviews.llvm.org/D103426>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106756

Files:
  clang/include/clang/AST/FormatString.h
  clang/lib/AST/FormatString.cpp
  clang/lib/AST/PrintfFormatString.cpp
  clang/lib/AST/ScanfFormatString.cpp
  clang/test/Sema/format-strings-int-typedefs.c

Index: clang/test/Sema/format-strings-int-typedefs.c
===================================================================
--- clang/test/Sema/format-strings-int-typedefs.c
+++ clang/test/Sema/format-strings-int-typedefs.c
@@ -12,6 +12,10 @@
   printf("%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}}
   printf("%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}}
   printf("%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
+  printf("%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'int')}}
+  printf("%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  printf("%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}}
+  printf("%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
   printf("%S", 42.0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   printf("%C", 42.0);  // expected-warning {{format specifies type 'wchar_t' (aka 'int')}}
   
@@ -21,6 +25,10 @@
   wprintf(L"%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}}
   wprintf(L"%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}}
   wprintf(L"%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
+  wprintf(L"%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'int')}}
+  wprintf(L"%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  wprintf(L"%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}}
+  wprintf(L"%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
   wprintf(L"%S", 42.0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   wprintf(L"%C", 42.0);  // expected-warning {{format specifies type 'wchar_t' (aka 'int')}}
 
@@ -30,6 +38,10 @@
   scanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}}
   scanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   scanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
+  scanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  scanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  scanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
+  scanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
   scanf("%S",  0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   scanf("%C",  0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   
@@ -39,6 +51,10 @@
   wscanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}}
   wscanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   wscanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
+  wscanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  wscanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}}
+  wscanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
+  wscanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}}
   wscanf("%S",  0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
   wscanf("%C",  0);  // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}}
 
Index: clang/lib/AST/ScanfFormatString.cpp
===================================================================
--- clang/lib/AST/ScanfFormatString.cpp
+++ clang/lib/AST/ScanfFormatString.cpp
@@ -261,6 +261,8 @@
         case LengthModifier::AsInt32:
         case LengthModifier::AsInt3264:
         case LengthModifier::AsWide:
+        case LengthModifier::AsUTF16:
+        case LengthModifier::AsUTF32:
         case LengthModifier::AsShortLong:
           return ArgType::Invalid();
       }
@@ -302,6 +304,8 @@
         case LengthModifier::AsInt32:
         case LengthModifier::AsInt3264:
         case LengthModifier::AsWide:
+        case LengthModifier::AsUTF16:
+        case LengthModifier::AsUTF32:
         case LengthModifier::AsShortLong:
           return ArgType::Invalid();
       }
@@ -329,14 +333,18 @@
 
     // Char, string and scanlist.
     case ConversionSpecifier::cArg:
-    case ConversionSpecifier::sArg:
+    case ConversionSpecifier::CArg:
     case ConversionSpecifier::ScanListArg:
       switch (LM.getKind()) {
         case LengthModifier::None:
           return ArgType::PtrTo(ArgType::AnyCharTy);
         case LengthModifier::AsLong:
         case LengthModifier::AsWide:
-          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
+          return ArgType::PtrTo(ArgType::WCStrTy);
+        case LengthModifier::AsUTF16:
+          return ArgType(ArgType::Char16Ty);
+        case LengthModifier::AsUTF32:
+          return ArgType(ArgType::Char32Ty);
         case LengthModifier::AsAllocate:
         case LengthModifier::AsMAllocate:
           return ArgType::PtrTo(ArgType::CStrTy);
@@ -347,13 +355,17 @@
         default:
           return ArgType::Invalid();
       }
-    case ConversionSpecifier::CArg:
+    case ConversionSpecifier::sArg:
     case ConversionSpecifier::SArg:
       // FIXME: Mac OS X specific?
       switch (LM.getKind()) {
         case LengthModifier::None:
         case LengthModifier::AsWide:
-          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
+          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t *"));
+        case LengthModifier::AsUTF16:
+          return ArgType::PtrTo(ArgType(Ctx.getChar16Type(), "char16_t *"));
+        case LengthModifier::AsUTF32:
+          return ArgType::PtrTo(ArgType(Ctx.getChar32Type(), "char32_t *"));
         case LengthModifier::AsAllocate:
         case LengthModifier::AsMAllocate:
           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
@@ -398,6 +410,8 @@
         case LengthModifier::AsInt32:
         case LengthModifier::AsInt3264:
         case LengthModifier::AsWide:
+        case LengthModifier::AsUTF16:
+        case LengthModifier::AsUTF32:
         case LengthModifier::AsShortLong:
           return ArgType::Invalid();
         }
@@ -435,10 +449,14 @@
     return false;
 
   // Pointer to a character.
-  if (PT->isAnyCharacterType()) {
+  if (PT->isAnyCharacterType(LangOpt)) {
     CS.setKind(ConversionSpecifier::sArg);
     if (PT->isWideCharType())
       LM.setKind(LengthModifier::AsWideChar);
+    else if (PT->isChar16Type(LangOpt))
+      LM.setKind(LengthModifier::AsUTF16);
+    else if (PT->isChar32Type(LangOpt))
+      LM.setKind(LengthModifier::AsUTF32);
     else
       LM.setKind(LengthModifier::None);
 
Index: clang/lib/AST/PrintfFormatString.cpp
===================================================================
--- clang/lib/AST/PrintfFormatString.cpp
+++ clang/lib/AST/PrintfFormatString.cpp
@@ -494,6 +494,10 @@
       case LengthModifier::AsLong:
       case LengthModifier::AsWide:
         return ArgType(ArgType::WIntTy, "wint_t");
+      case LengthModifier::AsUTF16:
+        return ArgType(ArgType::Char16Ty, "char16_t");
+      case LengthModifier::AsUTF32:
+        return ArgType(ArgType::Char32Ty, "char32_t");
       case LengthModifier::AsShort:
         if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
           return Ctx.IntTy;
@@ -535,6 +539,8 @@
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
       case LengthModifier::AsWide:
+      case LengthModifier::AsUTF16:
+      case LengthModifier::AsUTF32:
         return ArgType::Invalid();
     }
 
@@ -567,6 +573,8 @@
       case LengthModifier::AsPtrDiff:
         return ArgType::makePtrdiffT(
             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
+      case LengthModifier::AsUTF16:
+      case LengthModifier::AsUTF32:
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
       case LengthModifier::AsWide:
@@ -618,6 +626,8 @@
       case LengthModifier::AsInt3264:
       case LengthModifier::AsInt64:
       case LengthModifier::AsWide:
+      case LengthModifier::AsUTF16:
+      case LengthModifier::AsUTF32:
         return ArgType::Invalid();
       case LengthModifier::AsShortLong:
         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
@@ -632,9 +642,15 @@
                          "const unichar *");
         return ArgType(ArgType::WCStrTy, "wchar_t *");
       }
-      if (LM.getKind() == LengthModifier::AsWide)
+      if (LM.getKind() == LengthModifier::AsWide) {
         return ArgType(ArgType::WCStrTy, "wchar_t *");
-      return ArgType::CStrTy;
+      }
+      if (LM.getKind() == LengthModifier::AsUTF16)
+        return ArgType(ArgType::Char16Ty, "char16_t *");
+      if (LM.getKind() == LengthModifier::AsUTF32)
+        return ArgType(ArgType::Char32Ty, "char32_t *");
+      else
+        return ArgType::CStrTy;
     case ConversionSpecifier::SArg:
       if (IsObjCLiteral)
         return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
@@ -642,13 +658,22 @@
       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
           LM.getKind() == LengthModifier::AsShort)
         return ArgType::CStrTy;
+      if (LM.getKind() == LengthModifier::AsUTF16)
+        return ArgType(ArgType::Char16Ty, "char16_t *");
+      if (LM.getKind() == LengthModifier::AsUTF32)
+        return ArgType(ArgType::Char32Ty, "char32_t *");
       return ArgType(ArgType::WCStrTy, "wchar_t *");
+    case ConversionSpecifier::cArg:
     case ConversionSpecifier::CArg:
       if (IsObjCLiteral)
         return ArgType(Ctx.UnsignedShortTy, "unichar");
       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
           LM.getKind() == LengthModifier::AsShort)
         return Ctx.IntTy;
+      if (LM.getKind() == LengthModifier::AsUTF16)
+        return ArgType(ArgType::Char16Ty, "char16_t");
+      if (LM.getKind() == LengthModifier::AsUTF32)
+        return ArgType(ArgType::Char32Ty, "char32_t");
       return ArgType(Ctx.WideCharTy, "wchar_t");
     case ConversionSpecifier::pArg:
     case ConversionSpecifier::PArg:
@@ -706,17 +731,21 @@
     return true;
   }
 
-  // Handle strings next (char *, wchar_t *)
-  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
+  // Handle strings next (char *, wchar_t *, char16_t *, char32_t *)
+  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType(LangOpt))) {
     CS.setKind(ConversionSpecifier::sArg);
 
     // Disable irrelevant flags
     HasAlternativeForm = 0;
     HasLeadingZeroes = 0;
 
-    // Set the long length modifier for wide characters
+    // Set the length modifier for characters
     if (QT->getPointeeType()->isWideCharType())
       LM.setKind(LengthModifier::AsWideChar);
+    else if (QT->getPointeeType()->isChar16Type(LangOpt))
+      LM.setKind(LengthModifier::AsUTF16);
+    else if (QT->getPointeeType()->isChar32Type(LangOpt))
+      LM.setKind(LengthModifier::AsUTF32);
     else
       LM.setKind(LengthModifier::None);
 
@@ -736,19 +765,10 @@
       VectorNumElts = OptionalAmount(VT->getNumElements());
     }
   }
-
-  // We can only work with builtin types.
-  if (!BT)
-    return false;
-
+  
   // Set length modifier
   switch (BT->getKind()) {
   case BuiltinType::Bool:
-  case BuiltinType::WChar_U:
-  case BuiltinType::WChar_S:
-  case BuiltinType::Char8: // FIXME: Treat like 'char'?
-  case BuiltinType::Char16:
-  case BuiltinType::Char32:
   case BuiltinType::UInt128:
   case BuiltinType::Int128:
   case BuiltinType::Half:
@@ -819,6 +839,7 @@
   case BuiltinType::UChar:
   case BuiltinType::Char_S:
   case BuiltinType::SChar:
+  case BuiltinType::Char8:
     LM.setKind(LengthModifier::AsChar);
     break;
 
@@ -840,6 +861,19 @@
   case BuiltinType::LongDouble:
     LM.setKind(LengthModifier::AsLongDouble);
     break;
+      
+  case BuiltinType::Char16:
+    LM.setKind(LengthModifier::AsUTF16);
+    break;
+      
+  case BuiltinType::Char32:
+    LM.setKind(LengthModifier::AsUTF32);
+    break;
+      
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U:
+    LM.setKind(LengthModifier::AsWide);
+    break;
   }
 
   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
@@ -873,7 +907,7 @@
 
   // Set conversion specifier and disable any flags which do not apply to it.
   // Let typedefs to char fall through to int, as %c is silly for uint8_t.
-  if (!isa<TypedefType>(QT) && QT->isCharType()) {
+  if (!isa<TypedefType>(QT) && QT->isAnyCharacterType(LangOpt)) {
     CS.setKind(ConversionSpecifier::cArg);
     LM.setKind(LengthModifier::None);
     Precision.setHowSpecified(OptionalAmount::NotSpecified);
Index: clang/lib/AST/FormatString.cpp
===================================================================
--- clang/lib/AST/FormatString.cpp
+++ clang/lib/AST/FormatString.cpp
@@ -232,7 +232,17 @@
       break;
     case 'l':
       ++I;
-      if (I != E && *I == 'l') {
+      if (I + 1 != E && I[0] == '1' && I[1] == '6') {
+        ++I;
+        ++I;
+        lmKind = LengthModifier::AsUTF16;
+        break;
+      } else if (I + 1 != E && I[0] == '3' && I[1] == '2') {
+        ++I;
+        ++I;
+        lmKind = LengthModifier::AsUTF32;
+        break;
+      } else if (I + 1 != E && I[0] == 'l') {
         ++I;
         lmKind = LengthModifier::AsLongLong;
       } else {
@@ -459,6 +469,24 @@
 
       return WInt == PromoArg ? Match : NoMatch;
     }
+      
+    case Char16Ty: {
+      const PointerType *PT = argTy->getAs<PointerType>();
+      if (!PT)
+        return NoMatch;
+      QualType pointeeTy =
+        C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
+      return pointeeTy == C.getChar16Type() ? Match : NoMatch;
+    }
+      
+    case Char32Ty: {
+      const PointerType *PT = argTy->getAs<PointerType>();
+      if (!PT)
+        return NoMatch;
+      QualType pointeeTy =
+        C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
+      return pointeeTy == C.getChar32Type() ? Match : NoMatch;
+    }
 
     case CPointerTy:
       if (argTy->isVoidPointerType()) {
@@ -520,6 +548,12 @@
     case WCStrTy:
       Res = C.getPointerType(C.getWideCharType());
       break;
+    case Char16Ty:
+      Res = C.getPointerType(C.getChar16Type());
+      break;
+    case Char32Ty:
+      Res = C.getPointerType(C.getChar32Type());
+      break;
     case ObjCPointerTy:
       Res = C.ObjCBuiltinIdTy;
       break;
@@ -607,6 +641,10 @@
     return "m";
   case AsWide:
     return "w";
+  case AsUTF16:
+    return "l16";
+  case AsUTF32:
+    return "l32";
   case None:
     return "";
   }
@@ -860,6 +898,17 @@
         default:
           return false;
       }
+    case LengthModifier::AsUTF16:
+    case LengthModifier::AsUTF32:
+      switch (CS.getKind()) {
+        case ConversionSpecifier::cArg:
+        case ConversionSpecifier::CArg:
+        case ConversionSpecifier::sArg:
+        case ConversionSpecifier::SArg:
+          return true;
+        default:
+          return false;
+      }
     case LengthModifier::AsWide:
       switch (CS.getKind()) {
         case ConversionSpecifier::cArg:
@@ -886,6 +935,9 @@
     case LengthModifier::AsSizeT:
     case LengthModifier::AsPtrDiff:
     case LengthModifier::AsLongDouble:
+    case LengthModifier::AsWide:
+    case LengthModifier::AsUTF16:
+    case LengthModifier::AsUTF32:
       return true;
     case LengthModifier::AsAllocate:
     case LengthModifier::AsMAllocate:
@@ -893,7 +945,6 @@
     case LengthModifier::AsInt32:
     case LengthModifier::AsInt3264:
     case LengthModifier::AsInt64:
-    case LengthModifier::AsWide:
     case LengthModifier::AsShortLong: // ???
       return false;
   }
@@ -997,6 +1048,12 @@
     } else if (Identifier->getName() == "ptrdiff_t") {
       LM.setKind(LengthModifier::AsPtrDiff);
       return true;
+    } else if (Identifier->getName() == "char16_t") {
+      LM.setKind(LengthModifier::AsUTF16);
+      return true;
+    } else if (Identifier->getName() == "char32_t") {
+      LM.setKind(LengthModifier::AsUTF32);
+      return true;
     }
 
     QualType T = Typedef->getUnderlyingType();
Index: clang/include/clang/AST/FormatString.h
===================================================================
--- clang/include/clang/AST/FormatString.h
+++ clang/include/clang/AST/FormatString.h
@@ -80,6 +80,8 @@
     AsLongDouble, // 'L'
     AsAllocate,   // for '%as', GNU extension to C90 scanf
     AsMAllocate,  // for '%ms', GNU extension to scanf
+    AsUTF16,      // for '%l16(c|s)', Clang extension
+    AsUTF32,      // for '%l32(c|s)', Clang extension
     AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
     AsWideChar = AsLong // for '%ls', only makes sense for printf
   };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to