https://github.com/abhina-sree created 
https://github.com/llvm/llvm-project/pull/169803

This patch builds upon https://github.com/llvm/llvm-project/pull/138895 and 
introduces a ParserConversionAction which is able to control which charset to 
use for various string literals. I also introduce a FormatStrConverter which is 
used to do format string checking

>From ec0efa55da6e216ae811ff97279c1cd2024b549f Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <[email protected]>
Date: Mon, 24 Nov 2025 11:00:04 -0500
Subject: [PATCH 1/2] add ParserConversionAction

(cherry picked from commit c2647a73957921d3f7a53c6f25a69f1cc2725aa3)
---
 clang/include/clang/Parse/Parser.h |  1 +
 clang/include/clang/Sema/Sema.h    |  8 ++++++--
 clang/lib/Parse/ParseDecl.cpp      | 13 +++++++++++++
 clang/lib/Parse/ParseDeclCXX.cpp   | 10 +++++++---
 clang/lib/Parse/ParseExpr.cpp      |  9 +++++----
 clang/lib/Parse/Parser.cpp         |  4 ++++
 clang/lib/Sema/SemaExpr.cpp        | 12 +++++++-----
 7 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Parse/Parser.h 
b/clang/include/clang/Parse/Parser.h
index 58eb1c0a7c114..97867183b5a1d 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -5633,6 +5633,7 @@ class Parser : public CodeCompletionHandler {
     bool Finished;
   };
   ObjCImplParsingDataRAII *CurParsedObjCImpl;
+  ConversionAction ParserConversionAction;
 
   /// StashAwayMethodOrFunctionBodyTokens -  Consume the tokens and store them
   /// for later parsing.
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index cbfcc9bc0ea99..65567e367dea4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -54,6 +54,7 @@
 #include "clang/Basic/TemplateKinds.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/LiteralConverter.h"
 #include "clang/Sema/AnalysisBasedWarnings.h"
 #include "clang/Sema/Attr.h"
 #include "clang/Sema/CleanupInfo.h"
@@ -7272,9 +7273,12 @@ class Sema final : public SemaBase {
   /// from multiple tokens.  However, the common case is that StringToks points
   /// to one string.
   ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
-                                Scope *UDLScope = nullptr);
+                                Scope *UDLScope = nullptr,
+                                ConversionAction Action = CA_ToExecEncoding);
 
-  ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
+  ExprResult
+  ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks,
+                                ConversionAction Action = CA_ToExecEncoding);
 
   /// ControllingExprOrType is either an opaque pointer coming out of a
   /// ParsedType or an Expr *. FIXME: it'd be better to split this interface
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 8688ccf41acb5..fd537618a3c83 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -555,6 +555,9 @@ unsigned Parser::ParseAttributeArgsCommon(
               nullptr,
               Sema::ExpressionEvaluationContextRecord::EK_AttrArgument);
 
+          SaveAndRestore<ConversionAction> SavedTranslationState(
+              ParserConversionAction, CA_NoConversion);
+
           ExprResult ArgExpr = ParseAssignmentExpression();
           if (ArgExpr.isInvalid()) {
             SkipUntil(tok::r_paren, StopAtSemi);
@@ -634,6 +637,9 @@ void Parser::ParseGNUAttributeArgs(
   ParsedAttr::Kind AttrKind =
       ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax());
 
+  SaveAndRestore<ConversionAction> 
SavedTranslationState(ParserConversionAction,
+                                                         CA_NoConversion);
+
   if (AttrKind == ParsedAttr::AT_Availability) {
     ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, 
ScopeName,
                                ScopeLoc, Form);
@@ -699,6 +705,9 @@ unsigned Parser::ParseClangAttributeArgs(
   ParsedAttr::Kind AttrKind =
       ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax());
 
+  SaveAndRestore<ConversionAction> 
SavedTranslationState(ParserConversionAction,
+                                                         CA_NoConversion);
+
   switch (AttrKind) {
   default:
     return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc,
@@ -1521,6 +1530,10 @@ void Parser::ParseExternalSourceSymbolAttribute(
       SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch);
       continue;
     }
+
+    SaveAndRestore<ConversionAction> SavedTranslationState(
+        ParserConversionAction, CA_NoConversion);
+
     if (Keyword == Ident_language) {
       if (HadLanguage) {
         Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause)
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index d8ed7e3ff96bd..40bf409124711 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -314,7 +314,9 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation 
NamespaceLoc,
 
 Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) {
   assert(isTokenStringLiteral() && "Not a string literal!");
-  ExprResult Lang = ParseUnevaluatedStringLiteralExpression();
+  ExprResult Lang = (SaveAndRestore<ConversionAction>(ParserConversionAction,
+                                                      CA_NoConversion),
+                     ParseUnevaluatedStringLiteralExpression());
 
   ParseScope LinkageScope(this, Scope::DeclScope);
   Decl *LinkageSpec =
@@ -995,9 +997,11 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation 
&DeclEnd) {
                ? diag::warn_cxx20_compat_static_assert_user_generated_message
                : diag::ext_cxx_static_assert_user_generated_message);
       AssertMessage = ParseConstantExpressionInExprEvalContext();
-    } else if (tokenIsLikeStringLiteral(Tok, getLangOpts()))
+    } else if (tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+      SaveAndRestore<ConversionAction> SavedTranslationState(
+          ParserConversionAction, CA_NoConversion);
       AssertMessage = ParseUnevaluatedStringLiteralExpression();
-    else {
+    } else {
       Diag(Tok, diag::err_expected_string_literal)
           << /*Source='static_assert'*/ 1;
       SkipMalformedDecl();
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 3515343202de1..0b820532ffb81 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2988,13 +2988,14 @@ ExprResult Parser::ParseStringLiteralExpression(bool 
AllowUserDefinedLiteral,
 
   if (Unevaluated) {
     assert(!AllowUserDefinedLiteral && "UDL are always evaluated");
-    return Actions.ActOnUnevaluatedStringLiteral(StringToks);
+    return Actions.ActOnUnevaluatedStringLiteral(StringToks,
+                                                 ParserConversionAction);
   }
 
   // Pass the set of string tokens, ready for concatenation, to the actions.
-  return Actions.ActOnStringLiteral(StringToks,
-                                    AllowUserDefinedLiteral ? getCurScope()
-                                                            : nullptr);
+  return Actions.ActOnStringLiteral(
+      StringToks, AllowUserDefinedLiteral ? getCurScope() : nullptr,
+      ParserConversionAction);
 }
 
 ExprResult Parser::ParseGenericSelectionExpression() {
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index a6fc676f23a51..7bfe95bf995d8 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -67,6 +67,8 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool 
skipFunctionBodies)
   NumCachedScopes = 0;
   CurParsedObjCImpl = nullptr;
 
+  ParserConversionAction = CA_ToExecEncoding;
+
   // Add #pragma handlers. These are removed and destroyed in the
   // destructor.
   initializePragmaHandlers();
@@ -1584,6 +1586,8 @@ void Parser::ParseKNRParamDeclarations(Declarator &D) {
 }
 
 ExprResult Parser::ParseAsmStringLiteral(bool ForAsmLabel) {
+  SaveAndRestore<ConversionAction> 
SavedTranslationState(ParserConversionAction,
+                                                         CA_NoConversion);
 
   ExprResult AsmString;
   if (isTokenStringLiteral()) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 10f0ec3010c6c..550f1fbdf0ffc 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -2064,14 +2064,15 @@ static ExprResult BuildCookedLiteralOperatorCall(Sema 
&S, Scope *Scope,
   return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc);
 }
 
-ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) {
+ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks,
+                                               ConversionAction Action) {
   // StringToks needs backing storage as it doesn't hold array elements itself
   std::vector<Token> ExpandedToks;
   if (getLangOpts().MicrosoftExt)
     StringToks = ExpandedToks = 
ExpandFunctionLocalPredefinedMacros(StringToks);
 
   StringLiteralParser Literal(StringToks, PP,
-                              StringLiteralEvalMethod::Unevaluated);
+                              StringLiteralEvalMethod::Unevaluated, Action);
   if (Literal.hadError)
     return ExprError();
 
@@ -2142,8 +2143,8 @@ Sema::ExpandFunctionLocalPredefinedMacros(ArrayRef<Token> 
Toks) {
   return ExpandedToks;
 }
 
-ExprResult
-Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
+ExprResult Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope 
*UDLScope,
+                                    ConversionAction Action) {
   assert(!StringToks.empty() && "Must have at least one string!");
 
   // StringToks needs backing storage as it doesn't hold array elements itself
@@ -2151,7 +2152,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, 
Scope *UDLScope) {
   if (getLangOpts().MicrosoftExt)
     StringToks = ExpandedToks = 
ExpandFunctionLocalPredefinedMacros(StringToks);
 
-  StringLiteralParser Literal(StringToks, PP);
+  StringLiteralParser Literal(StringToks, PP,
+                              StringLiteralEvalMethod::Evaluated, Action);
   if (Literal.hadError)
     return ExprError();
 

>From 079f4517f693f1b290b5802bb49daec42add5943 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <[email protected]>
Date: Thu, 27 Nov 2025 08:45:45 -0500
Subject: [PATCH 2/2] add format string handling

(cherry picked from commit 20a6fdfe3045eebaf1acc4fff7269c66e85e10c3)
---
 clang/include/clang/AST/Expr.h             |   6 +
 clang/include/clang/AST/FormatString.h     |  13 +-
 clang/include/clang/Basic/TargetInfo.h     |   3 +
 clang/include/clang/Lex/LiteralConverter.h |   2 +-
 clang/lib/AST/Expr.cpp                     |  15 ++
 clang/lib/AST/FormatString.cpp             | 242 +++++++++++----------
 clang/lib/AST/FormatStringParsing.h        |  39 ++--
 clang/lib/AST/PrintfFormatString.cpp       | 149 +++++++------
 clang/lib/AST/ScanfFormatString.cpp        |  31 ++-
 clang/lib/Basic/TargetInfo.cpp             |   3 +
 clang/lib/Lex/LiteralConverter.cpp         |  10 +-
 clang/lib/Sema/SemaChecking.cpp            |  76 ++++---
 clang/lib/Sema/SemaExpr.cpp                |   5 +-
 clang/test/CodeGen/systemz-charset.c       |   8 +
 llvm/include/llvm/Support/TextEncoding.h   |  10 +
 llvm/lib/Support/TextEncoding.cpp          |  19 ++
 16 files changed, 376 insertions(+), 255 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 573cc72db35c6..7d1ac3193812f 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -28,6 +28,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/LiteralConverter.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallVector.h"
@@ -2063,6 +2064,11 @@ class PredefinedExpr final
     return getIdentKindName(getIdentKind());
   }
 
+  static std::string
+  ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl,
+                          LiteralConverter &LiteralConv,
+                          bool ForceElaboratedPrinting = false);
+
   static std::string ComputeName(PredefinedIdentKind IK,
                                  const Decl *CurrentDecl,
                                  bool ForceElaboratedPrinting = false);
diff --git a/clang/include/clang/AST/FormatString.h 
b/clang/include/clang/AST/FormatString.h
index a284f2c44d633..12083a0d00b4b 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -19,6 +19,7 @@
 #define LLVM_CLANG_AST_FORMATSTRING_H
 
 #include "clang/AST/CanonicalType.h"
+#include "llvm/Support/TextEncoding.h"
 #include <optional>
 
 namespace clang {
@@ -744,9 +745,9 @@ class FormatStringHandler {
   // Printf-specific handlers.
 
   virtual bool HandleInvalidPrintfConversionSpecifier(
-                                      const analyze_printf::PrintfSpecifier 
&FS,
-                                      const char *startSpecifier,
-                                      unsigned specifierLen) {
+      const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+      unsigned specifierLen,
+      const llvm::TextEncodingConverter &FormatStrConverter) {
     return true;
   }
 
@@ -763,9 +764,9 @@ class FormatStringHandler {
     // Scanf-specific handlers.
 
   virtual bool HandleInvalidScanfConversionSpecifier(
-                                        const analyze_scanf::ScanfSpecifier 
&FS,
-                                        const char *startSpecifier,
-                                        unsigned specifierLen) {
+      const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+      unsigned specifierLen,
+      const llvm::TextEncodingConverter &FormatStrConverter) {
     return true;
   }
 
diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 1c16f9f79ae68..b3d507e1170dc 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -38,6 +38,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/TextEncoding.h"
 #include "llvm/Support/VersionTuple.h"
 #include "llvm/TargetParser/Triple.h"
 #include <cassert>
@@ -320,6 +321,8 @@ class TargetInfo : public TransferrableTargetInfo,
 
   virtual ~TargetInfo();
 
+  llvm::TextEncodingConverter *FormatStrConverter;
+
   /// Retrieve the target options.
   TargetOptions &getTargetOpts() const {
     assert(TargetOpts && "Missing target options");
diff --git a/clang/include/clang/Lex/LiteralConverter.h 
b/clang/include/clang/Lex/LiteralConverter.h
index 6a66d2d0ff707..ba6fb6c87a782 100644
--- a/clang/include/clang/Lex/LiteralConverter.h
+++ b/clang/include/clang/Lex/LiteralConverter.h
@@ -34,7 +34,7 @@ class LiteralConverter {
   static std::error_code
   setConvertersFromOptions(LiteralConverter &LiteralConv,
                            const clang::LangOptions &Opts,
-                           const clang::TargetInfo &TInfo);
+                           clang::TargetInfo &TInfo);
 };
 
 #endif
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1d914fa876759..d9765f4a73fcd 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -667,6 +667,21 @@ StringRef 
PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) {
   llvm_unreachable("Unknown ident kind for PredefinedExpr");
 }
 
+std::string PredefinedExpr::ComputeNameAndTranslate(
+    PredefinedIdentKind IK, const Decl *CurrentDecl,
+    LiteralConverter &LiteralConv, bool ForceElaboratedPrinting) {
+  using namespace clang::charinfo;
+  std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting);
+  llvm::TextEncodingConverter *Converter =
+      LiteralConv.getConverter(CA_ToExecEncoding);
+  if (Converter) {
+    SmallString<128> Converted;
+    Converter->convert(Result, Converted);
+    Result = std::string(Converted);
+  }
+  return Result;
+}
+
 // FIXME: Maybe this should use DeclPrinter with a special "print predefined
 // expr" policy instead.
 std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index d4cb89b43ae87..be0f527da92e5 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -33,8 +33,9 @@ FormatStringHandler::~FormatStringHandler() {}
 // scanf format strings.
 
//===----------------------------------------------------------------------===//
 
-OptionalAmount
-clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
+OptionalAmount clang::analyze_format_string::ParseAmount(
+    const char *&Beg, const char *E,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   const char *I = Beg;
   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
 
@@ -42,7 +43,7 @@ clang::analyze_format_string::ParseAmount(const char *&Beg, 
const char *E) {
   bool hasDigits = false;
 
   for ( ; I != E; ++I) {
-    char c = *I;
+    char c = FormatStrConverter.convert(*I);
     if (c >= '0' && c <= '9') {
       hasDigits = true;
       accumulator = (accumulator * 10) + (c - '0');
@@ -59,27 +60,23 @@ clang::analyze_format_string::ParseAmount(const char *&Beg, 
const char *E) {
   return OptionalAmount();
 }
 
-OptionalAmount
-clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
-                                                     const char *E,
-                                                     unsigned &argIndex) {
-  if (*Beg == '*') {
+OptionalAmount clang::analyze_format_string::ParseNonPositionAmount(
+    const char *&Beg, const char *E, unsigned &argIndex,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
+  if (FormatStrConverter.convert(*Beg) == '*') {
     ++Beg;
     return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
   }
 
-  return ParseAmount(Beg, E);
+  return ParseAmount(Beg, E, FormatStrConverter);
 }
 
-OptionalAmount
-clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
-                                                  const char *Start,
-                                                  const char *&Beg,
-                                                  const char *E,
-                                                  PositionContext p) {
-  if (*Beg == '*') {
+OptionalAmount clang::analyze_format_string::ParsePositionAmount(
+    FormatStringHandler &H, const char *Start, const char *&Beg, const char *E,
+    PositionContext p, const llvm::TextEncodingConverter &FormatStrConverter) {
+  if (FormatStrConverter.convert(*Beg) == '*') {
     const char *I = Beg + 1;
-    const OptionalAmount &Amt = ParseAmount(I, E);
+    const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
 
     if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
       H.HandleInvalidPosition(Beg, I - Beg, p);
@@ -94,7 +91,7 @@ 
clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
 
     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
 
-    if (*I == '$') {
+    if (FormatStrConverter.convert(*I) == '$') {
       // Handle positional arguments
 
       // Special case: '*0$', since this is an easy mistake.
@@ -114,24 +111,22 @@ 
clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
     return OptionalAmount(false);
   }
 
-  return ParseAmount(Beg, E);
+  return ParseAmount(Beg, E, FormatStrConverter);
 }
 
-
-bool
-clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
-                                              FormatSpecifier &CS,
-                                              const char *Start,
-                                              const char *&Beg, const char *E,
-                                              unsigned *argIndex) {
+bool clang::analyze_format_string::ParseFieldWidth(
+    FormatStringHandler &H, FormatSpecifier &CS, const char *Start,
+    const char *&Beg, const char *E, unsigned *argIndex,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   // FIXME: Support negative field widths.
   if (argIndex) {
-    CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
+    CS.setFieldWidth(
+        ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter));
   }
   else {
-    const OptionalAmount Amt =
-      ParsePositionAmount(H, Start, Beg, E,
-                          analyze_format_string::FieldWidthPos);
+    const OptionalAmount Amt = ParsePositionAmount(
+        H, Start, Beg, E, analyze_format_string::FieldWidthPos,
+        FormatStrConverter);
 
     if (Amt.isInvalid())
       return true;
@@ -140,15 +135,13 @@ 
clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
   return false;
 }
 
-bool
-clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
-                                               FormatSpecifier &FS,
-                                               const char *Start,
-                                               const char *&Beg,
-                                               const char *E) {
+bool clang::analyze_format_string::ParseArgPosition(
+    FormatStringHandler &H, FormatSpecifier &FS, const char *Start,
+    const char *&Beg, const char *E,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   const char *I = Beg;
 
-  const OptionalAmount &Amt = ParseAmount(I, E);
+  const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
 
   if (I == E) {
     // No more characters left?
@@ -156,7 +149,8 @@ 
clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
     return true;
   }
 
-  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
+  if (Amt.getHowSpecified() == OptionalAmount::Constant &&
+      FormatStrConverter.convert(*(I++)) == '$') {
     // Warn that positional arguments are non-standard.
     H.HandlePosition(Start, I - Start);
 
@@ -177,17 +171,15 @@ 
clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
   return false;
 }
 
-bool
-clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
-                                                  FormatSpecifier &FS,
-                                                  const char *&I,
-                                                  const char *E,
-                                                  const LangOptions &LO) {
+bool clang::analyze_format_string::ParseVectorModifier(
+    FormatStringHandler &H, FormatSpecifier &FS, const char *&I, const char *E,
+    const LangOptions &LO,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   if (!LO.OpenCL)
     return false;
 
   const char *Start = I;
-  if (*I == 'v') {
+  if (FormatStrConverter.convert(*I) == 'v') {
     ++I;
 
     if (I == E) {
@@ -195,7 +187,7 @@ 
clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
       return true;
     }
 
-    OptionalAmount NumElts = ParseAmount(I, E);
+    OptionalAmount NumElts = ParseAmount(I, E, FormatStrConverter);
     if (NumElts.getHowSpecified() != OptionalAmount::Constant) {
       H.HandleIncompleteSpecifier(Start, E - Start);
       return true;
@@ -207,86 +199,104 @@ 
clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
   return false;
 }
 
-bool
-clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
-                                                  const char *&I,
-                                                  const char *E,
-                                                  const LangOptions &LO,
-                                                  bool IsScanf) {
+bool clang::analyze_format_string::ParseLengthModifier(
+    FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO,
+    const llvm::TextEncodingConverter &FormatStrConverter, bool IsScanf) {
   LengthModifier::Kind lmKind = LengthModifier::None;
   const char *lmPosition = I;
-  switch (*I) {
-    default:
-      return false;
-    case 'h':
+  switch (FormatStrConverter.convert(*I)) {
+  default:
+    return false;
+  case 'h':
+    ++I;
+    if (I != E && FormatStrConverter.convert(*I) == 'h') {
       ++I;
-      if (I != E && *I == 'h') {
-        ++I;
-        lmKind = LengthModifier::AsChar;
-      } else if (I != E && *I == 'l' && LO.OpenCL) {
-        ++I;
-        lmKind = LengthModifier::AsShortLong;
-      } else {
-        lmKind = LengthModifier::AsShort;
-      }
-      break;
-    case 'l':
+      lmKind = LengthModifier::AsChar;
+    } else if (I != E && FormatStrConverter.convert(*I) == 'l' && LO.OpenCL) {
+      ++I;
+      lmKind = LengthModifier::AsShortLong;
+    } else {
+      lmKind = LengthModifier::AsShort;
+    }
+    break;
+  case 'l':
+    ++I;
+    if (I != E && FormatStrConverter.convert(*I) == 'l') {
+      ++I;
+      lmKind = LengthModifier::AsLongLong;
+    } else {
+      lmKind = LengthModifier::AsLong;
+    }
+    break;
+  case 'j':
+    lmKind = LengthModifier::AsIntMax;
+    ++I;
+    break;
+  case 'z':
+    lmKind = LengthModifier::AsSizeT;
+    ++I;
+    break;
+  case 't':
+    lmKind = LengthModifier::AsPtrDiff;
+    ++I;
+    break;
+  case 'L':
+    lmKind = LengthModifier::AsLongDouble;
+    ++I;
+    break;
+  case 'q':
+    lmKind = LengthModifier::AsQuad;
+    ++I;
+    break;
+  case 'a':
+    if (IsScanf && !LO.C99 && !LO.CPlusPlus11) {
+      // For scanf in C90, look at the next character to see if this should
+      // be parsed as the GNU extension 'a' length modifier. If not, this
+      // will be parsed as a conversion specifier.
       ++I;
-      if (I != E && *I == 'l') {
-        ++I;
-        lmKind = LengthModifier::AsLongLong;
-      } else {
-        lmKind = LengthModifier::AsLong;
+      if (I != E && (FormatStrConverter.convert(*I) == 's' ||
+                     FormatStrConverter.convert(*I) == 'S' ||
+                     FormatStrConverter.convert(*I) == '[')) {
+        lmKind = LengthModifier::AsAllocate;
+        break;
       }
+      --I;
+    }
+    return false;
+  case 'm':
+    if (IsScanf) {
+      lmKind = LengthModifier::AsMAllocate;
+      ++I;
       break;
-    case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
-    case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
-    case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
-    case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
-    case 'q': lmKind = LengthModifier::AsQuad;       ++I; break;
-    case 'a':
-      if (IsScanf && !LO.C99 && !LO.CPlusPlus11) {
-        // For scanf in C90, look at the next character to see if this should
-        // be parsed as the GNU extension 'a' length modifier. If not, this
-        // will be parsed as a conversion specifier.
-        ++I;
-        if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
-          lmKind = LengthModifier::AsAllocate;
-          break;
-        }
-        --I;
-      }
-      return false;
-    case 'm':
-      if (IsScanf) {
-        lmKind = LengthModifier::AsMAllocate;
-        ++I;
+    }
+    return false;
+  // printf: AsInt64, AsInt32, AsInt3264
+  // scanf:  AsInt64
+  case 'I':
+    if (I + 1 != E && I + 2 != E) {
+      if (FormatStrConverter.convert(I[1]) == '6' &&
+          FormatStrConverter.convert(I[2]) == '4') {
+        I += 3;
+        lmKind = LengthModifier::AsInt64;
         break;
       }
-      return false;
-    // printf: AsInt64, AsInt32, AsInt3264
-    // scanf:  AsInt64
-    case 'I':
-      if (I + 1 != E && I + 2 != E) {
-        if (I[1] == '6' && I[2] == '4') {
-          I += 3;
-          lmKind = LengthModifier::AsInt64;
-          break;
-        }
-        if (IsScanf)
-          return false;
+      if (IsScanf)
+        return false;
 
-        if (I[1] == '3' && I[2] == '2') {
-          I += 3;
-          lmKind = LengthModifier::AsInt32;
-          break;
-        }
+      if (FormatStrConverter.convert(I[1]) == '3' &&
+          FormatStrConverter.convert(I[2]) == '2') {
+        I += 3;
+        lmKind = LengthModifier::AsInt32;
+        break;
       }
-      ++I;
-      lmKind = LengthModifier::AsInt3264;
-      break;
-    case 'w':
-      lmKind = LengthModifier::AsWide; ++I; break;
+    }
+    ++I;
+    lmKind = LengthModifier::AsInt3264;
+    break;
+  case 'w':
+    lmKind = LengthModifier::AsWide;
+    ++I;
+    break;
   }
   LengthModifier lm(lmPosition, lmKind);
   FS.setLengthModifier(lm);
diff --git a/clang/lib/AST/FormatStringParsing.h 
b/clang/lib/AST/FormatStringParsing.h
index 764e5d46394d7..7ad6d4b98d2ac 100644
--- a/clang/lib/AST/FormatStringParsing.h
+++ b/clang/lib/AST/FormatStringParsing.h
@@ -37,31 +37,38 @@ class UpdateOnReturn {
 
 namespace analyze_format_string {
 
-OptionalAmount ParseAmount(const char *&Beg, const char *E);
-OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
-                                      unsigned &argIndex);
+OptionalAmount
+ParseAmount(const char *&Beg, const char *E,
+            const llvm::TextEncodingConverter &FormatStrConverter);
 
-OptionalAmount ParsePositionAmount(FormatStringHandler &H,
-                                   const char *Start, const char *&Beg,
-                                   const char *E, PositionContext p);
+OptionalAmount
+ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex,
+                       const llvm::TextEncodingConverter &FormatStrConverter);
 
-bool ParseFieldWidth(FormatStringHandler &H,
-                     FormatSpecifier &CS,
+OptionalAmount
+ParsePositionAmount(FormatStringHandler &H, const char *Start, const char 
*&Beg,
+                    const char *E, PositionContext p,
+                    const llvm::TextEncodingConverter &FormatStrConverter);
+
+bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS,
                      const char *Start, const char *&Beg, const char *E,
-                     unsigned *argIndex);
+                     unsigned *argIndex,
+                     const llvm::TextEncodingConverter &FormatStrConverter);
 
-bool ParseArgPosition(FormatStringHandler &H,
-                      FormatSpecifier &CS, const char *Start,
-                      const char *&Beg, const char *E);
+bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS,
+                      const char *Start, const char *&Beg, const char *E,
+                      const llvm::TextEncodingConverter &FormatStrConverter);
 
-bool ParseVectorModifier(FormatStringHandler &H,
-                         FormatSpecifier &FS, const char *&Beg, const char *E,
-                         const LangOptions &LO);
+bool ParseVectorModifier(FormatStringHandler &H, FormatSpecifier &FS,
+                         const char *&Beg, const char *E, const LangOptions 
&LO,
+                         const llvm::TextEncodingConverter 
&FormatStrConverter);
 
 /// Returns true if a LengthModifier was parsed and installed in the
 /// FormatSpecifier& argument, and false otherwise.
 bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
-                         const LangOptions &LO, bool IsScanf = false);
+                         const LangOptions &LO,
+                         const llvm::TextEncodingConverter &FormatStrConverter,
+                         bool IsScanf = false);
 
 /// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
 /// string; check that it won't go further than \p FmtStrEnd and write
diff --git a/clang/lib/AST/PrintfFormatString.cpp 
b/clang/lib/AST/PrintfFormatString.cpp
index 855550475721a..dc32e3d020f7e 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -35,14 +35,17 @@ typedef 
clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
 
 using analyze_format_string::ParseNonPositionAmount;
 
-static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
-                           const char *Start, const char *&Beg, const char *E,
-                           unsigned *argIndex) {
+static bool
+ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, const char *Start,
+               const char *&Beg, const char *E, unsigned *argIndex,
+               const llvm::TextEncodingConverter &FormatStrConverter) {
   if (argIndex) {
-    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
+    FS.setPrecision(
+        ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter));
   } else {
-    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
-                                           
analyze_format_string::PrecisionPos);
+    const OptionalAmount Amt = ParsePositionAmount(
+        H, Start, Beg, E, analyze_format_string::PrecisionPos,
+        FormatStrConverter);
     if (Amt.isInvalid())
       return true;
     FS.setPrecision(Amt);
@@ -50,32 +53,32 @@ static bool ParsePrecision(FormatStringHandler &H, 
PrintfSpecifier &FS,
   return false;
 }
 
-static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
-                           const char *FlagBeg, const char *E, bool Warn) {
-   StringRef Flag(FlagBeg, E - FlagBeg);
-   // Currently there is only one flag.
-   if (Flag == "tt") {
-     FS.setHasObjCTechnicalTerm(FlagBeg);
-     return false;
-   }
-   // Handle either the case of no flag or an invalid flag.
-   if (Warn) {
-     if (Flag == "")
-       H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
-     else
-       H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
-   }
-   return true;
+static bool
+ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, const char 
*FlagBeg,
+               const char *E, bool Warn,
+               const llvm::TextEncodingConverter &FormatStrConverter) {
+    StringRef Flag(FlagBeg, E - FlagBeg);
+    // Currently there is only one flag.
+    if (Flag.size() == 2 && FormatStrConverter.convert(FlagBeg[0]) == 't' &&
+        FormatStrConverter.convert(FlagBeg[1]) == 't') {
+      FS.setHasObjCTechnicalTerm(FlagBeg);
+      return false;
+    }
+    // Handle either the case of no flag or an invalid flag.
+    if (Warn) {
+      if (Flag == "")
+        H.HandleEmptyObjCModifierFlag(FlagBeg, E - FlagBeg);
+      else
+        H.HandleInvalidObjCModifierFlag(FlagBeg, E - FlagBeg);
+    }
+    return true;
 }
 
-static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
-                                                  const char *&Beg,
-                                                  const char *E,
-                                                  unsigned &argIndex,
-                                                  const LangOptions &LO,
-                                                  const TargetInfo &Target,
-                                                  bool Warn,
-                                                  bool isFreeBSDKPrintf) {
+static PrintfSpecifierResult
+ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E,
+                     unsigned &argIndex, const LangOptions &LO,
+                     const TargetInfo &Target, bool Warn,
+                     bool isFreeBSDKPrintf) {
 
   using namespace clang::analyze_format_string;
   using namespace clang::analyze_printf;
@@ -84,6 +87,8 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   const char *Start = nullptr;
   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
 
+  const llvm::TextEncodingConverter &FormatStrConverter =
+      *Target.FormatStrConverter;
   // Look for a '%' character that indicates the start of a format specifier.
   for ( ; I != E ; ++I) {
     char c = *I;
@@ -92,7 +97,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
       H.HandleNullChar(I);
       return true;
     }
-    if (c == '%') {
+    if (FormatStrConverter.convert(c) == '%') {
       Start = I++;  // Record the start of the format specifier.
       break;
     }
@@ -110,7 +115,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   }
 
   PrintfSpecifier FS;
-  if (ParseArgPosition(H, FS, Start, I, E))
+  if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter))
     return true;
 
   if (I == E) {
@@ -120,13 +125,17 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
     return true;
   }
 
-  if (*I == '{') {
+  if (FormatStrConverter.convert(*I) == '{') {
     ++I;
     unsigned char PrivacyFlags = 0;
     StringRef MatchedStr;
 
     do {
-      StringRef Str(I, E - I);
+      const char *II;
+      std::string S(I, E - I);
+      for (unsigned long i = 0; i < S.length(); ++i)
+        S[i] = FormatStrConverter.convert(S[i]);
+      StringRef Str(S);
       std::string Match = "^[[:space:]]*"
                           "(private|public|sensitive|mask\\.[^[:space:],}]*)"
                           "[[:space:]]*(,|})";
@@ -135,25 +144,38 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
 
       if (R.match(Str, &Matches)) {
         MatchedStr = Matches[1];
+        II = I;
         I += Matches[0].size();
 
+        while (FormatStrConverter.convert(*II) == ' ')
+          ++II;
+
         // Set the privacy flag if the privacy annotation in the
         // comma-delimited segment is at least as strict as the privacy
         // annotations in previous comma-delimited segments.
         if (MatchedStr.starts_with("mask")) {
-          StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
+          StringRef MaskType(II + sizeof("mask.") - 1,
+                             MatchedStr.size() - sizeof("mask.") + 1);
           unsigned Size = MaskType.size();
+
           if (Warn && (Size == 0 || Size > 8))
             H.handleInvalidMaskType(MaskType);
           FS.setMaskType(MaskType);
-        } else if (MatchedStr == "sensitive")
+        } else if (MatchedStr == "sensitive") {
+          StringRef ProxyMatchedStr(II, sizeof("sensitive") - 1);
+          MatchedStr = ProxyMatchedStr;
           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
-        else if (PrivacyFlags !=
-                     clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
-                 MatchedStr == "private")
+        } else if (PrivacyFlags !=
+                       clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
+                   MatchedStr == "private") {
+          StringRef ProxyMatchedStr(II, sizeof("private") - 1);
+          MatchedStr = ProxyMatchedStr;
           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
-        else if (PrivacyFlags == 0 && MatchedStr == "public")
+        } else if (PrivacyFlags == 0 && MatchedStr == "public") {
+          StringRef ProxyMatchedStr(II, sizeof("public") - 1);
+          MatchedStr = ProxyMatchedStr;
           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
+        }
       } else {
         size_t CommaOrBracePos =
             Str.find_if([](char c) { return c == ',' || c == '}'; });
@@ -168,7 +190,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
         I += CommaOrBracePos + 1;
       }
       // Continue until the closing brace is found.
-    } while (*(I - 1) == ',');
+    } while (FormatStrConverter.convert(*(I - 1)) == ',');
 
     // Set the privacy flag.
     switch (PrivacyFlags) {
@@ -191,7 +213,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   // Look for flags (if any).
   bool hasMore = true;
   for ( ; I != E; ++I) {
-    switch (*I) {
+    switch (FormatStrConverter.convert(*I)) {
       default: hasMore = false; break;
       case '\'':
         // FIXME: POSIX specific.  Always accept?
@@ -216,7 +238,8 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
 
   // Look for the field width (if any).
   if (ParseFieldWidth(H, FS, Start, I, E,
-                      FS.usesPositionalArg() ? nullptr : &argIndex))
+                      FS.usesPositionalArg() ? nullptr : &argIndex,
+                      FormatStrConverter))
     return true;
 
   if (I == E) {
@@ -227,7 +250,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   }
 
   // Look for the precision (if any).
-  if (*I == '.') {
+  if (FormatStrConverter.convert(*I) == '.') {
     ++I;
     if (I == E) {
       if (Warn)
@@ -236,7 +259,8 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
     }
 
     if (ParsePrecision(H, FS, Start, I, E,
-                       FS.usesPositionalArg() ? nullptr : &argIndex))
+                       FS.usesPositionalArg() ? nullptr : &argIndex,
+                       FormatStrConverter))
       return true;
 
     if (I == E) {
@@ -247,11 +271,11 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
     }
   }
 
-  if (ParseVectorModifier(H, FS, I, E, LO))
+  if (ParseVectorModifier(H, FS, I, E, LO, FormatStrConverter))
     return true;
 
   // Look for the length modifier.
-  if (ParseLengthModifier(FS, I, E, LO) && I == E) {
+  if (ParseLengthModifier(FS, I, E, LO, FormatStrConverter) && I == E) {
     // No more characters left?
     if (Warn)
       H.HandleIncompleteSpecifier(Start, E - Start);
@@ -266,7 +290,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   // these flags are applicable until later.
   const char *ObjCModifierFlagsStart = nullptr,
              *ObjCModifierFlagsEnd = nullptr;
-  if (*I == '[') {
+  if (FormatStrConverter.convert(*I) == '[') {
     ObjCModifierFlagsStart = I;
     ++I;
     auto flagStart = I;
@@ -278,8 +302,8 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
         return true;
       }
       // Did we find the closing ']'?
-      if (*I == ']') {
-        if (ParseObjCFlags(H, FS, flagStart, I, Warn))
+      if (FormatStrConverter.convert(*I) == ']') {
+        if (ParseObjCFlags(H, FS, flagStart, I, Warn, FormatStrConverter))
           return true;
         ++I;
         break;
@@ -299,7 +323,7 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
   // Finally, look for the conversion specifier.
   const char *conversionPosition = I++;
   ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
-  switch (*conversionPosition) {
+  switch (FormatStrConverter.convert(*conversionPosition)) {
     default:
       break;
     // C99: 7.19.6.1 (section 8).
@@ -418,25 +442,21 @@ static PrintfSpecifierResult 
ParsePrintfSpecifier(FormatStringHandler &H,
       FS.setConversionSpecifier(CS);
     }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
+    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len,
+                                                     FormatStrConverter);
   }
   return PrintfSpecifierResult(Start, FS);
 }
 
-bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
-                                                     const char *I,
-                                                     const char *E,
-                                                     const LangOptions &LO,
-                                                     const TargetInfo &Target,
-                                                     bool isFreeBSDKPrintf) {
+bool clang::analyze_format_string::ParsePrintfString(
+    FormatStringHandler &H, const char *I, const char *E, const LangOptions 
&LO,
+    const TargetInfo &Target, bool isFreeBSDKPrintf) {
 
   unsigned argIndex = 0;
-
   // Keep looking for a format specifier until we have exhausted the string.
   while (I != E) {
-    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
-                                                            LO, Target, true,
-                                                            isFreeBSDKPrintf);
+    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(
+        H, I, E, argIndex, LO, Target, true, isFreeBSDKPrintf);
     // Did a fail-stop error of any kind occur when parsing the specifier?
     // If so, don't do any more processing.
     if (FSR.shouldStop())
@@ -464,9 +484,8 @@ bool 
clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
   // Keep looking for a %s format specifier until we have exhausted the string.
   FormatStringHandler H;
   while (I != E) {
-    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
-                                                            LO, Target, false,
-                                                            false);
+    const PrintfSpecifierResult &FSR =
+        ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
     // Did a fail-stop error of any kind occur when parsing the specifier?
     // If so, don't do any more processing.
     if (FSR.shouldStop())
diff --git a/clang/lib/AST/ScanfFormatString.cpp 
b/clang/lib/AST/ScanfFormatString.cpp
index 41cf71a3e042d..1954beeed371c 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -83,8 +83,9 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
   const char *I = Beg;
   const char *Start = nullptr;
   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
-
-    // Look for a '%' character that indicates the start of a format specifier.
+  const llvm::TextEncodingConverter &FormatStrConverter =
+      *Target.FormatStrConverter;
+  // Look for a '%' character that indicates the start of a format specifier.
   for ( ; I != E ; ++I) {
     char c = *I;
     if (c == '\0') {
@@ -92,7 +93,9 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
       H.HandleNullChar(I);
       return true;
     }
-    if (c == '%') {
+    SmallString<1> ConvertedChar;
+    FormatStrConverter.convert(StringRef(&c, 1), ConvertedChar);
+    if (ConvertedChar[0] == '%') {
       Start = I++;  // Record the start of the format specifier.
       break;
     }
@@ -109,7 +112,7 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
   }
 
   ScanfSpecifier FS;
-  if (ParseArgPosition(H, FS, Start, I, E))
+  if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter))
     return true;
 
   if (I == E) {
@@ -119,7 +122,7 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
   }
 
   // Look for '*' flag if it is present.
-  if (*I == '*') {
+  if (FormatStrConverter.convert(*I) == '*') {
     FS.setSuppressAssignment(I);
     if (++I == E) {
       H.HandleIncompleteSpecifier(Start, E - Start);
@@ -129,7 +132,8 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
 
   // Look for the field width (if any).  Unlike printf, this is either
   // a fixed integer or isn't present.
-  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
+  const OptionalAmount &Amt =
+      clang::analyze_format_string::ParseAmount(I, E, FormatStrConverter);
   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
     FS.setFieldWidth(Amt);
@@ -142,8 +146,10 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
   }
 
   // Look for the length modifier.
-  if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
-      // No more characters left?
+  if (ParseLengthModifier(FS, I, E, LO, FormatStrConverter,
+                          /*IsScanf=*/true) &&
+      I == E) {
+    // No more characters left?
     H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
   }
@@ -157,7 +163,7 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
   // Finally, look for the conversion specifier.
   const char *conversionPosition = I++;
   ScanfConversionSpecifier::Kind k = 
ScanfConversionSpecifier::InvalidSpecifier;
-  switch (*conversionPosition) {
+  switch (FormatStrConverter.convert(*conversionPosition)) {
     default:
       break;
     case '%': k = ConversionSpecifier::PercentArg;   break;
@@ -218,7 +224,8 @@ static ScanfSpecifierResult 
ParseScanfSpecifier(FormatStringHandler &H,
       FS.setConversionSpecifier(CS);
     }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
+    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len,
+                                                    FormatStrConverter);
   }
   return ScanfSpecifierResult(Start, FS);
 }
@@ -551,8 +558,8 @@ bool 
clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
 
   // Keep looking for a format specifier until we have exhausted the string.
   while (I != E) {
-    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
-                                                          LO, Target);
+    const ScanfSpecifierResult &FSR =
+        ParseScanfSpecifier(H, I, E, argIndex, LO, Target);
     // Did a fail-stop error of any kind occur when parsing the specifier?
     // If so, don't do any more processing.
     if (FSR.shouldStop())
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index ffaf98bf9c366..6aa80cab88bdb 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -191,6 +191,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   MaxOpenCLWorkGroupSize = 1024;
 
   MaxBitIntWidth.reset();
+
+  FormatStrConverter = new llvm::TextEncodingConverter(
+      std::move(*llvm::TextEncodingConverter::createNoopConverter()));
 }
 
 // Out of line virtual dtor for TargetInfo.
diff --git a/clang/lib/Lex/LiteralConverter.cpp 
b/clang/lib/Lex/LiteralConverter.cpp
index 2bd177d499b87..9fca9f7c2dc87 100644
--- a/clang/lib/Lex/LiteralConverter.cpp
+++ b/clang/lib/Lex/LiteralConverter.cpp
@@ -24,7 +24,7 @@ LiteralConverter::getConverter(ConversionAction Action) {
 std::error_code
 LiteralConverter::setConvertersFromOptions(LiteralConverter &LiteralConv,
                                            const clang::LangOptions &Opts,
-                                           const clang::TargetInfo &TInfo) {
+                                           clang::TargetInfo &TInfo) {
   using namespace llvm;
   LiteralConv.InternalEncoding = "UTF-8";
   LiteralConv.SystemEncoding = 
TInfo.getTriple().getDefaultNarrowTextEncoding();
@@ -56,5 +56,13 @@ LiteralConverter::setConvertersFromOptions(LiteralConverter 
&LiteralConv,
         new TextEncodingConverter(std::move(*ErrorOrConverter));
   } else
     return ErrorOrConverter.getError();
+
+  ErrorOrConverter = llvm::TextEncodingConverter::create(
+      LiteralConv.SystemEncoding, LiteralConv.InternalEncoding);
+
+  if (ErrorOrConverter)
+    TInfo.FormatStrConverter =
+        new TextEncodingConverter(std::move(*ErrorOrConverter));
+
   return std::error_code();
 }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3e1edc4548034..a0729fb4b18ec 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -102,6 +102,7 @@
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/TextEncoding.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/Triple.h"
@@ -7258,10 +7259,10 @@ class CheckFormatHandler : public 
analyze_format_string::FormatStringHandler {
                        ArrayRef<FixItHint> Fixit = {});
 
 protected:
-  bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc,
-                                        const char *startSpec,
-                                        unsigned specifierLen,
-                                        const char *csStart, unsigned csLen);
+  bool HandleInvalidConversionSpecifier(
+      unsigned argIndex, SourceLocation Loc, const char *startSpec,
+      unsigned specifierLen, const char *csStart, unsigned csLen,
+      const llvm::TextEncodingConverter &FormatStrConverter);
 
   void HandlePositionalNonpositionalArgs(SourceLocation Loc,
                                          const char *startSpec,
@@ -7492,13 +7493,10 @@ void UncoveredArgHandler::Diagnose(Sema &S, bool 
IsFunctionCall,
                                   DiagnosticExprs[0]->getSourceRange());
 }
 
-bool
-CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
-                                                     SourceLocation Loc,
-                                                     const char *startSpec,
-                                                     unsigned specifierLen,
-                                                     const char *csStart,
-                                                     unsigned csLen) {
+bool CheckFormatHandler::HandleInvalidConversionSpecifier(
+    unsigned argIndex, SourceLocation Loc, const char *startSpec,
+    unsigned specifierLen, const char *csStart, unsigned csLen,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   bool keepGoing = true;
   if (argIndex < NumDataArgs) {
     // Consider the argument coverered, even though the specifier doesn't
@@ -7514,7 +7512,13 @@ 
CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
     keepGoing = false;
   }
 
-  StringRef Specifier(csStart, csLen);
+  // The csStart points to a character that has already been converted to the
+  // exec charset, so we have to reverse the conversion to allow diagnostic
+  // message to match an expected value when using -verify option,
+  std::string RS(csStart, csLen);
+  for (unsigned int i = 0; i < RS.size(); ++i)
+    RS[i] = FormatStrConverter.convert(RS[i]);
+  StringRef Specifier(RS);
 
   // If the specifier in non-printable, it could be the first byte of a UTF-8
   // sequence. In that case, print the UTF-8 code point. If not, print the byte
@@ -7670,9 +7674,9 @@ class CheckPrintfHandler : public CheckFormatHandler {
   }
 
   bool HandleInvalidPrintfConversionSpecifier(
-                                      const analyze_printf::PrintfSpecifier 
&FS,
-                                      const char *startSpecifier,
-                                      unsigned specifierLen) override;
+      const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+      unsigned specifierLen,
+      const llvm::TextEncodingConverter &FormatStrConverter) override;
 
   void handleInvalidMaskType(StringRef MaskType) override;
 
@@ -7813,14 +7817,14 @@ class DecomposePrintfHandler : public 
CheckPrintfHandler {
 
 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
     const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
-    unsigned specifierLen) {
+    unsigned specifierLen,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
   const analyze_printf::PrintfConversionSpecifier &CS =
     FS.getConversionSpecifier();
 
-  return HandleInvalidConversionSpecifier(FS.getArgIndex(),
-                                          getLocationOfByte(CS.getStart()),
-                                          startSpecifier, specifierLen,
-                                          CS.getStart(), CS.getLength());
+  return HandleInvalidConversionSpecifier(
+      FS.getArgIndex(), getLocationOfByte(CS.getStart()), startSpecifier,
+      specifierLen, CS.getStart(), CS.getLength(), FormatStrConverter);
 }
 
 void CheckPrintfHandler::handleInvalidMaskType(StringRef MaskType) {
@@ -8333,15 +8337,15 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
   // Check for using an Objective-C specific conversion specifier
   // in a non-ObjC literal.
   if (!allowsObjCArg() && CS.isObjCArg()) {
-    return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier,
-                                                  specifierLen);
+    return HandleInvalidPrintfConversionSpecifier(
+        FS, startSpecifier, specifierLen, *Target.FormatStrConverter);
   }
 
   // %P can only be used with os_log.
   if (FSType != FormatStringType::OSLog &&
       CS.getKind() == ConversionSpecifier::PArg) {
-    return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier,
-                                                  specifierLen);
+    return HandleInvalidPrintfConversionSpecifier(
+        FS, startSpecifier, specifierLen, *Target.FormatStrConverter);
   }
 
   // %n is not allowed with os_log.
@@ -8360,8 +8364,8 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
       (CS.getKind() == ConversionSpecifier::PArg ||
        CS.getKind() == ConversionSpecifier::sArg ||
        CS.getKind() == ConversionSpecifier::ObjCObjArg)) {
-    return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier,
-                                                  specifierLen);
+    return HandleInvalidPrintfConversionSpecifier(
+        FS, startSpecifier, specifierLen, *Target.FormatStrConverter);
   }
 
   // Check for use of public/private annotation outside of os_log().
@@ -9022,9 +9026,9 @@ class CheckScanfHandler : public CheckFormatHandler {
                             unsigned specifierLen) override;
 
   bool HandleInvalidScanfConversionSpecifier(
-          const analyze_scanf::ScanfSpecifier &FS,
-          const char *startSpecifier,
-          unsigned specifierLen) override;
+      const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+      unsigned specifierLen,
+      const llvm::TextEncodingConverter &FormatStrConverter) override;
 
   void HandleIncompleteScanList(const char *start, const char *end) override;
 };
@@ -9039,16 +9043,16 @@ void CheckScanfHandler::HandleIncompleteScanList(const 
char *start,
 }
 
 bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier(
-                                        const analyze_scanf::ScanfSpecifier 
&FS,
-                                        const char *startSpecifier,
-                                        unsigned specifierLen) {
+    const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+    unsigned specifierLen,
+    const llvm::TextEncodingConverter &FormatStrConverter) {
+
   const analyze_scanf::ScanfConversionSpecifier &CS =
     FS.getConversionSpecifier();
 
-  return HandleInvalidConversionSpecifier(FS.getArgIndex(),
-                                          getLocationOfByte(CS.getStart()),
-                                          startSpecifier, specifierLen,
-                                          CS.getStart(), CS.getLength());
+  return HandleInvalidConversionSpecifier(
+      FS.getArgIndex(), getLocationOfByte(CS.getStart()), startSpecifier,
+      specifierLen, CS.getStart(), CS.getLength(), FormatStrConverter);
 }
 
 bool CheckScanfHandler::HandleScanfSpecifier(
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 550f1fbdf0ffc..9e795f8352679 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3541,8 +3541,9 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc,
     // the string.
     bool ForceElaboratedPrinting =
         IK == PredefinedIdentKind::Function && getLangOpts().MSVCCompat;
-    auto Str =
-        PredefinedExpr::ComputeName(IK, currentDecl, ForceElaboratedPrinting);
+    auto Str = PredefinedExpr::ComputeNameAndTranslate(
+        IK, currentDecl, getPreprocessor().getLiteralConverter(),
+        ForceElaboratedPrinting);
     unsigned Length = Str.length();
 
     llvm::APInt LengthI(32, Length + 1);
diff --git a/clang/test/CodeGen/systemz-charset.c 
b/clang/test/CodeGen/systemz-charset.c
index aab43157b1be4..771000c94d381 100644
--- a/clang/test/CodeGen/systemz-charset.c
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -1,6 +1,8 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset 
IBM-1047 -o - | FileCheck %s
 // RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s
 
+int printf(char const *, ...);
+
 const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 // CHECK: 
c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00"
 
@@ -33,3 +35,9 @@ const char *UcnCharacters = "\u00E2\u00AC\U000000DF";
 
 const char *Unicode = "ΓΏ";
 //CHECK: c"\DF\00"
+
+void test1() {
+  printf(__FUNCTION__);
+}
+//CHECK: @__FUNCTION__.test1 = private unnamed_addr constant [6 x i8] 
c"\A3\85\A2\A3\F1\00"
+
diff --git a/llvm/include/llvm/Support/TextEncoding.h 
b/llvm/include/llvm/Support/TextEncoding.h
index bda6f2a088eb2..c95983d0aadce 100644
--- a/llvm/include/llvm/Support/TextEncoding.h
+++ b/llvm/include/llvm/Support/TextEncoding.h
@@ -105,6 +105,8 @@ class TextEncodingConverter {
   LLVM_ABI static ErrorOr<TextEncodingConverter> create(StringRef From,
                                                         StringRef To);
 
+  LLVM_ABI static ErrorOr<TextEncodingConverter> createNoopConverter();
+
   TextEncodingConverter(const TextEncodingConverter &) = delete;
   TextEncodingConverter &operator=(const TextEncodingConverter &) = delete;
 
@@ -136,6 +138,14 @@ class TextEncodingConverter {
     return EC;
   }
 
+  char convert(char SingleChar) const {
+    SmallString<1> Result;
+    auto EC = Converter->convert(StringRef(&SingleChar, 1), Result);
+    if (!EC)
+      return Result[0];
+    return '\0';
+  }
+
   // Maps the encoding name to enum constant if possible.
   static std::optional<TextEncoding> getKnownEncoding(StringRef Name);
 };
diff --git a/llvm/lib/Support/TextEncoding.cpp 
b/llvm/lib/Support/TextEncoding.cpp
index b7d73ff5b8412..26e9643770f89 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -353,3 +353,22 @@ ErrorOr<TextEncodingConverter> 
TextEncodingConverter::create(StringRef From,
   return std::make_error_code(std::errc::invalid_argument);
 #endif
 }
+
+class TextEncodingConverterNoop final
+    : public details::TextEncodingConverterImplBase {
+
+public:
+  TextEncodingConverterNoop() {}
+
+  std::error_code convertString(StringRef Source,
+                                SmallVectorImpl<char> &Result) override {
+    Result.assign(Source.begin(), Source.end());
+    return std::error_code();
+  }
+
+  void reset() override {}
+};
+
+ErrorOr<TextEncodingConverter> TextEncodingConverter::createNoopConverter() {
+  return TextEncodingConverter(std::make_unique<TextEncodingConverterNoop>());
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to