https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/169803
>From 98fd359c7a62bc1fa3617455fb99296f075b8f54 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Mon, 24 Nov 2025 11:00:04 -0500 Subject: [PATCH 1/4] add ParserConversionAction (cherry picked from commit c2647a73957921d3f7a53c6f25a69f1cc2725aa3) (cherry picked from commit 2533cb58131041dee287897c5af79d6b5147a81b) (cherry picked from commit 8eb9d339a95bdd9fd67b40105d3ab9c5130b6453) --- clang/include/clang/Parse/Parser.h | 1 + clang/include/clang/Sema/Sema.h | 8 ++++++-- clang/lib/Parse/ParseDecl.cpp | 13 +++++++++++++ clang/lib/Parse/ParseDeclCXX.cpp | 6 +++++- clang/lib/Parse/ParseExpr.cpp | 9 +++++---- clang/lib/Parse/Parser.cpp | 4 ++++ clang/lib/Sema/SemaExpr.cpp | 12 +++++++----- 7 files changed, 41 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index cec1dc99e90d8..fa465f9e83efe 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -5662,6 +5662,7 @@ class Parser : public CodeCompletionHandler { bool Finished; }; ObjCImplParsingDataRAII *CurParsedObjCImpl; + ConversionAction ParserConversionAction; /// StashAwayMethodOrFunctionBodyTokens - Consume the tokens and store them /// for later parsing. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 72beac7526dc5..662fc2fd6ae82 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,6 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/LiteralConverter.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" @@ -7372,9 +7373,12 @@ class Sema final : public SemaBase { /// from multiple tokens. However, the common case is that StringToks points /// to one string. ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks, - Scope *UDLScope = nullptr); + Scope *UDLScope = nullptr, + ConversionAction Action = CA_ToExecEncoding); - ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks); + ExprResult + ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks, + ConversionAction Action = CA_ToExecEncoding); /// ControllingExprOrType is either an opaque pointer coming out of a /// ParsedType or an Expr *. FIXME: it'd be better to split this interface diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 55ea562faacaa..7d042f2212f67 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -564,6 +564,9 @@ unsigned Parser::ParseAttributeArgsCommon( nullptr, Sema::ExpressionEvaluationContextRecord::EK_AttrArgument); + SaveAndRestore<ConversionAction> SavedTranslationState( + ParserConversionAction, CA_NoConversion); + ExprResult ArgExpr = ParseAssignmentExpression(); if (ArgExpr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); @@ -644,6 +647,9 @@ void Parser::ParseGNUAttributeArgs( ParsedAttr::Kind AttrKind = ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax()); + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); + if (AttrKind == ParsedAttr::AT_Availability) { ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName, ScopeLoc, Form); @@ -723,6 +729,9 @@ unsigned Parser::ParseClangAttributeArgs( ParsedAttr::Kind AttrKind = ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax()); + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); + switch (AttrKind) { default: return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc, @@ -1546,6 +1555,10 @@ void Parser::ParseExternalSourceSymbolAttribute( SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch); continue; } + + SaveAndRestore<ConversionAction> SavedTranslationState( + ParserConversionAction, CA_NoConversion); + if (Keyword == Ident_language) { if (HadLanguage) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 877161b65b6c3..ac42b8a127897 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -314,7 +314,9 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation NamespaceLoc, Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); + ExprResult Lang = (SaveAndRestore<ConversionAction>(ParserConversionAction, + CA_NoConversion), + ParseUnevaluatedStringLiteralExpression()); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = @@ -1001,6 +1003,8 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) { return nullptr; } } else if (tokenIsLikeStringLiteral(Tok, getLangOpts())) { + SaveAndRestore<ConversionAction> SavedTranslationState( + ParserConversionAction, CA_NoConversion); AssertMessage = ParseUnevaluatedStringLiteralExpression(); } else { Diag(Tok, diag::err_expected_string_literal) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index c3ac8d7e6eb74..574192b6a98b6 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3019,13 +3019,14 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, if (Unevaluated) { assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); - return Actions.ActOnUnevaluatedStringLiteral(StringToks); + return Actions.ActOnUnevaluatedStringLiteral(StringToks, + ParserConversionAction); } // Pass the set of string tokens, ready for concatenation, to the actions. - return Actions.ActOnStringLiteral(StringToks, - AllowUserDefinedLiteral ? getCurScope() - : nullptr); + return Actions.ActOnStringLiteral( + StringToks, AllowUserDefinedLiteral ? getCurScope() : nullptr, + ParserConversionAction); } ExprResult Parser::ParseGenericSelectionExpression() { diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 5d87453cf219e..2ef6eb2be63b8 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -70,6 +70,8 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies) NumCachedScopes = 0; CurParsedObjCImpl = nullptr; + ParserConversionAction = CA_ToExecEncoding; + // Add #pragma handlers. These are removed and destroyed in the // destructor. initializePragmaHandlers(); @@ -1551,6 +1553,8 @@ void Parser::ParseKNRParamDeclarations(Declarator &D) { } ExprResult Parser::ParseAsmStringLiteral(bool ForAsmLabel) { + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); ExprResult AsmString; if (isTokenStringLiteral()) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index c494669420282..c47e095d3b132 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2153,14 +2153,15 @@ static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } -ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) { +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks, + ConversionAction Action) { // StringToks needs backing storage as it doesn't hold array elements itself std::vector<Token> ExpandedToks; if (getLangOpts().MicrosoftExt) StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks); StringLiteralParser Literal(StringToks, PP, - StringLiteralEvalMethod::Unevaluated); + StringLiteralEvalMethod::Unevaluated, Action); if (Literal.hadError) return ExprError(); @@ -2231,8 +2232,8 @@ Sema::ExpandFunctionLocalPredefinedMacros(ArrayRef<Token> Toks) { return ExpandedToks; } -ExprResult -Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) { +ExprResult Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope, + ConversionAction Action) { assert(!StringToks.empty() && "Must have at least one string!"); // StringToks needs backing storage as it doesn't hold array elements itself @@ -2240,7 +2241,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) { if (getLangOpts().MicrosoftExt) StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks); - StringLiteralParser Literal(StringToks, PP); + StringLiteralParser Literal(StringToks, PP, + StringLiteralEvalMethod::Evaluated, Action); if (Literal.hadError) return ExprError(); >From 2201975a32c3b09aecb9b430c58821b190696d05 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Wed, 17 Dec 2025 16:29:42 -0500 Subject: [PATCH 2/4] do not translate unevaluated strings --- clang/include/clang/Sema/Sema.h | 4 +--- clang/lib/Parse/ParseDecl.cpp | 3 --- clang/lib/Parse/ParseDeclCXX.cpp | 4 +--- clang/lib/Parse/ParseExpr.cpp | 3 +-- clang/lib/Sema/SemaExpr.cpp | 7 +++---- clang/test/CodeGen/systemz-charset-diag.cpp | 9 +++++++++ clang/test/CodeGen/systemz-charset.c | 15 +++++++++++++++ 7 files changed, 30 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/systemz-charset-diag.cpp diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 662fc2fd6ae82..e2bc5593efa97 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7376,9 +7376,7 @@ class Sema final : public SemaBase { Scope *UDLScope = nullptr, ConversionAction Action = CA_ToExecEncoding); - ExprResult - ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks, - ConversionAction Action = CA_ToExecEncoding); + ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks); /// ControllingExprOrType is either an opaque pointer coming out of a /// ParsedType or an Expr *. FIXME: it'd be better to split this interface diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 7d042f2212f67..d9ee9de4f5377 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -1556,9 +1556,6 @@ void Parser::ParseExternalSourceSymbolAttribute( continue; } - SaveAndRestore<ConversionAction> SavedTranslationState( - ParserConversionAction, CA_NoConversion); - if (Keyword == Ident_language) { if (HadLanguage) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index ac42b8a127897..61c4df70693de 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -314,9 +314,7 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation NamespaceLoc, Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = (SaveAndRestore<ConversionAction>(ParserConversionAction, - CA_NoConversion), - ParseUnevaluatedStringLiteralExpression()); + ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 574192b6a98b6..2d0f621a9d39b 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3019,8 +3019,7 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, if (Unevaluated) { assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); - return Actions.ActOnUnevaluatedStringLiteral(StringToks, - ParserConversionAction); + return Actions.ActOnUnevaluatedStringLiteral(StringToks); } // Pass the set of string tokens, ready for concatenation, to the actions. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index c47e095d3b132..bc35f81f006a1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2153,15 +2153,14 @@ static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } -ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks, - ConversionAction Action) { +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) { // StringToks needs backing storage as it doesn't hold array elements itself std::vector<Token> ExpandedToks; if (getLangOpts().MicrosoftExt) StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks); - StringLiteralParser Literal(StringToks, PP, - StringLiteralEvalMethod::Unevaluated, Action); + StringLiteralParser Literal( + StringToks, PP, StringLiteralEvalMethod::Unevaluated, CA_NoConversion); if (Literal.hadError) return ExprError(); diff --git a/clang/test/CodeGen/systemz-charset-diag.cpp b/clang/test/CodeGen/systemz-charset-diag.cpp new file mode 100644 index 0000000000000..11d60e1ac2793 --- /dev/null +++ b/clang/test/CodeGen/systemz-charset-diag.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple s390x-none-zos -fexec-charset IBM-1047 %s -std=c++17 -emit-llvm -o - -verify + +static_assert(false, "Error string"); // expected-error {{static assertion failed: Error string}} + +[[deprecated("message")]] void test_deprecated() {return;} // expected-note {{'test_deprecated' has been explicitly marked deprecated here}} + +int main() { + test_deprecated(); // expected-warning {{'test_deprecated' is deprecated: message}} +} diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index fa5c2ea5ef8d5..a77b19e9aff22 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -57,3 +57,18 @@ const char *Unicode = "ΓΏ"; // RUN: not %clang_cc1 -fexec-charset invalid %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR // CHECK-ERROR: error: failed to set fexec-charset to 'invalid' +void test1() { + printf(__FUNCTION__); +} +//CHECK: @__FUNCTION__.test1 = private unnamed_addr constant [6 x i8] c"\A3\85\A2\A3\F1\00" + +#define HELLO "Hello " +#define WORLD "World!" +#define HELLO_WORLD HELLO WORLD +const char* hello_macro = HELLO; +//CHECK: c"\C8\85\93\93\96@\00" +//CHECK-UTF8 = c"Hello\00" + +const char* preprocessor_concatenation = HELLO_WORLD; +//CHECK: c"\C8\85\93\93\96@\E6\96\99\93\84Z\00" +//CHECK-UTF8: c"Hello World!\00" >From 8dbeb92bab829fc8c55fa9f277c83ffc50904acb Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Thu, 27 Nov 2025 08:45:45 -0500 Subject: [PATCH 3/4] add format string handling (cherry picked from commit 20a6fdfe3045eebaf1acc4fff7269c66e85e10c3) (cherry picked from commit f416afce1fda8733c693aaef3f15c8b099154c20) (cherry picked from commit 0eb9577efcfdde493aaa8dd884b6c33d40166236) --- clang/include/clang/AST/Expr.h | 6 ++ clang/include/clang/AST/FormatString.h | 12 +-- clang/include/clang/Basic/TargetInfo.h | 3 + clang/include/clang/Lex/TextEncodingConfig.h | 3 +- clang/include/clang/Options/Options.td | 7 +- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/AST/Expr.cpp | 14 +++ clang/lib/AST/FormatString.cpp | 86 ++++++++++--------- clang/lib/AST/FormatStringParsing.h | 36 +++++--- clang/lib/AST/PrintfFormatString.cpp | 89 +++++++++++++------- clang/lib/AST/ScanfFormatString.cpp | 23 +++-- clang/lib/Basic/TargetInfo.cpp | 3 + clang/lib/Frontend/CompilerInstance.cpp | 2 +- clang/lib/Lex/TextEncodingConfig.cpp | 11 ++- clang/lib/Sema/SemaChecking.cpp | 54 +++++++----- clang/lib/Sema/SemaExpr.cpp | 5 +- clang/test/CodeGen/systemz-charset.c | 2 + llvm/include/llvm/Support/TextEncoding.h | 10 +++ llvm/lib/Support/TextEncoding.cpp | 19 +++++ 19 files changed, 261 insertions(+), 126 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index a0ab599fa82d2..3d42b99382f2e 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SyncScope.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/TextEncodingConfig.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" @@ -2066,6 +2067,11 @@ class PredefinedExpr final return getIdentKindName(getIdentKind()); } + static std::string + ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl, + TextEncodingConfig &TEC, + bool ForceElaboratedPrinting = false); + static std::string ComputeName(PredefinedIdentKind IK, const Decl *CurrentDecl, bool ForceElaboratedPrinting = false); diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index 586d9f0f8feb0..c0f70493a93c8 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -19,6 +19,7 @@ #define LLVM_CLANG_AST_FORMATSTRING_H #include "clang/AST/CanonicalType.h" +#include "llvm/Support/TextEncoding.h" #include <optional> namespace clang { @@ -724,7 +725,8 @@ class FormatStringHandler { virtual bool HandleInvalidPrintfConversionSpecifier( const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) { + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { return true; } @@ -740,10 +742,10 @@ class FormatStringHandler { // Scanf-specific handlers. - virtual bool - HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, - const char *startSpecifier, - unsigned specifierLen) { + virtual bool HandleInvalidScanfConversionSpecifier( + const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { return true; } diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 9f7d2a17a0f8a..ec7d4fcd4d8e3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -38,6 +38,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Error.h" +#include "llvm/Support/TextEncoding.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TargetParser/Triple.h" #include <cassert> @@ -323,6 +324,8 @@ class TargetInfo : public TransferrableTargetInfo, virtual ~TargetInfo(); + llvm::TextEncodingConverter *FormatStrConverter; + /// Retrieve the target options. TargetOptions &getTargetOpts() const { assert(TargetOpts && "Missing target options"); diff --git a/clang/include/clang/Lex/TextEncodingConfig.h b/clang/include/clang/Lex/TextEncodingConfig.h index 09967a81beeed..f4ef578eb2991 100644 --- a/clang/include/clang/Lex/TextEncodingConfig.h +++ b/clang/include/clang/Lex/TextEncodingConfig.h @@ -26,7 +26,8 @@ class TextEncodingConfig { llvm::TextEncodingConverter *getConverter(ConversionAction Action) const; static std::error_code setConvertersFromOptions(TextEncodingConfig &TEC, - const clang::LangOptions &Opts); + const clang::LangOptions &Opts, + clang::TargetInfo &TInfo); llvm::StringRef getExecEncoding() { return ExecEncoding; } }; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index f79b4a07ce326..e71192d4ef260 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3153,8 +3153,7 @@ def finput_charset_EQ : Joined<["-"], "finput-charset=">, Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>, HelpText<"Specify the default character set for source files">; def fexec_charset_EQ - : Joined<["-"], "fexec-charset=">, - Group<f_Group>, + : Joined<["-"], "fexec-charset=">, Group<f_Group>, HelpText< "Set the execution <encoding> for string and character literals. " "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " @@ -7506,8 +7505,8 @@ let Visibility = [CC1Option, CC1AsOption, FC1Option] in { def tune_cpu : Separate<["-"], "tune-cpu">, HelpText<"Tune for a specific cpu type">, MarshallingInfoString<TargetOpts<"TuneCPU">>; -def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">, - HelpText<"Set the execution <charset> for string and character literals. " +def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<encoding>">, + HelpText<"Set the execution <encoding> for string and character literals. " "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " "and possibly those supported by ICU or the host iconv library.">, MarshallingInfoString<LangOpts<"ExecEncoding">>; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e2bc5593efa97..8ac5cc175fd2f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,7 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" -#include "clang/Lex/LiteralConverter.h" +#include "clang/Lex/TextEncodingConfig.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 64d61dbc3d128..e067df4cefd7b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -668,6 +668,20 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) { llvm_unreachable("Unknown ident kind for PredefinedExpr"); } +std::string PredefinedExpr::ComputeNameAndTranslate( + PredefinedIdentKind IK, const Decl *CurrentDecl, TextEncodingConfig &TEC, + bool ForceElaboratedPrinting) { + using namespace clang::charinfo; + std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting); + llvm::TextEncodingConverter *Converter = TEC.getConverter(CA_ToExecEncoding); + if (Converter) { + SmallString<128> Converted; + Converter->convert(Result, Converted); + Result = std::string(Converted); + } + return Result; +} + // FIXME: Maybe this should use DeclPrinter with a special "print predefined // expr" policy instead. std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 7e1ac0de6dcaf..0d449fb5f0904 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -33,8 +33,9 @@ FormatStringHandler::~FormatStringHandler() {} // scanf format strings. //===----------------------------------------------------------------------===// -OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, - const char *E) { +OptionalAmount clang::analyze_format_string::ParseAmount( + const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter) { const char *I = Beg; UpdateOnReturn<const char *> UpdateBeg(Beg, I); @@ -42,7 +43,7 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, bool hasDigits = false; for (; I != E; ++I) { - char c = *I; + char c = FormatStrConverter.convert(*I); if (c >= '0' && c <= '9') { hasDigits = true; accumulator = (accumulator * 10) + (c - '0'); @@ -60,21 +61,22 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, } OptionalAmount clang::analyze_format_string::ParseNonPositionAmount( - const char *&Beg, const char *E, unsigned &argIndex) { - if (*Beg == '*') { + const char *&Beg, const char *E, unsigned &argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { + if (FormatStrConverter.convert(*Beg) == '*') { ++Beg; return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); } - return ParseAmount(Beg, E); + return ParseAmount(Beg, E, FormatStrConverter); } OptionalAmount clang::analyze_format_string::ParsePositionAmount( FormatStringHandler &H, const char *Start, const char *&Beg, const char *E, - PositionContext p) { - if (*Beg == '*') { + PositionContext p, const llvm::TextEncodingConverter &FormatStrConverter) { + if (FormatStrConverter.convert(*Beg) == '*') { const char *I = Beg + 1; - const OptionalAmount &Amt = ParseAmount(I, E); + const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter); if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { H.HandleInvalidPosition(Beg, I - Beg, p); @@ -89,7 +91,7 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount( assert(Amt.getHowSpecified() == OptionalAmount::Constant); - if (*I == '$') { + if (FormatStrConverter.convert(*I) == '$') { // Handle positional arguments // Special case: '*0$', since this is an easy mistake. @@ -109,18 +111,21 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount( return OptionalAmount(false); } - return ParseAmount(Beg, E); + return ParseAmount(Beg, E, FormatStrConverter); } bool clang::analyze_format_string::ParseFieldWidth( FormatStringHandler &H, FormatSpecifier &CS, const char *Start, - const char *&Beg, const char *E, unsigned *argIndex) { + const char *&Beg, const char *E, unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { // FIXME: Support negative field widths. if (argIndex) { - CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + CS.setFieldWidth( + ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter)); } else { const OptionalAmount Amt = ParsePositionAmount( - H, Start, Beg, E, analyze_format_string::FieldWidthPos); + H, Start, Beg, E, analyze_format_string::FieldWidthPos, + FormatStrConverter); if (Amt.isInvalid()) return true; @@ -129,14 +134,13 @@ bool clang::analyze_format_string::ParseFieldWidth( return false; } -bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, - FormatSpecifier &FS, - const char *Start, - const char *&Beg, - const char *E) { +bool clang::analyze_format_string::ParseArgPosition( + FormatStringHandler &H, FormatSpecifier &FS, const char *Start, + const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter) { const char *I = Beg; - const OptionalAmount &Amt = ParseAmount(I, E); + const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter); if (I == E) { // No more characters left? @@ -144,7 +148,8 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, return true; } - if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + if (Amt.getHowSpecified() == OptionalAmount::Constant && + FormatStrConverter.convert(*(I++)) == '$') { // Warn that positional arguments are non-standard. H.HandlePosition(Start, I - Start); @@ -165,16 +170,15 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, return false; } -bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, - FormatSpecifier &FS, - const char *&I, - const char *E, - const LangOptions &LO) { +bool clang::analyze_format_string::ParseVectorModifier( + FormatStringHandler &H, FormatSpecifier &FS, const char *&I, const char *E, + const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter) { if (!LO.OpenCL) return false; const char *Start = I; - if (*I == 'v') { + if (FormatStrConverter.convert(*I) == 'v') { ++I; if (I == E) { @@ -182,7 +186,7 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, return true; } - OptionalAmount NumElts = ParseAmount(I, E); + OptionalAmount NumElts = ParseAmount(I, E, FormatStrConverter); if (NumElts.getHowSpecified() != OptionalAmount::Constant) { H.HandleIncompleteSpecifier(Start, E - Start); return true; @@ -194,22 +198,20 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, return false; } -bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, - const char *&I, - const char *E, - const LangOptions &LO, - bool IsScanf) { +bool clang::analyze_format_string::ParseLengthModifier( + FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter, bool IsScanf) { LengthModifier::Kind lmKind = LengthModifier::None; const char *lmPosition = I; - switch (*I) { + switch (FormatStrConverter.convert(*I)) { default: return false; case 'h': ++I; - if (I != E && *I == 'h') { + if (I != E && FormatStrConverter.convert(*I) == 'h') { ++I; lmKind = LengthModifier::AsChar; - } else if (I != E && *I == 'l' && LO.OpenCL) { + } else if (I != E && FormatStrConverter.convert(*I) == 'l' && LO.OpenCL) { ++I; lmKind = LengthModifier::AsShortLong; } else { @@ -218,7 +220,7 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, break; case 'l': ++I; - if (I != E && *I == 'l') { + if (I != E && FormatStrConverter.convert(*I) == 'l') { ++I; lmKind = LengthModifier::AsLongLong; } else { @@ -251,7 +253,9 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, // be parsed as the GNU extension 'a' length modifier. If not, this // will be parsed as a conversion specifier. ++I; - if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { + if (I != E && (FormatStrConverter.convert(*I) == 's' || + FormatStrConverter.convert(*I) == 'S' || + FormatStrConverter.convert(*I) == '[')) { lmKind = LengthModifier::AsAllocate; break; } @@ -269,7 +273,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, // scanf: AsInt64 case 'I': if (I + 1 != E && I + 2 != E) { - if (I[1] == '6' && I[2] == '4') { + if (FormatStrConverter.convert(I[1]) == '6' && + FormatStrConverter.convert(I[2]) == '4') { I += 3; lmKind = LengthModifier::AsInt64; break; @@ -277,7 +282,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, if (IsScanf) return false; - if (I[1] == '3' && I[2] == '2') { + if (FormatStrConverter.convert(I[1]) == '3' && + FormatStrConverter.convert(I[2]) == '2') { I += 3; lmKind = LengthModifier::AsInt32; break; diff --git a/clang/lib/AST/FormatStringParsing.h b/clang/lib/AST/FormatStringParsing.h index 401528481a9d6..531bc291e0b5b 100644 --- a/clang/lib/AST/FormatStringParsing.h +++ b/clang/lib/AST/FormatStringParsing.h @@ -35,29 +35,43 @@ template <typename T> class UpdateOnReturn { namespace analyze_format_string { -OptionalAmount ParseAmount(const char *&Beg, const char *E); -OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, - unsigned &argIndex); +OptionalAmount +ParseAmount(const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter); -OptionalAmount ParsePositionAmount(FormatStringHandler &H, const char *Start, - const char *&Beg, const char *E, - PositionContext p); +OptionalAmount +ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex, + const llvm::TextEncodingConverter &FormatStrConverter); + +OptionalAmount +ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, + const char *E, PositionContext p, + const llvm::TextEncodingConverter &FormatStrConverter); + +OptionalAmount +ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, + const char *E, PositionContext p, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E, - unsigned *argIndex); + unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, - const char *Start, const char *&Beg, const char *E); + const char *Start, const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseVectorModifier(FormatStringHandler &H, FormatSpecifier &FS, - const char *&Beg, const char *E, - const LangOptions &LO); + const char *&Beg, const char *E, const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter); /// Returns true if a LengthModifier was parsed and installed in the /// FormatSpecifier& argument, and false otherwise. bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, - const LangOptions &LO, bool IsScanf = false); + const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter, + bool IsScanf = false); /// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8 /// string; check that it won't go further than \p FmtStrEnd and write diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 6610a2de9e083..7efcc554ec136 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -35,14 +35,17 @@ typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> using analyze_format_string::ParseNonPositionAmount; -static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, - const char *Start, const char *&Beg, const char *E, - unsigned *argIndex) { +static bool +ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, const char *Start, + const char *&Beg, const char *E, unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { if (argIndex) { - FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); + FS.setPrecision( + ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter)); } else { const OptionalAmount Amt = ParsePositionAmount( - H, Start, Beg, E, analyze_format_string::PrecisionPos); + H, Start, Beg, E, analyze_format_string::PrecisionPos, + FormatStrConverter); if (Amt.isInvalid()) return true; FS.setPrecision(Amt); @@ -50,11 +53,14 @@ static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, return false; } -static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, - const char *FlagBeg, const char *E, bool Warn) { +static bool +ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, const char *FlagBeg, + const char *E, bool Warn, + const llvm::TextEncodingConverter &FormatStrConverter) { StringRef Flag(FlagBeg, E - FlagBeg); // Currently there is only one flag. - if (Flag == "tt") { + if (Flag.size() == 2 && FormatStrConverter.convert(FlagBeg[0]) == 't' && + FormatStrConverter.convert(FlagBeg[1]) == 't') { FS.setHasObjCTechnicalTerm(FlagBeg); return false; } @@ -81,6 +87,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, const char *Start = nullptr; UpdateOnReturn<const char *> UpdateBeg(Beg, I); + const llvm::TextEncodingConverter &FormatStrConverter = + *Target.FormatStrConverter; // Look for a '%' character that indicates the start of a format specifier. for (; I != E; ++I) { char c = *I; @@ -89,7 +97,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, H.HandleNullChar(I); return true; } - if (c == '%') { + if (FormatStrConverter.convert(c) == '%') { Start = I++; // Record the start of the format specifier. break; } @@ -107,7 +115,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, } PrintfSpecifier FS; - if (ParseArgPosition(H, FS, Start, I, E)) + if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter)) return true; if (I == E) { @@ -117,13 +125,17 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, return true; } - if (*I == '{') { + if (FormatStrConverter.convert(*I) == '{') { ++I; unsigned char PrivacyFlags = 0; StringRef MatchedStr; do { - StringRef Str(I, E - I); + const char *II; + std::string S(I, E - I); + for (unsigned long i = 0; i < S.length(); ++i) + S[i] = FormatStrConverter.convert(S[i]); + StringRef Str(S); std::string Match = "^[[:space:]]*" "(private|public|sensitive|mask\\.[^[:space:],}]*)" "[[:space:]]*(,|})"; @@ -132,25 +144,38 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, if (R.match(Str, &Matches)) { MatchedStr = Matches[1]; + II = I; I += Matches[0].size(); + while (FormatStrConverter.convert(*II) == ' ') + ++II; + // Set the privacy flag if the privacy annotation in the // comma-delimited segment is at least as strict as the privacy // annotations in previous comma-delimited segments. if (MatchedStr.starts_with("mask")) { - StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1); + StringRef MaskType(II + sizeof("mask.") - 1, + MatchedStr.size() - sizeof("mask.") + 1); unsigned Size = MaskType.size(); + if (Warn && (Size == 0 || Size > 8)) H.handleInvalidMaskType(MaskType); FS.setMaskType(MaskType); - } else if (MatchedStr == "sensitive") + } else if (MatchedStr == "sensitive") { + StringRef ProxyMatchedStr(II, sizeof("sensitive") - 1); + MatchedStr = ProxyMatchedStr; PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive; - else if (PrivacyFlags != - clang::analyze_os_log::OSLogBufferItem::IsSensitive && - MatchedStr == "private") + } else if (PrivacyFlags != + clang::analyze_os_log::OSLogBufferItem::IsSensitive && + MatchedStr == "private") { + StringRef ProxyMatchedStr(II, sizeof("private") - 1); + MatchedStr = ProxyMatchedStr; PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate; - else if (PrivacyFlags == 0 && MatchedStr == "public") + } else if (PrivacyFlags == 0 && MatchedStr == "public") { + StringRef ProxyMatchedStr(II, sizeof("public") - 1); + MatchedStr = ProxyMatchedStr; PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic; + } } else { size_t CommaOrBracePos = Str.find_if([](char c) { return c == ',' || c == '}'; }); @@ -165,7 +190,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, I += CommaOrBracePos + 1; } // Continue until the closing brace is found. - } while (*(I - 1) == ','); + } while (FormatStrConverter.convert(*(I - 1)) == ','); // Set the privacy flag. switch (PrivacyFlags) { @@ -188,7 +213,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, // Look for flags (if any). bool hasMore = true; for (; I != E; ++I) { - switch (*I) { + switch (FormatStrConverter.convert(*I)) { default: hasMore = false; break; @@ -225,7 +250,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, // Look for the field width (if any). if (ParseFieldWidth(H, FS, Start, I, E, - FS.usesPositionalArg() ? nullptr : &argIndex)) + FS.usesPositionalArg() ? nullptr : &argIndex, + FormatStrConverter)) return true; if (I == E) { @@ -236,7 +262,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, } // Look for the precision (if any). - if (*I == '.') { + if (FormatStrConverter.convert(*I) == '.') { ++I; if (I == E) { if (Warn) @@ -245,7 +271,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, } if (ParsePrecision(H, FS, Start, I, E, - FS.usesPositionalArg() ? nullptr : &argIndex)) + FS.usesPositionalArg() ? nullptr : &argIndex, + FormatStrConverter)) return true; if (I == E) { @@ -256,11 +283,11 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, } } - if (ParseVectorModifier(H, FS, I, E, LO)) + if (ParseVectorModifier(H, FS, I, E, LO, FormatStrConverter)) return true; // Look for the length modifier. - if (ParseLengthModifier(FS, I, E, LO) && I == E) { + if (ParseLengthModifier(FS, I, E, LO, FormatStrConverter) && I == E) { // No more characters left? if (Warn) H.HandleIncompleteSpecifier(Start, E - Start); @@ -274,7 +301,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, // enables better recovery, and we don't know if // these flags are applicable until later. const char *ObjCModifierFlagsStart = nullptr, *ObjCModifierFlagsEnd = nullptr; - if (*I == '[') { + if (FormatStrConverter.convert(*I) == '[') { ObjCModifierFlagsStart = I; ++I; auto flagStart = I; @@ -286,8 +313,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, return true; } // Did we find the closing ']'? - if (*I == ']') { - if (ParseObjCFlags(H, FS, flagStart, I, Warn)) + if (FormatStrConverter.convert(*I) == ']') { + if (ParseObjCFlags(H, FS, flagStart, I, Warn, FormatStrConverter)) return true; ++I; break; @@ -307,7 +334,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, // Finally, look for the conversion specifier. const char *conversionPosition = I++; ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; - switch (*conversionPosition) { + switch (FormatStrConverter.convert(*conversionPosition)) { default: break; // C99: 7.19.6.1 (section 8). @@ -470,7 +497,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, FS.setConversionSpecifier(CS); } // Assume the conversion takes one argument. - return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len); + return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len, + FormatStrConverter); } return PrintfSpecifierResult(Start, FS); } @@ -480,7 +508,6 @@ bool clang::analyze_format_string::ParsePrintfString( const TargetInfo &Target, bool isFreeBSDKPrintf) { unsigned argIndex = 0; - // Keep looking for a format specifier until we have exhausted the string. while (I != E) { const PrintfSpecifierResult &FSR = ParsePrintfSpecifier( diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp index 90cbbd60bbcf5..c63171844d90d 100644 --- a/clang/lib/AST/ScanfFormatString.cpp +++ b/clang/lib/AST/ScanfFormatString.cpp @@ -81,7 +81,8 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *I = Beg; const char *Start = nullptr; UpdateOnReturn<const char *> UpdateBeg(Beg, I); - + const llvm::TextEncodingConverter &FormatStrConverter = + *Target.FormatStrConverter; // Look for a '%' character that indicates the start of a format specifier. for (; I != E; ++I) { char c = *I; @@ -90,7 +91,9 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, H.HandleNullChar(I); return true; } - if (c == '%') { + SmallString<1> ConvertedChar; + FormatStrConverter.convert(StringRef(&c, 1), ConvertedChar); + if (ConvertedChar[0] == '%') { Start = I++; // Record the start of the format specifier. break; } @@ -107,7 +110,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, } ScanfSpecifier FS; - if (ParseArgPosition(H, FS, Start, I, E)) + if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter)) return true; if (I == E) { @@ -117,7 +120,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, } // Look for '*' flag if it is present. - if (*I == '*') { + if (FormatStrConverter.convert(*I) == '*') { FS.setSuppressAssignment(I); if (++I == E) { H.HandleIncompleteSpecifier(Start, E - Start); @@ -127,7 +130,8 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, // Look for the field width (if any). Unlike printf, this is either // a fixed integer or isn't present. - const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); + const OptionalAmount &Amt = + clang::analyze_format_string::ParseAmount(I, E, FormatStrConverter); if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { assert(Amt.getHowSpecified() == OptionalAmount::Constant); FS.setFieldWidth(Amt); @@ -140,7 +144,9 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, } // Look for the length modifier. - if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { + if (ParseLengthModifier(FS, I, E, LO, FormatStrConverter, + /*IsScanf=*/true) && + I == E) { // No more characters left? H.HandleIncompleteSpecifier(Start, E - Start); return true; @@ -155,7 +161,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, // Finally, look for the conversion specifier. const char *conversionPosition = I++; ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; - switch (*conversionPosition) { + switch (FormatStrConverter.convert(*conversionPosition)) { default: break; case '%': @@ -262,7 +268,8 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, FS.setConversionSpecifier(CS); } // Assume the conversion takes one argument. - return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); + return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len, + FormatStrConverter); } return ScanfSpecifierResult(Start, FS); } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e6ae89e0948c5..43efca42886cc 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -194,6 +194,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { MaxOpenCLWorkGroupSize = 1024; MaxBitIntWidth.reset(); + + FormatStrConverter = new llvm::TextEncodingConverter( + std::move(*llvm::TextEncodingConverter::createNoopConverter())); } // Out of line virtual dtor for TargetInfo. diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index c9b5342b7e8d9..83945d203762c 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -550,7 +550,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { PP->setDependencyDirectivesGetter(*GetDependencyDirectives); if (auto EC = TextEncodingConfig::setConvertersFromOptions( - PP->getTextEncodingConfig(), getLangOpts())) + PP->getTextEncodingConfig(), getLangOpts(), getTarget())) PP->getDiagnostics().Report(clang::diag::err_fe_text_encoding_config) << PP->getTextEncodingConfig().getExecEncoding(); } diff --git a/clang/lib/Lex/TextEncodingConfig.cpp b/clang/lib/Lex/TextEncodingConfig.cpp index b89d5baefcc23..427b75a1c0a8b 100644 --- a/clang/lib/Lex/TextEncodingConfig.cpp +++ b/clang/lib/Lex/TextEncodingConfig.cpp @@ -23,7 +23,8 @@ TextEncodingConfig::getConverter(ConversionAction Action) const { std::error_code TextEncodingConfig::setConvertersFromOptions(TextEncodingConfig &TEC, - const clang::LangOptions &Opts) { + const clang::LangOptions &Opts, + clang::TargetInfo &TInfo) { using namespace llvm; const char *UTF8 = "UTF-8"; @@ -41,5 +42,13 @@ TextEncodingConfig::setConvertersFromOptions(TextEncodingConfig &TEC, new TextEncodingConverter(std::move(*ErrorOrConverter)); else return ErrorOrConverter.getError(); + + ErrorOrConverter = llvm::TextEncodingConverter::create(TEC.SystemEncoding, + TEC.InternalEncoding); + + if (ErrorOrConverter) + TInfo.FormatStrConverter = + new TextEncodingConverter(std::move(*ErrorOrConverter)); + return std::error_code(); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 12f77d021eb0d..3ed1593f16a3e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -104,6 +104,7 @@ #include "llvm/Support/Locale.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/TextEncoding.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include "llvm/TargetParser/Triple.h" @@ -7872,10 +7873,10 @@ class CheckFormatHandler : public analyze_format_string::FormatStringHandler { ArrayRef<FixItHint> Fixit = {}); protected: - bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, - const char *startSpec, - unsigned specifierLen, - const char *csStart, unsigned csLen); + bool HandleInvalidConversionSpecifier( + unsigned argIndex, SourceLocation Loc, const char *startSpec, + unsigned specifierLen, const char *csStart, unsigned csLen, + const llvm::TextEncodingConverter &FormatStrConverter); void HandlePositionalNonpositionalArgs(SourceLocation Loc, const char *startSpec, @@ -8105,7 +8106,8 @@ void UncoveredArgHandler::Diagnose(Sema &S, bool IsFunctionCall, bool CheckFormatHandler::HandleInvalidConversionSpecifier( unsigned argIndex, SourceLocation Loc, const char *startSpec, - unsigned specifierLen, const char *csStart, unsigned csLen) { + unsigned specifierLen, const char *csStart, unsigned csLen, + const llvm::TextEncodingConverter &FormatStrConverter) { bool keepGoing = true; if (argIndex < NumDataArgs) { // Consider the argument coverered, even though the specifier doesn't @@ -8120,7 +8122,13 @@ bool CheckFormatHandler::HandleInvalidConversionSpecifier( keepGoing = false; } - StringRef Specifier(csStart, csLen); + // The csStart points to a character that has already been converted to the + // exec charset, so we have to reverse the conversion to allow diagnostic + // message to match an expected value when using -verify option, + std::string RS(csStart, csLen); + for (unsigned int i = 0; i < RS.size(); ++i) + RS[i] = FormatStrConverter.convert(RS[i]); + StringRef Specifier(RS); // If the specifier in non-printable, it could be the first byte of a UTF-8 // sequence. In that case, print the UTF-8 code point. If not, print the byte @@ -8274,7 +8282,8 @@ class CheckPrintfHandler : public CheckFormatHandler { bool HandleInvalidPrintfConversionSpecifier( const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) override; + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) override; void handleInvalidMaskType(StringRef MaskType) override; @@ -8414,13 +8423,14 @@ class DecomposePrintfHandler : public CheckPrintfHandler { bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier( const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) { + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { const analyze_printf::PrintfConversionSpecifier &CS = FS.getConversionSpecifier(); return HandleInvalidConversionSpecifier( FS.getArgIndex(), getLocationOfByte(CS.getStart()), startSpecifier, - specifierLen, CS.getStart(), CS.getLength()); + specifierLen, CS.getStart(), CS.getLength(), FormatStrConverter); } void CheckPrintfHandler::handleInvalidMaskType(StringRef MaskType) { @@ -8928,15 +8938,15 @@ bool CheckPrintfHandler::HandlePrintfSpecifier( // Check for using an Objective-C specific conversion specifier // in a non-ObjC literal. if (!allowsObjCArg() && CS.isObjCArg()) { - return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, - specifierLen); + return HandleInvalidPrintfConversionSpecifier( + FS, startSpecifier, specifierLen, *Target.FormatStrConverter); } // %P can only be used with os_log. if (FSType != FormatStringType::OSLog && CS.getKind() == ConversionSpecifier::PArg) { - return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, - specifierLen); + return HandleInvalidPrintfConversionSpecifier( + FS, startSpecifier, specifierLen, *Target.FormatStrConverter); } // %n is not allowed with os_log. @@ -8955,8 +8965,8 @@ bool CheckPrintfHandler::HandlePrintfSpecifier( (CS.getKind() == ConversionSpecifier::PArg || CS.getKind() == ConversionSpecifier::sArg || CS.getKind() == ConversionSpecifier::ObjCObjArg)) { - return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, - specifierLen); + return HandleInvalidPrintfConversionSpecifier( + FS, startSpecifier, specifierLen, *Target.FormatStrConverter); } // Check for use of public/private annotation outside of os_log(). @@ -9614,10 +9624,10 @@ class CheckScanfHandler : public CheckFormatHandler { const char *startSpecifier, unsigned specifierLen) override; - bool - HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, - const char *startSpecifier, - unsigned specifierLen) override; + bool HandleInvalidScanfConversionSpecifier( + const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) override; void HandleIncompleteScanList(const char *start, const char *end) override; }; @@ -9633,13 +9643,15 @@ void CheckScanfHandler::HandleIncompleteScanList(const char *start, bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) { + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { + const analyze_scanf::ScanfConversionSpecifier &CS = FS.getConversionSpecifier(); return HandleInvalidConversionSpecifier( FS.getArgIndex(), getLocationOfByte(CS.getStart()), startSpecifier, - specifierLen, CS.getStart(), CS.getLength()); + specifierLen, CS.getStart(), CS.getLength(), FormatStrConverter); } bool CheckScanfHandler::HandleScanfSpecifier( diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bc35f81f006a1..0413b88a76d0a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3636,8 +3636,9 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, // the string. bool ForceElaboratedPrinting = IK == PredefinedIdentKind::Function && getLangOpts().MSVCCompat; - auto Str = - PredefinedExpr::ComputeName(IK, currentDecl, ForceElaboratedPrinting); + auto Str = PredefinedExpr::ComputeNameAndTranslate( + IK, currentDecl, getPreprocessor().getTextEncodingConfig(), + ForceElaboratedPrinting); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index a77b19e9aff22..dbb36aed49990 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -2,6 +2,8 @@ // RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8 +int printf(char const *, ...); + const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; //CHECK: c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00" //CHECK-UTF8: c"ABCDEFGHIJKLMNOPQRSTUVWXYZ\00" diff --git a/llvm/include/llvm/Support/TextEncoding.h b/llvm/include/llvm/Support/TextEncoding.h index 3d31505c5bc6b..414ca1f87d9ee 100644 --- a/llvm/include/llvm/Support/TextEncoding.h +++ b/llvm/include/llvm/Support/TextEncoding.h @@ -105,6 +105,8 @@ class TextEncodingConverter { LLVM_ABI static ErrorOr<TextEncodingConverter> create(StringRef From, StringRef To); + LLVM_ABI static ErrorOr<TextEncodingConverter> createNoopConverter(); + TextEncodingConverter(const TextEncodingConverter &) = delete; TextEncodingConverter &operator=(const TextEncodingConverter &) = delete; @@ -137,6 +139,14 @@ class TextEncodingConverter { } LLVM_ABI static bool isEncodingSupported(StringRef Name); + + char convert(char SingleChar) const { + SmallString<1> Result; + auto EC = Converter->convert(StringRef(&SingleChar, 1), Result); + if (!EC) + return Result[0]; + return '\0'; + } }; } // namespace llvm diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index adc474ff03c50..475799df9070b 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -366,3 +366,22 @@ ErrorOr<TextEncodingConverter> TextEncodingConverter::create(StringRef From, return std::make_error_code(std::errc::invalid_argument); #endif } + +class TextEncodingConverterNoop final + : public details::TextEncodingConverterImplBase { + +public: + TextEncodingConverterNoop() {} + + std::error_code convertString(StringRef Source, + SmallVectorImpl<char> &Result) override { + Result.assign(Source.begin(), Source.end()); + return std::error_code(); + } + + void reset() override {} +}; + +ErrorOr<TextEncodingConverter> TextEncodingConverter::createNoopConverter() { + return TextEncodingConverter(std::make_unique<TextEncodingConverterNoop>()); +} >From 4d6c6012ea0540c0bf3bd847654aacca3fb2548c Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Mon, 2 Feb 2026 11:42:09 -0500 Subject: [PATCH 4/4] convert to exec-charset inside getPredefinedStringLiteralFromCache, test __builtin_FILE() (cherry picked from commit 9e4f1d155064ed3cef04ce15df068ca906aa3c55) (cherry picked from commit 08cd2e95bb0c5e246a63fb3cb51fc1a8ecdee8c9) --- clang/include/clang/Basic/TargetInfo.h | 2 ++ clang/lib/AST/ASTContext.cpp | 10 +++++++++ clang/lib/Basic/TargetInfo.cpp | 3 +++ clang/lib/Lex/TextEncodingConfig.cpp | 9 +++++---- clang/test/CodeGen/systemz-charset.cpp | 28 ++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ec7d4fcd4d8e3..6c0e65a85ee13 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -326,6 +326,8 @@ class TargetInfo : public TransferrableTargetInfo, llvm::TextEncodingConverter *FormatStrConverter; + llvm::TextEncodingConverter *ExecStrConverter; + /// Retrieve the target options. TargetOptions &getTargetOpts() const { assert(TargetOpts && "Missing target options"); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a0894318dbd53..80e073385ce82 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -13752,6 +13752,16 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const { *this, Key, StringLiteralKind::Ordinary, /*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()), SourceLocation()); + + llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter; + if (Converter) { + SmallString<128> Converted; + Converter->convert(Result->getString(), Converted); + Result = StringLiteral::Create( + *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false, + getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation()); + } + return Result; } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 43efca42886cc..0c553033ad069 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -197,6 +197,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { FormatStrConverter = new llvm::TextEncodingConverter( std::move(*llvm::TextEncodingConverter::createNoopConverter())); + + ExecStrConverter = new llvm::TextEncodingConverter( + std::move(*llvm::TextEncodingConverter::createNoopConverter())); } // Out of line virtual dtor for TargetInfo. diff --git a/clang/lib/Lex/TextEncodingConfig.cpp b/clang/lib/Lex/TextEncodingConfig.cpp index 427b75a1c0a8b..6df88e258ffde 100644 --- a/clang/lib/Lex/TextEncodingConfig.cpp +++ b/clang/lib/Lex/TextEncodingConfig.cpp @@ -37,14 +37,15 @@ TextEncodingConfig::setConvertersFromOptions(TextEncodingConfig &TEC, return std::error_code(); ErrorOr<TextEncodingConverter> ErrorOrConverter = llvm::TextEncodingConverter::create(UTF8, TEC.ExecEncoding); - if (ErrorOrConverter) + if (ErrorOrConverter) { TEC.ToExecEncodingConverter = new TextEncodingConverter(std::move(*ErrorOrConverter)); - else + TInfo.ExecStrConverter = TEC.ToExecEncodingConverter; + } else return ErrorOrConverter.getError(); - ErrorOrConverter = llvm::TextEncodingConverter::create(TEC.SystemEncoding, - TEC.InternalEncoding); + ErrorOrConverter = llvm::TextEncodingConverter::create( + TInfo.getTriple().getDefaultNarrowTextEncoding(), UTF8); if (ErrorOrConverter) TInfo.FormatStrConverter = diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp index 7e66407fd2ff1..9db4260615a9c 100644 --- a/clang/test/CodeGen/systemz-charset.cpp +++ b/clang/test/CodeGen/systemz-charset.cpp @@ -1,46 +1,74 @@ // RUN: %clang %s -std=c++17 -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -o - | FileCheck %s --check-prefix=CHECK-UTF8 const char *RawString = R"(Hello\n)"; //CHECK: c"\C8\85\93\93\96\E0\95\00" +//CHECK-UTF8: c"Hello\\n\00" const char *MultiLineRawString = R"( Hello There)"; //CHECK: c"\15\C8\85\93\93\96\15\E3\88\85\99\85\00" +//CHECK-UTF8: c"\0AHello\0AThere\00" char UnicodeChar8 = u8'1'; //CHECK: i8 49 +//CHECK-UTF8: i8 49 char16_t UnicodeChar16 = u'1'; //CHECK: i16 49 +//CHECK-UTF8: i16 49 char32_t UnicodeChar32 = U'1'; //CHECK: i32 49 +//CHECK-UTF8: i32 49 const char *EscapeCharacters8 = u8"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: c"\07\08\0C\0A\0D\09\0B\\'\22?\00" +//CHECK-UTF8: c"\07\08\0C\0A\0D\09\0B\\'\22?\00" const char16_t *EscapeCharacters16 = u"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0] +//CHECK-UTF8: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0] const char32_t *EscapeCharacters32 = U"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0] +//CHECK-UTF8: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0] const char *UnicodeString8 = u8"Hello"; //CHECK: c"Hello\00" +//CHECK-UTF8: c"Hello\00" + const char16_t *UnicodeString16 = u"Hello"; //CHECK: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0] +//CHECK-UTF8: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0] + const char32_t *UnicodeString32 = U"Hello"; //CHECK: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0] +//CHECK=UTF8: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0] const char *UnicodeRawString8 = u8R"("Hello\")"; //CHECK: c"\22Hello\\\22\00" +//CHECK=UTF8: c"\22Hello\\\22\00" + const char16_t *UnicodeRawString16 = uR"("Hello\")"; //CHECK: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0] +//CHECK=UTF8: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0] + const char32_t *UnicodeRawString32 = UR"("Hello\")"; //CHECK: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0] +//CHECK=UTF8: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0] const char *UnicodeUCNString8 = u8"\u00E2\u00AC\U000000DF"; //CHECK: c"\C3\A2\C2\AC\C3\9F\00" +//CHECK=UTF8: c"\C3\A2\C2\AC\C3\9F\00" + const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF"; //CHECK: [4 x i16] [i16 226, i16 172, i16 223, i16 0] +//CHECK=UTF8: [4 x i16] [i16 226, i16 172, i16 223, i16 0] + const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF"; //CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0] +//CHECK=UTF8: [4 x i32] [i32 226, i32 172, i32 223, i32 0] + +const char *file = __builtin_FILE(); +//CHECK: {{.*}}\A2\A8\A2\A3\85\94\A9`\83\88\81\99\A2\85\A3K\83\97\97\00" +//CHECK-UTF8: {{.*}}systemz-charset.cpp\00" _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
