https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/169803
>From 6e3ddca29305863eb309ad8b60b44934f7405f93 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Fri, 8 May 2026 12:17:22 -0400 Subject: [PATCH 1/5] add ParserConversionAction, do not translate unevaluated strings --- clang/include/clang/Parse/Parser.h | 1 + clang/include/clang/Sema/Sema.h | 4 +++- clang/lib/Parse/ParseDecl.cpp | 10 ++++++++++ clang/lib/Parse/ParseDeclCXX.cpp | 2 ++ clang/lib/Parse/ParseExpr.cpp | 6 +++--- clang/lib/Parse/Parser.cpp | 4 ++++ clang/lib/Sema/SemaExpr.cpp | 12 ++++++------ clang/test/CodeGen/systemz-charset-diag.cpp | 8 ++++++++ clang/test/CodeGen/systemz-charset.c | 5 +++++ 9 files changed, 42 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 5e7af97feeb6c..21210f3aa41d9 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -5715,6 +5715,7 @@ class Parser : public CodeCompletionHandler { bool Finished; }; ObjCImplParsingDataRAII *CurParsedObjCImpl; + ConversionAction ParserConversionAction; /// StashAwayMethodOrFunctionBodyTokens - Consume the tokens and store them /// for later parsing. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b8d760e7e0975..d54e4ce19166a 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,6 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/LiteralConverter.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" @@ -7374,7 +7375,8 @@ class Sema final : public SemaBase { /// from multiple tokens. However, the common case is that StringToks points /// to one string. ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks, - Scope *UDLScope = nullptr); + Scope *UDLScope = nullptr, + ConversionAction Action = CA_ToExecEncoding); ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 405dddf7991b4..97e0721c02b1b 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -564,6 +564,9 @@ unsigned Parser::ParseAttributeArgsCommon( nullptr, Sema::ExpressionEvaluationContextRecord::EK_AttrArgument); + SaveAndRestore<ConversionAction> SavedTranslationState( + ParserConversionAction, CA_NoConversion); + ExprResult ArgExpr = ParseAssignmentExpression(); if (ArgExpr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); @@ -644,6 +647,9 @@ void Parser::ParseGNUAttributeArgs( ParsedAttr::Kind AttrKind = ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax()); + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); + if (AttrKind == ParsedAttr::AT_Availability) { ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName, ScopeLoc, Form); @@ -723,6 +729,9 @@ unsigned Parser::ParseClangAttributeArgs( ParsedAttr::Kind AttrKind = ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax()); + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); + switch (AttrKind) { default: return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc, @@ -1546,6 +1555,7 @@ void Parser::ParseExternalSourceSymbolAttribute( SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch); continue; } + if (Keyword == Ident_language) { if (HadLanguage) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 893989bd2398f..388cfa662068a 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1001,6 +1001,8 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) { return nullptr; } } else if (tokenIsLikeStringLiteral(Tok, getLangOpts())) { + SaveAndRestore<ConversionAction> SavedTranslationState( + ParserConversionAction, CA_NoConversion); AssertMessage = ParseUnevaluatedStringLiteralExpression(); } else { Diag(Tok, diag::err_expected_string_literal) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 2987d32d6e0d2..f8855d06fa343 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3060,9 +3060,9 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, } // Pass the set of string tokens, ready for concatenation, to the actions. - return Actions.ActOnStringLiteral(StringToks, - AllowUserDefinedLiteral ? getCurScope() - : nullptr); + return Actions.ActOnStringLiteral( + StringToks, AllowUserDefinedLiteral ? getCurScope() : nullptr, + ParserConversionAction); } ExprResult Parser::ParseGenericSelectionExpression() { diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 5e1fd4df1a3f0..7ac5e0a36d60e 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -70,6 +70,8 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies) NumCachedScopes = 0; CurParsedObjCImpl = nullptr; + ParserConversionAction = CA_ToExecEncoding; + // Add #pragma handlers. These are removed and destroyed in the // destructor. initializePragmaHandlers(); @@ -1551,6 +1553,8 @@ void Parser::ParseKNRParamDeclarations(Declarator &D) { } ExprResult Parser::ParseAsmStringLiteral(bool ForAsmLabel) { + SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction, + CA_NoConversion); ExprResult AsmString; if (isTokenStringLiteral()) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 56d36237375b7..cb8184b39afd6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2160,8 +2160,8 @@ ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) { if (getLangOpts().MicrosoftExt) StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks); - StringLiteralParser Literal(StringToks, PP, - StringLiteralEvalMethod::Unevaluated); + StringLiteralParser Literal( + StringToks, PP, StringLiteralEvalMethod::Unevaluated, CA_NoConversion); if (Literal.hadError) return ExprError(); @@ -2232,8 +2232,8 @@ Sema::ExpandFunctionLocalPredefinedMacros(ArrayRef<Token> Toks) { return ExpandedToks; } -ExprResult -Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) { +ExprResult Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope, + ConversionAction Action) { assert(!StringToks.empty() && "Must have at least one string!"); // StringToks needs backing storage as it doesn't hold array elements itself @@ -2241,8 +2241,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) { if (getLangOpts().MicrosoftExt) StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks); - StringLiteralParser Literal( - StringToks, PP, StringLiteralEvalMethod::Evaluated, CA_ToLiteralEncoding); + StringLiteralParser Literal(StringToks, PP, + StringLiteralEvalMethod::Evaluated, Action); if (Literal.hadError) return ExprError(); diff --git a/clang/test/CodeGen/systemz-charset-diag.cpp b/clang/test/CodeGen/systemz-charset-diag.cpp index 4ed94810150a3..ad08e1f391214 100644 --- a/clang/test/CodeGen/systemz-charset-diag.cpp +++ b/clang/test/CodeGen/systemz-charset-diag.cpp @@ -1,3 +1,11 @@ // RUN: %clang_cc1 -triple s390x-none-zos -fexec-charset IBM-1047 %s -std=c++17 -emit-llvm -o - -verify const char* Computer = "🖥️"; // expected-error-re {{conversion to literal encoding failed: {{.*}}}} + +static_assert(false, "Error string"); // expected-error {{static assertion failed: Error string}} + +[[deprecated("message")]] void test_deprecated() {return;} // expected-note {{'test_deprecated' has been explicitly marked deprecated here}} + +int main() { + test_deprecated(); // expected-warning {{'test_deprecated' is deprecated: message}} +} diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index 766b6a83f00ff..618b0cc203ab6 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -66,3 +66,8 @@ const char* hello_macro = HELLO; const char* preprocessor_concatenation = HELLO_WORLD; //CHECK: c"\C8\85\93\93\96@\E6\96\99\93\84Z\00" //CHECK-UTF8: c"Hello World!\00" + +void test1() { + printf(__FUNCTION__); +} +//CHECK: @__FUNCTION__.test1 = private unnamed_addr constant [6 x i8] c"\A3\85\A2\A3\F1\00" >From e71999cdfbd4db60b311f42090699c9a64e963fb Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Fri, 8 May 2026 12:29:23 -0400 Subject: [PATCH 2/5] Remove old include --- clang/include/clang/Sema/Sema.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index d54e4ce19166a..aecd0d1c2f5dd 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,7 +55,6 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" -#include "clang/Lex/LiteralConverter.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" >From 16017648a2cf90023dafad53917b88a79c0cfaf5 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Mon, 11 May 2026 09:27:48 -0400 Subject: [PATCH 3/5] Fix build failure --- clang/include/clang/Sema/Sema.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index aecd0d1c2f5dd..5d00b0c94daa3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,6 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/TextEncodingConfig.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" >From 8cd93a5ddc26b0e34218132c93935d25269168f5 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Tue, 12 May 2026 08:07:08 -0400 Subject: [PATCH 4/5] fix CI --- clang/test/CodeGen/systemz-charset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index 618b0cc203ab6..16f269f8fb2f5 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -1,6 +1,8 @@ // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8 +int printf(char const *, ...); + const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; //CHECK: c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00" //CHECK-UTF8: c"ABCDEFGHIJKLMNOPQRSTUVWXYZ\00" >From 5b71164a724d9e35d91aab05a7643fb71e9fb3d6 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Tue, 12 May 2026 15:21:15 -0400 Subject: [PATCH 5/5] fix CI --- clang/include/clang/AST/Expr.h | 6 ++++++ clang/include/clang/Sema/Sema.h | 4 ++-- clang/lib/AST/Expr.cpp | 15 +++++++++++++++ clang/lib/Parse/ParseDecl.cpp | 1 - clang/lib/Parse/Parser.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 5 +++-- 6 files changed, 27 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 7c94c4d35641c..fa71b39432171 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SyncScope.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/TextEncoding.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" @@ -2069,6 +2070,11 @@ class PredefinedExpr final return getIdentKindName(getIdentKind()); } + static std::string + ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl, + TextEncoding &TE, + bool ForceElaboratedPrinting = false); + static std::string ComputeName(PredefinedIdentKind IK, const Decl *CurrentDecl, bool ForceElaboratedPrinting = false); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5d00b0c94daa3..f78455769a082 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,7 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" -#include "clang/Lex/TextEncodingConfig.h" +#include "clang/Lex/TextEncoding.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" @@ -7376,7 +7376,7 @@ class Sema final : public SemaBase { /// to one string. ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope = nullptr, - ConversionAction Action = CA_ToExecEncoding); + ConversionAction Action = CA_ToLiteralEncoding); ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks); diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 1bafc4708a30b..78ea536ac50dd 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -674,6 +674,21 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) { llvm_unreachable("Unknown ident kind for PredefinedExpr"); } +std::string PredefinedExpr::ComputeNameAndTranslate( + PredefinedIdentKind IK, const Decl *CurrentDecl, TextEncoding &TE, + bool ForceElaboratedPrinting) { + using namespace clang::charinfo; + std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting); + llvm::TextEncodingConverter *Converter = + TE.getConverter(CA_ToLiteralEncoding); + if (Converter) { + SmallString<128> Converted; + Converter->convert(Result, Converted); + Result = std::string(Converted); + } + return Result; +} + // FIXME: Maybe this should use DeclPrinter with a special "print predefined // expr" policy instead. std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 97e0721c02b1b..3aa41ebc05397 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -1555,7 +1555,6 @@ void Parser::ParseExternalSourceSymbolAttribute( SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch); continue; } - if (Keyword == Ident_language) { if (HadLanguage) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 7ac5e0a36d60e..5a199b842fe8e 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -70,7 +70,7 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies) NumCachedScopes = 0; CurParsedObjCImpl = nullptr; - ParserConversionAction = CA_ToExecEncoding; + ParserConversionAction = CA_ToLiteralEncoding; // Add #pragma handlers. These are removed and destroyed in the // destructor. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index cb8184b39afd6..73bc6761d3e29 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3637,8 +3637,9 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, // the string. bool ForceElaboratedPrinting = IK == PredefinedIdentKind::Function && getLangOpts().MSVCCompat; - auto Str = - PredefinedExpr::ComputeName(IK, currentDecl, ForceElaboratedPrinting); + auto Str = PredefinedExpr::ComputeNameAndTranslate( + IK, currentDecl, getPreprocessor().getTextEncoding(), + ForceElaboratedPrinting); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
