https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/196569
>From 1b89edc902bdd2fd2328c55473ef7eb01161202b Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Fri, 8 May 2026 12:20:45 -0400 Subject: [PATCH] convert to exec-charset inside getPredefinedStringLiteralFromCache, test __builtin_FILE() --- clang/include/clang/Basic/TargetInfo.h | 2 ++ clang/lib/AST/ASTContext.cpp | 10 +++++++++ clang/lib/Basic/TargetInfo.cpp | 3 +++ clang/lib/Lex/TextEncodingConfig.cpp | 9 +++++---- clang/test/CodeGen/systemz-charset.cpp | 28 ++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ec7d4fcd4d8e3..6c0e65a85ee13 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -326,6 +326,8 @@ class TargetInfo : public TransferrableTargetInfo, llvm::TextEncodingConverter *FormatStrConverter; + llvm::TextEncodingConverter *ExecStrConverter; + /// Retrieve the target options. TargetOptions &getTargetOpts() const { assert(TargetOpts && "Missing target options"); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a0894318dbd53..80e073385ce82 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -13752,6 +13752,16 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const { *this, Key, StringLiteralKind::Ordinary, /*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()), SourceLocation()); + + llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter; + if (Converter) { + SmallString<128> Converted; + Converter->convert(Result->getString(), Converted); + Result = StringLiteral::Create( + *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false, + getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation()); + } + return Result; } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 43efca42886cc..0c553033ad069 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -197,6 +197,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { FormatStrConverter = new llvm::TextEncodingConverter( std::move(*llvm::TextEncodingConverter::createNoopConverter())); + + ExecStrConverter = new llvm::TextEncodingConverter( + std::move(*llvm::TextEncodingConverter::createNoopConverter())); } // Out of line virtual dtor for TargetInfo. diff --git a/clang/lib/Lex/TextEncodingConfig.cpp b/clang/lib/Lex/TextEncodingConfig.cpp index 427b75a1c0a8b..6df88e258ffde 100644 --- a/clang/lib/Lex/TextEncodingConfig.cpp +++ b/clang/lib/Lex/TextEncodingConfig.cpp @@ -37,14 +37,15 @@ TextEncodingConfig::setConvertersFromOptions(TextEncodingConfig &TEC, return std::error_code(); ErrorOr<TextEncodingConverter> ErrorOrConverter = llvm::TextEncodingConverter::create(UTF8, TEC.ExecEncoding); - if (ErrorOrConverter) + if (ErrorOrConverter) { TEC.ToExecEncodingConverter = new TextEncodingConverter(std::move(*ErrorOrConverter)); - else + TInfo.ExecStrConverter = TEC.ToExecEncodingConverter; + } else return ErrorOrConverter.getError(); - ErrorOrConverter = llvm::TextEncodingConverter::create(TEC.SystemEncoding, - TEC.InternalEncoding); + ErrorOrConverter = llvm::TextEncodingConverter::create( + TInfo.getTriple().getDefaultNarrowTextEncoding(), UTF8); if (ErrorOrConverter) TInfo.FormatStrConverter = diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp index 8ce4e906325dd..ffab1c50ed3d5 100644 --- a/clang/test/CodeGen/systemz-charset.cpp +++ b/clang/test/CodeGen/systemz-charset.cpp @@ -1,49 +1,77 @@ // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -std=c++17 -fexec-charset IBM-1047 -o - | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -o - | FileCheck %s --check-prefix=CHECK-UTF8 const char *RawString = R"(Hello\n)"; //CHECK: c"\C8\85\93\93\96\E0\95\00" +//CHECK-UTF8: c"Hello\\n\00" const char *MultiLineRawString = R"( Hello There)"; //CHECK: c"\15\C8\85\93\93\96\15\E3\88\85\99\85\00" +//CHECK-UTF8: c"\0AHello\0AThere\00" char UnicodeChar8 = u8'1'; //CHECK: i8 49 +//CHECK-UTF8: i8 49 char16_t UnicodeChar16 = u'1'; //CHECK: i16 49 +//CHECK-UTF8: i16 49 char32_t UnicodeChar32 = U'1'; //CHECK: i32 49 +//CHECK-UTF8: i32 49 int FourChar = '1234'; //CHECK: i32 -235736076 const char *EscapeCharacters8 = u8"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: c"\07\08\0C\0A\0D\09\0B\\'\22?\00" +//CHECK-UTF8: c"\07\08\0C\0A\0D\09\0B\\'\22?\00" const char16_t *EscapeCharacters16 = u"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0] +//CHECK-UTF8: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0] const char32_t *EscapeCharacters32 = U"\a\b\f\n\r\t\v\\\'\"\?"; //CHECK: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0] +//CHECK-UTF8: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0] const char *UnicodeString8 = u8"Hello"; //CHECK: c"Hello\00" +//CHECK-UTF8: c"Hello\00" + const char16_t *UnicodeString16 = u"Hello"; //CHECK: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0] +//CHECK-UTF8: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0] + const char32_t *UnicodeString32 = U"Hello"; //CHECK: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0] +//CHECK=UTF8: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0] const char *UnicodeRawString8 = u8R"("Hello\")"; //CHECK: c"\22Hello\\\22\00" +//CHECK=UTF8: c"\22Hello\\\22\00" + const char16_t *UnicodeRawString16 = uR"("Hello\")"; //CHECK: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0] +//CHECK=UTF8: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0] + const char32_t *UnicodeRawString32 = UR"("Hello\")"; //CHECK: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0] +//CHECK=UTF8: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0] const char *UnicodeUCNString8 = u8"\u00E2\u00AC\U000000DF"; //CHECK: c"\C3\A2\C2\AC\C3\9F\00" +//CHECK=UTF8: c"\C3\A2\C2\AC\C3\9F\00" + const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF"; //CHECK: [4 x i16] [i16 226, i16 172, i16 223, i16 0] +//CHECK=UTF8: [4 x i16] [i16 226, i16 172, i16 223, i16 0] + const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF"; //CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0] +//CHECK=UTF8: [4 x i32] [i32 226, i32 172, i32 223, i32 0] + +const char *file = __builtin_FILE(); +//CHECK: {{.*}}\A2\A8\A2\A3\85\94\A9`\83\88\81\99\A2\85\A3K\83\97\97\00" +//CHECK-UTF8: {{.*}}systemz-charset.cpp\00" _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
