https://github.com/abhina-sree created https://github.com/llvm/llvm-project/pull/195890
This enables driver changes for fexec-charset >From 696f807cfd557144b997f28fa9047854b9a4d2ca Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Tue, 5 May 2026 13:47:57 -0400 Subject: [PATCH] Enable driver changes for fexec-charset This enables driver changes for fexec-charset --- clang/docs/LanguageExtensions.rst | 6 +++--- .../clang/Basic/DiagnosticDriverKinds.td | 1 + clang/include/clang/Options/Options.td | 18 +++++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++------ clang/lib/Frontend/InitPreprocessor.cpp | 12 +++++++---- clang/test/CodeGen/systemz-charset.c | 1 + clang/test/CodeGen/systemz-charset.cpp | 2 +- clang/test/Driver/cl-options.c | 7 ++++--- clang/test/Driver/clang_f_opts.c | 14 ++++++++++--- clang/test/Preprocessor/init-s390x.c | 1 + llvm/include/llvm/Support/TextEncoding.h | 2 ++ llvm/lib/Support/TextEncoding.cpp | 10 ++++++++++ 12 files changed, 70 insertions(+), 24 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 03cb02deb5e7f..f2aca70d9d57d 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -421,9 +421,9 @@ Builtin Macros ``__clang_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of - narrow string literals, e.g., ``"hello"``. This macro typically expands to - "UTF-8" (but may change in the future if the - ``-fexec-charset="Encoding-Name"`` option is implemented.) + narrow string literals, e.g., ``"hello"``. This macro expands to the text + encoding specified by ``-fexec-charset`` if any, or a system-specific default + otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems. ``__clang_wide_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 114ee475c371f..16460583b3770 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -149,6 +149,7 @@ def warn_drv_unsupported_option_part_for_target : Warning< InGroup<OptionIgnored>; def err_drv_unsupported_option_part_for_target : Error< "'%0' in '%1' option is not currently supported for target '%2'">; +def err_drv_unsupported_encoding : Error<"'%0' is not a supported encoding">; def warn_drv_invalid_argument_for_flang : Warning< "'%0' is not valid for Fortran">, InGroup<OptionIgnored>; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 9236eab8be137..f79b4a07ce326 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3152,7 +3152,13 @@ def fexperimental_strict_floating_point : Flag<["-"], "fexperimental-strict-floa def finput_charset_EQ : Joined<["-"], "finput-charset=">, Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>, HelpText<"Specify the default character set for source files">; -def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>; +def fexec_charset_EQ + : Joined<["-"], "fexec-charset=">, + Group<f_Group>, + HelpText< + "Set the execution <encoding> for string and character literals. " + "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " + "and possibly those supported by ICU or the host iconv library.">; def finstrument_functions : Flag<["-"], "finstrument-functions">, Group<f_Group>, @@ -9159,9 +9165,13 @@ def _SLASH_showFilenames_ : CLFlag<"showFilenames-">, def _SLASH_source_charset : CLCompileJoined<"source-charset:">, HelpText<"Set source encoding, supports only UTF-8">, Alias<finput_charset_EQ>; -def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">, - HelpText<"Set runtime encoding, supports only UTF-8">, - Alias<fexec_charset_EQ>; +def _SLASH_execution_charset + : CLCompileJoined<"execution-charset:">, + HelpText< + "Set the execution <encoding> for string and character literals. " + "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " + "and possibly those supported by ICU or the host iconv library.">, + Alias<fexec_charset_EQ>; def _SLASH_std : CLCompileJoined<"std:">, HelpText<"Set language version (c++14,c++17,c++20,c++23preview,c++latest,c11,c17)">; def _SLASH_U : CLJoinedOrSeparate<"U">, HelpText<"Undefine macro">, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bdffa4fdd7e6b..fe78b60096aa3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/TextEncoding.h" #include "llvm/Support/YAMLParser.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" @@ -7755,12 +7756,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, << value; } - // -fexec_charset=UTF-8 is default. Reject others - if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { - StringRef value = execCharset->getValue(); - if (!value.equals_insensitive("utf-8")) - D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) - << value; + if (Arg *ExecEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) { + StringRef Value = ExecEncoding->getValue(); + if (llvm::TextEncodingConverter::isEncodingSupported(Value)) { + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back(Args.MakeArgString(Value)); + } else { + D.Diag(diag::err_drv_unsupported_encoding) << Value; + } + } else { + // Set the default fexec-charset as the system charset. + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back( + Args.MakeArgString(Triple.getDefaultNarrowTextEncoding())); } RenderDiagnosticsOptions(D, Args, CmdArgs); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 3f0468a938149..687e590f5ecf2 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1033,10 +1033,14 @@ static void InitializePredefinedMacros(const TargetInfo &TI, } } - // Macros to help identify the narrow and wide character sets - // FIXME: clang currently ignores -fexec-charset=. If this changes, - // then this may need to be updated. - Builder.defineMacro("__clang_literal_encoding__", "\"UTF-8\""); + // Macros to help identify the narrow and wide character sets. This is set + // to fexec-charset. If fexec-charset is not specified, the default is the + // system charset. + if (!LangOpts.ExecEncoding.empty()) + Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecEncoding); + else + Builder.defineMacro("__clang_literal_encoding__", + TI.getTriple().getDefaultNarrowTextEncoding()); if (TI.getTypeWidth(TI.getWCharType()) >= 32) { // FIXME: 32-bit wchar_t signals UTF-32. This may change // if -fwide-exec-charset= is ever supported. diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index 897b9d2eeefa1..fa5c2ea5ef8d5 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s +// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8 const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp index f7becd5b39492..7e66407fd2ff1 100644 --- a/clang/test/CodeGen/systemz-charset.cpp +++ b/clang/test/CodeGen/systemz-charset.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -std=c++17 -fexec-charset IBM-1047 -o - | FileCheck %s +// RUN: %clang %s -std=c++17 -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s const char *RawString = R"(Hello\n)"; //CHECK: c"\C8\85\93\93\96\E0\95\00" diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index c0f57ae768252..1a2827012549d 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -250,10 +250,11 @@ // RUN: not %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s // source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16' -// /execution-charset: should warn on everything except UTF-8. -// RUN: not %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-utf-16 %s -// execution-charset-utf-16: invalid value 'utf-16' in '/execution-charset:utf-16' +// /execution-charset: should warn on invalid charsets. +// RUN: not %clang_cl /execution-charset:invalid-charset -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-invalid %s +// execution-charset-invalid: 'invalid-charset' is not a supported encoding // + // RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // U: "-U" "mymacro" diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 5871f1580d6b7..7fe67068118fc 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -232,8 +232,16 @@ // RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s // CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1' -// RUN: not %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s -// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1' +// RUN: not %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s +// CHECK-INVALID-EXEC-CHARSET: error: 'invalid-charset' is not a supported encoding + +// Test that we support the following exec charsets. The preferred MIME name is +// `IBM1047`, but `IBM-1047` is the name used by z/OS USS utilities such as +// `chtag`. +// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-UTF-8 %s +// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM-1047 %s +// CHECK-EXEC-CHARSET-UTF-8: "-fexec-charset" "UTF-8" +// CHECK-EXEC-CHARSET-IBM-1047: "-fexec-charset" "IBM-1047" // Test that we don't error on these. // RUN: not %clang -### -S -Werror \ @@ -247,7 +255,7 @@ // RUN: -fident -fno-ident \ // RUN: -fimplicit-templates -fno-implicit-templates \ // RUN: -finput-charset=UTF-8 \ -// RUN: -fexec-charset=UTF-8 \ +// RUN: -fexec-charset=UTF-8 \ // RUN: -fivopts -fno-ivopts \ // RUN: -fnon-call-exceptions -fno-non-call-exceptions \ // RUN: -fpermissive -fno-permissive \ diff --git a/clang/test/Preprocessor/init-s390x.c b/clang/test/Preprocessor/init-s390x.c index a8fbde46cbb75..9ff122def913f 100644 --- a/clang/test/Preprocessor/init-s390x.c +++ b/clang/test/Preprocessor/init-s390x.c @@ -206,4 +206,5 @@ // S390X-ZOS: #define __TOS_390__ 1 // S390X-ZOS: #define __TOS_MVS__ 1 // S390X-ZOS: #define __XPLINK__ 1 +// S390X-ZOS: #define __clang_literal_encoding__ IBM-1047 // S390X-ZOS-GNUXX: #define __wchar_t 1 diff --git a/llvm/include/llvm/Support/TextEncoding.h b/llvm/include/llvm/Support/TextEncoding.h index 8a304910aa5dd..3d31505c5bc6b 100644 --- a/llvm/include/llvm/Support/TextEncoding.h +++ b/llvm/include/llvm/Support/TextEncoding.h @@ -135,6 +135,8 @@ class TextEncodingConverter { return std::string(Result); return EC; } + + LLVM_ABI static bool isEncodingSupported(StringRef Name); }; } // namespace llvm diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index d36f02c1300b9..adc474ff03c50 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -58,6 +58,16 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) { return std::nullopt; } +bool TextEncodingConverter::isEncodingSupported(StringRef Name) { + if (getKnownEncoding(Name)) + return true; + llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter = + llvm::TextEncodingConverter::create("UTF-8", Name.data()); + if (ErrorOrConverter) + return true; + return false; +} + [[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl<char> &Result) { _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
