https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/195890
>From 691ad51514a7a0a4ad973aaeeafa8971be4770c2 Mon Sep 17 00:00:00 2001 From: Abhina Sreeskantharajan <[email protected]> Date: Fri, 8 May 2026 12:21:38 -0400 Subject: [PATCH] Enable driver changes for fexec-charset --- clang/docs/LanguageExtensions.rst | 6 +++--- .../clang/Basic/DiagnosticDriverKinds.td | 1 + clang/include/clang/Options/Options.td | 18 +++++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++------ clang/test/CodeGen/systemz-charset.c | 1 + clang/test/Driver/cl-options.c | 7 ++++--- clang/test/Driver/clang_f_opts.c | 14 ++++++++++--- llvm/include/llvm/Support/TextEncoding.h | 2 ++ llvm/lib/Support/TextEncoding.cpp | 10 ++++++++++ 9 files changed, 60 insertions(+), 19 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 03cb02deb5e7f..f2aca70d9d57d 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -421,9 +421,9 @@ Builtin Macros ``__clang_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of - narrow string literals, e.g., ``"hello"``. This macro typically expands to - "UTF-8" (but may change in the future if the - ``-fexec-charset="Encoding-Name"`` option is implemented.) + narrow string literals, e.g., ``"hello"``. This macro expands to the text + encoding specified by ``-fexec-charset`` if any, or a system-specific default + otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems. ``__clang_wide_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 114ee475c371f..16460583b3770 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -149,6 +149,7 @@ def warn_drv_unsupported_option_part_for_target : Warning< InGroup<OptionIgnored>; def err_drv_unsupported_option_part_for_target : Error< "'%0' in '%1' option is not currently supported for target '%2'">; +def err_drv_unsupported_encoding : Error<"'%0' is not a supported encoding">; def warn_drv_invalid_argument_for_flang : Warning< "'%0' is not valid for Fortran">, InGroup<OptionIgnored>; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 73bce00b921ea..95163962da647 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3152,7 +3152,13 @@ def fexperimental_strict_floating_point : Flag<["-"], "fexperimental-strict-floa def finput_charset_EQ : Joined<["-"], "finput-charset=">, Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>, HelpText<"Specify the default character set for source files">; -def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>; +def fexec_charset_EQ + : Joined<["-"], "fexec-charset=">, + Group<f_Group>, + HelpText< + "Set the execution <encoding> for string and character literals. " + "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " + "and possibly those supported by ICU or the host iconv library.">; def finstrument_functions : Flag<["-"], "finstrument-functions">, Group<f_Group>, @@ -9163,9 +9169,13 @@ def _SLASH_showFilenames_ : CLFlag<"showFilenames-">, def _SLASH_source_charset : CLCompileJoined<"source-charset:">, HelpText<"Set source encoding, supports only UTF-8">, Alias<finput_charset_EQ>; -def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">, - HelpText<"Set runtime encoding, supports only UTF-8">, - Alias<fexec_charset_EQ>; +def _SLASH_execution_charset + : CLCompileJoined<"execution-charset:">, + HelpText< + "Set the execution <encoding> for string and character literals. " + "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, " + "and possibly those supported by ICU or the host iconv library.">, + Alias<fexec_charset_EQ>; def _SLASH_std : CLCompileJoined<"std:">, HelpText<"Set language version (c++14,c++17,c++20,c++23preview,c++latest,c11,c17)">; def _SLASH_U : CLJoinedOrSeparate<"U">, HelpText<"Undefine macro">, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bdffa4fdd7e6b..fe78b60096aa3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/TextEncoding.h" #include "llvm/Support/YAMLParser.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" @@ -7755,12 +7756,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, << value; } - // -fexec_charset=UTF-8 is default. Reject others - if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { - StringRef value = execCharset->getValue(); - if (!value.equals_insensitive("utf-8")) - D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) - << value; + if (Arg *ExecEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) { + StringRef Value = ExecEncoding->getValue(); + if (llvm::TextEncodingConverter::isEncodingSupported(Value)) { + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back(Args.MakeArgString(Value)); + } else { + D.Diag(diag::err_drv_unsupported_encoding) << Value; + } + } else { + // Set the default fexec-charset as the system charset. + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back( + Args.MakeArgString(Triple.getDefaultNarrowTextEncoding())); } RenderDiagnosticsOptions(D, Args, CmdArgs); diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c index 78ae3353224af..dbb36aed49990 100644 --- a/clang/test/CodeGen/systemz-charset.c +++ b/clang/test/CodeGen/systemz-charset.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s +// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8 int printf(char const *, ...); diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index c0f57ae768252..1a2827012549d 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -250,10 +250,11 @@ // RUN: not %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s // source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16' -// /execution-charset: should warn on everything except UTF-8. -// RUN: not %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-utf-16 %s -// execution-charset-utf-16: invalid value 'utf-16' in '/execution-charset:utf-16' +// /execution-charset: should warn on invalid charsets. +// RUN: not %clang_cl /execution-charset:invalid-charset -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-invalid %s +// execution-charset-invalid: 'invalid-charset' is not a supported encoding // + // RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // U: "-U" "mymacro" diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 5871f1580d6b7..7fe67068118fc 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -232,8 +232,16 @@ // RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s // CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1' -// RUN: not %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s -// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1' +// RUN: not %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s +// CHECK-INVALID-EXEC-CHARSET: error: 'invalid-charset' is not a supported encoding + +// Test that we support the following exec charsets. The preferred MIME name is +// `IBM1047`, but `IBM-1047` is the name used by z/OS USS utilities such as +// `chtag`. +// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-UTF-8 %s +// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM-1047 %s +// CHECK-EXEC-CHARSET-UTF-8: "-fexec-charset" "UTF-8" +// CHECK-EXEC-CHARSET-IBM-1047: "-fexec-charset" "IBM-1047" // Test that we don't error on these. // RUN: not %clang -### -S -Werror \ @@ -247,7 +255,7 @@ // RUN: -fident -fno-ident \ // RUN: -fimplicit-templates -fno-implicit-templates \ // RUN: -finput-charset=UTF-8 \ -// RUN: -fexec-charset=UTF-8 \ +// RUN: -fexec-charset=UTF-8 \ // RUN: -fivopts -fno-ivopts \ // RUN: -fnon-call-exceptions -fno-non-call-exceptions \ // RUN: -fpermissive -fno-permissive \ diff --git a/llvm/include/llvm/Support/TextEncoding.h b/llvm/include/llvm/Support/TextEncoding.h index 8f5a6122ede45..c120e36d1de6c 100644 --- a/llvm/include/llvm/Support/TextEncoding.h +++ b/llvm/include/llvm/Support/TextEncoding.h @@ -145,6 +145,8 @@ class TextEncodingConverter { return Result[0]; return '\0'; } + + LLVM_ABI static bool isEncodingSupported(StringRef Name); }; } // namespace llvm diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index 5c1d9696686a2..475799df9070b 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -58,6 +58,16 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) { return std::nullopt; } +bool TextEncodingConverter::isEncodingSupported(StringRef Name) { + if (getKnownEncoding(Name)) + return true; + llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter = + llvm::TextEncodingConverter::create("UTF-8", Name.data()); + if (ErrorOrConverter) + return true; + return false; +} + [[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl<char> &Result) { _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
