https://github.com/abhina-sree updated 
https://github.com/llvm/llvm-project/pull/195890

>From 691ad51514a7a0a4ad973aaeeafa8971be4770c2 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <[email protected]>
Date: Fri, 8 May 2026 12:21:38 -0400
Subject: [PATCH] Enable driver changes for fexec-charset

---
 clang/docs/LanguageExtensions.rst             |  6 +++---
 .../clang/Basic/DiagnosticDriverKinds.td      |  1 +
 clang/include/clang/Options/Options.td        | 18 +++++++++++++----
 clang/lib/Driver/ToolChains/Clang.cpp         | 20 +++++++++++++------
 clang/test/CodeGen/systemz-charset.c          |  1 +
 clang/test/Driver/cl-options.c                |  7 ++++---
 clang/test/Driver/clang_f_opts.c              | 14 ++++++++++---
 llvm/include/llvm/Support/TextEncoding.h      |  2 ++
 llvm/lib/Support/TextEncoding.cpp             | 10 ++++++++++
 9 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 03cb02deb5e7f..f2aca70d9d57d 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -421,9 +421,9 @@ Builtin Macros
 
 ``__clang_literal_encoding__``
   Defined to a narrow string literal that represents the current encoding of
-  narrow string literals, e.g., ``"hello"``. This macro typically expands to
-  "UTF-8" (but may change in the future if the
-  ``-fexec-charset="Encoding-Name"`` option is implemented.)
+  narrow string literals, e.g., ``"hello"``. This macro expands to the text
+  encoding specified by ``-fexec-charset`` if any, or a system-specific default
+  otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems.
 
 ``__clang_wide_literal_encoding__``
   Defined to a narrow string literal that represents the current encoding of
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 114ee475c371f..16460583b3770 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -149,6 +149,7 @@ def warn_drv_unsupported_option_part_for_target : Warning<
   InGroup<OptionIgnored>;
 def err_drv_unsupported_option_part_for_target : Error<
   "'%0' in '%1' option is not currently supported for target '%2'">;
+def err_drv_unsupported_encoding : Error<"'%0' is not a supported encoding">;
 def warn_drv_invalid_argument_for_flang : Warning<
   "'%0' is not valid for Fortran">,
   InGroup<OptionIgnored>;
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 73bce00b921ea..95163962da647 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3152,7 +3152,13 @@ def fexperimental_strict_floating_point : Flag<["-"], 
"fexperimental-strict-floa
 def finput_charset_EQ : Joined<["-"], "finput-charset=">,
   Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>,
   HelpText<"Specify the default character set for source files">;
-def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>;
+def fexec_charset_EQ
+    : Joined<["-"], "fexec-charset=">,
+      Group<f_Group>,
+      HelpText<
+          "Set the execution <encoding> for string and character literals. "
+          "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
+          "and possibly those supported by ICU or the host iconv library.">;
 def finstrument_functions
     : Flag<["-"], "finstrument-functions">,
       Group<f_Group>,
@@ -9163,9 +9169,13 @@ def _SLASH_showFilenames_ : CLFlag<"showFilenames-">,
 def _SLASH_source_charset : CLCompileJoined<"source-charset:">,
   HelpText<"Set source encoding, supports only UTF-8">,
   Alias<finput_charset_EQ>;
-def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">,
-  HelpText<"Set runtime encoding, supports only UTF-8">,
-  Alias<fexec_charset_EQ>;
+def _SLASH_execution_charset
+    : CLCompileJoined<"execution-charset:">,
+      HelpText<
+          "Set the execution <encoding> for string and character literals. "
+          "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
+          "and possibly those supported by ICU or the host iconv library.">,
+      Alias<fexec_charset_EQ>;
 def _SLASH_std : CLCompileJoined<"std:">,
   HelpText<"Set language version 
(c++14,c++17,c++20,c++23preview,c++latest,c11,c17)">;
 def _SLASH_U : CLJoinedOrSeparate<"U">, HelpText<"Undefine macro">,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index bdffa4fdd7e6b..fe78b60096aa3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -52,6 +52,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/TextEncoding.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 #include "llvm/TargetParser/ARMTargetParserCommon.h"
@@ -7755,12 +7756,19 @@ void Clang::ConstructJob(Compilation &C, const 
JobAction &JA,
                                           << value;
   }
 
-  // -fexec_charset=UTF-8 is default. Reject others
-  if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
-    StringRef value = execCharset->getValue();
-    if (!value.equals_insensitive("utf-8"))
-      D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
-                                          << value;
+  if (Arg *ExecEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
+    StringRef Value = ExecEncoding->getValue();
+    if (llvm::TextEncodingConverter::isEncodingSupported(Value)) {
+      CmdArgs.push_back("-fexec-charset");
+      CmdArgs.push_back(Args.MakeArgString(Value));
+    } else {
+      D.Diag(diag::err_drv_unsupported_encoding) << Value;
+    }
+  } else {
+    // Set the default fexec-charset as the system charset.
+    CmdArgs.push_back("-fexec-charset");
+    CmdArgs.push_back(
+        Args.MakeArgString(Triple.getDefaultNarrowTextEncoding()));
   }
 
   RenderDiagnosticsOptions(D, Args, CmdArgs);
diff --git a/clang/test/CodeGen/systemz-charset.c 
b/clang/test/CodeGen/systemz-charset.c
index 78ae3353224af..dbb36aed49990 100644
--- a/clang/test/CodeGen/systemz-charset.c
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset 
IBM-1047 -o - | FileCheck %s
+// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s
 // RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 
-DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8
 
 int printf(char const *, ...);
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index c0f57ae768252..1a2827012549d 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -250,10 +250,11 @@
 // RUN: not %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck 
-check-prefix=source-charset-utf-16 %s
 // source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16'
 
-// /execution-charset: should warn on everything except UTF-8.
-// RUN: not %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck 
-check-prefix=execution-charset-utf-16 %s
-// execution-charset-utf-16: invalid value 'utf-16' in 
'/execution-charset:utf-16'
+// /execution-charset: should warn on invalid charsets.
+// RUN: not %clang_cl /execution-charset:invalid-charset -### -- %s 2>&1 | 
FileCheck -check-prefix=execution-charset-invalid %s
+// execution-charset-invalid: 'invalid-charset' is not a supported encoding
 //
+
 // RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
 // RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
 // U: "-U" "mymacro"
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 5871f1580d6b7..7fe67068118fc 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -232,8 +232,16 @@
 // RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | 
FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
 // CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in 
'-finput-charset=iso-8859-1'
 
-// RUN: not %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | 
FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
-// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'iso-8859-1' in 
'-fexec-charset=iso-8859-1'
+// RUN: not %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 
| FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
+// CHECK-INVALID-EXEC-CHARSET: error: 'invalid-charset' is not a supported 
encoding
+
+// Test that we support the following exec charsets. The preferred MIME name is
+// `IBM1047`, but `IBM-1047` is the name used by z/OS USS utilities such as
+// `chtag`.
+// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck 
--check-prefix=CHECK-EXEC-CHARSET-UTF-8 %s
+// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | 
FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM-1047 %s
+// CHECK-EXEC-CHARSET-UTF-8: "-fexec-charset" "UTF-8"
+// CHECK-EXEC-CHARSET-IBM-1047: "-fexec-charset" "IBM-1047"
 
 // Test that we don't error on these.
 // RUN: not %clang -### -S -Werror                                             
   \
@@ -247,7 +255,7 @@
 // RUN:     -fident -fno-ident                                                \
 // RUN:     -fimplicit-templates -fno-implicit-templates                      \
 // RUN:     -finput-charset=UTF-8                                             \
-// RUN:     -fexec-charset=UTF-8                                             \
+// RUN:     -fexec-charset=UTF-8                                              \
 // RUN:     -fivopts -fno-ivopts                                              \
 // RUN:     -fnon-call-exceptions -fno-non-call-exceptions                    \
 // RUN:     -fpermissive -fno-permissive                                      \
diff --git a/llvm/include/llvm/Support/TextEncoding.h 
b/llvm/include/llvm/Support/TextEncoding.h
index 8f5a6122ede45..c120e36d1de6c 100644
--- a/llvm/include/llvm/Support/TextEncoding.h
+++ b/llvm/include/llvm/Support/TextEncoding.h
@@ -145,6 +145,8 @@ class TextEncodingConverter {
       return Result[0];
     return '\0';
   }
+
+  LLVM_ABI static bool isEncodingSupported(StringRef Name);
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Support/TextEncoding.cpp 
b/llvm/lib/Support/TextEncoding.cpp
index 5c1d9696686a2..475799df9070b 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -58,6 +58,16 @@ static std::optional<TextEncoding> 
getKnownEncoding(StringRef Name) {
   return std::nullopt;
 }
 
+bool TextEncodingConverter::isEncodingSupported(StringRef Name) {
+  if (getKnownEncoding(Name))
+    return true;
+  llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
+      llvm::TextEncodingConverter::create("UTF-8", Name.data());
+  if (ErrorOrConverter)
+    return true;
+  return false;
+}
+
 [[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
                                             size_t &OutputLength,
                                             SmallVectorImpl<char> &Result) {

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to