The TPF operating system uses the GCC S/390 backend. They set an EBCDIC exec charset for compilation using -fexec-charset. However, certain libraries require ASCII strings instead. In order to be able to put calls to that library into the normal code it is required to switch the exec charset within a compilation unit.
This is an attempt to implement it by adding a new pragma which could be used like in the following example: int foo () { call_with_utf8("hello world"); #pragma GCC exec_charset("UTF16") call_with_utf16("hello world"); #pragma GCC exec_charset(pop) call_with_utf8("hello world"); } Does this look reasonable? Bye, -Andreas- --- gcc/c-family/c-pragma.c | 50 ++++++++++++++++++++++++++++ gcc/doc/extend.texi | 26 +++++++++++++++ gcc/testsuite/gcc.dg/pragma-exec_charset-1.c | 26 +++++++++++++++ libcpp/charset.c | 2 +- libcpp/include/cpplib.h | 3 ++ libcpp/init.c | 2 +- libcpp/internal.h | 1 - 7 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pragma-exec_charset-1.c diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index f7b59b3..db281b9 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -34,6 +34,8 @@ along with GCC; see the file COPYING3. If not see #include "opts.h" #include "plugin.h" +extern cpp_options *cpp_opts; + #define GCC_BAD(gmsgid) \ do { warning (OPT_Wpragmas, gmsgid); return; } while (0) #define GCC_BAD2(gmsgid, arg) \ @@ -1141,6 +1143,52 @@ handle_pragma_message (cpp_reader *ARG_UNUSED(dummy)) inform (input_location, "#pragma message: %s", TREE_STRING_POINTER (message)); } +static void +handle_pragma_exec_charset (cpp_reader *ARG_UNUSED(dummy)) +{ + enum cpp_ttype token; + tree x; + static const char* previous_charset = NULL; + + token = pragma_lex (&x); + if (token == CPP_OPEN_PAREN) + { + token = pragma_lex (&x); + if (token == CPP_STRING) + { + previous_charset = cpp_opts->narrow_charset; + cpp_opts->narrow_charset = TREE_STRING_POINTER (x); + } + else if (token == CPP_NAME + && strncmp (IDENTIFIER_POINTER (x), "pop", 3) == 0) + { + if (previous_charset == NULL) + { + warning (OPT_Wpragmas, + "pop without previous exec_charset use - ignored"); + return; + } + cpp_opts->narrow_charset = previous_charset; + previous_charset = NULL; + } + else + GCC_BAD ("expected a charset string or pop after %<#pragma exec_charset%>"); + + if (pragma_lex (&x) != CPP_CLOSE_PAREN) + GCC_BAD ("malformed %<#pragma exec_charset%>, ignored"); + } + else + GCC_BAD ("expected a string after %<#pragma exec_charset%>"); + + if (pragma_lex (&x) != CPP_EOF) + warning (OPT_Wpragmas, "junk at end of %<#pragma exec_charset%>"); + + inform (input_location, "switching to exec charset: %s", + cpp_opts->narrow_charset); + cpp_destroy_iconv (parse_in); + cpp_init_iconv (parse_in); +} + /* Mark whether the current location is valid for a STDC pragma. */ static bool valid_location_for_stdc_pragma; @@ -1571,6 +1619,8 @@ init_pragma (void) handle_pragma_redefine_extname); c_register_pragma_with_expansion (0, "message", handle_pragma_message); + c_register_pragma_with_expansion ("GCC", "exec_charset", + handle_pragma_exec_charset); #ifdef REGISTER_TARGET_PRAGMAS REGISTER_TARGET_PRAGMAS (); diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index d9b7a54..b67993a 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21611,6 +21611,7 @@ for further explanation. * Push/Pop Macro Pragmas:: * Function Specific Option Pragmas:: * Loop-Specific Pragmas:: +* Charset-Specific Pragmas:: @end menu @node AArch64 Pragmas @@ -22209,6 +22210,31 @@ void ignore_vec_dep (int *a, int k, int c, int m) @} @end smallexample +@node Charset-Specific Pragmas +@subsection Charset-Specific Pragmas + +@table @code +@item #pragma GCC exec_charset(@var{"charset"}) +@cindex pragma GCC exec_charset + +Set the execution character set, used for string and character +constants. The default is the exec charset specified with +@option{-fexec-charset} or UTF-8 if @option{-fexec-charset} isn't used. +charset can be any encoding supported by the system's "iconv" library +routine. The special value @var{pop} (without ``) can be +used to switch back to the exec charset before the last @code{#pragma +GCC exec_charset} setting. +@end table + +@smallexample +call_with_utf8("hello world"); + +#pragma GCC exec_charset("UTF16") +call_with_utf16("hello world"); + +#pragma GCC exec_charset(pop) +call_with_utf8("hello world"); +@end smallexample @node Unnamed Fields @section Unnamed Structure and Union Fields diff --git a/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c new file mode 100644 index 0000000..5c695aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <stdio.h> + +char t1[] = "hello world"; +#pragma GCC exec_charset("EBCDIC-US") +char t2[] = "hello world"; +#pragma GCC exec_charset(pop) +char t3[] = "hello world"; + +char hello_world_utf8[12] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; +char hello_world_ebcdic[12] = { 0x88, 0x85, 0x93, 0x93, 0x96, 0x40, 0xa6, 0x96, 0x99, 0x93, 0x84, 0x00 }; + +int +main () +{ + if (__builtin_memcmp (t1, hello_world_utf8, 12) != 0) + __builtin_abort (); + + if (__builtin_memcmp (t2, hello_world_ebcdic, 12) != 0) + __builtin_abort (); + + if (__builtin_memcmp (t3, hello_world_utf8, 12) != 0) + __builtin_abort (); + +} diff --git a/libcpp/charset.c b/libcpp/charset.c index 6a3bbbc..47fa406 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -737,7 +737,7 @@ cpp_init_iconv (cpp_reader *pfile) /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */ void -_cpp_destroy_iconv (cpp_reader *pfile) +cpp_destroy_iconv (cpp_reader *pfile) { if (HAVE_ICONV) { diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 804132a..acbdf5a 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -861,6 +861,9 @@ extern void cpp_post_options (cpp_reader *); /* Set up translation to the target character set. */ extern void cpp_init_iconv (cpp_reader *); +/* Cleanup translation to the target character set. */ +extern void cpp_destroy_iconv (cpp_reader *); + /* Call this to finish preprocessing. If you requested dependency generation, pass an open stream to write the information to, otherwise NULL. It is your responsibility to close the stream. */ diff --git a/libcpp/init.c b/libcpp/init.c index 16ff202..4e68645 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -314,7 +314,7 @@ cpp_destroy (cpp_reader *pfile) _cpp_destroy_hashtable (pfile); _cpp_cleanup_files (pfile); - _cpp_destroy_iconv (pfile); + cpp_destroy_iconv (pfile); _cpp_free_buff (pfile->a_buff); _cpp_free_buff (pfile->u_buff); diff --git a/libcpp/internal.h b/libcpp/internal.h index f24e85c..ce2d902 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -757,7 +757,6 @@ extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **, cppchar_t *, source_range *char_range, cpp_string_location_reader *loc_reader); -extern void _cpp_destroy_iconv (cpp_reader *); extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, unsigned char *, size_t, size_t, const unsigned char **, off_t *); -- 2.9.1