[PATCH 1/3] C: Implement C2X N2653 char8_t and UTF-8 string literal changes

2022-07-25 Thread Tom Honermann via Gcc-patches
This patch implements the core language and compiler dependent library
changes adopted for C2X via WG14 N2653.  The changes include:
- Change of type for UTF-8 string literals from array of const char to
  array of const char8_t (unsigned char).
- A new atomic_char8_t typedef.
- A new ATOMIC_CHAR8_T_LOCK_FREE macro defined in terms of the existing
  __GCC_ATOMIC_CHAR8_T_LOCK_FREE predefined macro.

gcc/ChangeLog:

* ginclude/stdatomic.h (atomic_char8_t,
ATOMIC_CHAR8_T_LOCK_FREE): New typedef and macro.

gcc/c/ChangeLog:

* c-parser.c (c_parser_string_literal): Use char8_t as the type
of CPP_UTF8STRING when char8_t support is enabled.
* c-typeck.c (digest_init): Allow initialization of an array
of character type by a string literal with type array of
char8_t.

gcc/c-family/ChangeLog:

* c-lex.c (lex_string, lex_charconst): Use char8_t as the type
of CPP_UTF8CHAR and CPP_UTF8STRING when char8_t support is
enabled.
* c-opts.c (c_common_post_options): Set flag_char8_t if
targeting C2x.
---
 gcc/c-family/c-lex.cc| 13 +
 gcc/c-family/c-opts.cc   |  4 ++--
 gcc/c/c-parser.cc| 16 ++--
 gcc/c/c-typeck.cc|  2 +-
 gcc/ginclude/stdatomic.h |  8 
 5 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
index 8bfa4f4024f..0b6f94e18a8 100644
--- a/gcc/c-family/c-lex.cc
+++ b/gcc/c-family/c-lex.cc
@@ -1352,7 +1352,14 @@ lex_string (const cpp_token *tok, tree *valp, bool 
objc_string, bool translate)
default:
case CPP_STRING:
case CPP_UTF8STRING:
- value = build_string (1, "");
+ if (type == CPP_UTF8STRING && flag_char8_t)
+   {
+ value = build_string (TYPE_PRECISION (char8_type_node)
+   / TYPE_PRECISION (char_type_node),
+   "");  /* char8_t is 8 bits */
+   }
+ else
+   value = build_string (1, "");
  break;
case CPP_STRING16:
  value = build_string (TYPE_PRECISION (char16_type_node)
@@ -1425,9 +1432,7 @@ lex_charconst (const cpp_token *token)
 type = char16_type_node;
   else if (token->type == CPP_UTF8CHAR)
 {
-  if (!c_dialect_cxx ())
-   type = unsigned_char_type_node;
-  else if (flag_char8_t)
+  if (flag_char8_t)
 type = char8_type_node;
   else
 type = char_type_node;
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index b9f01a65ed7..108adc5caf8 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1059,9 +1059,9 @@ c_common_post_options (const char **pfilename)
   if (flag_sized_deallocation == -1)
 flag_sized_deallocation = (cxx_dialect >= cxx14);
 
-  /* char8_t support is new in C++20.  */
+  /* char8_t support is implicitly enabled in C++20 and C2X.  */
   if (flag_char8_t == -1)
-flag_char8_t = (cxx_dialect >= cxx20);
+flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
 
   if (flag_extern_tls_init)
 {
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 92049d1a101..fa9395986de 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -7447,7 +7447,14 @@ c_parser_string_literal (c_parser *parser, bool 
translate, bool wide_ok)
default:
case CPP_STRING:
case CPP_UTF8STRING:
- value = build_string (1, "");
+ if (type == CPP_UTF8STRING && flag_char8_t)
+   {
+ value = build_string (TYPE_PRECISION (char8_type_node)
+   / TYPE_PRECISION (char_type_node),
+   "");  /* char8_t is 8 bits */
+   }
+ else
+   value = build_string (1, "");
  break;
case CPP_STRING16:
  value = build_string (TYPE_PRECISION (char16_type_node)
@@ -7472,9 +7479,14 @@ c_parser_string_literal (c_parser *parser, bool 
translate, bool wide_ok)
 {
 default:
 case CPP_STRING:
-case CPP_UTF8STRING:
   TREE_TYPE (value) = char_array_type_node;
   break;
+case CPP_UTF8STRING:
+  if (flag_char8_t)
+   TREE_TYPE (value) = char8_array_type_node;
+  else
+   TREE_TYPE (value) = char_array_type_node;
+  break;
 case CPP_STRING16:
   TREE_TYPE (value) = char16_array_type_node;
   break;
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index fd0a7f81a7a..231f4e980b6 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -8045,7 +8045,7 @@ digest_init (location_t init_loc, tree type, tree init, 
tree origtype,
 
  if (char_array)
{
- if (typ2 != char_type_node)
+ if (typ2 != char_type_node && typ2 != char8_type_node)
incompat_string_cst = true;
}
  else if (!comptypes (typ1, typ2))
diff --git a/gcc/ginclude/stdatomic.h b/gcc/ginclude/stdatomic.h
index bfcfdf664c7..75ed7965689 100644

Re: [PATCH 1/3] C: Implement C2X N2653 char8_t and UTF-8 string literal changes

2022-07-27 Thread Joseph Myers
On Mon, 25 Jul 2022, Tom Honermann via Gcc-patches wrote:

> diff --git a/gcc/ginclude/stdatomic.h b/gcc/ginclude/stdatomic.h
> index bfcfdf664c7..75ed7965689 100644
> --- a/gcc/ginclude/stdatomic.h
> +++ b/gcc/ginclude/stdatomic.h
> @@ -49,6 +49,10 @@ typedef _Atomic long atomic_long;
>  typedef _Atomic unsigned long atomic_ulong;
>  typedef _Atomic long long atomic_llong;
>  typedef _Atomic unsigned long long atomic_ullong;
> +#if (defined(__CHAR8_TYPE__) \
> + && (defined(_GNU_SOURCE) || defined(_ISOC2X_SOURCE)))
> +typedef _Atomic __CHAR8_TYPE__ atomic_char8_t;
> +#endif
>  typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
>  typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
>  typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;

GCC headers don't test glibc feature test macros such as _GNU_SOURCE and 
_ISOC2X_SOURCE; they base things only on the standard version (whether 
directly, or indirectly as via __CHAR8_TYPE__) and standard-defined 
feature test macros.

(There's one exception in glimits.h - testing __USE_GNU, the macro defined 
internally by glibc's headers - but I don't think that's something we want 
to emulate in new code.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 1/3] C: Implement C2X N2653 char8_t and UTF-8 string literal changes

2022-07-30 Thread Tom Honermann via Gcc-patches

On 7/27/22 7:20 PM, Joseph Myers wrote:

On Mon, 25 Jul 2022, Tom Honermann via Gcc-patches wrote:


diff --git a/gcc/ginclude/stdatomic.h b/gcc/ginclude/stdatomic.h
index bfcfdf664c7..75ed7965689 100644
--- a/gcc/ginclude/stdatomic.h
+++ b/gcc/ginclude/stdatomic.h
@@ -49,6 +49,10 @@ typedef _Atomic long atomic_long;
  typedef _Atomic unsigned long atomic_ulong;
  typedef _Atomic long long atomic_llong;
  typedef _Atomic unsigned long long atomic_ullong;
+#if (defined(__CHAR8_TYPE__) \
+ && (defined(_GNU_SOURCE) || defined(_ISOC2X_SOURCE)))
+typedef _Atomic __CHAR8_TYPE__ atomic_char8_t;
+#endif
  typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
  typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
  typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;

GCC headers don't test glibc feature test macros such as _GNU_SOURCE and
_ISOC2X_SOURCE; they base things only on the standard version (whether
directly, or indirectly as via __CHAR8_TYPE__) and standard-defined
feature test macros.


Ok, thank you, that makes sense. I'll follow up with a revised patch 
that removes the additional conditions.


Tom.



(There's one exception in glimits.h - testing __USE_GNU, the macro defined
internally by glibc's headers - but I don't think that's something we want
to emulate in new code.)