On Mon, Aug 29, 2022 at 11:31:54PM -0400, Jason Merrill wrote:
> > The pedwarn on -std=c++23 -finput-charset=UTF-8 or just documenting that
> > "conforming" UTF-8 is only -finput-charset=UTF-8 -Werror=invalid-utf8 ?
> 
> The former.

Ok.

So, here is an updated patch that implements that, and also warns/pedwarns
about invalid UTF-8 in string/character literals.
For these, we'd sometimes get errors on conversion to execution charset
(e.g. in [uU]['"] literals), but e.g. for 0x110000 .... 0x7FFFFFFF there
was no diagnostics and similarly nothing for u8" or " literals (when we
were doing a noop conversion).

So far only tested on the new testcases, ok for trunk if it passes full
bootstrap/regtest?

2022-08-30  Jakub Jelinek  <ja...@redhat.com>

        PR c++/106655
libcpp/
        * include/cpplib.h (struct cpp_options): Implement C++23
        P2295R6 - Support for UTF-8 as a portable source file encoding.
        Add cpp_warn_invalid_utf8 and cpp_input_charset_explicit fields.
        (enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator.
        * init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8
        and cpp_input_charset_explicit.
        * lex.cc (UCS_LIMIT): Define.
        (utf8_continuation): New const variable.
        (utf8_signifier): Move earlier in the file.
        (_cpp_warn_invalid_utf8): New function.
        (_cpp_skip_block_comment): Handle -Winvalid-utf8 warning.
        (skip_line_comment): Likewise.
        (lex_raw_string, lex_string): Likewise.
gcc/
        * doc/invoke.texi (-Winvalid-utf8): Document it.
gcc/c-family/
        * c.opt (-Winvalid-utf8): New warning.
        * c-opts.c (c_common_handle_option) <case OPT_finput_charset_>:
        Set cpp_opts->cpp_input_charset_explicit.
        (c_common_post_options): If -finput-charset=UTF-8 is explicit
        in C++23, enable -Winvalid-utf8 by default and if -pedantic
        or -pedantic-errors, make it a pedwarn.
gcc/testsuite/
        * c-c++-common/cpp/Winvalid-utf8-1.c: New test.
        * c-c++-common/cpp/Winvalid-utf8-2.c: New test.
        * g++.dg/cpp23/Winvalid-utf8-1.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-2.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-3.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-4.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-5.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-6.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-7.C: New test.
        * g++.dg/cpp23/Winvalid-utf8-8.C: New test.

--- libcpp/include/cpplib.h.jj  2022-08-29 14:58:32.645982297 +0200
+++ libcpp/include/cpplib.h     2022-08-30 13:38:46.618815415 +0200
@@ -560,6 +560,13 @@ struct cpp_options
      cpp_bidirectional_level.  */
   unsigned char cpp_warn_bidirectional;
 
+  /* True if libcpp should warn about invalid UTF-8 characters in comments.
+     2 if it should be a pedwarn.  */
+  unsigned char cpp_warn_invalid_utf8;
+
+  /* True if -finput-charset= option has been used explicitly.  */
+  bool cpp_input_charset_explicit;
+
   /* Dependency generation.  */
   struct
   {
@@ -666,7 +673,8 @@ enum cpp_warning_reason {
   CPP_W_CXX11_COMPAT,
   CPP_W_CXX20_COMPAT,
   CPP_W_EXPANSION_TO_DEFINED,
-  CPP_W_BIDIRECTIONAL
+  CPP_W_BIDIRECTIONAL,
+  CPP_W_INVALID_UTF8
 };
 
 /* Callback for header lookup for HEADER, which is the name of a
--- libcpp/init.cc.jj   2022-08-29 14:58:32.646982284 +0200
+++ libcpp/init.cc      2022-08-30 13:39:49.784965506 +0200
@@ -227,6 +227,8 @@ cpp_create_reader (enum c_lang lang, cpp
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
   CPP_OPTION (pfile, warn_date_time) = 0;
   CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
+  CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0;
+  CPP_OPTION (pfile, cpp_input_charset_explicit) = 0;
 
   /* Default CPP arithmetic to something sensible for the host for the
      benefit of dumb users like fix-header.  */
--- libcpp/lex.cc.jj    2022-08-29 14:58:32.669981977 +0200
+++ libcpp/lex.cc       2022-08-30 16:56:12.793868707 +0200
@@ -50,6 +50,9 @@ static const struct token_spelling token
 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
 
+/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive.  */
+#define UCS_LIMIT 0x10FFFF
+
 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
 static int skip_line_comment (cpp_reader *);
 static void skip_whitespace (cpp_reader *, cppchar_t);
@@ -1704,6 +1707,116 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
   bidi::on_char (kind, ucn_p, loc);
 }
 
+static const cppchar_t utf8_continuation = 0x80;
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
+   at PFILE->buffer->cur.  Return a pointer after the diagnosed
+   invalid character.  */
+
+static const uchar *
+_cpp_warn_invalid_utf8 (cpp_reader *pfile, bool comment_p)
+{
+  cpp_buffer *buffer = pfile->buffer;
+  const uchar *cur = buffer->cur;
+  bool pedantic = (CPP_PEDANTIC (pfile)
+                  && CPP_OPTION (pfile, cpp_warn_invalid_utf8) == 2);
+
+  if (cur[0] < utf8_signifier
+      || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
+    {
+      if (pedantic)
+       cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+                            pfile->line_table->highest_line,
+                            CPP_BUF_COL (buffer),
+                            comment_p
+                            ? N_("invalid UTF-8 character <%x> in comment")
+                            : N_("invalid UTF-8 character <%x> in literal"),
+                            cur[0]);
+      else
+       cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+                              pfile->line_table->highest_line,
+                              CPP_BUF_COL (buffer),
+                              comment_p
+                              ? N_("invalid UTF-8 character <%x> in comment")
+                              : N_("invalid UTF-8 character <%x> in literal"),
+                              cur[0]);
+      return cur + 1;
+    }
+  else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
+    {
+      if (pedantic)
+       cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+                            pfile->line_table->highest_line,
+                            CPP_BUF_COL (buffer),
+                            comment_p
+                            ? N_("invalid UTF-8 character "
+                                 "<%x><%x> in comment")
+                            : N_("invalid UTF-8 character "
+                                 "<%x><%x> in literal"),
+                            cur[0], cur[1]);
+      else
+       cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+                              pfile->line_table->highest_line,
+                              CPP_BUF_COL (buffer),
+                              comment_p
+                              ? N_("invalid UTF-8 character "
+                                   "<%x><%x> in comment")
+                              : N_("invalid UTF-8 character "
+                                   "<%x><%x> in literal"),
+                              cur[0], cur[1]);
+      return cur + 2;
+    }
+  else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
+    {
+      if (pedantic)
+       cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+                            pfile->line_table->highest_line,
+                            CPP_BUF_COL (buffer),
+                            comment_p
+                            ? N_("invalid UTF-8 character "
+                                 "<%x><%x><%x> in comment")
+                            : N_("invalid UTF-8 character "
+                                 "<%x><%x><%x> in literal"),
+                            cur[0], cur[1], cur[2]);
+      else
+       cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+                              pfile->line_table->highest_line,
+                              CPP_BUF_COL (buffer),
+                              comment_p
+                              ? N_("invalid UTF-8 character "
+                                   "<%x><%x><%x> in comment")
+                              : N_("invalid UTF-8 character "
+                                   "<%x><%x><%x> in literal"),
+                              cur[0], cur[1], cur[2]);
+      return cur + 3;
+    }
+  else
+    {
+      if (pedantic)
+       cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+                            pfile->line_table->highest_line,
+                            CPP_BUF_COL (buffer),
+                            comment_p
+                            ? "invalid UTF-8 character "
+                              "<%x><%x><%x><%x> in comment"
+                            : "invalid UTF-8 character "
+                              "<%x><%x><%x><%x> in literal",
+                            cur[0], cur[1], cur[2], cur[3]);
+      else
+       cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+                              pfile->line_table->highest_line,
+                              CPP_BUF_COL (buffer),
+                              comment_p
+                              ? "invalid UTF-8 character "
+                                "<%x><%x><%x><%x> in comment"
+                              : "invalid UTF-8 character "
+                                "<%x><%x><%x><%x> in literal",
+                              cur[0], cur[1], cur[2], cur[3]);
+      return cur + 4;
+    }
+}
+
 /* Skip a C-style block comment.  We find the end of the comment by
    seeing if an asterisk is before every '/' we encounter.  Returns
    nonzero if comment terminated by EOF, zero otherwise.
@@ -1716,6 +1829,8 @@ _cpp_skip_block_comment (cpp_reader *pfi
   const uchar *cur = buffer->cur;
   uchar c;
   const bool warn_bidi_p = pfile->warn_bidi_p ();
+  const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+  const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
 
   cur++;
   if (*cur == '/')
@@ -1765,13 +1880,32 @@ _cpp_skip_block_comment (cpp_reader *pfi
 
          cur = buffer->cur;
        }
-      /* If this is a beginning of a UTF-8 encoding, it might be
-        a bidirectional control character.  */
-      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
-       {
-         location_t loc;
-         bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
-         maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+      else if (__builtin_expect (c >= utf8_continuation, 0)
+              && warn_bidi_or_invalid_utf8_p)
+       {
+         /* If this is a beginning of a UTF-8 encoding, it might be
+            a bidirectional control character.  */
+         if (c == bidi::utf8_start && warn_bidi_p)
+           {
+             location_t loc;
+             bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
+             maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+           }
+         if (!warn_invalid_utf8_p)
+           continue;
+         if (c >= utf8_signifier)
+           {
+             cppchar_t s;
+             const uchar *pstr = cur - 1;
+             if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s)
+                 && s <= UCS_LIMIT)
+               {
+                 cur = pstr;
+                 continue;
+               }
+           }
+         buffer->cur = cur - 1;
+         cur = _cpp_warn_invalid_utf8 (pfile, true);
        }
     }
 
@@ -1789,11 +1923,13 @@ skip_line_comment (cpp_reader *pfile)
   cpp_buffer *buffer = pfile->buffer;
   location_t orig_line = pfile->line_table->highest_line;
   const bool warn_bidi_p = pfile->warn_bidi_p ();
+  const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+  const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
 
-  if (!warn_bidi_p)
+  if (!warn_bidi_or_invalid_utf8_p)
     while (*buffer->cur != '\n')
       buffer->cur++;
-  else
+  else if (!warn_invalid_utf8_p)
     {
       while (*buffer->cur != '\n'
             && *buffer->cur != bidi::utf8_start)
@@ -1813,6 +1949,38 @@ skip_line_comment (cpp_reader *pfile)
          maybe_warn_bidi_on_close (pfile, buffer->cur);
        }
     }
+  else
+    {
+      while (*buffer->cur != '\n')
+       {
+         if (*buffer->cur < utf8_continuation)
+           {
+             buffer->cur++;
+             continue;
+           }
+         if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
+             && warn_bidi_p)
+           {
+             location_t loc;
+             bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
+             maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+           }
+         if (*buffer->cur >= utf8_signifier)
+           {
+             cppchar_t s;
+             const uchar *pstr = buffer->cur;
+             if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s)
+                 && s <= UCS_LIMIT)
+               {
+                 buffer->cur = pstr;
+                 continue;
+               }
+           }
+         buffer->cur = _cpp_warn_invalid_utf8 (pfile, true);
+       }
+      if (warn_bidi_p)
+       maybe_warn_bidi_on_close (pfile, buffer->cur);
+    }
 
   _cpp_process_line_notes (pfile, true);
   return orig_line != pfile->line_table->highest_line;
@@ -1919,8 +2087,6 @@ warn_about_normalization (cpp_reader *pf
     }
 }
 
-static const cppchar_t utf8_signifier = 0xC0;
-
 /* Returns TRUE if the sequence starting at buffer->cur is valid in
    an identifier.  FIRST is TRUE if this starts an identifier.  */
 
@@ -2361,6 +2527,8 @@ lex_raw_string (cpp_reader *pfile, cpp_t
 {
   const uchar *pos = base;
   const bool warn_bidi_p = pfile->warn_bidi_p ();
+  const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+  const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
 
   /* 'tis a pity this information isn't passed down from the lexer's
      initial categorization of the token.  */
@@ -2597,12 +2765,31 @@ lex_raw_string (cpp_reader *pfile, cpp_t
          pos = base = pfile->buffer->cur;
          note = &pfile->buffer->notes[pfile->buffer->cur_note];
        }
-      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
-              && warn_bidi_p)
+      else if (__builtin_expect ((unsigned char) c >= utf8_continuation, 0)
+              && warn_bidi_or_invalid_utf8_p)
        {
-         location_t loc;
-         bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
-         maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+         if ((unsigned char) c == bidi::utf8_start && warn_bidi_p)
+           {
+             location_t loc;
+             bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
+             maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+           }
+         if (!warn_invalid_utf8_p)
+           continue;
+         if ((unsigned char) c >= utf8_signifier)
+           {
+             cppchar_t s;
+             const uchar *pstr = pos - 1;
+             if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0,
+                                  NULL, &s)
+                 && s <= UCS_LIMIT)
+               {
+                 pos = pstr;
+                 continue;
+               }
+           }
+         pfile->buffer->cur = pos - 1;
+         pos = _cpp_warn_invalid_utf8 (pfile, false);
        }
     }
 
@@ -2704,6 +2891,8 @@ lex_string (cpp_reader *pfile, cpp_token
     terminator = '>', type = CPP_HEADER_NAME;
 
   const bool warn_bidi_p = pfile->warn_bidi_p ();
+  const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
+  const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
   for (;;)
     {
       cppchar_t c = *cur++;
@@ -2745,11 +2934,31 @@ lex_string (cpp_reader *pfile, cpp_token
        }
       else if (c == '\0')
        saw_NUL = true;
-      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
+      else if (__builtin_expect (c >= utf8_continuation, 0)
+              && warn_bidi_or_invalid_utf8_p)
        {
-         location_t loc;
-         bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
-         maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+         if (c == bidi::utf8_start && warn_bidi_p)
+           {
+             location_t loc;
+             bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
+             maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+           }
+         if (!warn_invalid_utf8_p)
+           continue;
+         if (c >= utf8_signifier)
+           {
+             cppchar_t s;
+             const uchar *pstr = cur - 1;
+             if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0,
+                                  NULL, &s)
+                 && s <= UCS_LIMIT)
+               {
+                 cur = pstr;
+                 continue;
+               }
+           }
+         pfile->buffer->cur = cur - 1;
+         cur = _cpp_warn_invalid_utf8 (pfile, false);
        }
     }
 
--- gcc/doc/invoke.texi.jj      2022-08-29 14:58:32.619982644 +0200
+++ gcc/doc/invoke.texi 2022-08-30 16:42:52.129579000 +0200
@@ -365,9 +365,9 @@ Objective-C and Objective-C++ Dialects}.
 -Winfinite-recursion @gol
 -Winit-self  -Winline  -Wno-int-conversion  -Wint-in-bool-context @gol
 -Wno-int-to-pointer-cast  -Wno-invalid-memory-model @gol
--Winvalid-pch  -Wjump-misses-init  -Wlarger-than=@var{byte-size} @gol
--Wlogical-not-parentheses  -Wlogical-op  -Wlong-long @gol
--Wno-lto-type-mismatch -Wmain  -Wmaybe-uninitialized @gol
+-Winvalid-pch  -Winvalid-utf8 -Wjump-misses-init  @gol
+-Wlarger-than=@var{byte-size}  -Wlogical-not-parentheses  -Wlogical-op  @gol
+-Wlong-long  -Wno-lto-type-mismatch -Wmain  -Wmaybe-uninitialized @gol
 -Wmemset-elt-size  -Wmemset-transposed-args @gol
 -Wmisleading-indentation  -Wmissing-attributes  -Wmissing-braces @gol
 -Wmissing-field-initializers  -Wmissing-format-attribute @gol
@@ -9569,6 +9569,14 @@ different size.
 Warn if a precompiled header (@pxref{Precompiled Headers}) is found in
 the search path but cannot be used.
 
+@item -Winvalid-utf8
+@opindex Winvalid-utf8
+@opindex Wno-invalid-utf8
+Warn if an invalid UTF-8 character is found inside of a comment or
+character or string literals.
+This warning is on by default for C++23 if @option{-finput-charset=UTF-8}
+is used and turned into error with @option{-pedantic-errors}.
+
 @item -Wlong-long
 @opindex Wlong-long
 @opindex Wno-long-long
--- gcc/c-family/c.opt.jj       2022-08-29 14:58:32.586983084 +0200
+++ gcc/c-family/c.opt  2022-08-30 12:59:01.971768141 +0200
@@ -821,6 +821,10 @@ Winvalid-pch
 C ObjC C++ ObjC++ CPP(warn_invalid_pch) CppReason(CPP_W_INVALID_PCH) 
Var(cpp_warn_invalid_pch) Init(0) Warning
 Warn about PCH files that are found but not used.
 
+Winvalid-utf8
+C objC C++ ObjC++ CPP(cpp_warn_invalid_utf8) CppReason(CPP_W_INVALID_UTF8) 
Var(warn_invalid_utf8) Init(0) Warning
+Warn about invalid UTF-8 characters in comments.
+
 Wjump-misses-init
 C ObjC Var(warn_jump_misses_init) Warning LangEnabledby(C ObjC,Wc++-compat)
 Warn when a jump misses a variable initialization.
--- gcc/c-family/c-opts.cc.jj   2022-08-17 16:59:08.619552629 +0200
+++ gcc/c-family/c-opts.cc      2022-08-30 14:27:27.111675228 +0200
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, co
 
     case OPT_finput_charset_:
       cpp_opts->input_charset = arg;
+      cpp_opts->cpp_input_charset_explicit = 1;
       break;
 
     case OPT_ftemplate_depth_:
@@ -1152,6 +1153,17 @@ c_common_post_options (const char **pfil
     lang_hooks.preprocess_options (parse_in);
   cpp_post_options (parse_in);
   init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
+  /* For C++23 and explicit -finput-charset=UTF-8, turn on -Winvalid-utf8
+     by default and make it a pedwarn unless -Wno-invalid-utf8.  */
+  if (cxx_dialect >= cxx23
+      && cpp_opts->cpp_input_charset_explicit
+      && strcmp (cpp_opts->input_charset, "UTF-8") == 0
+      && (cpp_opts->cpp_warn_invalid_utf8
+         || !global_options_set.x_warn_invalid_utf8))
+    {
+      global_options.x_warn_invalid_utf8 = 1;
+      cpp_opts->cpp_warn_invalid_utf8 = cpp_opts->cpp_pedantic ? 2 : 1;
+    }
 
   /* Let diagnostics infrastructure know how to convert input files the same
      way libcpp will do it, namely using the configured input charset and
--- gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c.jj 2022-08-30 
12:59:01.971768141 +0200
+++ gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c    2022-08-30 
15:05:39.011892953 +0200
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+// a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" }
+// a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" }
+// a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" }
+// a���a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" }
+// a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" }
+//                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target *-*-* } .-1 }
+/* a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" } */
+/* a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" } */
+/*                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target *-*-* } .-1 } */
--- gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c.jj 2022-08-30 
16:59:42.943057445 +0200
+++ gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c    2022-08-30 
17:03:02.189390222 +0200
@@ -0,0 +1,88 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess { target { c || c++11 } } }
+// { dg-require-effective-target wchar }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+// { dg-additional-options "-std=gnu99" { target c } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+char32_t a = U'�';                             // { dg-warning "invalid UTF-8 
character <80> in literal" }
+char32_t b = U'�';                             // { dg-warning "invalid UTF-8 
character <bf> in literal" }
+char32_t c = U'�';                             // { dg-warning "invalid UTF-8 
character <c0> in literal" }
+char32_t d = U'�';                             // { dg-warning "invalid UTF-8 
character <c1> in literal" }
+char32_t e = U'�';                             // { dg-warning "invalid UTF-8 
character <f5> in literal" }
+char32_t f = U'�';                             // { dg-warning "invalid UTF-8 
character <ff> in literal" }
+char32_t g = U'�';                             // { dg-warning "invalid UTF-8 
character <c2> in literal" }
+char32_t h = U'�';                             // { dg-warning "invalid UTF-8 
character <e0> in literal" }
+char32_t i = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" }
+char32_t j = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" }
+char32_t k = U'��';                            // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" }
+char32_t l = U'��';                            // { dg-warning "invalid UTF-8 
character <ec><80> in literal" }
+char32_t m = U'���';                           // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" }
+char32_t n = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" }
+char32_t o = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" }
+char32_t p = U'����';                          // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" }
+char32_t q = U'������';                                // { dg-warning 
"invalid UTF-8 character <fd><bf><bf><bf> in literal" }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target *-*-* } .-1 }
+const char32_t *A = U"€߿ࠀ퟿𐀀�";        // { dg-bogus "invalid UTF-8 character" 
}
+const char32_t *B = U"�";                      // { dg-warning "invalid UTF-8 
character <80> in literal" }
+const char32_t *C = U"�";                      // { dg-warning "invalid UTF-8 
character <bf> in literal" }
+const char32_t *D = U"�";                      // { dg-warning "invalid UTF-8 
character <c0> in literal" }
+const char32_t *E = U"�";                      // { dg-warning "invalid UTF-8 
character <c1> in literal" }
+const char32_t *F = U"�";                      // { dg-warning "invalid UTF-8 
character <f5> in literal" }
+const char32_t *G = U"�";                      // { dg-warning "invalid UTF-8 
character <ff> in literal" }
+const char32_t *H = U"�";                      // { dg-warning "invalid UTF-8 
character <c2> in literal" }
+const char32_t *I = U"�";                      // { dg-warning "invalid UTF-8 
character <e0> in literal" }
+const char32_t *J = U"���";                    // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" }
+const char32_t *K = U"���";                    // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" }
+const char32_t *L = U"��";                     // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" }
+const char32_t *M = U"��";                     // { dg-warning "invalid UTF-8 
character <ec><80> in literal" }
+const char32_t *N = U"���";                    // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" }
+const char32_t *O = U"����";                   // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" }
+const char32_t *P = U"����";                   // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" }
+const char32_t *Q = U"����";                   // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" }
+const char32_t *R = U"������";                 // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target *-*-* } .-1 }
+const char32_t *A1 = UR"(€߿ࠀ퟿𐀀�)"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <80> in literal" }
+const char32_t *C1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <bf> in literal" }
+const char32_t *D1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <c0> in literal" }
+const char32_t *E1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <c1> in literal" }
+const char32_t *F1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <f5> in literal" }
+const char32_t *G1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <ff> in literal" }
+const char32_t *H1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <c2> in literal" }
+const char32_t *I1 = UR"(�)";                  // { dg-warning "invalid UTF-8 
character <e0> in literal" }
+const char32_t *J1 = UR"(���)";                        // { dg-warning 
"invalid UTF-8 character <e0><80><bf> in literal" }
+const char32_t *K1 = UR"(���)";                        // { dg-warning 
"invalid UTF-8 character <e0><9f><80> in literal" }
+const char32_t *L1 = UR"(��)";                 // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" }
+const char32_t *M1 = UR"(��)";                 // { dg-warning "invalid UTF-8 
character <ec><80> in literal" }
+const char32_t *N1 = UR"(���)";                        // { dg-warning 
"invalid UTF-8 character <ed><a0><80> in literal" }
+const char32_t *O1 = UR"(����)";               // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" }
+const char32_t *P1 = UR"(����)";               // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" }
+const char32_t *Q1 = UR"(����)";               // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" }
+const char32_t *R1 = UR"(������)";             // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target *-*-* } .-1 }
+const char *A2 = u8"€߿ࠀ퟿𐀀�";  // { dg-bogus "invalid UTF-8 character" }
+const char *B2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <80> in literal" }
+const char *C2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <bf> in literal" }
+const char *D2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <c0> in literal" }
+const char *E2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <c1> in literal" }
+const char *F2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <f5> in literal" }
+const char *G2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <ff> in literal" }
+const char *H2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <c2> in literal" }
+const char *I2 = u8"�";                                // { dg-warning 
"invalid UTF-8 character <e0> in literal" }
+const char *J2 = u8"���";                      // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" }
+const char *K2 = u8"���";                      // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" }
+const char *L2 = u8"��";                       // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" }
+const char *M2 = u8"��";                       // { dg-warning "invalid UTF-8 
character <ec><80> in literal" }
+const char *N2 = u8"���";                      // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" }
+const char *O2 = u8"���?";                     // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" }
+const char *P2 = u8"����";                     // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" }
+const char *Q2 = u8"����";                     // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" }
+const char *R2 = u8"������";                   // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target *-*-* } .-1 }
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C.jj     2022-08-30 
14:08:42.515672782 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C        2022-08-30 
15:13:08.844830193 +0200
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8" }
+
+// a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } }
+// a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } }
+// a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } }
+// a?��a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } }
+// a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } }
+//                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } } */
+/* a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } } */
+/*                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 } */
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C.jj     2022-08-30 
14:09:30.498029302 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C        2022-08-30 
15:13:40.887397995 +0200
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+// a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } }
+// a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } }
+// a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } }
+// a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } }
+// a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } }
+// a���a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } }
+// a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } }
+// a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } }
+//                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<80> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-warning "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-warning "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } } */
+/* a���a                               { dg-warning "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } } */
+/* a����a                              { dg-warning "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } } */
+/* a������a                            { dg-warning "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } } */
+/*                                     { dg-warning "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 } */
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C.jj     2022-08-30 
14:10:17.278404614 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C        2022-08-30 
15:15:30.277922541 +0200
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+// a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" }
+// a�a                                 { dg-error "invalid UTF-8 character 
<80> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } }
+// a�a                                 { dg-error "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } }
+// a���a                               { dg-error "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } }
+// a���a                               { dg-error "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } }
+// a��a                                        { dg-error "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } }
+// a��a                                        { dg-error "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } }
+// a���a                               { dg-error "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } }
+// a����a                              { dg-error "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } }
+// a����a                              { dg-error "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } }
+// a����a                              { dg-error "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } }
+// a������a                            { dg-error "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } }
+//                                     { dg-error "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<80> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<c0> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<c1> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<f5> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<ff> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<c2> in comment" "" { target c++23 } } */
+/* a�a                                 { dg-error "invalid UTF-8 character 
<e0> in comment" "" { target c++23 } } */
+/* a���a                               { dg-error "invalid UTF-8 character 
<e0><80><bf> in comment" "" { target c++23 } } */
+/* a���a                               { dg-error "invalid UTF-8 character 
<e0><9f><80> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-error "invalid UTF-8 
character <e0><bf> in comment" "" { target c++23 } } */
+/* a��a                                        { dg-error "invalid UTF-8 
character <ec><80> in comment" "" { target c++23 } } */
+/* a���a                               { dg-error "invalid UTF-8 character 
<ed><a0><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-error "invalid UTF-8 character 
<f0><80><80><80> in comment" "" { target c++23 } } */
+/* a����a                              { dg-error "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" "" { target c++23 } } */
+/* a����a                              { dg-error "invalid UTF-8 character 
<f4><90><80><80> in comment" "" { target c++23 } } */
+/* a������a                            { dg-error "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" "" { target c++23 } } */
+/*                                     { dg-error "invalid UTF-8 character 
<bf> in comment" "" { target c++23 } .-1 } */
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C.jj     2022-08-30 
14:25:33.367204912 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C        2022-08-30 
15:15:56.213572726 +0200
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+// a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<80> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<bf> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<c0> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<c1> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<f5> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<ff> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<c2> in comment" }
+// a�a                                 { dg-bogus "invalid UTF-8 character 
<e0> in comment" }
+// a���a                               { dg-bogus "invalid UTF-8 character 
<e0><80><bf> in comment" }
+// a���a                               { dg-bogus "invalid UTF-8 character 
<e0><9f><80> in comment" }
+// a��a                                        { dg-bogus "invalid UTF-8 
character <e0><bf> in comment" }
+// a��a                                        { dg-bogus "invalid UTF-8 
character <ec><80> in comment" }
+// a���a                               { dg-bogus "invalid UTF-8 character 
<ed><a0><80> in comment" }
+// a����a                              { dg-bogus "invalid UTF-8 character 
<f0><80><80><80> in comment" }
+// a����a                              { dg-bogus "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" }
+// a����a                              { dg-bogus "invalid UTF-8 character 
<f4><90><80><80> in comment" }
+// a������a                            { dg-bogus "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" }
+//                                     { dg-bogus "invalid UTF-8 character 
<bf> in comment" "" { target *-*-* } .-1 }
+/* a€߿ࠀ퟿𐀀�a           { dg-bogus "invalid UTF-8 character" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<80> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<bf> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<c0> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<c1> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<f5> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<ff> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<c2> in comment" } */
+/* a�a                                 { dg-bogus "invalid UTF-8 character 
<e0> in comment" } */
+/* a���a                               { dg-bogus "invalid UTF-8 character 
<e0><80><bf> in comment" } */
+/* a���a                               { dg-bogus "invalid UTF-8 character 
<e0><9f><80> in comment" } */
+/* a��a                                        { dg-bogus "invalid UTF-8 
character <e0><bf> in comment" } */
+/* a��a                                        { dg-bogus "invalid UTF-8 
character <ec><80> in comment" } */
+/* a���a                               { dg-bogus "invalid UTF-8 character 
<ed><a0><80> in comment" } */
+/* a����a                              { dg-bogus "invalid UTF-8 character 
<f0><80><80><80> in comment" } */
+/* a����a                              { dg-bogus "invalid UTF-8 character 
<f0><8f><bf><bf> in comment" } */
+/* a����a                              { dg-bogus "invalid UTF-8 character 
<f4><90><80><80> in comment" } */
+/* a������a                            { dg-bogus "invalid UTF-8 character 
<fd><bf><bf><bf> in comment" } */
+/*                                     { dg-bogus "invalid UTF-8 character 
<bf> in comment" "" { target *-*-* } .-1 } */
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C.jj     2022-08-30 
15:23:19.780598450 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C        2022-08-30 
16:53:40.250909343 +0200
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8" }
+
+char32_t a = U'�';                             // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+char32_t b = U'�';                             // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+char32_t c = U'�';                             // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+char32_t d = U'�';                             // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+char32_t e = U'�';                             // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+char32_t f = U'�';                             // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+char32_t g = U'�';                             // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+char32_t h = U'�';                             // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+char32_t i = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+char32_t j = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+char32_t k = U'��';                            // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+char32_t l = U'��';                            // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+char32_t m = U'���';                           // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+char32_t n = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+char32_t o = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+char32_t p = U'����';                          // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+char32_t q = U'������';                                // { dg-warning 
"invalid UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀�";           // { dg-bogus "invalid UTF-8 character" }
+auto B = U"�";                                 // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C = U"�";                                 // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D = U"�";                                 // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E = U"�";                                 // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F = U"�";                                 // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G = U"�";                                 // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H = U"�";                                 // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I = U"�";                                 // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J = U"���";                               // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K = U"���";                               // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L = U"��";                                        // { dg-warning 
"invalid UTF-8 character <e0><bf> in literal" "" { target c++23 } }
+auto M = U"��";                                        // { dg-warning 
"invalid UTF-8 character <ec><80> in literal" "" { target c++23 } }
+auto N = U"���";                               // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O = U"����";                              // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P = U"����";                              // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q = U"����";                              // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R = U"������";                            // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀�)";               // { dg-bogus "invalid UTF-8 character" 
}
+auto B1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L1 = UR"(��)";                            // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M1 = UR"(��)";                            // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R1 = UR"(������)";                                // { dg-warning 
"invalid UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀�";         // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L2 = u8"��";                              // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M2 = u8"��";                              // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R2 = u8"������";                          // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C.jj     2022-08-30 
16:54:35.458170811 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C        2022-08-30 
16:53:31.846021778 +0200
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+char32_t a = U'�';                             // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+char32_t b = U'�';                             // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+char32_t c = U'�';                             // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+char32_t d = U'�';                             // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+char32_t e = U'�';                             // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+char32_t f = U'�';                             // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+char32_t g = U'�';                             // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+char32_t h = U'�';                             // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+char32_t i = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+char32_t j = U'���';                           // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+char32_t k = U'��';                            // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+char32_t l = U'��';                            // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+char32_t m = U'���';                           // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+char32_t n = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+char32_t o = U'����';                          // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+char32_t p = U'����';                          // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+char32_t q = U'������';                                // { dg-warning 
"invalid UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀�";           // { dg-bogus "invalid UTF-8 character" }
+auto B = U"�";                                 // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C = U"�";                                 // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D = U"�";                                 // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E = U"�";                                 // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F = U"�";                                 // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G = U"�";                                 // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H = U"�";                                 // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I = U"�";                                 // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J = U"���";                               // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K = U"���";                               // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L = U"��";                                        // { dg-warning 
"invalid UTF-8 character <e0><bf> in literal" "" { target c++23 } }
+auto M = U"��";                                        // { dg-warning 
"invalid UTF-8 character <ec><80> in literal" "" { target c++23 } }
+auto N = U"���";                               // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O = U"����";                              // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P = U"����";                              // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q = U"����";                              // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R = U"������";                            // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀�)";               // { dg-bogus "invalid UTF-8 character" 
}
+auto B1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I1 = UR"(�)";                             // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L1 = UR"(��)";                            // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M1 = UR"(��)";                            // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N1 = UR"(���)";                           // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q1 = UR"(����)";                          // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R1 = UR"(������)";                                // { dg-warning 
"invalid UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀�";         // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I2 = u8"�";                               // { dg-warning "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L2 = u8"��";                              // { dg-warning "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M2 = u8"��";                              // { dg-warning "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N2 = u8"���";                             // { dg-warning "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q2 = u8"����";                            // { dg-warning "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R2 = u8"������";                          // { dg-warning "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-warning "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C.jj     2022-08-30 
16:54:32.891205153 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C        2022-08-30 
16:54:06.422559241 +0200
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+char32_t a = U'�';                             // { dg-error "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+char32_t b = U'�';                             // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+char32_t c = U'�';                             // { dg-error "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+char32_t d = U'�';                             // { dg-error "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+char32_t e = U'�';                             // { dg-error "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+char32_t f = U'�';                             // { dg-error "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+char32_t g = U'�';                             // { dg-error "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+char32_t h = U'�';                             // { dg-error "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+char32_t i = U'���';                           // { dg-error "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+char32_t j = U'���';                           // { dg-error "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+char32_t k = U'��';                            // { dg-error "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+char32_t l = U'��';                            // { dg-error "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+char32_t m = U'���';                           // { dg-error "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+char32_t n = U'����';                          // { dg-error "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+char32_t o = U'����';                          // { dg-error "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+char32_t p = U'����';                          // { dg-error "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+char32_t q = U'������';                                // { dg-error "invalid 
UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀�";           // { dg-bogus "invalid UTF-8 character" }
+auto B = U"�";                                 // { dg-error "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C = U"�";                                 // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D = U"�";                                 // { dg-error "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E = U"�";                                 // { dg-error "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F = U"�";                                 // { dg-error "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G = U"�";                                 // { dg-error "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H = U"�";                                 // { dg-error "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I = U"�";                                 // { dg-error "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J = U"���";                               // { dg-error "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K = U"���";                               // { dg-error "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L = U"��";                                        // { dg-error "invalid 
UTF-8 character <e0><bf> in literal" "" { target c++23 } }
+auto M = U"��";                                        // { dg-error "invalid 
UTF-8 character <ec><80> in literal" "" { target c++23 } }
+auto N = U"���";                               // { dg-error "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O = U"����";                              // { dg-error "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P = U"����";                              // { dg-error "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q = U"����";                              // { dg-error "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R = U"������";                            // { dg-error "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀�)";               // { dg-bogus "invalid UTF-8 character" 
}
+auto B1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I1 = UR"(�)";                             // { dg-error "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J1 = UR"(���)";                           // { dg-error "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K1 = UR"(���)";                           // { dg-error "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L1 = UR"(��)";                            // { dg-error "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M1 = UR"(��)";                            // { dg-error "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N1 = UR"(���)";                           // { dg-error "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O1 = UR"(����)";                          // { dg-error "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P1 = UR"(����)";                          // { dg-error "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q1 = UR"(����)";                          // { dg-error "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R1 = UR"(������)";                                // { dg-error "invalid 
UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀�";         // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"�";                               // { dg-error "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C2 = u8"�";                               // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D2 = u8"�";                               // { dg-error "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E2 = u8"�";                               // { dg-error "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F2 = u8"�";                               // { dg-error "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G2 = u8"�";                               // { dg-error "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H2 = u8"�";                               // { dg-error "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I2 = u8"�";                               // { dg-error "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J2 = u8"���";                             // { dg-error "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K2 = u8"���";                             // { dg-error "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L2 = u8"��";                              // { dg-error "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M2 = u8"��";                              // { dg-error "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N2 = u8"���";                             // { dg-error "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O2 = u8"����";                            // { dg-error "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P2 = u8"����";                            // { dg-error "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q2 = u8"����";                            // { dg-error "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R2 = u8"������";                          // { dg-error "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-error "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
--- gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C.jj     2022-08-30 
16:54:29.867245604 +0200
+++ gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C        2022-08-30 
16:54:50.737966411 +0200
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid 
UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+char32_t a = U'�';                             // { dg-bogus "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+char32_t b = U'�';                             // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+char32_t c = U'�';                             // { dg-bogus "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+char32_t d = U'�';                             // { dg-bogus "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+char32_t e = U'�';                             // { dg-bogus "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+char32_t f = U'�';                             // { dg-bogus "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+char32_t g = U'�';                             // { dg-bogus "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+char32_t h = U'�';                             // { dg-bogus "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+char32_t i = U'���';                           // { dg-bogus "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+char32_t j = U'���';                           // { dg-bogus "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+char32_t k = U'��';                            // { dg-bogus "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+char32_t l = U'��';                            // { dg-bogus "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+char32_t m = U'���';                           // { dg-bogus "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+char32_t n = U'����';                          // { dg-bogus "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+char32_t o = U'����';                          // { dg-bogus "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+char32_t p = U'����';                          // { dg-bogus "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+char32_t q = U'������';                                // { dg-bogus "invalid 
UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀�";           // { dg-bogus "invalid UTF-8 character" }
+auto B = U"�";                                 // { dg-bogus "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C = U"�";                                 // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D = U"�";                                 // { dg-bogus "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E = U"�";                                 // { dg-bogus "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F = U"�";                                 // { dg-bogus "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G = U"�";                                 // { dg-bogus "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H = U"�";                                 // { dg-bogus "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I = U"�";                                 // { dg-bogus "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J = U"���";                               // { dg-bogus "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K = U"���";                               // { dg-bogus "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L = U"��";                                        // { dg-bogus "invalid 
UTF-8 character <e0><bf> in literal" "" { target c++23 } }
+auto M = U"��";                                        // { dg-bogus "invalid 
UTF-8 character <ec><80> in literal" "" { target c++23 } }
+auto N = U"���";                               // { dg-bogus "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O = U"����";                              // { dg-bogus "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P = U"����";                              // { dg-bogus "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q = U"����";                              // { dg-bogus "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R = U"������";                            // { dg-bogus "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀�)";               // { dg-bogus "invalid UTF-8 character" 
}
+auto B1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I1 = UR"(�)";                             // { dg-bogus "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J1 = UR"(���)";                           // { dg-bogus "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K1 = UR"(���)";                           // { dg-bogus "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L1 = UR"(��)";                            // { dg-bogus "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M1 = UR"(��)";                            // { dg-bogus "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N1 = UR"(���)";                           // { dg-bogus "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O1 = UR"(����)";                          // { dg-bogus "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P1 = UR"(����)";                          // { dg-bogus "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q1 = UR"(����)";                          // { dg-bogus "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R1 = UR"(������)";                                // { dg-bogus "invalid 
UTF-8 character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀�";         // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <80> in literal" "" { target c++23 } }
+auto C2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } }
+auto D2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <c0> in literal" "" { target c++23 } }
+auto E2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <c1> in literal" "" { target c++23 } }
+auto F2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <f5> in literal" "" { target c++23 } }
+auto G2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <ff> in literal" "" { target c++23 } }
+auto H2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <c2> in literal" "" { target c++23 } }
+auto I2 = u8"�";                               // { dg-bogus "invalid UTF-8 
character <e0> in literal" "" { target c++23 } }
+auto J2 = u8"���";                             // { dg-bogus "invalid UTF-8 
character <e0><80><bf> in literal" "" { target c++23 } }
+auto K2 = u8"���";                             // { dg-bogus "invalid UTF-8 
character <e0><9f><80> in literal" "" { target c++23 } }
+auto L2 = u8"��";                              // { dg-bogus "invalid UTF-8 
character <e0><bf> in literal" "" { target c++23 } }
+auto M2 = u8"��";                              // { dg-bogus "invalid UTF-8 
character <ec><80> in literal" "" { target c++23 } }
+auto N2 = u8"���";                             // { dg-bogus "invalid UTF-8 
character <ed><a0><80> in literal" "" { target c++23 } }
+auto O2 = u8"����";                            // { dg-bogus "invalid UTF-8 
character <f0><80><80><80> in literal" "" { target c++23 } }
+auto P2 = u8"����";                            // { dg-bogus "invalid UTF-8 
character <f0><8f><bf><bf> in literal" "" { target c++23 } }
+auto Q2 = u8"����";                            // { dg-bogus "invalid UTF-8 
character <f4><90><80><80> in literal" "" { target c++23 } }
+auto R2 = u8"������";                          // { dg-bogus "invalid UTF-8 
character <fd><bf><bf><bf> in literal" "" { target c++23 } }
+                                               // { dg-bogus "invalid UTF-8 
character <bf> in literal" "" { target c++23 } .-1 }


        Jakub

Reply via email to