This flags rich_locations associated with -Wbidi-chars= so that
non-ASCII bytes will be escaped when printing the source lines
(using the diagnostics support I added in
r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).

In particular, this ensures that the printed source lines will
be pure ASCII, and thus the visual ordering of the characters
will be the same as the logical ordering.

Before:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
      6 |     /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
        |                                           ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
      9 |     /* end admins only ‮ { ⁦*/
        |                            ^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      6 | int LRE_‪_PDF_\u202c;
        |               ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      8 | int LRE_\u202a_PDF_‬_;
        |                   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     10 | const char *s1 = "LRE_‪_PDF_\u202c";
        |                            ^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     12 | const char *s2 = "LRE_\u202a_PDF_‬";
        |                                 ^

After:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins 
only */
        |                                                                       
    ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
      9 |     /* end admins only <U+202E> { <U+2066>*/
        |                                            ^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      6 | int LRE_<U+202A>_PDF_\u202c;
        |                       ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      8 | int LRE_\u202a_PDF_<U+202C>_;
        |                   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c";
        |                                    ^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>";
        |                                 ^

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-5355-g1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3.

libcpp/ChangeLog:
        PR preprocessor/103026
        * lex.c (maybe_warn_bidi_on_close): Use a rich_location
        and call set_escape_on_output (true) on it.
        (maybe_warn_bidi_on_char): Likewise.

Signed-off-by: David Malcolm <dmalc...@redhat.com>
---
 libcpp/lex.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/libcpp/lex.c b/libcpp/lex.c
index 6a4fbce6030..8290bc637cd 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar 
*p)
       const location_t loc
        = linemap_position_for_column (pfile->line_table,
                                       CPP_BUF_COLUMN (pfile->buffer, p));
-      cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-                            "unpaired UTF-8 bidirectional control character "
-                            "detected");
+      rich_location rich_loc (pfile->line_table, loc);
+      rich_loc.set_escape_on_output (true);
+      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+                     "unpaired UTF-8 bidirectional control character "
+                     "detected");
     }
   /* We're done with this context.  */
   bidi::on_close ();
@@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar 
*p, bidi::kind kind,
       const location_t loc
        = linemap_position_for_column (pfile->line_table,
                                       CPP_BUF_COLUMN (pfile->buffer, p));
+      rich_location rich_loc (pfile->line_table, loc);
+      rich_loc.set_escape_on_output (true);
+
       /* It seems excessive to warn about a PDI/PDF that is closing
         an opened context because we've already warned about the
         opening character.  Except warn when we have a UCN x UTF-8
@@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar 
*p, bidi::kind kind,
        {
          if (warn_bidi == bidirectional_unpaired
              && bidi::current_ctx_ucn_p () != ucn_p)
-           cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-                                  "UTF-8 vs UCN mismatch when closing "
-                                  "a context by \"%s\"", bidi::to_str (kind));
+           cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+                           "UTF-8 vs UCN mismatch when closing "
+                           "a context by \"%s\"", bidi::to_str (kind));
        }
       else if (warn_bidi == bidirectional_any)
        {
          if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
-           cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-                                  "\"%s\" is closing an unopened context",
-                                  bidi::to_str (kind));
+           cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+                           "\"%s\" is closing an unopened context",
+                           bidi::to_str (kind));
          else
-           cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-                                  "found problematic Unicode character \"%s\"",
-                                  bidi::to_str (kind));
+           cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+                           "found problematic Unicode character \"%s\"",
+                           bidi::to_str (kind));
        }
     }
   /* We're done with this context.  */
-- 
2.26.3

Reply via email to