This flags rich_locations associated with -Wbidi-chars= so that non-ASCII bytes will be escaped when printing the source lines (using the diagnostics support I added in r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).
In particular, this ensures that the printed source lines will be pure ASCII, and thus the visual ordering of the characters will be the same as the logical ordering. Before: Wbidi-chars-1.c: In function ‘main’: Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 6 | /* } if (isAdmin) begin admins only */ | ^ Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 9 | /* end admins only { */ | ^ Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 6 | int LRE__PDF_\u202c; | ^ Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 8 | int LRE_\u202a_PDF__; | ^ Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 10 | const char *s1 = "LRE__PDF_\u202c"; | ^ Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 12 | const char *s2 = "LRE_\u202a_PDF_"; | ^ After: Wbidi-chars-1.c: In function ‘main’: Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ | ^ Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 9 | /* end admins only <U+202E> { <U+2066>*/ | ^ Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 6 | int LRE_<U+202A>_PDF_\u202c; | ^ Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 8 | int LRE_\u202a_PDF_<U+202C>_; | ^ Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c"; | ^ Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>"; | ^ Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. Pushed to trunk as r12-5355-g1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3. libcpp/ChangeLog: PR preprocessor/103026 * lex.c (maybe_warn_bidi_on_close): Use a rich_location and call set_escape_on_output (true) on it. (maybe_warn_bidi_on_char): Likewise. Signed-off-by: David Malcolm <dmalc...@redhat.com> --- libcpp/lex.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/libcpp/lex.c b/libcpp/lex.c index 6a4fbce6030..8290bc637cd 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) const location_t loc = linemap_position_for_column (pfile->line_table, CPP_BUF_COLUMN (pfile->buffer, p)); - cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, - "unpaired UTF-8 bidirectional control character " - "detected"); + rich_location rich_loc (pfile->line_table, loc); + rich_loc.set_escape_on_output (true); + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "unpaired UTF-8 bidirectional control character " + "detected"); } /* We're done with this context. */ bidi::on_close (); @@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, const location_t loc = linemap_position_for_column (pfile->line_table, CPP_BUF_COLUMN (pfile->buffer, p)); + rich_location rich_loc (pfile->line_table, loc); + rich_loc.set_escape_on_output (true); + /* It seems excessive to warn about a PDI/PDF that is closing an opened context because we've already warned about the opening character. Except warn when we have a UCN x UTF-8 @@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, { if (warn_bidi == bidirectional_unpaired && bidi::current_ctx_ucn_p () != ucn_p) - cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, - "UTF-8 vs UCN mismatch when closing " - "a context by \"%s\"", bidi::to_str (kind)); + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "UTF-8 vs UCN mismatch when closing " + "a context by \"%s\"", bidi::to_str (kind)); } else if (warn_bidi == bidirectional_any) { if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) - cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, - "\"%s\" is closing an unopened context", - bidi::to_str (kind)); + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "\"%s\" is closing an unopened context", + bidi::to_str (kind)); else - cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, - "found problematic Unicode character \"%s\"", - bidi::to_str (kind)); + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, + "found problematic Unicode character \"%s\"", + bidi::to_str (kind)); } } /* We're done with this context. */ -- 2.26.3