https://gcc.gnu.org/g:e07df053031e109c50387c92d689950de1d193ab

commit r13-8732-ge07df053031e109c50387c92d689950de1d193ab
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Tue Apr 30 11:22:32 2024 +0200

    gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t) 0 argument 
[PR114876]
    
    Seems when Martin S. implemented this, he coded there strict reading
    of the standard, which said that %lc with (wint_t) 0 argument is handled
    as wchar_t[2] temp = { arg, 0 }; %ls with temp arg and so shouldn't print
    any values.  But, most of the libc implementations actually handled that
    case like %c with '\0' argument, adding a single NUL character, the only
    known exception is musl.
    Recently, C23 changed this in response to GB-141 and POSIX in
    https://austingroupbugs.net/view.php?id=1647
    so that it should have the same behavior as %c with '\0'.
    
    Because there is implementation divergence, the following patch uses
    a range rather than hardcoding it to all 1s (i.e. the %c behavior),
    though the likely case is still 1 (forward looking plus most of
    implementations).
    The res.knownrange = true; assignment removed is redundant due to
    the same assignment done unconditionally before the if statement,
    rest is formatting fixes.
    
    I don't think the min >= 0 && min < 128 case is right either, I'd think
    it should be min >= 0 && max < 128, otherwise it is just some possible
    inputs are (maybe) ASCII and there can be others, but this code is a total
    mess anyway, with the min, max, likely (somewhere in [min, max]?) and then
    unlikely possibly larger than max, dunno, perhaps for at least some chars
    in the ASCII range the likely case could be for the ascii case; so perhaps
    just the one_2_one_ascii shouldn't set max to 1 and mayfail should be true
    for max >= 128.  Anyway, didn't feel I should touch that right now.
    
    2024-04-30  Jakub Jelinek  <ja...@redhat.com>
    
            PR tree-optimization/114876
            * gimple-ssa-sprintf.cc (format_character): For min == 0 && max == 
0,
            set max, likely and unlikely members to 1 rather than 0.  Remove
            useless res.knownrange = true;.  Formatting fixes.
    
            * gcc.dg/pr114876.c: New test.
            * gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust expected
            diagnostics.
    
    (cherry picked from commit 6c6b70f07208ca14ba783933988c04c6fc2fff42)

Diff:
---
 gcc/gimple-ssa-sprintf.cc                          | 20 +++++++------
 gcc/testsuite/gcc.dg/pr114876.c                    | 34 ++++++++++++++++++++++
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-1.c       | 12 ++++----
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/gcc/gimple-ssa-sprintf.cc b/gcc/gimple-ssa-sprintf.cc
index 18975708d2c2..e02977f0ac39 100644
--- a/gcc/gimple-ssa-sprintf.cc
+++ b/gcc/gimple-ssa-sprintf.cc
@@ -2170,8 +2170,7 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
 
   res.knownrange = true;
 
-  if (dir.specifier == 'C'
-      || dir.modifier == FMT_LEN_l)
+  if (dir.specifier == 'C' || dir.modifier == FMT_LEN_l)
     {
       /* A wide character can result in as few as zero bytes.  */
       res.range.min = 0;
@@ -2182,10 +2181,13 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
        {
          if (min == 0 && max == 0)
            {
-             /* The NUL wide character results in no bytes.  */
-             res.range.max = 0;
-             res.range.likely = 0;
-             res.range.unlikely = 0;
+             /* In strict reading of older ISO C or POSIX, this required
+                no characters to be emitted.  ISO C23 changes that, so
+                does POSIX, to match what has been implemented in most of the
+                implementations, namely emitting a single NUL character.
+                Let's use 0 for minimum and 1 for all the other values.  */
+             res.range.max = 1;
+             res.range.likely = res.range.unlikely = 1;
            }
          else if (min >= 0 && min < 128)
            {
@@ -2193,11 +2195,12 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
                 is not a 1-to-1 mapping to the source character set or
                 if the source set is not ASCII.  */
              bool one_2_one_ascii
-               = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 
97);
+               = (target_to_host_charmap[0] == 1
+                  && target_to_host ('a') == 97);
 
              /* A wide character in the ASCII range most likely results
                 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
-             res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
+             res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();
              res.range.likely = 1;
              res.range.unlikely = target_mb_len_max ();
              res.mayfail = !one_2_one_ascii;
@@ -2228,7 +2231,6 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
       /* A plain '%c' directive.  Its output is exactly 1.  */
       res.range.min = res.range.max = 1;
       res.range.likely = res.range.unlikely = 1;
-      res.knownrange = true;
     }
 
   /* Bump up the byte counters if WIDTH is greater.  */
diff --git a/gcc/testsuite/gcc.dg/pr114876.c b/gcc/testsuite/gcc.dg/pr114876.c
new file mode 100644
index 000000000000..7bb380aa29e5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114876.c
@@ -0,0 +1,34 @@
+/* PR tree-optimization/114876 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "return \[01\];" "optimized" } } */
+/* { dg-final { scan-tree-dump "return 3;" "optimized" } } */
+/* { dg-final { scan-tree-dump "return 4;" "optimized" } } */
+
+int
+foo (void)
+{
+  char buf[64];
+  return __builtin_sprintf (buf, "%lc%lc%lc", (__WINT_TYPE__) 0, 
(__WINT_TYPE__) 0, (__WINT_TYPE__) 0);
+}
+
+int
+bar (void)
+{
+  char buf[64];
+  return __builtin_sprintf (buf, "%c%c%c", 0, 0, 0);
+}
+
+int
+baz (void)
+{
+  char buf[64];
+  return __builtin_sprintf (buf, "%lc%lc%lca", (__WINT_TYPE__) 0, 
(__WINT_TYPE__) 0, (__WINT_TYPE__) 0);
+}
+
+int
+qux (void)
+{
+  char buf[64];
+  return __builtin_sprintf (buf, "%c%c%ca", 0, 0, 0);
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
index 0cc03ff2de8d..6cb2acb7f871 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
@@ -200,11 +200,11 @@ void test_sprintf_chk_c_const (void)
   T (3, "%c%c", '1', '2');
 
   /* Wide characters.  */
-  T (0, "%lc",     (wint_t)0);   /* { dg-warning "nul past the end" } */
-  T (1, "%lc",     (wint_t)0);
-  T (1, "%lc%lc",  (wint_t)0, (wint_t)0);
+  T (0, "%lc",     (wint_t)0);   /* { dg-warning ".%lc. directive writing up 
to 1 bytes into a region of size 0" } */
+  T (1, "%lc",     (wint_t)0);   /* { dg-warning "nul past the end" } */
+  T (1, "%lc%lc",  (wint_t)0, (wint_t)0);   /* { dg-warning ".%lc. directive 
writing up to 1 bytes into a region of size between 0 and 1" } */
   T (2, "%lc",     (wint_t)0);
-  T (2, "%lc%lc",  (wint_t)0, (wint_t)0);
+  T (2, "%lc%lc",  (wint_t)0, (wint_t)0);   /* { dg-warning "nul past the end" 
} */
 
   /* The following could result in as few as no bytes and in as many as
      MB_CUR_MAX, but since the MB_CUR_MAX value is a runtime property
@@ -1550,7 +1550,7 @@ void test_snprintf_c_const (char *d)
 
   /* Wide characters.  */
   T (0, "%lc",  (wint_t)0);
-  T (1, "%lc",  (wint_t)0);
+  T (1, "%lc",  (wint_t)0);      /* { dg-warning "output may be truncated 
before the last format character" } */
   T (2, "%lc",  (wint_t)0);
 
   /* The following could result in as few as a single byte and in as many
@@ -1603,7 +1603,7 @@ void test_snprintf_chk_c_const (void)
 
   /* Wide characters.  */
   T (0, "%lc",  (wint_t)0);
-  T (1, "%lc",  (wint_t)0);
+  T (1, "%lc",  (wint_t)0);      /* { dg-warning "output may be truncated 
before the last format character" } */
   T (2, "%lc",  (wint_t)0);
 
   /* The following could result in as few as a single byte and in as many

Reply via email to