On Tue, Feb 20, 2018 at 01:13:13PM -0700, Martin Sebor wrote:
> A safer and even more conservative alternative that should be
> equivalent to your approach while avoiding the sprintf regressions
> is to add another mode to the function and have it clear *minlen
> as an option.  This lets the strlen code obtain the conservative
> lower bound without compromising the sprintf warnings.

I fail to see what it would be good for to set *MINLEN to zero and
*MAXLEN to all ones for the non-warning use cases, we simply don't know
anything about it, both NULL_TREEs i.e. returning false is better.  I'm
offering two alternate patches which use
fuzzy == 0 for the previous !fuzzy, fuzzy == 1 for conservatively correct
code that assumes strlen can't cross field/variable boundaries in
compliant programs and fuzzy == 2 which does that + whatever the warning
code wants.  Additionally, I've rewritten the COND_EXPR handling, so that
it matches exactly the PHI handling.

The first patch doesn't change the 2 argument get_range_strlen and changes
gimple_fold_builtin_strlen to use the 6 argument one, the second patch
changes also the 2 argument get_range_strlen similarly to what you've done
in your patch.

Tested on x86_64-linux and i686-linux, ok for trunk if it passes
bootstrap/regtest?  Which one?

        Jakub
2018-02-20  Jakub Jelinek  <ja...@redhat.com>
            Martin Sebor  <mse...@redhat.com>

        PR tree-optimization/84478
        * gimple-fold.c (get_range_strlen): Make minlen const and assume it
        can't be NULL.  Change FUZZY from bool to int, for 1 add PHI/COND_EXPR
        support which is conservatively correct, for 2 only stay conservative
        for maxlen.  Formatting and comment capitalization fixes.  Add warning
        that the 2 argument get_range_strlen is only usable for warnings, adjust
        6 arg get_range_strlen caller and clear minmaxlen[0] and [1] if it
        returned false.
        (get_maxval_strlen): Adjust 6 arg get_range_strlen caller.
        (gimple_fold_builtin_strlen): Use the 6 arg get_range_strlen overload
        rather than 2 arg, use it only if it returns true and flexarray is
        false, pass 3 as type to it.

        * gcc.c-torture/execute/pr84478.c: New test.

--- gcc/gimple-fold.c.jj        2018-02-19 19:57:03.424279589 +0100
+++ gcc/gimple-fold.c   2018-02-20 22:03:47.595265756 +0100
@@ -1283,13 +1283,16 @@ gimple_fold_builtin_memset (gimple_stmt_
    value of ARG in LENGTH[0] and LENGTH[1], respectively.
    If ARG is an SSA name variable, follow its use-def chains.  When
    TYPE == 0, if LENGTH[1] is not equal to the length we determine or
-   if we are unable to determine the length or value, return False.
+   if we are unable to determine the length or value, return false.
    VISITED is a bitmap of visited variables.
    TYPE is 0 if string length should be obtained, 1 for maximum string
    length and 2 for maximum value ARG can have.
-   When FUZZY is set and the length of a string cannot be determined,
+   When FUZZY is non-zero and the length of a string cannot be determined,
    the function instead considers as the maximum possible length the
-   size of a character array it may refer to.
+   size of a character array it may refer to.  If FUZZY is 2, it will handle
+   PHIs and COND_EXPRs optimistically, if we can determine string length
+   minimum and maximum, it will use the minimum from the ones where it
+   can be determined.
    Set *FLEXP to true if the range of the string lengths has been
    obtained from the upper bound of an array at the end of a struct.
    Such an array may hold a string that's longer than its upper bound
@@ -1297,14 +1300,13 @@ gimple_fold_builtin_memset (gimple_stmt_
 
 static bool
 get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
-                 bool fuzzy, bool *flexp)
+                 int fuzzy, bool *flexp)
 {
   tree var, val = NULL_TREE;
   gimple *def_stmt;
 
-  /* The minimum and maximum length.  The MAXLEN pointer stays unchanged
-     but MINLEN may be cleared during the execution of the function.  */
-  tree *minlen = length;
+  /* The minimum and maximum length.  */
+  tree *const minlen = length;
   tree *const maxlen = length + 1;
 
   if (TREE_CODE (arg) != SSA_NAME)
@@ -1445,12 +1447,11 @@ get_range_strlen (tree arg, tree length[
       if (!val)
        return false;
 
-      if (minlen
-         && (!*minlen
-             || (type > 0
-                 && TREE_CODE (*minlen) == INTEGER_CST
-                 && TREE_CODE (val) == INTEGER_CST
-                 && tree_int_cst_lt (val, *minlen))))
+      if (!*minlen
+         || (type > 0
+             && TREE_CODE (*minlen) == INTEGER_CST
+             && TREE_CODE (val) == INTEGER_CST
+             && tree_int_cst_lt (val, *minlen)))
        *minlen = val;
 
       if (*maxlen)
@@ -1501,20 +1502,26 @@ get_range_strlen (tree arg, tree length[
           }
        else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
          {
-           tree op2 = gimple_assign_rhs2 (def_stmt);
-           tree op3 = gimple_assign_rhs3 (def_stmt);
-           return get_range_strlen (op2, length, visited, type, fuzzy, flexp)
-             && get_range_strlen (op3, length, visited, type, fuzzy, flexp);
+           tree ops[2] = { gimple_assign_rhs2 (def_stmt),
+                           gimple_assign_rhs3 (def_stmt) };
+
+           for (unsigned int i = 0; i < 2; i++)
+             if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
+                                    flexp))
+               {
+                 if (fuzzy == 2)
+                   *maxlen = build_all_ones_cst (size_type_node);
+                 else
+                   return false;
+               }
+           return true;
          }
         return false;
 
       case GIMPLE_PHI:
-       {
-         /* All the arguments of the PHI node must have the same constant
-            length.  */
-         unsigned i;
-
-         for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+       /* All the arguments of the PHI node must have the same constant
+          length.  */
+       for (unsigned i = 0; i < gimple_phi_num_args (def_stmt); i++)
           {
             tree arg = gimple_phi_arg (def_stmt, i)->def;
 
@@ -1529,13 +1536,12 @@ get_range_strlen (tree arg, tree length[
 
            if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
              {
-               if (fuzzy)
+               if (fuzzy == 2)
                  *maxlen = build_all_ones_cst (size_type_node);
                else
                  return false;
              }
           }
-        }
         return true;
 
       default:
@@ -1549,12 +1555,15 @@ get_range_strlen (tree arg, tree length[
    character arrays, use the upper bound of the array as the maximum
    length.  For example, given an expression like 'x ? array : "xyz"'
    and array declared as 'char array[8]', MINMAXLEN[0] will be set
-   to 3 and MINMAXLEN[1] to 7, the longest string that could be
+   to 0 and MINMAXLEN[1] to 7, the longest string that could be
    stored in array.
    Return true if the range of the string lengths has been obtained
    from the upper bound of an array at the end of a struct.  Such
    an array may hold a string that's longer than its upper bound
-   due to it being used as a poor-man's flexible array member.  */
+   due to it being used as a poor-man's flexible array member.
+
+   This function should be only used for warning code, as it doesn't
+   handle PHIs in a conservatively correct way.  */
 
 bool
 get_range_strlen (tree arg, tree minmaxlen[2])
@@ -1565,7 +1574,11 @@ get_range_strlen (tree arg, tree minmaxl
   minmaxlen[1] = NULL_TREE;
 
   bool flexarray = false;
-  get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray);
+  if (!get_range_strlen (arg, minmaxlen, &visited, 1, 2, &flexarray))
+    {
+      minmaxlen[0] = NULL_TREE;
+      minmaxlen[1] = NULL_TREE;
+    }
 
   if (visited)
     BITMAP_FREE (visited);
@@ -1580,7 +1593,7 @@ get_maxval_strlen (tree arg, int type)
   tree len[2] = { NULL_TREE, NULL_TREE };
 
   bool dummy;
-  if (!get_range_strlen (arg, len, &visited, type, false, &dummy))
+  if (!get_range_strlen (arg, len, &visited, type, 0, &dummy))
     len[1] = NULL_TREE;
   if (visited)
     BITMAP_FREE (visited);
@@ -3533,8 +3546,12 @@ gimple_fold_builtin_strlen (gimple_stmt_
   wide_int minlen;
   wide_int maxlen;
 
-  tree lenrange[2];
-  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange)
+  tree lenrange[2] = { NULL_TREE, NULL_TREE };
+  bitmap visited = NULL;
+  bool flexarray = false;
+  if (get_range_strlen (gimple_call_arg (stmt, 0), lenrange, &visited,
+                       1, 1, &flexarray)
+      && !flexarray
       && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
       && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
     {
@@ -3554,6 +3571,9 @@ gimple_fold_builtin_strlen (gimple_stmt_
       maxlen = wi::to_wide (max_object_size (), prec) - 2;
     }
 
+  if (visited)
+    BITMAP_FREE (visited);
+
   if (minlen == maxlen)
     {
       lenrange[0] = force_gimple_operand_gsi (gsi, lenrange[0], true, NULL,
--- gcc/testsuite/gcc.c-torture/execute/pr84478.c.jj    2018-02-20 
16:32:00.683086212 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr84478.c       2018-02-20 
16:31:33.497081640 +0100
@@ -0,0 +1,49 @@
+/* PR tree-optimization/84478 */
+
+long poolptr;
+unsigned char *strpool;
+static const char *poolfilearr[] = {
+  "mu",
+  "",
+#define A "x",
+#define B A "xx", A A "xxx", A A A A A
+#define C B B B B B B B B B B
+#define D C C C C C C C C C C
+  D C C C C C C C B B B
+ ((void *)0) 
+};
+
+__attribute__((noipa)) long
+makestring (void)
+{
+  return 1;
+}
+
+__attribute__((noipa)) long
+loadpoolstrings (long spare_size)
+{
+  const char *s;
+  long g = 0;
+  int i = 0, j = 0;
+  while ((s = poolfilearr[j++]))
+    {
+      int l = __builtin_strlen (s);
+      i += l;
+      if (i >= spare_size) return 0;
+      while (l-- > 0) strpool[poolptr++] = *s++;
+      g = makestring ();
+    }
+  return g;
+}
+
+int
+main ()
+{
+  strpool = __builtin_malloc (4000);
+  if (!strpool)
+    return 0;
+  asm volatile ("" : : : "memory");
+  volatile int r = loadpoolstrings (4000);
+  __builtin_free (strpool);
+  return 0;
+}
2018-02-20  Jakub Jelinek  <ja...@redhat.com>
            Martin Sebor  <mse...@redhat.com>

        PR tree-optimization/84478
        * gimple-fold.h (get_range_strlen): Add a bool argument defaulted to
        false.
        * gimple-fold.c (get_range_strlen): Make minlen const and assume it
        can't be NULL.  Change FUZZY from bool to int, for 1 add PHI/COND_EXPR
        support which is conservatively correct, for 2 only stay conservative
        for maxlen.  Formatting and comment capitalization fixes.  Add STRICT
        argument to the 2 argument get_range_strlen, adjust 6 arg
        get_range_strlen caller and clear minmaxlen[0] and [1] if it returned
        false.
        (get_maxval_strlen): Adjust 6 arg get_range_strlen caller.
        (gimple_fold_builtin_strlen): Pass true as last argument to
        get_range_strlen.

        * gcc.c-torture/execute/pr84478.c: New test.

--- gcc/gimple-fold.h.jj        2018-01-03 10:19:55.771534056 +0100
+++ gcc/gimple-fold.h   2018-02-20 22:13:24.012326866 +0100
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.
 extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
 extern tree canonicalize_constructor_val (tree, tree);
 extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2]);
+extern bool get_range_strlen (tree, tree[2], bool = false);
 extern tree get_maxval_strlen (tree, int);
 extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
 extern bool fold_stmt (gimple_stmt_iterator *);
--- gcc/gimple-fold.c.jj        2018-02-19 19:57:03.424279589 +0100
+++ gcc/gimple-fold.c   2018-02-20 22:17:32.460373162 +0100
@@ -1283,13 +1283,16 @@ gimple_fold_builtin_memset (gimple_stmt_
    value of ARG in LENGTH[0] and LENGTH[1], respectively.
    If ARG is an SSA name variable, follow its use-def chains.  When
    TYPE == 0, if LENGTH[1] is not equal to the length we determine or
-   if we are unable to determine the length or value, return False.
+   if we are unable to determine the length or value, return false.
    VISITED is a bitmap of visited variables.
    TYPE is 0 if string length should be obtained, 1 for maximum string
    length and 2 for maximum value ARG can have.
-   When FUZZY is set and the length of a string cannot be determined,
+   When FUZZY is non-zero and the length of a string cannot be determined,
    the function instead considers as the maximum possible length the
-   size of a character array it may refer to.
+   size of a character array it may refer to.  If FUZZY is 2, it will handle
+   PHIs and COND_EXPRs optimistically, if we can determine string length
+   minimum and maximum, it will use the minimum from the ones where it
+   can be determined.
    Set *FLEXP to true if the range of the string lengths has been
    obtained from the upper bound of an array at the end of a struct.
    Such an array may hold a string that's longer than its upper bound
@@ -1297,14 +1300,13 @@ gimple_fold_builtin_memset (gimple_stmt_
 
 static bool
 get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
-                 bool fuzzy, bool *flexp)
+                 int fuzzy, bool *flexp)
 {
   tree var, val = NULL_TREE;
   gimple *def_stmt;
 
-  /* The minimum and maximum length.  The MAXLEN pointer stays unchanged
-     but MINLEN may be cleared during the execution of the function.  */
-  tree *minlen = length;
+  /* The minimum and maximum length.  */
+  tree *const minlen = length;
   tree *const maxlen = length + 1;
 
   if (TREE_CODE (arg) != SSA_NAME)
@@ -1445,12 +1447,11 @@ get_range_strlen (tree arg, tree length[
       if (!val)
        return false;
 
-      if (minlen
-         && (!*minlen
-             || (type > 0
-                 && TREE_CODE (*minlen) == INTEGER_CST
-                 && TREE_CODE (val) == INTEGER_CST
-                 && tree_int_cst_lt (val, *minlen))))
+      if (!*minlen
+         || (type > 0
+             && TREE_CODE (*minlen) == INTEGER_CST
+             && TREE_CODE (val) == INTEGER_CST
+             && tree_int_cst_lt (val, *minlen)))
        *minlen = val;
 
       if (*maxlen)
@@ -1501,20 +1502,26 @@ get_range_strlen (tree arg, tree length[
           }
        else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
          {
-           tree op2 = gimple_assign_rhs2 (def_stmt);
-           tree op3 = gimple_assign_rhs3 (def_stmt);
-           return get_range_strlen (op2, length, visited, type, fuzzy, flexp)
-             && get_range_strlen (op3, length, visited, type, fuzzy, flexp);
+           tree ops[2] = { gimple_assign_rhs2 (def_stmt),
+                           gimple_assign_rhs3 (def_stmt) };
+
+           for (unsigned int i = 0; i < 2; i++)
+             if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
+                                    flexp))
+               {
+                 if (fuzzy == 2)
+                   *maxlen = build_all_ones_cst (size_type_node);
+                 else
+                   return false;
+               }
+           return true;
          }
         return false;
 
       case GIMPLE_PHI:
-       {
-         /* All the arguments of the PHI node must have the same constant
-            length.  */
-         unsigned i;
-
-         for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+       /* All the arguments of the PHI node must have the same constant
+          length.  */
+       for (unsigned i = 0; i < gimple_phi_num_args (def_stmt); i++)
           {
             tree arg = gimple_phi_arg (def_stmt, i)->def;
 
@@ -1529,13 +1536,12 @@ get_range_strlen (tree arg, tree length[
 
            if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
              {
-               if (fuzzy)
+               if (fuzzy == 2)
                  *maxlen = build_all_ones_cst (size_type_node);
                else
                  return false;
              }
           }
-        }
         return true;
 
       default:
@@ -1549,15 +1555,21 @@ get_range_strlen (tree arg, tree length[
    character arrays, use the upper bound of the array as the maximum
    length.  For example, given an expression like 'x ? array : "xyz"'
    and array declared as 'char array[8]', MINMAXLEN[0] will be set
-   to 3 and MINMAXLEN[1] to 7, the longest string that could be
+   to 0 and MINMAXLEN[1] to 7, the longest string that could be
    stored in array.
    Return true if the range of the string lengths has been obtained
    from the upper bound of an array at the end of a struct.  Such
    an array may hold a string that's longer than its upper bound
-   due to it being used as a poor-man's flexible array member.  */
+   due to it being used as a poor-man's flexible array member.
+
+   STRICT is true if it will handle PHIs and COND_EXPRs conservatively
+   and false if PHIs and COND_EXPRs are to be handled optimistically,
+   if we can determine string length minimum and maximum; it will use
+   the minimum from the ones where it can be determined.
+   STRICT false should be only used for warning code.  */
 
 bool
-get_range_strlen (tree arg, tree minmaxlen[2])
+get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
 {
   bitmap visited = NULL;
 
@@ -1565,7 +1577,12 @@ get_range_strlen (tree arg, tree minmaxl
   minmaxlen[1] = NULL_TREE;
 
   bool flexarray = false;
-  get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray);
+  if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
+                        &flexarray))
+    {
+      minmaxlen[0] = NULL_TREE;
+      minmaxlen[1] = NULL_TREE;
+    }
 
   if (visited)
     BITMAP_FREE (visited);
@@ -1580,7 +1597,7 @@ get_maxval_strlen (tree arg, int type)
   tree len[2] = { NULL_TREE, NULL_TREE };
 
   bool dummy;
-  if (!get_range_strlen (arg, len, &visited, type, false, &dummy))
+  if (!get_range_strlen (arg, len, &visited, type, 0, &dummy))
     len[1] = NULL_TREE;
   if (visited)
     BITMAP_FREE (visited);
@@ -3534,7 +3551,7 @@ gimple_fold_builtin_strlen (gimple_stmt_
   wide_int maxlen;
 
   tree lenrange[2];
-  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange)
+  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true)
       && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
       && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
     {
--- gcc/testsuite/gcc.c-torture/execute/pr84478.c.jj    2018-02-20 
16:32:00.683086212 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr84478.c       2018-02-20 
16:31:33.497081640 +0100
@@ -0,0 +1,49 @@
+/* PR tree-optimization/84478 */
+
+long poolptr;
+unsigned char *strpool;
+static const char *poolfilearr[] = {
+  "mu",
+  "",
+#define A "x",
+#define B A "xx", A A "xxx", A A A A A
+#define C B B B B B B B B B B
+#define D C C C C C C C C C C
+  D C C C C C C C B B B
+ ((void *)0) 
+};
+
+__attribute__((noipa)) long
+makestring (void)
+{
+  return 1;
+}
+
+__attribute__((noipa)) long
+loadpoolstrings (long spare_size)
+{
+  const char *s;
+  long g = 0;
+  int i = 0, j = 0;
+  while ((s = poolfilearr[j++]))
+    {
+      int l = __builtin_strlen (s);
+      i += l;
+      if (i >= spare_size) return 0;
+      while (l-- > 0) strpool[poolptr++] = *s++;
+      g = makestring ();
+    }
+  return g;
+}
+
+int
+main ()
+{
+  strpool = __builtin_malloc (4000);
+  if (!strpool)
+    return 0;
+  asm volatile ("" : : : "memory");
+  volatile int r = loadpoolstrings (4000);
+  __builtin_free (strpool);
+  return 0;
+}

Reply via email to