On Tue, Feb 20, 2018 at 01:13:13PM -0700, Martin Sebor wrote: > A safer and even more conservative alternative that should be > equivalent to your approach while avoiding the sprintf regressions > is to add another mode to the function and have it clear *minlen > as an option. This lets the strlen code obtain the conservative > lower bound without compromising the sprintf warnings.
I fail to see what it would be good for to set *MINLEN to zero and *MAXLEN to all ones for the non-warning use cases, we simply don't know anything about it, both NULL_TREEs i.e. returning false is better. I'm offering two alternate patches which use fuzzy == 0 for the previous !fuzzy, fuzzy == 1 for conservatively correct code that assumes strlen can't cross field/variable boundaries in compliant programs and fuzzy == 2 which does that + whatever the warning code wants. Additionally, I've rewritten the COND_EXPR handling, so that it matches exactly the PHI handling. The first patch doesn't change the 2 argument get_range_strlen and changes gimple_fold_builtin_strlen to use the 6 argument one, the second patch changes also the 2 argument get_range_strlen similarly to what you've done in your patch. Tested on x86_64-linux and i686-linux, ok for trunk if it passes bootstrap/regtest? Which one? Jakub
2018-02-20 Jakub Jelinek <ja...@redhat.com> Martin Sebor <mse...@redhat.com> PR tree-optimization/84478 * gimple-fold.c (get_range_strlen): Make minlen const and assume it can't be NULL. Change FUZZY from bool to int, for 1 add PHI/COND_EXPR support which is conservatively correct, for 2 only stay conservative for maxlen. Formatting and comment capitalization fixes. Add warning that the 2 argument get_range_strlen is only usable for warnings, adjust 6 arg get_range_strlen caller and clear minmaxlen[0] and [1] if it returned false. (get_maxval_strlen): Adjust 6 arg get_range_strlen caller. (gimple_fold_builtin_strlen): Use the 6 arg get_range_strlen overload rather than 2 arg, use it only if it returns true and flexarray is false, pass 3 as type to it. * gcc.c-torture/execute/pr84478.c: New test. --- gcc/gimple-fold.c.jj 2018-02-19 19:57:03.424279589 +0100 +++ gcc/gimple-fold.c 2018-02-20 22:03:47.595265756 +0100 @@ -1283,13 +1283,16 @@ gimple_fold_builtin_memset (gimple_stmt_ value of ARG in LENGTH[0] and LENGTH[1], respectively. If ARG is an SSA name variable, follow its use-def chains. When TYPE == 0, if LENGTH[1] is not equal to the length we determine or - if we are unable to determine the length or value, return False. + if we are unable to determine the length or value, return false. VISITED is a bitmap of visited variables. TYPE is 0 if string length should be obtained, 1 for maximum string length and 2 for maximum value ARG can have. - When FUZZY is set and the length of a string cannot be determined, + When FUZZY is non-zero and the length of a string cannot be determined, the function instead considers as the maximum possible length the - size of a character array it may refer to. + size of a character array it may refer to. If FUZZY is 2, it will handle + PHIs and COND_EXPRs optimistically, if we can determine string length + minimum and maximum, it will use the minimum from the ones where it + can be determined. Set *FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1297,14 +1300,13 @@ gimple_fold_builtin_memset (gimple_stmt_ static bool get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - bool fuzzy, bool *flexp) + int fuzzy, bool *flexp) { tree var, val = NULL_TREE; gimple *def_stmt; - /* The minimum and maximum length. The MAXLEN pointer stays unchanged - but MINLEN may be cleared during the execution of the function. */ - tree *minlen = length; + /* The minimum and maximum length. */ + tree *const minlen = length; tree *const maxlen = length + 1; if (TREE_CODE (arg) != SSA_NAME) @@ -1445,12 +1447,11 @@ get_range_strlen (tree arg, tree length[ if (!val) return false; - if (minlen - && (!*minlen - || (type > 0 - && TREE_CODE (*minlen) == INTEGER_CST - && TREE_CODE (val) == INTEGER_CST - && tree_int_cst_lt (val, *minlen)))) + if (!*minlen + || (type > 0 + && TREE_CODE (*minlen) == INTEGER_CST + && TREE_CODE (val) == INTEGER_CST + && tree_int_cst_lt (val, *minlen))) *minlen = val; if (*maxlen) @@ -1501,20 +1502,26 @@ get_range_strlen (tree arg, tree length[ } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { - tree op2 = gimple_assign_rhs2 (def_stmt); - tree op3 = gimple_assign_rhs3 (def_stmt); - return get_range_strlen (op2, length, visited, type, fuzzy, flexp) - && get_range_strlen (op3, length, visited, type, fuzzy, flexp); + tree ops[2] = { gimple_assign_rhs2 (def_stmt), + gimple_assign_rhs3 (def_stmt) }; + + for (unsigned int i = 0; i < 2; i++) + if (!get_range_strlen (ops[i], length, visited, type, fuzzy, + flexp)) + { + if (fuzzy == 2) + *maxlen = build_all_ones_cst (size_type_node); + else + return false; + } + return true; } return false; case GIMPLE_PHI: - { - /* All the arguments of the PHI node must have the same constant - length. */ - unsigned i; - - for (i = 0; i < gimple_phi_num_args (def_stmt); i++) + /* All the arguments of the PHI node must have the same constant + length. */ + for (unsigned i = 0; i < gimple_phi_num_args (def_stmt); i++) { tree arg = gimple_phi_arg (def_stmt, i)->def; @@ -1529,13 +1536,12 @@ get_range_strlen (tree arg, tree length[ if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) { - if (fuzzy) + if (fuzzy == 2) *maxlen = build_all_ones_cst (size_type_node); else return false; } } - } return true; default: @@ -1549,12 +1555,15 @@ get_range_strlen (tree arg, tree length[ character arrays, use the upper bound of the array as the maximum length. For example, given an expression like 'x ? array : "xyz"' and array declared as 'char array[8]', MINMAXLEN[0] will be set - to 3 and MINMAXLEN[1] to 7, the longest string that could be + to 0 and MINMAXLEN[1] to 7, the longest string that could be stored in array. Return true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound - due to it being used as a poor-man's flexible array member. */ + due to it being used as a poor-man's flexible array member. + + This function should be only used for warning code, as it doesn't + handle PHIs in a conservatively correct way. */ bool get_range_strlen (tree arg, tree minmaxlen[2]) @@ -1565,7 +1574,11 @@ get_range_strlen (tree arg, tree minmaxl minmaxlen[1] = NULL_TREE; bool flexarray = false; - get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray); + if (!get_range_strlen (arg, minmaxlen, &visited, 1, 2, &flexarray)) + { + minmaxlen[0] = NULL_TREE; + minmaxlen[1] = NULL_TREE; + } if (visited) BITMAP_FREE (visited); @@ -1580,7 +1593,7 @@ get_maxval_strlen (tree arg, int type) tree len[2] = { NULL_TREE, NULL_TREE }; bool dummy; - if (!get_range_strlen (arg, len, &visited, type, false, &dummy)) + if (!get_range_strlen (arg, len, &visited, type, 0, &dummy)) len[1] = NULL_TREE; if (visited) BITMAP_FREE (visited); @@ -3533,8 +3546,12 @@ gimple_fold_builtin_strlen (gimple_stmt_ wide_int minlen; wide_int maxlen; - tree lenrange[2]; - if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange) + tree lenrange[2] = { NULL_TREE, NULL_TREE }; + bitmap visited = NULL; + bool flexarray = false; + if (get_range_strlen (gimple_call_arg (stmt, 0), lenrange, &visited, + 1, 1, &flexarray) + && !flexarray && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST) { @@ -3554,6 +3571,9 @@ gimple_fold_builtin_strlen (gimple_stmt_ maxlen = wi::to_wide (max_object_size (), prec) - 2; } + if (visited) + BITMAP_FREE (visited); + if (minlen == maxlen) { lenrange[0] = force_gimple_operand_gsi (gsi, lenrange[0], true, NULL, --- gcc/testsuite/gcc.c-torture/execute/pr84478.c.jj 2018-02-20 16:32:00.683086212 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr84478.c 2018-02-20 16:31:33.497081640 +0100 @@ -0,0 +1,49 @@ +/* PR tree-optimization/84478 */ + +long poolptr; +unsigned char *strpool; +static const char *poolfilearr[] = { + "mu", + "", +#define A "x", +#define B A "xx", A A "xxx", A A A A A +#define C B B B B B B B B B B +#define D C C C C C C C C C C + D C C C C C C C B B B + ((void *)0) +}; + +__attribute__((noipa)) long +makestring (void) +{ + return 1; +} + +__attribute__((noipa)) long +loadpoolstrings (long spare_size) +{ + const char *s; + long g = 0; + int i = 0, j = 0; + while ((s = poolfilearr[j++])) + { + int l = __builtin_strlen (s); + i += l; + if (i >= spare_size) return 0; + while (l-- > 0) strpool[poolptr++] = *s++; + g = makestring (); + } + return g; +} + +int +main () +{ + strpool = __builtin_malloc (4000); + if (!strpool) + return 0; + asm volatile ("" : : : "memory"); + volatile int r = loadpoolstrings (4000); + __builtin_free (strpool); + return 0; +}
2018-02-20 Jakub Jelinek <ja...@redhat.com> Martin Sebor <mse...@redhat.com> PR tree-optimization/84478 * gimple-fold.h (get_range_strlen): Add a bool argument defaulted to false. * gimple-fold.c (get_range_strlen): Make minlen const and assume it can't be NULL. Change FUZZY from bool to int, for 1 add PHI/COND_EXPR support which is conservatively correct, for 2 only stay conservative for maxlen. Formatting and comment capitalization fixes. Add STRICT argument to the 2 argument get_range_strlen, adjust 6 arg get_range_strlen caller and clear minmaxlen[0] and [1] if it returned false. (get_maxval_strlen): Adjust 6 arg get_range_strlen caller. (gimple_fold_builtin_strlen): Pass true as last argument to get_range_strlen. * gcc.c-torture/execute/pr84478.c: New test. --- gcc/gimple-fold.h.jj 2018-01-03 10:19:55.771534056 +0100 +++ gcc/gimple-fold.h 2018-02-20 22:13:24.012326866 +0100 @@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL); extern tree canonicalize_constructor_val (tree, tree); extern tree get_symbol_constant_value (tree); -extern bool get_range_strlen (tree, tree[2]); +extern bool get_range_strlen (tree, tree[2], bool = false); extern tree get_maxval_strlen (tree, int); extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree); extern bool fold_stmt (gimple_stmt_iterator *); --- gcc/gimple-fold.c.jj 2018-02-19 19:57:03.424279589 +0100 +++ gcc/gimple-fold.c 2018-02-20 22:17:32.460373162 +0100 @@ -1283,13 +1283,16 @@ gimple_fold_builtin_memset (gimple_stmt_ value of ARG in LENGTH[0] and LENGTH[1], respectively. If ARG is an SSA name variable, follow its use-def chains. When TYPE == 0, if LENGTH[1] is not equal to the length we determine or - if we are unable to determine the length or value, return False. + if we are unable to determine the length or value, return false. VISITED is a bitmap of visited variables. TYPE is 0 if string length should be obtained, 1 for maximum string length and 2 for maximum value ARG can have. - When FUZZY is set and the length of a string cannot be determined, + When FUZZY is non-zero and the length of a string cannot be determined, the function instead considers as the maximum possible length the - size of a character array it may refer to. + size of a character array it may refer to. If FUZZY is 2, it will handle + PHIs and COND_EXPRs optimistically, if we can determine string length + minimum and maximum, it will use the minimum from the ones where it + can be determined. Set *FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1297,14 +1300,13 @@ gimple_fold_builtin_memset (gimple_stmt_ static bool get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - bool fuzzy, bool *flexp) + int fuzzy, bool *flexp) { tree var, val = NULL_TREE; gimple *def_stmt; - /* The minimum and maximum length. The MAXLEN pointer stays unchanged - but MINLEN may be cleared during the execution of the function. */ - tree *minlen = length; + /* The minimum and maximum length. */ + tree *const minlen = length; tree *const maxlen = length + 1; if (TREE_CODE (arg) != SSA_NAME) @@ -1445,12 +1447,11 @@ get_range_strlen (tree arg, tree length[ if (!val) return false; - if (minlen - && (!*minlen - || (type > 0 - && TREE_CODE (*minlen) == INTEGER_CST - && TREE_CODE (val) == INTEGER_CST - && tree_int_cst_lt (val, *minlen)))) + if (!*minlen + || (type > 0 + && TREE_CODE (*minlen) == INTEGER_CST + && TREE_CODE (val) == INTEGER_CST + && tree_int_cst_lt (val, *minlen))) *minlen = val; if (*maxlen) @@ -1501,20 +1502,26 @@ get_range_strlen (tree arg, tree length[ } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { - tree op2 = gimple_assign_rhs2 (def_stmt); - tree op3 = gimple_assign_rhs3 (def_stmt); - return get_range_strlen (op2, length, visited, type, fuzzy, flexp) - && get_range_strlen (op3, length, visited, type, fuzzy, flexp); + tree ops[2] = { gimple_assign_rhs2 (def_stmt), + gimple_assign_rhs3 (def_stmt) }; + + for (unsigned int i = 0; i < 2; i++) + if (!get_range_strlen (ops[i], length, visited, type, fuzzy, + flexp)) + { + if (fuzzy == 2) + *maxlen = build_all_ones_cst (size_type_node); + else + return false; + } + return true; } return false; case GIMPLE_PHI: - { - /* All the arguments of the PHI node must have the same constant - length. */ - unsigned i; - - for (i = 0; i < gimple_phi_num_args (def_stmt); i++) + /* All the arguments of the PHI node must have the same constant + length. */ + for (unsigned i = 0; i < gimple_phi_num_args (def_stmt); i++) { tree arg = gimple_phi_arg (def_stmt, i)->def; @@ -1529,13 +1536,12 @@ get_range_strlen (tree arg, tree length[ if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) { - if (fuzzy) + if (fuzzy == 2) *maxlen = build_all_ones_cst (size_type_node); else return false; } } - } return true; default: @@ -1549,15 +1555,21 @@ get_range_strlen (tree arg, tree length[ character arrays, use the upper bound of the array as the maximum length. For example, given an expression like 'x ? array : "xyz"' and array declared as 'char array[8]', MINMAXLEN[0] will be set - to 3 and MINMAXLEN[1] to 7, the longest string that could be + to 0 and MINMAXLEN[1] to 7, the longest string that could be stored in array. Return true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound - due to it being used as a poor-man's flexible array member. */ + due to it being used as a poor-man's flexible array member. + + STRICT is true if it will handle PHIs and COND_EXPRs conservatively + and false if PHIs and COND_EXPRs are to be handled optimistically, + if we can determine string length minimum and maximum; it will use + the minimum from the ones where it can be determined. + STRICT false should be only used for warning code. */ bool -get_range_strlen (tree arg, tree minmaxlen[2]) +get_range_strlen (tree arg, tree minmaxlen[2], bool strict) { bitmap visited = NULL; @@ -1565,7 +1577,12 @@ get_range_strlen (tree arg, tree minmaxl minmaxlen[1] = NULL_TREE; bool flexarray = false; - get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray); + if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2, + &flexarray)) + { + minmaxlen[0] = NULL_TREE; + minmaxlen[1] = NULL_TREE; + } if (visited) BITMAP_FREE (visited); @@ -1580,7 +1597,7 @@ get_maxval_strlen (tree arg, int type) tree len[2] = { NULL_TREE, NULL_TREE }; bool dummy; - if (!get_range_strlen (arg, len, &visited, type, false, &dummy)) + if (!get_range_strlen (arg, len, &visited, type, 0, &dummy)) len[1] = NULL_TREE; if (visited) BITMAP_FREE (visited); @@ -3534,7 +3551,7 @@ gimple_fold_builtin_strlen (gimple_stmt_ wide_int maxlen; tree lenrange[2]; - if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange) + if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true) && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST) { --- gcc/testsuite/gcc.c-torture/execute/pr84478.c.jj 2018-02-20 16:32:00.683086212 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr84478.c 2018-02-20 16:31:33.497081640 +0100 @@ -0,0 +1,49 @@ +/* PR tree-optimization/84478 */ + +long poolptr; +unsigned char *strpool; +static const char *poolfilearr[] = { + "mu", + "", +#define A "x", +#define B A "xx", A A "xxx", A A A A A +#define C B B B B B B B B B B +#define D C C C C C C C C C C + D C C C C C C C B B B + ((void *)0) +}; + +__attribute__((noipa)) long +makestring (void) +{ + return 1; +} + +__attribute__((noipa)) long +loadpoolstrings (long spare_size) +{ + const char *s; + long g = 0; + int i = 0, j = 0; + while ((s = poolfilearr[j++])) + { + int l = __builtin_strlen (s); + i += l; + if (i >= spare_size) return 0; + while (l-- > 0) strpool[poolptr++] = *s++; + g = makestring (); + } + return g; +} + +int +main () +{ + strpool = __builtin_malloc (4000); + if (!strpool) + return 0; + asm volatile ("" : : : "memory"); + volatile int r = loadpoolstrings (4000); + __builtin_free (strpool); + return 0; +}