Hi,

I am not sure whether it’s proper to send this patch during this late stage.
however, since the patch itself is quite straightforward, I decided to send it 
now. 

=================

2nd Patch for PR78009
Patch for PR83026

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809 
<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809>
Inline strcmp with small constant strings

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83026 
<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83026>
missing strlen optimization for strcmp of unequal strings

The design doc for PR78809 is at:
https://www.mail-archive.com/gcc@gcc.gnu.org/msg83822.html 
<https://www.mail-archive.com/gcc@gcc.gnu.org/msg83822.html>

this patch is for the second part of change of PR78809 and PR83026:

B. for strncmp (s1, s2, n) (!)= 0 or strcmp (s1, s2) (!)= 0

  B.1. (PR83026) When both arguments are constant strings and it's a strcmp:
     * if the length of the strings are NOT equal, we can safely fold the call
       to a non-zero value.
     * otherwise, do nothing now.

  B.2. (PR78809) When one of the arguments is a constant string, try to replace
  the call with a __builtin_memcmp_eq call where possible, i.e:

  strncmp (s, STR, C) (!)= 0 in which, s is a pointer to a string, STR is a
  constant string, C is a constant.
    if (C <= strlen(STR) && sizeof_array(s) > C)
      {
        replace this call with
        memcmp (s, STR, C) (!)= 0
      }
    if (C > strlen(STR)
      {
        it can be safely treated as a call to strcmp (s, STR) (!)= 0
        can handled by the following strcmp.
      }

  strcmp (s, STR) (!)= 0 in which, s is a pointer to a string, STR is a
  constant string.
    if  (sizeof_array(s) > strlen(STR))
      {
        replace this call with
        memcmp (s, STR, strlen(STR)+1) (!)= 0
      }

  (NOTE, currently, memcmp(s1, s2, N) (!)=0 has been optimized to a simple
   sequence to access all bytes and accumulate the overall result in GCC by
   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52171 
<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52171>

   as a result, such str(n)cmp call would like to be replaced by simple
   sequences to access all types and accumulate the overall results.

adding test case strcmpopt_2.c into gcc.dg for part B of PR78809
adding test case strcmpopt_3.c into gcc.dg for PR83026

bootstraped and tested on both X86 and Aarch64. no regression.

Okay for trunk?

thanks.

Qing

================
gcc/ChangeLog:

2017-12-11  Qing Zhao  <qing.z...@oracle.com <mailto:qing.z...@oracle.com>>

        PR middle-end/78809
        PR middle-end/83026
        * tree-ssa-strlen.c (compute_string_length): New function.
        (handle_builtin_string_cmp): New function to handle calls to
        string compare functions.
        (strlen_optimize_stmt): Add handling to builtin string compare
        calls. 

gcc/testsuite/ChangeLog:

2017-12-11  Qing Zhao  <qing.z...@oracle.com <mailto:qing.z...@oracle.com>>

        PR middle-end/78809
        * gcc.dg/strcmpopt_2.c: New testcase.

        PR middle-end/83026
        * gcc.dg/strcmpopt_3.c: New testcase.
        
============
---
 gcc/testsuite/gcc.dg/strcmpopt_2.c |  67 +++++++++++++
 gcc/testsuite/gcc.dg/strcmpopt_3.c |  27 +++++
 gcc/tree-ssa-strlen.c              | 197 +++++++++++++++++++++++++++++++++++++
 3 files changed, 291 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_2.c
 create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_3.c

diff --git a/gcc/testsuite/gcc.dg/strcmpopt_2.c 
b/gcc/testsuite/gcc.dg/strcmpopt_2.c
new file mode 100644
index 0000000..ac8ff39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+char s[100] = {'a','b','c','d'};
+typedef struct { int x; char s[8]; } S;
+
+__attribute__ ((noinline)) int 
+f1 (S *s) 
+{ 
+  return __builtin_strcmp (s->s, "abc") != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f2 (void) 
+{ 
+  return __builtin_strcmp (s, "abc") != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f3 (S *s) 
+{ 
+  return __builtin_strcmp ("abc", s->s) != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f4 (void) 
+{ 
+  return __builtin_strcmp ("abc", s) != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f5 (S *s) 
+{ 
+  return __builtin_strncmp (s->s, "abc", 3) != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f6 (void) 
+{ 
+  return __builtin_strncmp (s, "abc", 2) != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f7 (S *s) 
+{ 
+  return __builtin_strncmp ("abc", s->s, 3) != 0; 
+}
+
+__attribute__ ((noinline)) int 
+f8 (void) 
+{ 
+  return __builtin_strncmp ("abc", s, 2) != 0; 
+}
+
+int main (void)
+{
+  S ss = {2, {'a','b','c'}};
+
+  if (f1 (&ss) != 0 || f2 () != 1 || f3 (&ss) != 0 ||
+      f4 () != 1 || f5 (&ss) != 0 || f6 () != 0 ||
+      f7 (&ss) != 0 || f8 () != 0)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "memcmp_eq \\(" 8 "strlen" } } */
diff --git a/gcc/testsuite/gcc.dg/strcmpopt_3.c 
b/gcc/testsuite/gcc.dg/strcmpopt_3.c
new file mode 100644
index 0000000..a3be431
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_3.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__ ((noinline)) int 
+f1 (void) 
+{ 
+  char *s= "abcd";
+  return __builtin_strcmp(s, "abc") != 0; 
+}
+
+__attribute__ ((noinline)) int
+f2 (void) 
+{ 
+  char *s = "ab";
+  return __builtin_strcmp("abc", s) != 0; 
+}
+
+int main (void)
+{
+  if (f1 () != 1 
+      || f2 () != 1)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strcmp" 0 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 48b9241..aafa574 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2541,6 +2541,198 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi)
   return false;
 }
 
+/* Given an index to the strinfo vector, compute the string length for the
+   corresponding string. Return -1 when unknown.  */
+ 
+static HOST_WIDE_INT 
+compute_string_length (int idx)
+{
+  HOST_WIDE_INT string_leni = -1; 
+  gcc_assert (idx != 0);
+
+  if (idx < 0)
+    string_leni = ~idx;
+  else
+    {
+      strinfo *si = get_strinfo (idx);
+      if (si)
+       {
+         tree const_string_len = get_string_length (si);
+         string_leni
+           = (const_string_len && tree_fits_uhwi_p (const_string_len)
+              ? tree_to_uhwi(const_string_len) : -1); 
+       }
+    }
+  return string_leni;
+}
+
+/* Handle a call to strcmp or strncmp. When the result is ONLY used to do 
+   equality test against zero:
+
+   A. When both arguments are constant strings and it's a strcmp:
+      * if the length of the strings are NOT equal, we can safely fold the call
+        to a non-zero value.
+      * otherwise, do nothing now.
+  
+   B. When one of the arguments is constant string, try to replace the call 
with
+   a __builtin_memcmp_eq call where possible, i.e:
+
+   strncmp (s, STR, C) (!)= 0 in which, s is a pointer to a string, STR is a 
+   constant string, C is a constant.
+     if (C <= strlen(STR) && sizeof_array(s) > C)
+       {
+         replace this call with
+         memcmp (s, STR, C) (!)= 0
+       }
+     if (C > strlen(STR)
+       {
+         it can be safely treated as a call to strcmp (s, STR) (!)= 0
+         can handled by the following strcmp.
+       }
+
+   strcmp (s, STR) (!)= 0 in which, s is a pointer to a string, STR is a 
+   constant string.
+     if  (sizeof_array(s) > strlen(STR))
+       {
+         replace this call with
+         memcmp (s, STR, strlen(STR)+1) (!)= 0
+       }
+ */ 
+
+static bool
+handle_builtin_string_cmp (gimple_stmt_iterator *gsi)
+{
+  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+  tree res = gimple_call_lhs (stmt);
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  tree arg1 = gimple_call_arg (stmt, 0);
+  tree arg2 = gimple_call_arg (stmt, 1);
+  int idx1 = get_stridx (arg1);
+  int idx2 = get_stridx (arg2);
+  HOST_WIDE_INT length = -1;
+  bool is_ncmp = false;
+
+  if (!res)
+    return true;
+
+  /* When both arguments are unknown, do nothing.  */
+  if (idx1 == 0 && idx2 == 0)
+    return true;
+
+  /* Handle strncmp function.  */
+  if (gimple_call_num_args (stmt) == 3)
+    {
+      tree len = gimple_call_arg (stmt, 2);
+      if (tree_fits_uhwi_p (len))
+        length = tree_to_uhwi (len);
+
+      is_ncmp = true;
+    }
+
+  /* For strncmp, if the length argument is NOT known, do nothing.  */
+  if (is_ncmp && length < 0)
+    return true;
+
+  /* When the result is ONLY used to do equality test against zero.  */
+  FOR_EACH_IMM_USE_FAST (use_p, iter, res) 
+    {    
+      gimple *ustmt = USE_STMT (use_p);
+
+      if (is_gimple_debug (ustmt))
+        continue;
+      if (gimple_code (ustmt) == GIMPLE_ASSIGN)
+       {    
+         gassign *asgn = as_a <gassign *> (ustmt);
+         tree_code code = gimple_assign_rhs_code (asgn);
+         if ((code != EQ_EXPR && code != NE_EXPR)
+             || !integer_zerop (gimple_assign_rhs2 (asgn)))
+           return true;
+       }
+      else if (gimple_code (ustmt) == GIMPLE_COND)
+       {
+         tree_code code = gimple_cond_code (ustmt);
+         if ((code != EQ_EXPR && code != NE_EXPR)
+             || !integer_zerop (gimple_cond_rhs (ustmt)))
+           return true;
+       }
+      else
+        return true;
+    }
+  
+  /* When both arguments are known, and their strlens are unequal, we can 
+     safely fold the call to a non-zero value for strcmp;
+     othewise, do nothing now.  */
+  if (idx1 != 0 && idx2 != 0)
+    {
+      HOST_WIDE_INT const_string_leni1 = -1;
+      HOST_WIDE_INT const_string_leni2 = -1;
+      const_string_leni1 = compute_string_length (idx1);
+      const_string_leni2 = compute_string_length (idx2);
+      if (!is_ncmp 
+         && const_string_leni1 != -1
+         && const_string_leni2 != -1
+         && const_string_leni1 != const_string_leni2) 
+       {
+         replace_call_with_value (gsi, integer_one_node); 
+         return false;
+       }
+      return true;
+    }
+
+  /* When one of args is constant string.  */
+  tree var_string;
+  HOST_WIDE_INT const_string_leni = -1;
+  
+  if (idx1)
+    {
+      const_string_leni = compute_string_length (idx1);
+      var_string = arg2;
+    } 
+  else if (idx2)
+    {
+      const_string_leni = compute_string_length (idx2);
+      var_string = arg1;
+    } 
+
+  if (const_string_leni < 0) 
+    return true;
+ 
+  tree size[2];
+  unsigned HOST_WIDE_INT var_sizei = 0;
+
+  /* Try to get the min and max string length for var_string, the max length is
+     the size of the array - 1, recorded in size[1].  */ 
+  get_range_strlen (var_string, size);
+  if (size[1] && tree_fits_uhwi_p (size[1]))
+    var_sizei = tree_to_uhwi (size[1]) + 1;
+
+  if (var_sizei == 0)
+    return true;
+
+  /* For strncmp, if length > const_string_leni , this call can be safely 
+     transformed to a strcmp.  */
+  if (is_ncmp && length > const_string_leni)
+    is_ncmp = false;
+
+  unsigned HOST_WIDE_INT final_length 
+    = is_ncmp ? length : (const_string_leni + 1);
+
+  /* Replace strcmp or strncmp with the corresponding memcmp.  */
+  if (var_sizei > final_length) 
+    {
+      tree fn = builtin_decl_implicit (BUILT_IN_MEMCMP_EQ);
+      if (!fn)
+       return true;
+      tree const_string_len = build_int_cst (size_type_node, final_length); 
+      update_gimple_call (gsi, fn, 3, arg1, arg2, const_string_len);
+    }
+  else 
+    return true;
+
+  return false;
+}
+
 /* Handle a POINTER_PLUS_EXPR statement.
    For p = "abcd" + 2; compute associated length, or if
    p = q + off is pointing to a '\0' character of a string, call
@@ -2940,6 +3132,11 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
            if (!handle_builtin_memcmp (gsi))
              return false;
            break;
+         case BUILT_IN_STRCMP:
+         case BUILT_IN_STRNCMP:
+           if (!handle_builtin_string_cmp (gsi))
+             return false;
+           break;
          default:
            break;
          }
-- 
1.9.1

Reply via email to