The recently-added logic for -finline-stringops=memset introduced an
assumption that doesn't necessarily hold, namely, that
can_store_by_pieces of a larger size implies can_store_by_pieces by
smaller sizes.  Checks for all sizes the by-multiple-pieces machinery
might use before committing to an expansion pattern.

Regstrapped (and slightly different version) and regstrapping this one
on x86_64-linux-gnu.  Ok to install?

(FWIW, for completeness, I've just launched bootstraps with
-finline-stringops on ppc64le-linux-gnu, and aarch64-linux-gnu, and will
do so on x86_64-linux-gnu as soon as my retesting completes.)


for  gcc/ChangeLog

        PR target/112778
        * builtins.cc (can_store_by_multiple_pieces): New.
        (try_store_by_multiple_pieces): Call it.

for  gcc/testsuite/ChangeLog

        PR target/112778
        * gcc.dg/inline-mem-cmp-pr112778.c: New.
---
 gcc/builtins.cc                                |   57 ++++++++++++++++++++----
 gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c |   10 ++++
 2 files changed, 58 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 12a535d313f12..ad8497192a2dd 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -4284,6 +4284,40 @@ expand_builtin_memset (tree exp, rtx target, 
machine_mode mode)
   return expand_builtin_memset_args (dest, val, len, target, mode, exp);
 }
 
+/* Check that store_by_pieces allows BITS + LEN (so that we don't
+   expand something too unreasonably long), and every power of 2 in
+   BITS.  It is assumed that LEN has already been tested by
+   itself.  */
+static bool
+can_store_by_multiple_pieces (unsigned HOST_WIDE_INT bits,
+                             by_pieces_constfn constfun,
+                             void *constfundata, unsigned int align,
+                             bool memsetp,
+                             unsigned HOST_WIDE_INT len)
+{
+  if (bits
+      && !can_store_by_pieces (bits + len, constfun, constfundata,
+                              align, memsetp))
+    return false;
+
+  /* Avoid the loop if we're just going to repeat the same single
+     test.  */
+  if (!len && popcount_hwi (bits) == 1)
+    return true;
+
+  for (int i = ctz_hwi (bits); i >= 0; i = ctz_hwi (bits))
+    {
+      unsigned HOST_WIDE_INT bit = 1;
+      bit <<= i;
+      bits &= ~bit;
+      if (!can_store_by_pieces (bit, constfun, constfundata,
+                               align, memsetp))
+       return false;
+    }
+
+  return true;
+}
+
 /* Try to store VAL (or, if NULL_RTX, VALC) in LEN bytes starting at TO.
    Return TRUE if successful, FALSE otherwise.  TO is assumed to be
    aligned at an ALIGN-bits boundary.  LEN must be a multiple of
@@ -4341,7 +4375,11 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned 
int ctz_len,
   else
     /* Huh, max_len < min_len?  Punt.  See pr100843.c.  */
     return false;
-  if (min_len >= blksize)
+  if (min_len >= blksize
+      /* ??? Maybe try smaller fixed-prefix blksizes before
+        punting?  */
+      && can_store_by_pieces (blksize, builtin_memset_read_str,
+                             &valc, align, true))
     {
       min_len -= blksize;
       min_bits = floor_log2 (min_len);
@@ -4367,8 +4405,9 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned 
int ctz_len,
      happen because of the way max_bits and blksize are related, but
      it doesn't hurt to test.  */
   if (blksize > xlenest
-      || !can_store_by_pieces (xlenest, builtin_memset_read_str,
-                              &valc, align, true))
+      || !can_store_by_multiple_pieces (xlenest - blksize,
+                                       builtin_memset_read_str,
+                                       &valc, align, true, blksize))
     {
       if (!(flag_inline_stringops & ILSOP_MEMSET))
        return false;
@@ -4386,17 +4425,17 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned 
int ctz_len,
             of overflow.  */
          if (max_bits < orig_max_bits
              && xlenest + blksize >= xlenest
-             && can_store_by_pieces (xlenest + blksize,
-                                     builtin_memset_read_str,
-                                     &valc, align, true))
+             && can_store_by_multiple_pieces (xlenest,
+                                              builtin_memset_read_str,
+                                              &valc, align, true, blksize))
            {
              max_loop = true;
              break;
            }
          if (blksize
-             && can_store_by_pieces (xlenest,
-                                     builtin_memset_read_str,
-                                     &valc, align, true))
+             && can_store_by_multiple_pieces (xlenest,
+                                              builtin_memset_read_str,
+                                              &valc, align, true, 0))
            {
              max_len += blksize;
              min_len += blksize;
diff --git a/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c 
b/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c
new file mode 100644
index 0000000000000..fdfc5b6f28c8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-finline-stringops" } */
+
+char buf[3];
+
+int
+f ()
+{
+  __builtin_memset (buf, 'v', 3);
+}

-- 
Alexandre Oliva, happy hacker            https://FSFLA.org/blogs/lxo/
   Free Software Activist                   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive

Reply via email to