https://gcc.gnu.org/g:a177be05f6952c3f7e62186d2e138d96c475b81a

commit r15-2055-ga177be05f6952c3f7e62186d2e138d96c475b81a
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Jul 15 13:50:58 2024 +0200

    tree-optimization/115843 - fix wrong-code with fully-masked loop and peeling
    
    When AVX512 uses a fully masked loop and peeling we fail to create the
    correct initial loop mask when the mask is composed of multiple
    components in some cases.  The following fixes this by properly applying
    the bias for the component to the shift amount.
    
            PR tree-optimization/115843
            * tree-vect-loop-manip.cc
            (vect_set_loop_condition_partial_vectors_avx512): Properly
            bias the shift of the initial mask for alignment peeling.
    
            * gcc.dg/vect/pr115843.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115843.c | 41 ++++++++++++++++++++++++++++++++++++
 gcc/tree-vect-loop-manip.cc          |  8 +++++--
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115843.c 
b/gcc/testsuite/gcc.dg/vect/pr115843.c
new file mode 100644
index 000000000000..3dbb6c792788
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115843.c
@@ -0,0 +1,41 @@
+/* { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { 
target avx512f_runtime } } */
+
+#include "tree-vect.h"
+
+typedef __UINT64_TYPE__ BITBOARD;
+BITBOARD KingPressureMask1[64], KingSafetyMask1[64];
+
+void __attribute__((noinline))
+foo()
+{
+  for (int i = 0; i < 64; i++)
+    {
+      if ((i & 7) == 0)
+       KingPressureMask1[i] = KingSafetyMask1[i + 1];
+      else if ((i & 7) == 7)
+       KingPressureMask1[i] = KingSafetyMask1[i - 1];
+      else
+       KingPressureMask1[i] = KingSafetyMask1[i];
+    }
+}
+
+BITBOARD verify[64]
+  = {1, 1, 2, 3, 4, 5, 6, 6, 9, 9, 10, 11, 12, 13, 14, 14, 17, 17, 18, 19,
+    20, 21, 22, 22, 25, 25, 26, 27, 28, 29, 30, 30, 33, 33, 34, 35, 36, 37, 38,
+    38, 41, 41, 42, 43, 44, 45, 46, 46, 49, 49, 50, 51, 52, 53, 54, 54, 57, 57,
+    58, 59, 60, 61, 62, 62};
+
+int main()
+{
+  check_vect ();
+
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+    KingSafetyMask1[i] = i;
+  foo ();
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+    if (KingPressureMask1[i] != verify[i])
+      __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ac13873cd88d..57dbcbe862cd 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1149,10 +1149,14 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
loop *loop,
              /* ???  But when the shift amount isn't constant this requires
                 a round-trip to GRPs.  We could apply the bias to either
                 side of the compare instead.  */
-             tree shift = gimple_build (&preheader_seq, MULT_EXPR,
+             tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
                                         TREE_TYPE (niters_skip), niters_skip,
                                         build_int_cst (TREE_TYPE (niters_skip),
-                                                       
rgc.max_nscalars_per_iter));
+                                                       bias));
+             shift = gimple_build (&preheader_seq, MULT_EXPR,
+                                   TREE_TYPE (niters_skip), shift,
+                                   build_int_cst (TREE_TYPE (niters_skip),
+                                                  rgc.max_nscalars_per_iter));
              init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
                                        TREE_TYPE (init_ctrl),
                                        init_ctrl, shift);

Reply via email to