Hi!

As the following testcase shows, for V64QImode expand_vec_perm_blend
needs to build a 64-bit mask, so using unsigned type for it results in
wrong-code.
While the testcase in the PR which also requires AVX512VBMI regressed
recently, the actual bug as shown by this different testcase is there
since the introduction of AVX512BW support.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed as obvious to trunk, queued for backporting.

2019-07-30  Jakub Jelinek  <ja...@redhat.com>

        PR target/91150
        * config/i386/i386-expand.c (expand_vec_perm_blend): Change mask type
        from unsigned to unsigned HOST_WIDE_INT.  For E_V64QImode cast
        comparison to unsigned HOST_WIDE_INT before shifting it left.

        * gcc.target/i386/avx512bw-pr91150.c: New test.

--- gcc/config/i386/i386-expand.c.jj    2019-07-16 13:35:01.213083117 +0200
+++ gcc/config/i386/i386-expand.c       2019-07-29 15:11:09.553633265 +0200
@@ -16385,7 +16385,8 @@ static bool
 expand_vec_perm_blend (struct expand_vec_perm_d *d)
 {
   machine_mode mmode, vmode = d->vmode;
-  unsigned i, mask, nelt = d->nelt;
+  unsigned i, nelt = d->nelt;
+  unsigned HOST_WIDE_INT mask;
   rtx target, op0, op1, maskop, x;
   rtx rperm[32], vperm;
 
@@ -16439,7 +16440,7 @@ expand_vec_perm_blend (struct expand_vec
     case E_V16SImode:
     case E_V8DImode:
       for (i = 0; i < nelt; ++i)
-       mask |= (d->perm[i] >= nelt) << i;
+       mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i;
       break;
 
     case E_V2DImode:
--- gcc/testsuite/gcc.target/i386/avx512bw-pr91150.c.jj 2019-07-29 
15:47:09.750324258 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91150.c    2019-07-29 
15:47:15.369242808 +0200
@@ -0,0 +1,37 @@
+/* PR target/91150 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+
+typedef unsigned char V __attribute__((vector_size (64)));
+
+__attribute__((noipa)) void
+foo (V *x, V *y, V *z)
+{
+  *x = __builtin_shuffle (*y, *z, (V) { 0, 1, 2, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15,
+                                       80, 81, 82, 83, 84, 85, 86, 87,
+                                       88, 89, 90, 91, 92, 93, 94, 95,
+                                       96, 97, 98, 99, 100, 101, 102, 103,
+                                       104, 105, 106, 107, 108, 109, 110, 111,
+                                       112, 113, 114, 115, 116, 117, 118, 119,
+                                       120, 121, 122, 123, 124, 125, 126, 127 
});
+}
+
+static void
+avx512bw_test (void)
+{
+  union U { unsigned char a[64]; V v; } a, b, c;
+  int i;
+  for (i = 0; i < 64; i++)
+    {
+      b.a[i] = i + 1;
+      c.a[i] = i + 65;
+    }
+  foo (&a.v, &b.v, &c.v);
+  for (i = 0; i < 64; i++)
+    if (a.a[i] != (i < 16 ? i + 1 : i + 65))
+      __builtin_abort ();
+}

        Jakub

Reply via email to