Attached patch fixes PR 66174.  We have to push an immediate mask
operand in expand_vec_perm_blend to a mask register for AVX512F modes.

The patch also enables AVX512BW modes in the above function.

2015-05-17  Uros Bizjak  <ubiz...@gmail.com>

    PR target/66174
    * config/i386/i386.c (expand_vec_perm_blend): Enable HImode and
    QImode inner modes for TARGET_AVX512BW.  Force mask operand
    to a register for AVX512F modes.

testsuite/ChangeLog:

2015-05-17  Uros Bizjak  <ubiz...@gmail.com>

    PR target/66174
    * gcc.target/i386/pr66174.c: New test.

Tested on x86_64-linux-gnu{,-m32} non-AVX512F target and committed to
mainline SVN.

I plan to backport the patch to a release branches in a couple of
days, so a runtime test would be much appreciated.

Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 223248)
+++ config/i386/i386.c  (working copy)
@@ -46777,15 +46777,16 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx o
 static bool
 expand_vec_perm_blend (struct expand_vec_perm_d *d)
 {
-  machine_mode vmode = d->vmode;
+  machine_mode mmode, vmode = d->vmode;
   unsigned i, mask, nelt = d->nelt;
-  rtx target, op0, op1, x;
+  rtx target, op0, op1, maskop, x;
   rtx rperm[32], vperm;
 
   if (d->one_operand_p)
     return false;
   if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
-      && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+      && (TARGET_AVX512BW
+         || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
     ;
   else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
     ;
@@ -46959,8 +46960,33 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d
       gcc_unreachable ();
     }
 
+  switch (vmode)
+    {
+    case V8DFmode:
+    case V8DImode:
+      mmode = QImode;
+      break;
+    case V16SFmode:
+    case V16SImode:
+      mmode = HImode;
+      break;
+    case V32HImode:
+      mmode = SImode;
+      break;
+    case V64QImode:
+      mmode = DImode;
+      break;
+    default:
+      mmode = VOIDmode;
+    }
+
+  if (mmode != VOIDmode)
+    maskop = force_reg (mmode, gen_int_mode (mask, mmode));
+  else
+    maskop = GEN_INT (mask);
+
   /* This matches five different patterns with the different modes.  */
-  x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
+  x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
   x = gen_rtx_SET (target, x);
   emit_insn (x);
   if (target != d->target)
Index: testsuite/gcc.target/i386/pr66174.c
===================================================================
--- testsuite/gcc.target/i386/pr66174.c (revision 0)
+++ testsuite/gcc.target/i386/pr66174.c (working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O -ftree-vectorize -mavx512f" } */
+
+extern void abort (void);
+
+typedef struct {
+   unsigned int a;
+   unsigned int b;
+} ii;
+
+void foo (unsigned short *arr, ii *iarr)
+{
+  ii *iptr = iarr;
+  unsigned short res[128];
+  ii ires[128];
+  int i;
+  for (i = 0; i < 128; i++)
+    {
+      ires[i].a = iptr->b - iptr->a;
+      ires[i].b = iptr->b + iptr->a;
+      iptr++;
+    }
+  for (i = 0; i < 128; i++)
+    {
+      if (res[i] != arr[i]
+          || ires[i].a != iarr[i].b - iarr[i].a
+          || ires[i].b != iarr[i].b + iarr[i].a)
+        abort ();
+    }
+}

Reply via email to