Attached patch fixes PR 66174. We have to push an immediate mask operand in expand_vec_perm_blend to a mask register for AVX512F modes.
The patch also enables AVX512BW modes in the above function. 2015-05-17 Uros Bizjak <ubiz...@gmail.com> PR target/66174 * config/i386/i386.c (expand_vec_perm_blend): Enable HImode and QImode inner modes for TARGET_AVX512BW. Force mask operand to a register for AVX512F modes. testsuite/ChangeLog: 2015-05-17 Uros Bizjak <ubiz...@gmail.com> PR target/66174 * gcc.target/i386/pr66174.c: New test. Tested on x86_64-linux-gnu{,-m32} non-AVX512F target and committed to mainline SVN. I plan to backport the patch to a release branches in a couple of days, so a runtime test would be much appreciated. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 223248) +++ config/i386/i386.c (working copy) @@ -46777,15 +46777,16 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx o static bool expand_vec_perm_blend (struct expand_vec_perm_d *d) { - machine_mode vmode = d->vmode; + machine_mode mmode, vmode = d->vmode; unsigned i, mask, nelt = d->nelt; - rtx target, op0, op1, x; + rtx target, op0, op1, maskop, x; rtx rperm[32], vperm; if (d->one_operand_p) return false; if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 - && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4) + && (TARGET_AVX512BW + || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)) ; else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ; @@ -46959,8 +46960,33 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d gcc_unreachable (); } + switch (vmode) + { + case V8DFmode: + case V8DImode: + mmode = QImode; + break; + case V16SFmode: + case V16SImode: + mmode = HImode; + break; + case V32HImode: + mmode = SImode; + break; + case V64QImode: + mmode = DImode; + break; + default: + mmode = VOIDmode; + } + + if (mmode != VOIDmode) + maskop = force_reg (mmode, gen_int_mode (mask, mmode)); + else + maskop = GEN_INT (mask); + /* This matches five different patterns with the different modes. */ - x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); + x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); x = gen_rtx_SET (target, x); emit_insn (x); if (target != d->target) Index: testsuite/gcc.target/i386/pr66174.c =================================================================== --- testsuite/gcc.target/i386/pr66174.c (revision 0) +++ testsuite/gcc.target/i386/pr66174.c (working copy) @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftree-vectorize -mavx512f" } */ + +extern void abort (void); + +typedef struct { + unsigned int a; + unsigned int b; +} ii; + +void foo (unsigned short *arr, ii *iarr) +{ + ii *iptr = iarr; + unsigned short res[128]; + ii ires[128]; + int i; + for (i = 0; i < 128; i++) + { + ires[i].a = iptr->b - iptr->a; + ires[i].b = iptr->b + iptr->a; + iptr++; + } + for (i = 0; i < 128; i++) + { + if (res[i] != arr[i] + || ires[i].a != iarr[i].b - iarr[i].a + || ires[i].b != iarr[i].b + iarr[i].a) + abort (); + } +}