On Wed, Oct 01, 2014 at 03:09:59PM +0200, Uros Bizjak wrote:
> > 2014-10-01  Jakub Jelinek  <ja...@redhat.com>
> >
> >         * config/i386/i386.c (expand_vec_perm_palignr): Handle
> >         256-bit vectors for TARGET_AVX2.
> 
> Please mention PR 62128 and include the testcase from the PR. Also,
> please add a version of gcc.target/i386/pr52252-atom.c, compiled with
> -mavx2 (perhaps named pr52252-avx2.c)

This patch doesn't fix PR62128, and it is already tested (even without
GCC_RUN_EXPENSIVE_TESTS=1) in the vshuf*.c torture tests (several of them).
If you want coverage not just for the default flags, I'd say we should
say for -O2 only just add gcc.target/i386/{avx,avx2,avx512}-vshuf-*.c
tests that would include ../../gcc.dg/torture/vshuf-*.c and be compiled/run
with -mavx/-mavx2/-mavx512 options instead of the default ones.

For PR62128, IMHO the right fix is attached.  Note, this is again covered
in vshuf-*.c tests (test 22 in both vshuf-v32*.c and vshuf-v16*.c).
With that attached patch, pr62128.c (aka test_22 in vshuf-v32qi.c), changes:
-       vpshufb .LC0(%rip), %ymm0, %ymm1
-       vpshufb .LC1(%rip), %ymm0, %ymm0
-       vpermq  $78, %ymm1, %ymm1
-       vpor    %ymm1, %ymm0, %ymm0
+       vpermq  $78, %ymm0, %ymm1
+       vpalignr        $1, %ymm0, %ymm1, %ymm0
        ret

> > --- gcc/config/i386/i386.c.jj   2014-10-01 14:24:16.483138899 +0200
> > +++ gcc/config/i386/i386.c      2014-10-01 14:27:53.577222011 +0200
> > @@ -43297,44 +43297,75 @@ expand_vec_perm_palignr (struct expand_v
> >    rtx shift, target;
> >    struct expand_vec_perm_d dcopy;
> >
> > -  /* Even with AVX, palignr only operates on 128-bit vectors.  */
> > -  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
> > +  /* Even with AVX, palignr only operates on 128-bit vectors,
> > +     in AVX2 palignr operates on both 128-bit lanes.  */
> > +  if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
> > +      && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
> 
> Please simplify the above condition ...

How?  I don't see how that can be simplified, it can
be transformed into
  if (!((TARGET_SSSE3 && GET_MODE_SIZE (d->vmode) == 16)
        || (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)))
but I don't find that simpler.

        Jakub
2014-10-01  Jakub Jelinek  <ja...@redhat.com>

        PR target/62128
        * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr
        if it expands to a single insn only.
        (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument.  If true,
        fail unless in_order is true.  Add forward declaration.
        (expand_vec_perm_vperm2f128): Fix up comment about which permutation
        is useful for one_operand_p.
        (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller.

--- gcc/config/i386/i386.c.jj   2014-10-01 15:42:28.000000000 +0200
+++ gcc/config/i386/i386.c      2014-10-01 15:50:06.234079887 +0200
@@ -39636,6 +39636,7 @@ struct expand_vec_perm_d
 static bool canonicalize_perm (struct expand_vec_perm_d *d);
 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
 
 /* Get a vector mode of the same size as the original but with elements
    twice as wide.  This is only guaranteed to apply to integral vectors.  */
@@ -43225,6 +43226,10 @@ expand_vec_perm_1 (struct expand_vec_per
   if (expand_vec_perm_pshufb (d))
     return true;
 
+  /* Try the AVX2 vpalignr instruction.  */
+  if (expand_vec_perm_palignr (d, true))
+    return true;
+
   /* Try the AVX512F vpermi2 instructions.  */
   rtx vec[64];
   enum machine_mode mode = d->vmode;
@@ -43286,10 +43291,11 @@ expand_vec_perm_pshuflw_pshufhw (struct
    the permutation using the SSSE3 palignr instruction.  This succeeds
    when all of the elements in PERM fit within one vector and we merely
    need to shift them down so that a single vector permutation has a
-   chance to succeed.  */
+   chance to succeed.  If SINGLE_INSN_ONLY_P, succeed if only
+   the vpalignr instruction itself can perform the requested permutation.  */
 
 static bool
-expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
 {
   unsigned i, nelt = d->nelt;
   unsigned min, max;
@@ -43320,8 +43326,9 @@ expand_vec_perm_palignr (struct expand_v
 
   /* Given that we have SSSE3, we know we'll be able to implement the
      single operand permutation after the palignr with pshufb for
-     128-bit vectors.  */
-  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
+     128-bit vectors.  If SINGLE_INSN_ONLY_P, in_order has to be computed
+     first.  */
+  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
     return true;
 
   dcopy = *d;
@@ -43342,6 +43349,9 @@ expand_vec_perm_palignr (struct expand_v
     }
   dcopy.one_operand_p = true;
 
+  if (single_insn_only_p && !in_order)
+    return false;
+
   /* For AVX2, test whether we can permute the result in one instruction.  */
   if (d->testing_p)
     {
@@ -43922,7 +43932,8 @@ expand_vec_perm_vperm2f128 (struct expan
          return true;
        }
 
-      /* For one operand, the only useful vperm2f128 permutation is 0x10.  */
+      /* For one operand, the only useful vperm2f128 permutation is 0x01
+        aka lanes swap.  */
       if (d->one_operand_p)
        return false;
     }
@@ -44811,7 +44822,7 @@ ix86_expand_vec_perm_const_1 (struct exp
   if (expand_vec_perm_pshuflw_pshufhw (d))
     return true;
 
-  if (expand_vec_perm_palignr (d))
+  if (expand_vec_perm_palignr (d, false))
     return true;
 
   if (expand_vec_perm_interleave2 (d))

Reply via email to