GCC 6.1 added support for the XXPERM instruction for the PowerPC ISA 3.0.  The
XXPERM instruction is essentially a 4 operand instruction, with only 3 operands
in the instruction (the target register overlaps with the first input
register).  The Power9 hardware has fusion support where if the instruction
that precedes the XXPERM is a XXLOR move instruction to set the first input
argument, it is fused with the XXPERM.  I added code to support this fusion.

Unfortunately, in running the testsuite on the power9 simulator, we discovered
that the test gcc.c-torture/execute/pr56866.c would fail because the fusion
alternatives confused the register allocator and/or the passes after the
register allocator.  This patch removes the explicit fusion support from
XXPERM.

In addition, ISA 3.0 added XXPERMR and VPERMR instructions for little endian
support where the permute vector reverses the bytes.  This patch adds support
for XXPERMR/VPERMR.

[gcc]
2016-05-19  Michael Meissner  <meiss...@linux.vnet.ibm.com>
            Kelvin Nilsen  <kel...@gcc.gnu.org>

        PR target/71201
        * config/rs6000/rs6000.c (rs6000_expand_vector_set): Generate
        vpermr/xxpermr on ISA 3.0.
        (altivec_expand_vec_perm_le): Likewise.
        * config/rs6000/altivec.md (UNSPEC_VPERMR): New unspec.
        (altivec_vperm_<mode>_internal): Drop ISA 3.0 xxperm fusion
        alternative.
        (altivec_vperm_v8hiv16qi): Likewise.
        (altivec_vperm_<mode>_uns_internal): Likewise.
        (vperm_v8hiv4si): Likewise.
        (vperm_v16qiv8hi): Likewise.
        (altivec_vpermr_<mode>_internal): Add VPERMR/XXPERMR support for
        ISA 3.0.

[gcc/testsuite]
2016-05-19  Michael Meissner  <meiss...@linux.vnet.ibm.com>
            Kelvin Nilsen  <kel...@gcc.gnu.org>

        * gcc.target/powerpc/p9-permute.c: Run test on big endian as well
        as little endian.
        * gcc.target/powerpc/p9-vpermr.c: New test for ISA 3.0 vpermr
        support.


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 236423)
+++ gcc/config/rs6000/rs6000.c  (.../gcc/config/rs6000) (working copy)
@@ -6858,19 +6858,27 @@ rs6000_expand_vector_set (rtx target, rt
                        UNSPEC_VPERM);
   else 
     {
-      /* Invert selector.  We prefer to generate VNAND on P8 so
-         that future fusion opportunities can kick in, but must
-         generate VNOR elsewhere.  */
-      rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
-      rtx iorx = (TARGET_P8_VECTOR
-                 ? gen_rtx_IOR (V16QImode, notx, notx)
-                 : gen_rtx_AND (V16QImode, notx, notx));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_insn (gen_rtx_SET (tmp, iorx));
-
-      /* Permute with operands reversed and adjusted selector.  */
-      x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
-                         UNSPEC_VPERM);
+      if (TARGET_P9_VECTOR)
+       x = gen_rtx_UNSPEC (mode,
+                           gen_rtvec (3, target, reg, 
+                                      force_reg (V16QImode, x)),
+                           UNSPEC_VPERMR);
+      else
+       {
+         /* Invert selector.  We prefer to generate VNAND on P8 so
+            that future fusion opportunities can kick in, but must
+            generate VNOR elsewhere.  */
+         rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+         rtx iorx = (TARGET_P8_VECTOR
+                     ? gen_rtx_IOR (V16QImode, notx, notx)
+                     : gen_rtx_AND (V16QImode, notx, notx));
+         rtx tmp = gen_reg_rtx (V16QImode);
+         emit_insn (gen_rtx_SET (tmp, iorx));
+         
+         /* Permute with operands reversed and adjusted selector.  */
+         x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+                             UNSPEC_VPERM);
+       }
     }
 
   emit_insn (gen_rtx_SET (target, x));
@@ -34238,17 +34246,25 @@ altivec_expand_vec_perm_le (rtx operands
   if (!REG_P (target))
     tmp = gen_reg_rtx (mode);
 
-  /* Invert the selector with a VNAND if available, else a VNOR.
-     The VNAND is preferred for future fusion opportunities.  */
-  notx = gen_rtx_NOT (V16QImode, sel);
-  iorx = (TARGET_P8_VECTOR
-         ? gen_rtx_IOR (V16QImode, notx, notx)
-         : gen_rtx_AND (V16QImode, notx, notx));
-  emit_insn (gen_rtx_SET (norreg, iorx));
-
-  /* Permute with operands reversed and adjusted selector.  */
-  unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
-                          UNSPEC_VPERM);
+  if (TARGET_P9_VECTOR)
+    {
+      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), 
+                              UNSPEC_VPERMR);
+    }
+  else
+    {
+      /* Invert the selector with a VNAND if available, else a VNOR.
+        The VNAND is preferred for future fusion opportunities.  */
+      notx = gen_rtx_NOT (V16QImode, sel);
+      iorx = (TARGET_P8_VECTOR
+             ? gen_rtx_IOR (V16QImode, notx, notx)
+             : gen_rtx_AND (V16QImode, notx, notx));
+      emit_insn (gen_rtx_SET (norreg, iorx));
+      
+      /* Permute with operands reversed and adjusted selector.  */
+      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+                              UNSPEC_VPERM);
+    }
 
   /* Copy into target, possibly by way of a register.  */
   if (!REG_P (target))
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 236423)
+++ gcc/config/rs6000/altivec.md        (.../gcc/config/rs6000) (working copy)
@@ -58,6 +58,7 @@ (define_c_enum "unspec"
    UNSPEC_VSUM2SWS
    UNSPEC_VSUMSWS
    UNSPEC_VPERM
+   UNSPEC_VPERMR
    UNSPEC_VPERM_UNS
    UNSPEC_VRFIN
    UNSPEC_VCFUX
@@ -1949,32 +1950,30 @@ (define_expand "altivec_vperm_<mode>"
 
 ;; Slightly prefer vperm, since the target does not overlap the source
 (define_insn "*altivec_vperm_<mode>_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
-                   (match_operand:VM 2 "register_operand" "v,wo,wo")
-                   (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
+                   (match_operand:VM 2 "register_operand" "v,wo")
+                   (match_operand:V16QI 3 "register_operand" "v,wo")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
   "@
    vperm %0,%1,%2,%3
-   xxperm %x0,%x2,%x3
-   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+   xxperm %x0,%x2,%x3"
   [(set_attr "type" "vecperm")
-   (set_attr "length" "4,4,8")])
+   (set_attr "length" "4")])
 
 (define_insn "altivec_vperm_v8hiv16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo")
-       (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
-                      (match_operand:V8HI 2 "register_operand" "v,wo,wo")
-                      (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
+  [(set (match_operand:V16QI 0 "register_operand" "=v,?wo")
+       (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0")
+                      (match_operand:V8HI 2 "register_operand" "v,wo")
+                      (match_operand:V16QI 3 "register_operand" "v,wo")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
   "@
    vperm %0,%1,%2,%3
-   xxperm %x0,%x2,%x3
-   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+   xxperm %x0,%x2,%x3"
   [(set_attr "type" "vecperm")
-   (set_attr "length" "4,4,8")])
+   (set_attr "length" "4")])
 
 (define_expand "altivec_vperm_<mode>_uns"
   [(set (match_operand:VM 0 "register_operand" "")
@@ -1992,18 +1991,17 @@ (define_expand "altivec_vperm_<mode>_uns
 })
 
 (define_insn "*altivec_vperm_<mode>_uns_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
-                   (match_operand:VM 2 "register_operand" "v,wo,wo")
-                   (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
+                   (match_operand:VM 2 "register_operand" "v,wo")
+                   (match_operand:V16QI 3 "register_operand" "v,wo")]
                   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
   "@
    vperm %0,%1,%2,%3
-   xxperm %x0,%x2,%x3
-   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+   xxperm %x0,%x2,%x3"
   [(set_attr "type" "vecperm")
-   (set_attr "length" "4,4,8")])
+   (set_attr "length" "4")])
 
 (define_expand "vec_permv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "")
@@ -2032,6 +2030,19 @@ (define_expand "vec_perm_constv16qi"
     FAIL;
 })
 
+(define_insn "*altivec_vpermr_<mode>_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
+                   (match_operand:VM 2 "register_operand" "v,wo")
+                   (match_operand:V16QI 3 "register_operand" "v,wo")]
+                  UNSPEC_VPERMR))]
+  "TARGET_P9_VECTOR"
+  "@
+   vpermr %0,%1,%2,%3
+   xxpermr %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4")])
+
 (define_insn "altivec_vrfip"           ; ceil
   [(set (match_operand:V4SF 0 "register_operand" "=v")
         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
@@ -2841,32 +2852,30 @@ (define_expand "vec_unpacks_lo_<VP_small
   "")
 
 (define_insn "vperm_v8hiv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
-                     (match_operand:V4SI 2 "register_operand" "v,wo,wo")
-                     (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
+  [(set (match_operand:V4SI 0 "register_operand" "=v,?wo")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0")
+                     (match_operand:V4SI 2 "register_operand" "v,wo")
+                     (match_operand:V16QI 3 "register_operand" "v,wo")]
                   UNSPEC_VPERMSI))]
   "TARGET_ALTIVEC"
   "@
    vperm %0,%1,%2,%3
-   xxperm %x0,%x2,%x3
-   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+   xxperm %x0,%x2,%x3"
   [(set_attr "type" "vecperm")
-   (set_attr "length" "4,4,8")])
+   (set_attr "length" "4")])
 
 (define_insn "vperm_v16qiv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo")
-                     (match_operand:V8HI 2 "register_operand" "v,wo,wo")
-                     (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
+  [(set (match_operand:V8HI 0 "register_operand" "=v,?wo")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0")
+                     (match_operand:V8HI 2 "register_operand" "v,wo")
+                     (match_operand:V16QI 3 "register_operand" "v,wo")]
                   UNSPEC_VPERMHI))]
   "TARGET_ALTIVEC"
   "@
    vperm %0,%1,%2,%3
-   xxperm %x0,%x2,%x3
-   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+   xxperm %x0,%x2,%x3"
   [(set_attr "type" "vecperm")
-   (set_attr "length" "4,4,8")])
+   (set_attr "length" "4")])
 
 
 (define_expand "vec_unpacku_hi_v16qi"
Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-permute.c       
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
     (revision 236423)
+++ gcc/testsuite/gcc.target/powerpc/p9-permute.c       
(.../gcc/testsuite/gcc.target/powerpc)  (working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-do compile { target { powerpc64*-*-* } } } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
 /* { dg-options "-mcpu=power9 -O2" } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
@@ -17,5 +17,6 @@ permute (vector long long *p, vector lon
   return vec_perm (a, b, mask);
 }
 
+/* expect xxpermr on little-endian, xxperm on big-endian */
 /* { dg-final { scan-assembler    "xxperm" } } */
 /* { dg-final { scan-assembler-not "vperm"  } } */
Index: gcc/testsuite/gcc.target/powerpc/p9-vpermr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-vpermr.c        
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-vpermr.c        
(.../gcc/testsuite/gcc.target/powerpc)  (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+/* Test generation of VPERMR/XXPERMR on ISA 3.0 in little endian.  */
+
+#include <altivec.h>
+
+vector long long
+permute (vector long long *p, vector long long *q, vector unsigned char mask)
+{
+  vector long long a = *p;
+  vector long long b = *q;
+
+  /* Force a, b to be in altivec registers to select vpermr insn.  */
+  __asm__ (" # a: %x0, b: %x1" : "+v" (a), "+v" (b));
+
+  return vec_perm (a, b, mask);
+}
+
+/* { dg-final { scan-assembler    "vpermr\|xxpermr" } } */

Reply via email to