arch12 provides pop count vector instructions for bigger elements than
just chars.

gcc/testsuite/ChangeLog:

2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>

        * gcc.target/s390/vxe/popcount-1.c: New test.

gcc/ChangeLog:

2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>

        * config/s390/vector.md ("popcountv16qi2", "popcountv8hi2")
        ("popcountv4si2", "popcountv2di2"): Rename to ...
        ("popcount<mode>2", "popcountv8hi2_vx", "popcountv4si2_vx")
        ("popcountv2di2_vx"): ... these and add !TARGET_VXE to the
        condition.
        ("popcount<mode>2_vxe"): New pattern.
---
 gcc/ChangeLog                                  |  9 +++
 gcc/config/s390/vector.md                      | 38 ++++++++---
 gcc/testsuite/ChangeLog                        |  4 ++
 gcc/testsuite/gcc.target/s390/vxe/popcount-1.c | 88 ++++++++++++++++++++++++++
 4 files changed, 131 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vxe/popcount-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 89e7906..d516b4d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
 2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
 
+       * config/s390/vector.md ("popcountv16qi2", "popcountv8hi2")
+       ("popcountv4si2", "popcountv2di2"): Rename to ...
+       ("popcount<mode>2", "popcountv8hi2_vx", "popcountv4si2_vx")
+       ("popcountv2di2_vx"): ... these and add !TARGET_VXE to the
+       condition.
+       ("popcount<mode>2_vxe"): New pattern.
+
+2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
+
        * common/config/s390/s390-common.c (processor_flags_table): Add
        arch12.
        * config.gcc: Add arch12.
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 68a8ed0..d4c0e95 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -715,11 +715,33 @@
 
 ; Vector population count
 
-(define_insn "popcountv16qi2"
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VI_HW                0 "register_operand" "=v")
+       (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand"  "v")]
+                     UNSPEC_POPCNT))]
+  "TARGET_VX"
+{
+  if (TARGET_VXE)
+    emit_insn (gen_popcount<mode>2_vxe (operands[0], operands[1]));
+  else
+    emit_insn (gen_popcount<mode>2_vx (operands[0], operands[1]));
+  DONE;
+})
+
+; vpopctb, vpopcth, vpopctf, vpopctg
+(define_insn "popcount<mode>2_vxe"
+  [(set (match_operand:VI_HW                0 "register_operand" "=v")
+       (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand"  "v")]
+                     UNSPEC_POPCNT))]
+  "TARGET_VXE"
+  "vpopct<bhfgq>\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "popcountv16qi2_vx"
   [(set (match_operand:V16QI                0 "register_operand" "=v")
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand"  "v")]
                      UNSPEC_POPCNT))]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE"
   "vpopct\t%v0,%v1,0"
   [(set_attr "op_type" "VRR")])
 
@@ -729,7 +751,7 @@
 ; of the result, add it to the result and extend it to halfword
 ; element size (unpack).
 
-(define_expand "popcountv8hi2"
+(define_expand "popcountv8hi2_vx"
   [(set (match_dup 2)
        (unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" 
"v") 0)]
                      UNSPEC_POPCNT))
@@ -761,7 +783,7 @@
        (and:V8HI (subreg:V8HI (match_dup 2) 0)
                  (subreg:V8HI (match_dup 3) 0)))
 ]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE"
 {
   operands[2] = gen_reg_rtx (V16QImode);
   operands[3] = gen_reg_rtx (V16QImode);
@@ -769,20 +791,20 @@
   operands[5] = CONST0_RTX (V16QImode);
 })
 
-(define_expand "popcountv4si2"
+(define_expand "popcountv4si2_vx"
   [(set (match_dup 2)
        (unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" 
"v") 0)]
                      UNSPEC_POPCNT))
    (set (match_operand:V4SI 0 "register_operand" "=v")
        (unspec:V4SI [(match_dup 2) (match_dup 3)]
                     UNSPEC_VEC_VSUM))]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE"
 {
   operands[2] = gen_reg_rtx (V16QImode);
   operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode));
 })
 
-(define_expand "popcountv2di2"
+(define_expand "popcountv2di2_vx"
   [(set (match_dup 2)
        (unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" 
"v") 0)]
                      UNSPEC_POPCNT))
@@ -792,7 +814,7 @@
    (set (match_operand:V2DI 0 "register_operand" "=v")
        (unspec:V2DI [(match_dup 3) (match_dup 5)]
                     UNSPEC_VEC_VSUMG))]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE"
 {
   operands[2] = gen_reg_rtx (V16QImode);
   operands[3] = gen_reg_rtx (V4SImode);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bbdd3c8..6d178c5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
 2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
 
+       * gcc.target/s390/vxe/popcount-1.c: New test.
+
+2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
+
        * gcc.target/s390/vxe/bitops-1.c: New test.
 
 2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
diff --git a/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c 
b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
new file mode 100644
index 0000000..9ea835a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=arch12 --save-temps" } */
+/* { dg-require-effective-target s390_vxe } */
+
+/* Vectorization currently only works for v4si.  v8hi at least uses 2x
+   vpopctf but no vpopcth.  */
+
+typedef unsigned char     uv16qi __attribute__((vector_size(16)));
+typedef unsigned short     uv8hi __attribute__((vector_size(16)));
+typedef unsigned int       uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+uv16qi __attribute__((noinline))
+vpopctb (uv16qi a)
+{
+  uv16qi r;
+  int i;
+
+  for (i = 0; i < 16; i++)
+    r[i] = __builtin_popcount (a[i]);
+
+  return r;
+}
+/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" { xfail *-*-* } } } */
+
+uv8hi __attribute__((noinline))
+vpopcth (uv8hi a)
+{
+  uv8hi r;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    r[i] = __builtin_popcount (a[i]);
+
+  return r;
+}
+/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" { xfail *-*-* } } } */
+
+uv4si __attribute__((noinline))
+vpopctf (uv4si a)
+{
+  uv4si r;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    r[i] = __builtin_popcount (a[i]);
+
+  return r;
+}
+/* { dg-final { scan-assembler "vpopctf\t%v24,%v24" } } */
+
+uv2di __attribute__((noinline))
+vpopctg (uv2di a)
+{
+  uv2di r;
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = __builtin_popcount (a[i]);
+
+  return r;
+}
+/* { dg-final { scan-assembler "vpopctg\t%v24,%v24" { xfail *-*-* } } } */
+
+int
+main ()
+{
+  uv16qi a = (uv16qi){ 42, 1, ~0, 2, 42, 1, ~0, 2, 42, 1, ~0, 2, 42, 1, ~0, 2 
};
+  if (__builtin_s390_vec_any_ne (vpopctb (a),
+                                (uv16qi){ 3, 1, 8, 1, 3, 1, 8, 1,
+                                          3, 1, 8, 1, 3, 1, 8, 1 }))
+    __builtin_abort ();
+
+  if (__builtin_s390_vec_any_ne (vpopcth ((uv8hi){ 42, 1, ~0, 2, 42, 1, ~0, 2 
}),
+                                (uv8hi){ 3, 1, 16, 1, 3, 1, 16, 1 }))
+    __builtin_abort ();
+
+  if (__builtin_s390_vec_any_ne (vpopctf ((uv4si){ 42, 1, ~0, 2 }),
+                                (uv4si){ 3, 1, 32, 1 }))
+    __builtin_abort ();
+
+  if (__builtin_s390_vec_any_ne (vpopctg ((uv2di){ 42, 1 }),
+                                         (uv2di){ 3, 1 }))
+      __builtin_abort ();
+
+
+  return 0;
+}
-- 
2.9.1

Reply via email to