This patch improves the handling of 128 bit vectors residing in GPRs
by adding more alternatives to the move pattern.

Regression tested on s390x.

gcc/ChangeLog:

2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>

        * config/s390/constraints.md: Add comments.
        (jKK): Reject element sizes > 8 bytes.
        * config/s390/s390.c (s390_split_ok_p): Enable splitting also for
        s_operands.
        * config/s390/s390.md: Add the s_operand checks formerly in
        s390_split_ok_p to various splitters where they are still
        required.
        * config/s390/vector.md ("mov<mode>" V_128): Add GPR alternatives
        for 128 bit vectors.  Plus two splitters.
---
 gcc/ChangeLog                  | 12 ++++++++++
 gcc/config/s390/constraints.md | 10 +++++++--
 gcc/config/s390/s390.c         |  4 ----
 gcc/config/s390/s390.md        | 16 +++++++++++++
 gcc/config/s390/vector.md      | 51 ++++++++++++++++++++++++++++++++++++++----
 5 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7a83d1b..292e946 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,17 @@
 2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
 
+       * config/s390/constraints.md: Add comments.
+       (jKK): Reject element sizes > 8 bytes.
+       * config/s390/s390.c (s390_split_ok_p): Enable splitting also for
+       s_operands.
+       * config/s390/s390.md: Add the s_operand checks formerly in
+       s390_split_ok_p to various splitters where they are still
+       required.
+       * config/s390/vector.md ("mov<mode>" V_128): Add GPR alternatives
+       for 128 bit vectors.  Plus two splitters.
+
+2017-03-24  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>
+
        * config/s390/s390.md: Rename the cpu facilty vec to vx throughout
        the file.
 
diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md
index 536f485..95c6a8f 100644
--- a/gcc/config/s390/constraints.md
+++ b/gcc/config/s390/constraints.md
@@ -410,20 +410,26 @@
   "All one bit scalar or vector constant"
   (match_test "op == CONSTM1_RTX (GET_MODE (op))"))
 
+; vector generate mask operand - support for up to 64 bit elements
 (define_constraint "jxx"
   "@internal"
   (and (match_code "const_vector")
        (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)")))
 
+; vector generate byte mask operand - this is only supposed to deal
+; with real vectors 128 bit values of being either 0 or -1 are handled
+; with j00 and jm1
 (define_constraint "jyy"
   "@internal"
   (and (match_code "const_vector")
        (match_test "s390_bytemask_vector_p (op, NULL)")))
 
+; vector replicate immediate operand - support for up to 64 bit elements
 (define_constraint "jKK"
   "@internal"
-  (and (and (match_code "const_vector")
-           (match_test "const_vec_duplicate_p (op)"))
+  (and (and (and (match_code "const_vector")
+                (match_test "const_vec_duplicate_p (op)"))
+           (match_test "GET_MODE_UNIT_SIZE (GET_MODE (op)) <= 8"))
        (match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))")))
 
 (define_constraint "jm6"
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 27640ad..f3cebd6 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2494,10 +2494,6 @@ s390_split_ok_p (rtx dst, rtx src, machine_mode mode, 
int first_subword)
   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P 
(dst))
     return false;
 
-  /* We don't need to split if operands are directly accessible.  */
-  if (s_operand (src, mode) || s_operand (dst, mode))
-    return false;
-
   /* Non-offsettable memory references cannot be split.  */
   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 660b5f9..555a779 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1490,6 +1490,8 @@
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
         (match_operand:TI 1 "general_operand" ""))]
   "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], TImode)
+   && !s_operand (operands[1], TImode)
    && s390_split_ok_p (operands[0], operands[1], TImode, 0)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -1504,6 +1506,8 @@
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
         (match_operand:TI 1 "general_operand" ""))]
   "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], TImode)
+   && !s_operand (operands[1], TImode)
    && s390_split_ok_p (operands[0], operands[1], TImode, 1)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -1824,6 +1828,8 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
   "!TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], DImode)
+   && !s_operand (operands[1], DImode)
    && s390_split_ok_p (operands[0], operands[1], DImode, 0)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -1838,6 +1844,8 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
   "!TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], DImode)
+   && !s_operand (operands[1], DImode)
    && s390_split_ok_p (operands[0], operands[1], DImode, 1)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -2364,6 +2372,8 @@
   [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
         (match_operand:TD_TF 1 "general_operand"      ""))]
   "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], <MODE>mode)
+   && !s_operand (operands[1], <MODE>mode)
    && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -2378,6 +2388,8 @@
   [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
         (match_operand:TD_TF 1 "general_operand"      ""))]
   "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], <MODE>mode)
+   && !s_operand (operands[1], <MODE>mode)
    && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -2532,6 +2544,8 @@
   [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
         (match_operand:DD_DF 1 "general_operand" ""))]
   "!TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], <MODE>mode)
+   && !s_operand (operands[1], <MODE>mode)
    && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
@@ -2546,6 +2560,8 @@
   [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
         (match_operand:DD_DF 1 "general_operand" ""))]
   "!TARGET_ZARCH && reload_completed
+   && !s_operand (operands[0], <MODE>mode)
+   && !s_operand (operands[1], <MODE>mode)
    && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 4b5d43b..38905e8 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -144,11 +144,18 @@
 (include "vx-builtins.md")
 
 ; Full HW vector size moves
+
+; We don't use lm/stm for 128 bit moves since these are slower than
+; splitting it into separate moves.
+
+; FIXME: More constants are possible by enabling jxx, jyy constraints
+; for TImode (use double-int for the calculations)
+
 ; vgmb, vgmh, vgmf, vgmg, vrepib, vrepih, vrepif, vrepig
 (define_insn "mov<mode>"
-  [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R,  v,  v,  v,  v, 
 v,v,d")
-       (match_operand:V_128 1 "general_operand"      " 
v,R,v,j00,jm1,jyy,jxx,jKK,d,v"))]
-  "TARGET_VX"
+  [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R,  v,  v,  v,  v, 
 v,v,*d,*d,?o")
+       (match_operand:V_128 1 "general_operand"      " 
v,R,v,j00,jm1,jyy,jxx,jKK,d, v,dT,*d"))]
+  ""
   "@
    vlr\t%v0,%v1
    vl\t%v0,%1
@@ -159,9 +166,13 @@
    vgm<bhfgq>\t%v0,%s1,%e1
    vrepi<bhfgq>\t%v0,%h1
    vlvgp\t%v0,%1,%N1
+   #
+   #
    #"
-  [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")])
+  [(set_attr "cpu_facility" "vx,vx,vx,vx,vx,vx,vx,vx,vx,vx,*,*")
+   (set_attr "op_type"      "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*,*,*")])
 
+; VR -> GPR, no instruction so split it into 64 element sets.
 (define_split
   [(set (match_operand:V_128 0 "register_operand" "")
        (match_operand:V_128 1 "register_operand" ""))]
@@ -177,6 +188,38 @@
   operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
 })
 
+; Split the 128 bit GPR move into two word mode moves
+; s390_split_ok_p decides which part needs to be moved first.
+
+(define_split
+  [(set (match_operand:V_128 0 "nonimmediate_operand" "")
+        (match_operand:V_128 1 "general_operand" ""))]
+  "reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:V_128 0 "nonimmediate_operand" "")
+        (match_operand:V_128 1 "general_operand" ""))]
+  "reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+
 ; Moves for smaller vector modes.
 
 ; In these patterns only the vlr, vone, and vzero instructions write
-- 
2.9.1

Reply via email to