This patch improves the handling of 128 bit vectors residing in GPRs by adding more alternatives to the move pattern.
Regression tested on s390x. gcc/ChangeLog: 2017-03-24 Andreas Krebbel <kreb...@linux.vnet.ibm.com> * config/s390/constraints.md: Add comments. (jKK): Reject element sizes > 8 bytes. * config/s390/s390.c (s390_split_ok_p): Enable splitting also for s_operands. * config/s390/s390.md: Add the s_operand checks formerly in s390_split_ok_p to various splitters where they are still required. * config/s390/vector.md ("mov<mode>" V_128): Add GPR alternatives for 128 bit vectors. Plus two splitters. --- gcc/ChangeLog | 12 ++++++++++ gcc/config/s390/constraints.md | 10 +++++++-- gcc/config/s390/s390.c | 4 ---- gcc/config/s390/s390.md | 16 +++++++++++++ gcc/config/s390/vector.md | 51 ++++++++++++++++++++++++++++++++++++++---- 5 files changed, 83 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7a83d1b..292e946 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,17 @@ 2017-03-24 Andreas Krebbel <kreb...@linux.vnet.ibm.com> + * config/s390/constraints.md: Add comments. + (jKK): Reject element sizes > 8 bytes. + * config/s390/s390.c (s390_split_ok_p): Enable splitting also for + s_operands. + * config/s390/s390.md: Add the s_operand checks formerly in + s390_split_ok_p to various splitters where they are still + required. + * config/s390/vector.md ("mov<mode>" V_128): Add GPR alternatives + for 128 bit vectors. Plus two splitters. + +2017-03-24 Andreas Krebbel <kreb...@linux.vnet.ibm.com> + * config/s390/s390.md: Rename the cpu facilty vec to vx throughout the file. diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md index 536f485..95c6a8f 100644 --- a/gcc/config/s390/constraints.md +++ b/gcc/config/s390/constraints.md @@ -410,20 +410,26 @@ "All one bit scalar or vector constant" (match_test "op == CONSTM1_RTX (GET_MODE (op))")) +; vector generate mask operand - support for up to 64 bit elements (define_constraint "jxx" "@internal" (and (match_code "const_vector") (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)"))) +; vector generate byte mask operand - this is only supposed to deal +; with real vectors 128 bit values of being either 0 or -1 are handled +; with j00 and jm1 (define_constraint "jyy" "@internal" (and (match_code "const_vector") (match_test "s390_bytemask_vector_p (op, NULL)"))) +; vector replicate immediate operand - support for up to 64 bit elements (define_constraint "jKK" "@internal" - (and (and (match_code "const_vector") - (match_test "const_vec_duplicate_p (op)")) + (and (and (and (match_code "const_vector") + (match_test "const_vec_duplicate_p (op)")) + (match_test "GET_MODE_UNIT_SIZE (GET_MODE (op)) <= 8")) (match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))"))) (define_constraint "jm6" diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 27640ad..f3cebd6 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -2494,10 +2494,6 @@ s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword) if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) return false; - /* We don't need to split if operands are directly accessible. */ - if (s_operand (src, mode) || s_operand (dst, mode)) - return false; - /* Non-offsettable memory references cannot be split. */ if ((GET_CODE (src) == MEM && !offsettable_memref_p (src)) || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst))) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 660b5f9..555a779 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1490,6 +1490,8 @@ [(set (match_operand:TI 0 "nonimmediate_operand" "") (match_operand:TI 1 "general_operand" ""))] "TARGET_ZARCH && reload_completed + && !s_operand (operands[0], TImode) + && !s_operand (operands[1], TImode) && s390_split_ok_p (operands[0], operands[1], TImode, 0)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -1504,6 +1506,8 @@ [(set (match_operand:TI 0 "nonimmediate_operand" "") (match_operand:TI 1 "general_operand" ""))] "TARGET_ZARCH && reload_completed + && !s_operand (operands[0], TImode) + && !s_operand (operands[1], TImode) && s390_split_ok_p (operands[0], operands[1], TImode, 1)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -1824,6 +1828,8 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "general_operand" ""))] "!TARGET_ZARCH && reload_completed + && !s_operand (operands[0], DImode) + && !s_operand (operands[1], DImode) && s390_split_ok_p (operands[0], operands[1], DImode, 0)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -1838,6 +1844,8 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "general_operand" ""))] "!TARGET_ZARCH && reload_completed + && !s_operand (operands[0], DImode) + && !s_operand (operands[1], DImode) && s390_split_ok_p (operands[0], operands[1], DImode, 1)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -2364,6 +2372,8 @@ [(set (match_operand:TD_TF 0 "nonimmediate_operand" "") (match_operand:TD_TF 1 "general_operand" ""))] "TARGET_ZARCH && reload_completed + && !s_operand (operands[0], <MODE>mode) + && !s_operand (operands[1], <MODE>mode) && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -2378,6 +2388,8 @@ [(set (match_operand:TD_TF 0 "nonimmediate_operand" "") (match_operand:TD_TF 1 "general_operand" ""))] "TARGET_ZARCH && reload_completed + && !s_operand (operands[0], <MODE>mode) + && !s_operand (operands[1], <MODE>mode) && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -2532,6 +2544,8 @@ [(set (match_operand:DD_DF 0 "nonimmediate_operand" "") (match_operand:DD_DF 1 "general_operand" ""))] "!TARGET_ZARCH && reload_completed + && !s_operand (operands[0], <MODE>mode) + && !s_operand (operands[1], <MODE>mode) && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] @@ -2546,6 +2560,8 @@ [(set (match_operand:DD_DF 0 "nonimmediate_operand" "") (match_operand:DD_DF 1 "general_operand" ""))] "!TARGET_ZARCH && reload_completed + && !s_operand (operands[0], <MODE>mode) + && !s_operand (operands[1], <MODE>mode) && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 4b5d43b..38905e8 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -144,11 +144,18 @@ (include "vx-builtins.md") ; Full HW vector size moves + +; We don't use lm/stm for 128 bit moves since these are slower than +; splitting it into separate moves. + +; FIXME: More constants are possible by enabling jxx, jyy constraints +; for TImode (use double-int for the calculations) + ; vgmb, vgmh, vgmf, vgmg, vrepib, vrepih, vrepif, vrepig (define_insn "mov<mode>" - [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R, v, v, v, v, v,v,d") - (match_operand:V_128 1 "general_operand" " v,R,v,j00,jm1,jyy,jxx,jKK,d,v"))] - "TARGET_VX" + [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R, v, v, v, v, v,v,*d,*d,?o") + (match_operand:V_128 1 "general_operand" " v,R,v,j00,jm1,jyy,jxx,jKK,d, v,dT,*d"))] + "" "@ vlr\t%v0,%v1 vl\t%v0,%1 @@ -159,9 +166,13 @@ vgm<bhfgq>\t%v0,%s1,%e1 vrepi<bhfgq>\t%v0,%h1 vlvgp\t%v0,%1,%N1 + # + # #" - [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")]) + [(set_attr "cpu_facility" "vx,vx,vx,vx,vx,vx,vx,vx,vx,vx,*,*") + (set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*,*,*")]) +; VR -> GPR, no instruction so split it into 64 element sets. (define_split [(set (match_operand:V_128 0 "register_operand" "") (match_operand:V_128 1 "register_operand" ""))] @@ -177,6 +188,38 @@ operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode); }) +; Split the 128 bit GPR move into two word mode moves +; s390_split_ok_p decides which part needs to be moved first. + +(define_split + [(set (match_operand:V_128 0 "nonimmediate_operand" "") + (match_operand:V_128 1 "general_operand" ""))] + "reload_completed + && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5))] +{ + operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode); + operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode); + operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode); + operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode); +}) + +(define_split + [(set (match_operand:V_128 0 "nonimmediate_operand" "") + (match_operand:V_128 1 "general_operand" ""))] + "reload_completed + && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5))] +{ + operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode); + operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode); + operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode); + operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode); +}) + + ; Moves for smaller vector modes. ; In these patterns only the vlr, vone, and vzero instructions write -- 2.9.1