Hi, In patch https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html, we improved the bictcast from lowpart/highpart of DI to SF by using mtvsrws or mtvsrd.
As investigating this functionality, we may improve the related code by using bitcast subreg from SI to SF, and avoid generating UNSPEC_SF_FROM_SI. We can also improve the cases like "subreg:SI(reg:SF)=reg:SI" which is cast SI to SF (e.g. pr48335-1.c). This patch also reduce clobber usage, only adding clobber for p8 where additional register is required. This patch pass bootstrap and regtest for ppc64(p7,p8 and p9) and ppc64le(p10,p9). Is this patch ok for trunk (or maybe stage1)? Thanks for comments and sugguestions! BR, Jeff (Jiufu) gcc/ChangeLog: * config/rs6000/predicates.md: Rename TARGET_NO_SF_SUBREG to BITCAST_SI_SF_IN_REGS, and rename TARGET_ALLOW_SF_SUBREG to BITCAST_SI_SF_IN_MEM. * config/rs6000/rs6000.cc (valid_sf_si_move): Likewise. (is_lfs_stfs_insn): Split to is_stfs_insn and is_lfs_insn. (is_stfs_insn): Split from is_lfs_stfs_insn. (is_lfs_insn): Split from is_lfs_stfs_insn. (prefixed_load_p): Call is_lfs_insn. (prefixed_store_p): Call is_stfs_insn. * config/rs6000/rs6000.h (TARGET_NO_SF_SUBREG): Rename to ... (BITCAST_SI_SF_IN_REGS): ... this. (TARGET_ALLOW_SF_SUBREG): Rename to ... (BITCAST_SI_SF_IN_MEM): ... this. * config/rs6000/rs6000.md (movsf_from_si_p8): New define_insn. --- gcc/config/rs6000/predicates.md | 16 +++--- gcc/config/rs6000/rs6000.cc | 36 ++++++++---- gcc/config/rs6000/rs6000.h | 4 +- gcc/config/rs6000/rs6000.md | 98 +++++++++++++++++++++------------ 4 files changed, 97 insertions(+), 57 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index e57c9d99c6b..4a7d5893126 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -47,7 +47,7 @@ (define_predicate "sf_subreg_operand" rtx inner_reg = SUBREG_REG (op); machine_mode inner_mode = GET_MODE (inner_reg); - if (TARGET_ALLOW_SF_SUBREG || !REG_P (inner_reg)) + if (BITCAST_SI_SF_IN_MEM || !REG_P (inner_reg)) return 0; if ((mode == SFmode && GET_MODE_CLASS (inner_mode) == MODE_INT) @@ -67,7 +67,7 @@ (define_predicate "altivec_register_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -88,7 +88,7 @@ (define_predicate "vsx_register_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -126,7 +126,7 @@ (define_predicate "vfloat_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -148,7 +148,7 @@ (define_predicate "vint_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -170,7 +170,7 @@ (define_predicate "vlogical_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -346,7 +346,7 @@ (define_predicate "gpc_reg_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); @@ -375,7 +375,7 @@ (define_predicate "int_reg_operand" { if (SUBREG_P (op)) { - if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode)) return 0; op = SUBREG_REG (op); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 16ca3a31757..b8a9f01cbfa 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10565,7 +10565,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) bool valid_sf_si_move (rtx dest, rtx src, machine_mode mode) { - if (TARGET_ALLOW_SF_SUBREG) + if (BITCAST_SI_SF_IN_MEM) return true; if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) @@ -26425,13 +26425,10 @@ pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem) - stfs: - SET is from UNSPEC_SI_FROM_SF to MEM:SI - CLOBBER is a V4SF - - lfs: - - SET is from UNSPEC_SF_FROM_SI to REG:SF - - CLOBBER is a DI */ static bool -is_lfs_stfs_insn (rtx_insn *insn) +is_stfs_insn (rtx_insn *insn) { rtx pattern = PATTERN (insn); if (GET_CODE (pattern) != PARALLEL) @@ -26466,10 +26463,27 @@ is_lfs_stfs_insn (rtx_insn *insn) && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode) return true; - /* lfs case. */ - if (XINT (src, 1) == UNSPEC_SF_FROM_SI - && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode - && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode) + return false; +} + + +static bool +is_lfs_insn (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + if (GET_CODE (set) != SET) + return false; + + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + if (!SUBREG_P (src)) + return false; + + /* lfs case. */ + if (GET_CODE (dest) == REG && GET_MODE (dest) == SFmode + && GET_MODE (SUBREG_REG (src)) == SImode + && GET_CODE (SUBREG_REG (src)) == MEM) return true; return false; @@ -26585,7 +26599,7 @@ prefixed_load_p (rtx_insn *insn) else non_prefixed = reg_to_non_prefixed (reg, mem_mode); - if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) + if (non_prefixed == NON_PREFIXED_X && is_lfs_insn (insn)) return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT); else return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); @@ -26623,7 +26637,7 @@ prefixed_store_p (rtx_insn *insn) /* Need to make sure we aren't looking at a stfs which doesn't look like the other things reg_to_non_prefixed/address_is_prefixed looks for. */ - if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) + if (non_prefixed == NON_PREFIXED_X && is_stfs_insn (insn)) return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); else return address_is_prefixed (addr, mem_mode, non_prefixed); diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 3503614efbd..03b20fb8d66 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -480,8 +480,8 @@ extern int rs6000_vector_align[]; && TARGET_POWERPC64) /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI). */ -#define TARGET_NO_SF_SUBREG TARGET_DIRECT_MOVE_64BIT -#define TARGET_ALLOW_SF_SUBREG (!TARGET_DIRECT_MOVE_64BIT) +#define BITCAST_SI_SF_IN_REGS TARGET_DIRECT_MOVE_64BIT +#define BITCAST_SI_SF_IN_MEM (!TARGET_DIRECT_MOVE_64BIT) /* This wants to be set for p8 and newer. On p7, overlapping unaligned loads are slow. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 74b1c9cee6a..90ee0d566ab 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -145,7 +145,6 @@ (define_c_enum "unspec" UNSPEC_SQRT_ROUND_TO_ODD UNSPEC_TRUNC_ROUND_TO_ODD UNSPEC_SIGNBIT - UNSPEC_SF_FROM_SI UNSPEC_SI_FROM_SF UNSPEC_PLTSEQ UNSPEC_PLT16_HA @@ -7655,7 +7654,7 @@ (define_insn_and_split "movsi_from_sf" "=X, X, X, X, X, X, X, X, wa, X, X"))] - "TARGET_NO_SF_SUBREG + "BITCAST_SI_SF_IN_REGS && (register_operand (operands[0], SImode) || register_operand (operands[1], SFmode))" "@ @@ -7761,7 +7760,7 @@ (define_insn "*movsi_from_df" (unspec:SI [(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "wa"))] UNSPEC_SI_FROM_SF))] - "TARGET_NO_SF_SUBREG" + "BITCAST_SI_SF_IN_REGS" "xscvdpsp %x0,%x1" [(set_attr "type" "fp")]) @@ -8053,7 +8052,7 @@ (define_insn "movsf_hardfloat" "(register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)) && TARGET_HARD_FLOAT - && (TARGET_ALLOW_SF_SUBREG + && (BITCAST_SI_SF_IN_MEM || valid_sf_si_move (operands[0], operands[1], SFmode))" "@ lwz%U1%X1 %0,%1 @@ -8171,14 +8170,10 @@ (define_insn_and_split "movsf_from_si" [(set (match_operand:SF 0 "nonimmediate_operand" "=!r, f, v, wa, m, Z, Z, wa, ?r, !r") - (unspec:SF [(match_operand:SI 1 "input_operand" + (subreg:SF (match_operand:SI 1 "input_operand" "m, m, wY, Z, r, f, - wa, r, wa, r")] - UNSPEC_SF_FROM_SI)) - (clobber (match_scratch:DI 2 - "=X, X, X, X, X, X, - X, r, X, X"))] - "TARGET_NO_SF_SUBREG + wa, r, wa, r") 0))] + "BITCAST_SI_SF_IN_REGS && (register_operand (operands[0], SFmode) || register_operand (operands[1], SImode))" "@ @@ -8192,31 +8187,25 @@ (define_insn_and_split "movsf_from_si" # mfvsrwz %0,%x1 mr %0,%1" - - "&& reload_completed - && vsx_reg_sfsubreg_ok (operands[0], SFmode) - && int_reg_operand_not_pseudo (operands[1], SImode)" + "&& ((!reload_completed && !TARGET_P9_VECTOR + && gpc_reg_operand (operands[0], SFmode) + && gpc_reg_operand (operands[1], SImode)) + || (reload_completed && TARGET_P9_VECTOR + && vsx_reg_sfsubreg_ok (operands[0], SFmode) + && int_reg_operand_not_pseudo (operands[1], SImode)))" [(const_int 0)] { - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - if (TARGET_P9_VECTOR) + if (reload_completed) { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0)); emit_insn (gen_vsx_splat_v4si (op0_v, op1)); emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); } else - { - rtx op2 = operands[2]; - rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); - - /* Move SF value to upper 32-bits for xscvspdpn. */ - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); - } + emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1])); DONE; } @@ -8230,6 +8219,46 @@ (define_insn_and_split "movsf_from_si" "*, *, p9v, p8v, *, *, p8v, p8v, p8v, *")]) +(define_insn_and_split "movsf_from_si_p8" + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0)) + (clobber (match_scratch:DI 2 "=r"))] + "BITCAST_SI_SF_IN_REGS" + "#" + "&& reload_completed + && vsx_reg_sfsubreg_ok (operands[0], SFmode) + && int_reg_operand_not_pseudo (operands[1], SImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "vecfloat") + (set_attr "isa" "p8v")]) + +(define_split + [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand") 0) + (match_operand:SI 1 "gpc_reg_operand"))] + "BITCAST_SI_SF_IN_REGS" + [(const_int 0)] +{ + if (TARGET_P9_VECTOR) + emit_insn (gen_movsf_from_si (operands[0], operands[1])); + else + emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1])); + DONE; +}) + (define_code_iterator any_rshift [ashiftrt lshiftrt]) ;; For extracting high part element from DImode register like: @@ -8237,15 +8266,12 @@ (define_code_iterator any_rshift [ashiftrt lshiftrt]) ;; split it before reload with "and mask" to avoid generating shift right ;; 32 bit then shift left 32 bit. (define_insn_and_split "movsf_from_si2_<code>" - [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") - (unspec:SF - [(match_operator:SI 3 "lowpart_subreg_operator" - [(any_rshift:DI - (match_operand:DI 1 "input_operand" "r") - (const_int 32))])] - UNSPEC_SF_FROM_SI)) + [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand" "=wa") 0) + (match_operator:SI 3 "lowpart_subreg_operator" + [(any_rshift:DI (match_operand:DI 1 "input_operand" "r") + (const_int 32))])) (clobber (match_scratch:DI 2 "=r"))] - "TARGET_NO_SF_SUBREG" + "BITCAST_SI_SF_IN_REGS" "#" "&& 1" [(const_int 0)] -- 2.31.1