Now that TImode support is enabled on SPARC 64-bit, let's implement it. :-) This is modeled on the TFmode support and, consequently, inherits its relative verbosity. A future cleanup could simplify it a little and unify it with the TFmode support, as e.g. for Alpha.
Bootstrapped/regtested on SPARC/Solaris and SPARC64/Solaris, applied on the mainline. 2012-09-13 Eric Botcazou <ebotca...@adacore.com> * config/sparc/predicates.md (input_operand): Do not consider TImode constants as 1-instruction integer constants. Use register_or_zero_operand instead of register_operand and tidy up. * config/sparc/sparc.md (movti): New expander. (movti_insn_sp64): New instruction. (movti_insn_sp64_hq): Likewise. (TImode splitters): New splitters. * config/sparc/sparc.c (sparc_expand_move) <TImode>: New case. (sparc_legitimate_address_p): Return 0 for REG+REG in TImode. * config/sparc/sparc-protos.h (arith_double_4096_operand): Delete. (arith_4096_operand): Likewise. (zero_operand): Likewise. (fp_zero_operand): Likewise. (reg_or_0_operand): Likewise. -- Eric Botcazou
Index: config/sparc/predicates.md =================================================================== --- config/sparc/predicates.md (revision 191198) +++ config/sparc/predicates.md (working copy) @@ -357,7 +357,7 @@ (define_predicate "arith_double_operand" (define_predicate "arith_add_operand" (ior (match_operand 0 "arith_operand") (match_operand 0 "const_4096_operand"))) - + ;; Return true if OP is suitable as second double operand for add/sub. (define_predicate "arith_double_add_operand" (match_code "const_int,const_double,reg,subreg") @@ -427,6 +427,7 @@ (define_predicate "input_operand" /* Allow any 1-instruction integer constant. */ if (mclass == MODE_INT + && mode != TImode && (small_int_operand (op, mode) || const_high_operand (op, mode))) return true; @@ -440,12 +441,10 @@ (define_predicate "input_operand" if (mclass == MODE_FLOAT && GET_CODE (op) == CONST_DOUBLE) return true; - if (mclass == MODE_VECTOR_INT && GET_CODE (op) == CONST_VECTOR - && (const_zero_operand (op, mode) - || const_all_ones_operand (op, mode))) + if (mclass == MODE_VECTOR_INT && const_all_ones_operand (op, mode)) return true; - if (register_operand (op, mode)) + if (register_or_zero_operand (op, mode)) return true; /* If this is a SUBREG, look inside so that we handle paradoxical ones. */ Index: config/sparc/sparc.md =================================================================== --- config/sparc/sparc.md (revision 191198) +++ config/sparc/sparc.md (working copy) @@ -2034,6 +2034,164 @@ (define_split DONE; }) +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_ARCH64" +{ + if (sparc_expand_move (TImode, operands)) + DONE; +}) + +;; We need to prevent reload from splitting TImode moves, because it +;; might decide to overwrite a pointer with the value it points to. +;; In that case we have to do the loads in the appropriate order so +;; that the pointer is not destroyed too early. + +(define_insn "*movti_insn_sp64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?o,b") + (match_operand:TI 1 "input_operand" "roJ,rJ, eo, e,J"))] + "TARGET_ARCH64 + && ! TARGET_HARD_QUAD + && (register_operand (operands[0], TImode) + || register_or_zero_operand (operands[1], TImode))" + "#" + [(set_attr "length" "2,2,2,2,2") + (set_attr "cpu_feature" "*,*,fpu,fpu,vis")]) + +(define_insn "*movti_insn_sp64_hq" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?*e,?m,b") + (match_operand:TI 1 "input_operand" "roJ,rJ, e, m, e,J"))] + "TARGET_ARCH64 + && TARGET_HARD_QUAD + && (register_operand (operands[0], TImode) + || register_or_zero_operand (operands[1], TImode))" + "@ + # + # + fmovq\t%1, %0 + ldq\t%1, %0 + stq\t%1, %0 + #" + [(set_attr "type" "*,*,fpmove,fpload,fpstore,*") + (set_attr "length" "2,2,*,*,*,2")]) + +;; Now all the splits to handle multi-insn TI mode moves. +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" ""))] + "reload_completed + && ((TARGET_FPU + && ! TARGET_HARD_QUAD) + || (! fp_register_operand (operands[0], TImode) + && ! fp_register_operand (operands[1], TImode)))" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_src = operands[1]; + rtx dest1, dest2; + rtx src1, src2; + + dest1 = gen_highpart (DImode, set_dest); + dest2 = gen_lowpart (DImode, set_dest); + src1 = gen_highpart (DImode, set_src); + src2 = gen_lowpart (DImode, set_src); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if (reg_overlap_mentioned_p (dest1, src2)) + { + emit_insn (gen_movdi (dest2, src2)); + emit_insn (gen_movdi (dest1, src1)); + } + else + { + emit_insn (gen_movdi (dest1, src1)); + emit_insn (gen_movdi (dest2, src2)); + } + DONE; +}) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "const_zero_operand" ""))] + "reload_completed" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx dest1, dest2; + + switch (GET_CODE (set_dest)) + { + case REG: + dest1 = gen_highpart (DImode, set_dest); + dest2 = gen_lowpart (DImode, set_dest); + break; + case MEM: + dest1 = adjust_address (set_dest, DImode, 0); + dest2 = adjust_address (set_dest, DImode, 8); + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_movdi (dest1, const0_rtx)); + emit_insn (gen_movdi (dest2, const0_rtx)); + DONE; +}) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "memory_operand" ""))] + "reload_completed + && offsettable_memref_p (operands[1]) + && (! TARGET_HARD_QUAD + || ! fp_register_operand (operands[0], TImode))" + [(clobber (const_int 0))] +{ + rtx word0 = adjust_address (operands[1], DImode, 0); + rtx word1 = adjust_address (operands[1], DImode, 8); + rtx set_dest, dest1, dest2; + + set_dest = operands[0]; + + dest1 = gen_highpart (DImode, set_dest); + dest2 = gen_lowpart (DImode, set_dest); + + /* Now output, ordering such that we don't clobber any registers + mentioned in the address. */ + if (reg_overlap_mentioned_p (dest1, word1)) + + { + emit_insn (gen_movdi (dest2, word1)); + emit_insn (gen_movdi (dest1, word0)); + } + else + { + emit_insn (gen_movdi (dest1, word0)); + emit_insn (gen_movdi (dest2, word1)); + } + DONE; +}) + +(define_split + [(set (match_operand:TI 0 "memory_operand" "") + (match_operand:TI 1 "register_operand" ""))] + "reload_completed + && offsettable_memref_p (operands[0]) + && (! TARGET_HARD_QUAD + || ! fp_register_operand (operands[1], TImode))" + [(clobber (const_int 0))] +{ + rtx set_src = operands[1]; + + emit_insn (gen_movdi (adjust_address (operands[0], DImode, 0), + gen_highpart (DImode, set_src))); + emit_insn (gen_movdi (adjust_address (operands[0], DImode, 8), + gen_lowpart (DImode, set_src))); + DONE; +}) + ;; Floating point move instructions @@ -2477,7 +2635,7 @@ (define_split dest2 = adjust_address (set_dest, DFmode, 8); break; default: - gcc_unreachable (); + gcc_unreachable (); } emit_insn (gen_movdf (dest1, CONST0_RTX (DFmode))); Index: config/sparc/sparc-protos.h =================================================================== --- config/sparc/sparc-protos.h (revision 191198) +++ config/sparc/sparc-protos.h (working copy) @@ -82,11 +82,6 @@ extern const char *output_probe_stack_ra extern bool emit_scc_insn (rtx []); extern void emit_conditional_branch_insn (rtx []); extern int mems_ok_for_ldd_peep (rtx, rtx, rtx); -extern int arith_double_4096_operand (rtx, enum machine_mode); -extern int arith_4096_operand (rtx, enum machine_mode); -extern int zero_operand (rtx, enum machine_mode); -extern int fp_zero_operand (rtx, enum machine_mode); -extern int reg_or_0_operand (rtx, enum machine_mode); extern int empty_delay_slot (rtx); extern int eligible_for_return_delay (rtx); extern int eligible_for_sibcall_delay (rtx); Index: config/sparc/sparc.c =================================================================== --- config/sparc/sparc.c (revision 191198) +++ config/sparc/sparc.c (working copy) @@ -1465,6 +1465,18 @@ sparc_expand_move (enum machine_mode mod sparc_emit_set_const64 (operands[0], operands[1]); return true; + case TImode: + { + rtx high, low; + /* TImode isn't available in 32-bit mode. */ + split_double (operands[1], &high, &low); + emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), + high)); + emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), + low)); + } + return true; + default: gcc_unreachable (); } @@ -3492,6 +3504,10 @@ sparc_legitimate_address_p (enum machine && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) return 0; + /* Likewise for TImode, but in all cases. */ + if (mode == TImode) + return 0; + /* We prohibit REG + REG on ARCH32 if not optimizing for DFmode/DImode because then mem_min_alignment is likely to be zero after reload and the forced split would lack a matching splitter