Wilco Dijkstra <[email protected]> writes:
> Hi Richard,
>
>> The idea was that, if we did the split during expand, the movsf/df
>> define_insns would then only accept the immediates that their
>> constraints can handle.
>
> Right, always disallowing these immediates works fine too (it seems
> reload doesn't require all immediates to be valid), and then the split is
> redundant. I've updated the patch:
>
>
> v2: split during expand, remove movsf/df splitter
>
> The IRA combine_and_move pass runs if the scheduler is disabled and
> aggressively
> combines moves. The movsf/df patterns allow all FP immediates since they rely
> on a split pattern. However splits do not happen during IRA, so the result is
> extra literal loads. To avoid this, split early during expand and block
> creation of FP immediates that need this split.
>
> double f(void) { return 128.0; }
>
> -O2 -fno-schedule-insns gives:
>
> adrp x0, .LC0
> ldr d0, [x0, #:lo12:.LC0]
> ret
>
> After patch:
>
> mov x0, 4638707616191610880
> fmov d0, x0
> ret
>
> Passes bootstrap & regress, OK for commit?
>
> gcc/ChangeLog:
> * config/aarch64/aarch64.md (movhf_aarch64): Use
> aarch64_valid_fp_move.
> (movsf_aarch64): Likewise.
> (movdf_aarch64): Likewise.
> * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function.
> * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise.
Thanks, this mostly looks good, but...
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index
> 05d3258abf7b43342d9058dd1b365a1a0870cdc2..6da81556110c978a9de6f6fad5775c9d77771b10
> 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -843,6 +843,7 @@ bool aarch64_advsimd_struct_mode_p (machine_mode mode);
> opt_machine_mode aarch64_vq_mode (scalar_mode);
> opt_machine_mode aarch64_full_sve_mode (scalar_mode);
> bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
> +bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
> bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
> bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
> HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index
> 00bcf18ae97cea94227c00798b7951daa255d360..ec2d391d1e3eb9bd28f66fb6ee85311b4ced4c94
> 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -11175,6 +11175,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode
> mode)
> return aarch64_simd_valid_mov_imm (v_op);
> }
>
> +/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */
> +bool
> +aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
> +{
> + if (!TARGET_FLOAT)
> + return false;
> +
> + if (aarch64_reg_or_fp_zero (src, mode))
> + return true;
> +
> + if (!register_operand (dst, mode))
> + return false;
> +
> + if (MEM_P (src))
> + return true;
> +
> + if (!DECIMAL_FLOAT_MODE_P (mode))
> + {
> + if (aarch64_can_const_movi_rtx_p (src, mode)
> + || aarch64_float_const_representable_p (src)
> + || aarch64_float_const_zero_rtx_p (src))
> + return true;
> +
> + /* Block FP immediates which are split during expand. */
> + if (aarch64_float_const_rtx_p (src))
> + return false;
> + }
> +
> + return can_create_pseudo_p ();
...I still think we should avoid testing can_create_pseudo_p.
Does it work with the last part replaced by:
if (!DECIMAL_FLOAT_MODE_P (mode))
{
if (aarch64_can_const_movi_rtx_p (src, mode)
|| aarch64_float_const_representable_p (src)
|| aarch64_float_const_zero_rtx_p (src))
return true;
}
return false;
? If so, the patch is OK with that change from my POV, but please wait
till Thursday morning for others' opinions.
Richard
> +}
>
> /* Return the fixed registers used for condition codes. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index
> 8d10197c9e8dd66b7a30a1034b629297b9992661..b865ae2ff5e23edc4d8990e1efd4a51dd195f41f
> 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1636,14 +1636,33 @@ (define_expand "mov<mode>"
> && ! (GET_CODE (operands[1]) == CONST_DOUBLE
> && aarch64_float_const_zero_rtx_p (operands[1])))
> operands[1] = force_reg (<MODE>mode, operands[1]);
> +
> + if (!DECIMAL_FLOAT_MODE_P (<MODE>mode)
> + && GET_CODE (operands[1]) == CONST_DOUBLE
> + && can_create_pseudo_p ()
> + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
> + && !aarch64_float_const_representable_p (operands[1])
> + && !aarch64_float_const_zero_rtx_p (operands[1])
> + && aarch64_float_const_rtx_p (operands[1]))
> + {
> + unsigned HOST_WIDE_INT ival;
> + bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
> + gcc_assert (res);
> +
> + machine_mode intmode
> + = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
> + rtx tmp = gen_reg_rtx (intmode);
> + emit_move_insn (tmp, gen_int_mode (ival, intmode));
> + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
> + DONE;
> + }
> }
> )
>
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:HFBF 0 "nonimmediate_operand")
> (match_operand:HFBF 1 "general_operand"))]
> - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
> {@ [ cons: =0 , 1 ; attrs: type , arch ]
> [ w , Y ; neon_move , simd ] movi\t%0.4h, #0
> [ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1
> @@ -1666,8 +1685,7 @@ (define_insn "*mov<mode>_aarch64"
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:SFD 0 "nonimmediate_operand")
> (match_operand:SFD 1 "general_operand"))]
> - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
> {@ [ cons: =0 , 1 ; attrs: type , arch ]
> [ w , Y ; neon_move , simd ] movi\t%0.2s, #0
> [ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1
> @@ -1687,8 +1705,7 @@ (define_insn "*mov<mode>_aarch64"
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:DFD 0 "nonimmediate_operand")
> (match_operand:DFD 1 "general_operand"))]
> - "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> - || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> + "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
> {@ [ cons: =0 , 1 ; attrs: type , arch ]
> [ w , Y ; neon_move , simd ] movi\t%d0, #0
> [ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1
> @@ -1705,27 +1722,6 @@ (define_insn "*mov<mode>_aarch64"
> }
> )
>
> -(define_split
> - [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
> - (match_operand:GPF_HF 1 "const_double_operand"))]
> - "can_create_pseudo_p ()
> - && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
> - && !aarch64_float_const_representable_p (operands[1])
> - && !aarch64_float_const_zero_rtx_p (operands[1])
> - && aarch64_float_const_rtx_p (operands[1])"
> - [(const_int 0)]
> - {
> - unsigned HOST_WIDE_INT ival;
> - if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> - FAIL;
> -
> - rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
> - emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
> - emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
> - DONE;
> - }
> -)
> -
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:TFD 0
> "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")