On Mon, Jun 26, 2017 at 11:49:42AM +0100, Tamar Christina wrote: > Hi All, > > I've updated patch accordingly. > > This mostly involves removing the loop to create the ival > and removing the *2 code and instead defaulting to 64bit > and switching to 128 when needed. > > Regression tested on aarch64-none-linux-gnu and no regressions. > > OK for trunk?
Almost, with a couple of small changes and clarifications. > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > 04417dcd609f6e8ff594a9c5853b3143696d3208..efb027f7fa9b9750b019c529bbcfc8b73dbaf804 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -4668,6 +4670,62 @@ aarch64_legitimize_address_displacement (rtx *disp, > rtx *off, machine_mode mode) > return true; > } > > +/* Return the binary representation of floating point constant VALUE in > INTVAL. > + If the value cannot be converted, return false without setting INTVAL. > + The conversion is done in the given MODE. */ > +bool > +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) > +{ > + machine_mode mode = GET_MODE (value); > + if (GET_CODE (value) != CONST_DOUBLE > + || !SCALAR_FLOAT_MODE_P (mode) > + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) > + return false; > + > + unsigned HOST_WIDE_INT ival = 0; > + > + /* Only support up to DF mode. */ > + gcc_assert (GET_MODE_BITSIZE (mode) <= 64); GET_MODE_BITSIZE (DFmode) would be more self-documenting. > + > + long res[2]; > + real_to_target (res, > + CONST_DOUBLE_REAL_VALUE (value), > + REAL_MODE_FORMAT (mode)); > + > + ival = zext_hwi (res[0], 32); > + if (GET_MODE_BITSIZE (mode) == 64) Likewise here. > + ival |= (zext_hwi (res[1], 32) << 32); > + > + *intval = ival; > + return true; > +} > + > @@ -4680,6 +4738,46 @@ aarch64_float_const_zero_rtx_p (rtx x) > return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0); > } > > +/* Return TRUE if rtx X is immediate constant that fits in a single > + MOVI immediate operation. */ > +bool > +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) > +{ > + if (!TARGET_SIMD) > + return false; > + > + machine_mode vmode, imode; > + unsigned HOST_WIDE_INT ival; > + > + /* Don't write float constants out to memory. */ This comment seems (only a little) out of line with the code below - "Don't write float constants out to memory if we can represent them as integers." > + if (GET_CODE (x) == CONST_DOUBLE > + && SCALAR_FLOAT_MODE_P (mode)) > + { > + if (!aarch64_reinterpret_float_as_int (x, &ival)) > + return false; > + > + imode = int_mode_for_mode (mode); > + } > + else if (GET_CODE (x) == CONST_INT > + && SCALAR_INT_MODE_P (mode)) > + { > + imode = mode; > + ival = INTVAL (x); > + } > + else > + return false; > + > + /* use a 64 bit mode for everything except for DI/DF mode, where we use > + a 128 bit vector mode. */ > + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; > + > + vmode = aarch64_simd_container_mode (imode, width); > + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival); > + > + return aarch64_simd_valid_immediate (v_op, vmode, false, NULL); I still wonder whether we could rewrite aarch64_simd_valid)_immediate to avoid the need for a 128-bit vector here - 64-bits are good enough. This doesn't need fixed for this patch, but it would make for a small optimisation. > +} > + > + > /* Return the fixed registers used for condition codes. */ > > static bool > @@ -5857,12 +5955,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t > regclass) > return NO_REGS; > } > > - /* If it's an integer immediate that MOVI can't handle, then > - FP_REGS is not an option, so we return NO_REGS instead. */ > - if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) > - && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) > - return NO_REGS; > - I don't understand this relaxation could you explain what this achieves and why it is safe in this patch? > /* Register eliminiation can result in a request for > SP+constant->FP_REGS. We cannot support such operations which > use SP as source and an FP_REG as destination, so reject out > @@ -6773,26 +6865,44 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int > outer ATTRIBUTE_UNUSED, > return true; > > case CONST_DOUBLE: > + > + /* First determine number of instructions to do the move > + as an integer constant. */ > + if (!aarch64_float_const_representable_p (x) > + && !aarch64_can_const_movi_rtx_p (x, mode) > + && aarch64_float_const_rtx_p (x)) > + { > + unsigned HOST_WIDE_INT ival; > + bool succeed = aarch64_reinterpret_float_as_int (x, &ival); > + gcc_assert (succeed); > + > + machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode > (mode); > + int ncost = aarch64_internal_mov_immediate > + (NULL_RTX, gen_int_mode (ival, imode), false, imode); > + *cost += COSTS_N_INSNS (ncost); > + return true; > + } > + > if (speed) > { > - /* mov[df,sf]_aarch64. */ > - if (aarch64_float_const_representable_p (x)) > - /* FMOV (scalar immediate). */ > - *cost += extra_cost->fp[mode == DFmode].fpconst; > - else if (!aarch64_float_const_zero_rtx_p (x)) > - { > - /* This will be a load from memory. */ > - if (mode == DFmode) > + /* mov[df,sf]_aarch64. */ > + if (aarch64_float_const_representable_p (x)) > + /* FMOV (scalar immediate). */ > + *cost += extra_cost->fp[mode == DFmode].fpconst; > + else if (!aarch64_float_const_zero_rtx_p (x)) > + { > + /* This will be a load from memory. */ > + if (mode == DFmode) > *cost += extra_cost->ldst.loadd; > - else > + else > *cost += extra_cost->ldst.loadf; > - } > - else > - /* Otherwise this is +0.0. We get this using MOVI d0, #0 > - or MOV v0.s[0], wzr - neither of which are modeled by the > - cost tables. Just use the default cost. */ > - { > - } > + } > + else > + /* Otherwise this is +0.0. We get this using MOVI d0, #0 > + or MOV v0.s[0], wzr - neither of which are modeled by the > + cost tables. Just use the default cost. */ > + { > + } Why reindent this code? From what I can tell this makes the code less conformant to GNU style. > } > > return true; > @@ -6974,7 +7084,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer > ATTRIBUTE_UNUSED, > if (speed) > *cost += extra_cost->fp[mode == DFmode].compare; > > - if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) > + if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) Likewise here? > { > *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed); > /* FCMP supports constant 0.0 for no extra cost. */ > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index > cd7ded986630c14ed6d42618b2a1f9baa0cbd192..6992c82fa790eac34669fcc5b030e395ad332201 > 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -53,6 +53,11 @@ > (ior (match_operand 0 "register_operand") > (match_test "op == const0_rtx")))) > > +(define_predicate "aarch64_reg_or_fp_float" > + (ior (match_operand 0 "register_operand") > + (and (match_code "const_double") > + (match_test "aarch64_float_const_rtx_p (op)")))) > + Unused? > (define_predicate "aarch64_reg_or_fp_zero" > (ior (match_operand 0 "register_operand") > (and (match_code "const_double") Thanks, James