For this simple testcase double sub (void) { return 0.0; }
Without the attached patch, an ARM compiler with neon support enabled, gives vldr.64 d0, .L2 With the attached patch, an ARM compiler with neon enabled, gives vmov.i64 d0, #0@ float which is faster and smaller, as there is no load from a constant pool entry. There are a few ways to implement this. I added a neon enabled attribute. Another way to do this would be a new constraint, like Dg, that tests for both neon and 0. I don't see any mention of targets that only support single-float in the ARM ARM, so it isn't obvious how to handle that. I see no targets that support both neon and single-float, but maybe I need to check for that anyways? Most of the patch involves renumbering constraints and matching attributes. The new alternative w/G must come before w/UvF or else we still get a constant pool reference. Otherwise the patch is pretty small and simple. We can do the same thing in the movdi pattern. I haven't tried writing that yet. This patch was tested with a bootstrap and make check in an armhf schroot on an xgene box. There were no regressions. OK to check in? Jim
* config/arm/arm.md: (arch): Add neon. (arch_enabled): Return yes for arch neon when TARGET_NEON. * config/arm/vfp.md (movdf_vfp): Add w/G as alternative 3. Add neon_move as type for alt 3. Add arch attr enabling alt 3 for neon. Emit vmov.i64 for alt 3. Renumber alternatives 3 to 8. Adjust attributes for alt renumbering. Mark alt 3 as non-predicable. (thumb2_movdf_vfp): Likewise. Index: config/arm/arm.md =================================================================== --- config/arm/arm.md (revision 235793) +++ config/arm/arm.md (working copy) @@ -121,7 +121,7 @@ ; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is ; used to compute attribute "enabled", use type "any" to enable an ; alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3" +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -177,6 +177,10 @@ (and (eq_attr "arch" "armv6_or_vfpv3") (match_test "arm_arch6 || TARGET_VFP3")) (const_string "yes") + + (and (eq_attr "arch" "neon") + (match_test "TARGET_NEON")) + (const_string "yes") ] (const_string "no"))) Index: config/arm/vfp.md =================================================================== --- config/arm/vfp.md (revision 235793) +++ config/arm/vfp.md (working copy) @@ -394,8 +394,8 @@ ;; DFmode moves (define_insn "*movdf_vfp" - [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r") - (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))] + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r, m,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))] "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" @@ -410,16 +410,18 @@ case 2: gcc_assert (TARGET_VFP_DOUBLE); return \"vmov%?.f64\\t%P0, %1\"; - case 3: case 4: + case 3: + return \"vmov.i64\\t%P0, #0@ float\"; + case 4: case 5: return output_move_vfp (operands); - case 5: case 6: + case 6: case 7: return output_move_double (operands, true, NULL); - case 7: + case 8: if (TARGET_VFP_SINGLE) return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; else return \"vmov%?.f64\\t%P0, %P1\"; - case 8: + case 9: return \"#\"; default: gcc_unreachable (); @@ -426,23 +428,24 @@ } } " - [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\ + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\ load2,store2,ffarithd,multiple") - (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) - (eq_attr "alternative" "7") + (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8) + (eq_attr "alternative" "8") (if_then_else (match_test "TARGET_VFP_SINGLE") (const_int 8) (const_int 4))] (const_int 4))) - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") - (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")] + (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes") + (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*") + (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")] ) (define_insn "*thumb2_movdf_vfp" - [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r") - (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))] + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w, mF,r, w,r"))] "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" @@ -457,11 +460,13 @@ case 2: gcc_assert (TARGET_VFP_DOUBLE); return \"vmov%?.f64\\t%P0, %1\"; - case 3: case 4: + case 3: + return \"vmov.i64\\t%P0, #0@ float\"; + case 4: case 5: return output_move_vfp (operands); - case 5: case 6: case 8: + case 6: case 7: case 9: return output_move_double (operands, true, NULL); - case 7: + case 8: if (TARGET_VFP_SINGLE) return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; else @@ -471,17 +476,18 @@ } } " - [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\ + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\ f_stored,load2,store2,ffarithd,multiple") - (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) - (eq_attr "alternative" "7") + (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8) + (eq_attr "alternative" "8") (if_then_else (match_test "TARGET_VFP_SINGLE") (const_int 8) (const_int 4))] (const_int 4))) - (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*") - (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] + (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*") + (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*") + (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")] )