Hi Kyrill, Many thanks for the review!
The 06/19/2018 16:47, Kyrill Tkachov wrote: > Hi Tamar, > > On 19/06/18 15:07, Tamar Christina wrote: > > Hi All, > > > > This fixes a regression where we don't have an instruction for pre Armv8.2-a > > to do a move of an fp16 value from a GP reg to a SIMD reg. > > > > This patch adds that pattern to movhf_aarch64 using a dup and only selectes > > it > > using a very low priority. > > > > This fixes an ICE at -O0. > > > > Regtested on aarch64-none-elf and no issues. > > Bootstrapped on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > gcc/ > > 2018-06-19 Tamar Christina <tamar.christ...@arm.com> > > > > PR target/85769 > > * config/aarch64/aarch64.md (*movhf_aarch64): Add dup v0.4h pattern. > > > > gcc/testsuite/ > > 2018-06-19 Tamar Christina <tamar.christ...@arm.com> > > > > PR target/85769 > > * gcc.target/aarch64/f16_mov_immediate_3.c: New. > > Thanks, > > Tamar > > > > -- > new file mode 100644 > index > 0000000000000000000000000000000000000000..8bf9695e708ad35d9592fb667c8305438e9e031a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0" } */ > +/* { dg-add-options arm_v8_neon } */ > + > > This is an arm testsuite directive. I believe you don't want it here... Ah indeed, it works because arm_v8_neon also has an empty case version. I have removed it. Sorry for missing this. Kind Regards, Tamar > > Thanks, > Kyrill > > +extern __fp16 foo (); > + > +__fp16 f4 () > +{ > + __fp16 a = 0; > + __fp16 b = 1; > + __fp16 c = 2; > + __fp16 d = 4; > + > + __fp16 z = a + b; > + z = z + c; > + z = z - d; > + return z; > +} > + > +/* { dg-final { scan-assembler-times {dup\tv[0-9]+.4h, w[0-9]+} 1 } } */ > > --
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..c1c12d46729ada6ed147602b8d3fbbf9f797db81 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1155,13 +1155,14 @@ ) (define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) || aarch64_reg_or_fp_zero (operands[1], HFmode))" "@ movi\\t%0.4h, #0 fmov\\t%h0, %w1 + dup\\t%w0.4h, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] fmov\\t%h0, %1 @@ -1171,10 +1172,10 @@ ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \ neon_move,f_loads,f_stores,load_4,store_4,mov_reg") - (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*") - (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")] + (set_attr "simd" "yes,*,yes,yes,yes,*,yes,*,*,*,*,*") + (set_attr "fp16" "*,yes,*,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movsf_aarch64" diff --git a/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c new file mode 100644 index 0000000000000000000000000000000000000000..6dc325b10b76b2a04f7081f892ce175622eaf49d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +extern __fp16 foo (); + +__fp16 f4 () +{ + __fp16 a = 0; + __fp16 b = 1; + __fp16 c = 2; + __fp16 d = 4; + + __fp16 z = a + b; + z = z + c; + z = z - d; + return z; +} + +/* { dg-final { scan-assembler-times {dup\tv[0-9]+.4h, w[0-9]+} 1 } } */