Hi Kyrill,

Many thanks for the review!

The 06/19/2018 16:47, Kyrill Tkachov wrote:
> Hi Tamar,
> 
> On 19/06/18 15:07, Tamar Christina wrote:
> > Hi All,
> >
> > This fixes a regression where we don't have an instruction for pre Armv8.2-a
> > to do a move of an fp16 value from a GP reg to a SIMD reg.
> >
> > This patch adds that pattern to movhf_aarch64 using a dup and only selectes 
> > it
> > using a very low priority.
> >
> > This fixes an ICE at -O0.
> >
> > Regtested on aarch64-none-elf and no issues.
> > Bootstrapped on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > gcc/
> > 2018-06-19  Tamar Christina  <tamar.christ...@arm.com>
> >
> >         PR target/85769
> >         * config/aarch64/aarch64.md (*movhf_aarch64): Add dup v0.4h pattern.
> >
> > gcc/testsuite/
> > 2018-06-19  Tamar Christina  <tamar.christ...@arm.com>
> >
> >         PR target/85769
> >         * gcc.target/aarch64/f16_mov_immediate_3.c: New.
> > Thanks,
> > Tamar
> >
> > -- 

> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..8bf9695e708ad35d9592fb667c8305438e9e031a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0" } */
> +/* { dg-add-options arm_v8_neon } */
> +
> 
> This is an arm testsuite directive. I believe you don't want it here...

Ah indeed, it works because arm_v8_neon also has an empty case version.

I have removed it. Sorry for missing this.

Kind Regards,
Tamar

> 
> Thanks,
> Kyrill
> 
> +extern __fp16 foo ();
> +
> +__fp16 f4 ()
> +{
> +  __fp16 a = 0;
> +  __fp16 b = 1;
> +  __fp16 c = 2;
> +  __fp16 d = 4;
> +
> +  __fp16 z = a + b;
> +  z = z + c;
> +  z = z - d;
> +  return z;
> +}
> +
> +/* { dg-final { scan-assembler-times {dup\tv[0-9]+.4h, w[0-9]+} 1 } } */
> 
> 

-- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..c1c12d46729ada6ed147602b8d3fbbf9f797db81 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1155,13 +1155,14 @@
 )
 
 (define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
     || aarch64_reg_or_fp_zero (operands[1], HFmode))"
   "@
    movi\\t%0.4h, #0
    fmov\\t%h0, %w1
+   dup\\t%w0.4h, %w1
    umov\\t%w0, %1.h[0]
    mov\\t%0.h[0], %1.h[0]
    fmov\\t%h0, %1
@@ -1171,10 +1172,10 @@
    ldrh\\t%w0, %1
    strh\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+  [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
 		     neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
-   (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
-   (set_attr "fp16"   "*,yes,*,*,yes,*,*,*,*,*,*")]
+   (set_attr "simd" "yes,*,yes,yes,yes,*,yes,*,*,*,*,*")
+   (set_attr "fp16"   "*,yes,*,*,*,yes,*,*,*,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..6dc325b10b76b2a04f7081f892ce175622eaf49d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+extern __fp16 foo ();
+
+__fp16 f4 ()
+{
+  __fp16 a = 0;
+  __fp16 b = 1;
+  __fp16 c = 2;
+  __fp16 d = 4;
+
+  __fp16 z = a + b;
+  z = z + c;
+  z = z - d;
+  return z;
+}
+
+/* { dg-final { scan-assembler-times {dup\tv[0-9]+.4h, w[0-9]+} 1 } } */

Reply via email to