movti lacked an way of zeroing an FPR, meaning that we'd do:

        mov     x0, 0
        mov     x1, 0
        fmov    d0, x0
        fmov    v0.d[1], x1

instead of just:

        movi    v0.2d, #0

movtf had the opposite problem for GPRs: we'd generate:

        movi    v0.2d, #0
        fmov    x0, d0
        fmov    x1, v0.d[1]

instead of just:

        mov     x0, 0
        mov     x1, 0

Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf
alternative (but not the movti one).  The splitter handles overlap
correctly.

The TF splitter used aarch64_reg_or_imm, but the _imm part only
accepts integer constants, not floating-point ones.  The patch
changes it to nonmemory_operand instead.

Tested on aarch64-linux-gnu, pushed.

Richard


gcc/
        * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a
        function comment.  Tighten check for FP moves.
        * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative.
        (*movtf_aarch64): Handle r<-Y like r<-r.  Remove unnecessary
        earlyclobber.  Change splitter predicate from aarch64_reg_or_imm
        to nonmemory_operand.

gcc/testsuite/
        * gcc.target/aarch64/movtf_1.c: New test.
        * gcc.target/aarch64/movti_1.c: Likewise.
---
 gcc/config/aarch64/aarch64.c               |  9 ++-
 gcc/config/aarch64/aarch64.md              | 17 +++--
 gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++
 4 files changed, 190 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 491fc582dab..9e88438b3c3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
     }
 }
 
+/* Return true if we should split a move from 128-bit value SRC
+   to 128-bit register DEST.  */
+
 bool
 aarch64_split_128bit_move_p (rtx dst, rtx src)
 {
-  return (! REG_P (src)
-         || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+  if (FP_REGNUM_P (REGNO (dst)))
+    return REG_P (src) && !FP_REGNUM_P (REGNO (src));
+  /* All moves to GPRs need to be split.  */
+  return true;
 }
 
 /* Split a complex SIMD combine.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 19ec9e33f9f..78fe7c43a00 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1361,13 +1361,14 @@ (define_expand "movti"
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-        "nonimmediate_operand"  "=   r,w, r,w,r,m,m,w,m")
+        "nonimmediate_operand"  "=   r,w,w, r,w,r,m,m,w,m")
        (match_operand:TI 1
-        "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+        "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
    #
+   movi\\t%0.2d, #0
    #
    #
    mov\\t%0.16b, %1.16b
@@ -1376,11 +1377,11 @@ (define_insn "*movti_aarch64"
    stp\\txzr, xzr, %0
    ldr\\t%q0, %1
    str\\t%q1, %0"
-  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+  [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
                             load_16,store_16,store_16,\
                              load_16,store_16")
-   (set_attr "length" "8,8,8,4,4,4,4,4,4")
-   (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+   (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+   (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
 )
 
 ;; Split a TImode register-register or register-immediate move into
@@ -1511,9 +1512,9 @@ (define_split
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-        "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+        "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
        (match_operand:TF 1
-        "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+        "general_operand"      " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
@@ -1536,7 +1537,7 @@ (define_insn "*movtf_aarch64"
 
 (define_split
    [(set (match_operand:TF 0 "register_operand" "")
-        (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+        (match_operand:TF 1 "nonmemory_operand" ""))]
   "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
   [(const_int 0)]
   {
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c 
b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
new file mode 100644
index 00000000000..570de931389
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** zero_q:
+**     movi    v0.2d, #0
+**     ret
+*/
+void
+zero_q ()
+{
+  register _Float128 q0 asm ("q0");
+  q0 = 0;
+  asm volatile ("" :: "w" (q0));
+}
+
+/*
+** load_q:
+**     ldr     q0, \[x0\]
+**     ret
+*/
+void
+load_q (_Float128 *ptr)
+{
+  register _Float128 q0 asm ("q0");
+  q0 = *ptr;
+  asm volatile ("" :: "w" (q0));
+}
+
+/*
+** store_q:
+**     str     q0, \[x0\]
+**     ret
+*/
+void
+store_q (_Float128 *ptr)
+{
+  register _Float128 q0 asm ("q0");
+  asm volatile ("" : "=w" (q0));
+  *ptr = q0;
+}
+
+/*
+** zero_x:
+** (
+**     mov     x0, #?0
+**     mov     x1, #?0
+** |
+**     mov     x1, #?0
+**     mov     x0, #?0
+** )
+**     ret
+*/
+void
+zero_x ()
+{
+  register _Float128 x0 asm ("x0");
+  x0 = 0;
+  asm volatile ("" :: "r" (x0));
+}
+
+/*
+** load_x:
+**     ldp     x2, x3, \[x0\]
+**     ret
+*/
+void
+load_x (_Float128 *ptr)
+{
+  register _Float128 x2 asm ("x2");
+  x2 = *ptr;
+  asm volatile ("" :: "r" (x2));
+}
+
+/*
+** store_x:
+**     stp     x2, x3, \[x0\]
+**     ret
+*/
+void
+store_x (_Float128 *ptr)
+{
+  register _Float128 x2 asm ("x2");
+  asm volatile ("" : "=r" (x2));
+  *ptr = x2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c 
b/gcc/testsuite/gcc.target/aarch64/movti_1.c
new file mode 100644
index 00000000000..160e1acd281
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** zero_q:
+**     movi    v0.2d, #0
+**     ret
+*/
+void
+zero_q ()
+{
+  register __int128_t q0 asm ("q0");
+  q0 = 0;
+  asm volatile ("" :: "w" (q0));
+}
+
+/*
+** load_q:
+**     ldr     q0, \[x0\]
+**     ret
+*/
+void
+load_q (__int128_t *ptr)
+{
+  register __int128_t q0 asm ("q0");
+  q0 = *ptr;
+  asm volatile ("" :: "w" (q0));
+}
+
+/*
+** store_q:
+**     str     q0, \[x0\]
+**     ret
+*/
+void
+store_q (__int128_t *ptr)
+{
+  register __int128_t q0 asm ("q0");
+  asm volatile ("" : "=w" (q0));
+  *ptr = q0;
+}
+
+/*
+** zero_x:
+** (
+**     mov     x0, #?0
+**     mov     x1, #?0
+** |
+**     mov     x1, #?0
+**     mov     x0, #?0
+** )
+**     ret
+*/
+void
+zero_x ()
+{
+  register __int128_t x0 asm ("x0");
+  x0 = 0;
+  asm volatile ("" :: "r" (x0));
+}
+
+/*
+** load_x:
+**     ldp     x2, x3, \[x0\]
+**     ret
+*/
+void
+load_x (__int128_t *ptr)
+{
+  register __int128_t x2 asm ("x2");
+  x2 = *ptr;
+  asm volatile ("" :: "r" (x2));
+}
+
+/*
+** store_x:
+**     stp     x2, x3, \[x0\]
+**     ret
+*/
+void
+store_x (__int128_t *ptr)
+{
+  register __int128_t x2 asm ("x2");
+  asm volatile ("" : "=r" (x2));
+  *ptr = x2;
+}

Reply via email to