A MOPS memmove may corrupt registers since there is no copy of the input 
operands to temporary
registers.  Fix this by calling aarch64_expand_cpymem which does this.  Also 
fix an issue with
STRICT_ALIGNMENT being ignored if TARGET_MOPS is true, and avoid crashing or 
generating a huge
expansion if aarch64_mops_memcpy_size_threshold is large.

Passes regress/bootstrap, OK for commit?

gcc/ChangeLog/
        PR target/111121
        * config/aarch64/aarch64.md (cpymemdi): Remove STRICT_ALIGNMENT, add 
param for memmove.
        (aarch64_movmemdi): Add new expander similar to aarch64_cpymemdi.
        (movmemdi): Like cpymemdi call aarch64_expand_cpymem for correct 
expansion.
        * config/aarch64/aarch64.cc (aarch64_expand_cpymem_mops): Add support 
for memmove.
        (aarch64_expand_cpymem): Add support for memmove. Handle 
STRICT_ALIGNMENT correctly.
        Handle TARGET_MOPS size selection correctly.
        * config/aarch64/aarch64-protos.h (aarch64_expand_cpymem): Update 
prototype. 

gcc/testsuite/ChangeLog/
        PR target/111121
        * gcc.target/aarch64/mops_4.c: Add memmove testcases.

---
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
70303d6fd953e0c397b9138ede8858c2db2e53db..97375e81cbda078847af83bf5dd4e0d7673d6af4
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -765,7 +765,7 @@ bool aarch64_emit_approx_div (rtx, rtx, rtx);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
 tree aarch64_vector_load_decl (tree);
 void aarch64_expand_call (rtx, rtx, rtx, bool);
-bool aarch64_expand_cpymem (rtx *);
+bool aarch64_expand_cpymem (rtx *, bool);
 bool aarch64_expand_setmem (rtx *);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_float_const_rtx_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
eba5d4a7e04b7af82437453a691d5607d98133c9..5e8d0a0c91bc7719de2a8c5627b354cf905a4db0
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25135,10 +25135,11 @@ aarch64_copy_one_block_and_progress_pointers (rtx 
*src, rtx *dst,
   *dst = aarch64_progress_pointer (*dst);
 }
 
-/* Expand a cpymem using the MOPS extension.  OPERANDS are taken
-   from the cpymem pattern.  Return true iff we succeeded.  */
+/* Expand a cpymem/movmem using the MOPS extension.  OPERANDS are taken
+   from the cpymem/movmem pattern.  IS_MEMMOVE is true if this is a memmove
+   rather than memcpy.  Return true iff we succeeded.  */
 static bool
-aarch64_expand_cpymem_mops (rtx *operands)
+aarch64_expand_cpymem_mops (rtx *operands, bool is_memmove)
 {
   if (!TARGET_MOPS)
     return false;
@@ -25150,17 +25151,19 @@ aarch64_expand_cpymem_mops (rtx *operands)
   rtx dst_mem = replace_equiv_address (operands[0], dst_addr);
   rtx src_mem = replace_equiv_address (operands[1], src_addr);
   rtx sz_reg = copy_to_mode_reg (DImode, operands[2]);
-  emit_insn (gen_aarch64_cpymemdi (dst_mem, src_mem, sz_reg));
-
+  if (is_memmove)
+    emit_insn (gen_aarch64_movmemdi (dst_mem, src_mem, sz_reg));
+  else
+    emit_insn (gen_aarch64_cpymemdi (dst_mem, src_mem, sz_reg));
   return true;
 }
 
-/* Expand cpymem, as if from a __builtin_memcpy.  Return true if
-   we succeed, otherwise return false, indicating that a libcall to
-   memcpy should be emitted.  */
-
+/* Expand cpymem/movmem, as if from a __builtin_memcpy/memmove.
+   OPERANDS are taken from the cpymem/movmem pattern.  IS_MEMMOVE is true
+   if this is a memmove rather than memcpy.  Return true if we succeed,
+   otherwise return false, indicating that a libcall should be emitted.  */
 bool
-aarch64_expand_cpymem (rtx *operands)
+aarch64_expand_cpymem (rtx *operands, bool is_memmove)
 {
   int mode_bits;
   rtx dst = operands[0];
@@ -25168,25 +25171,23 @@ aarch64_expand_cpymem (rtx *operands)
   rtx base;
   machine_mode cur_mode = BLKmode;
 
-  /* Variable-sized memcpy can go through the MOPS expansion if available.  */
-  if (!CONST_INT_P (operands[2]))
-    return aarch64_expand_cpymem_mops (operands);
+  /* Variable-sized or strict align copies may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[2]) || STRICT_ALIGNMENT)
+    return aarch64_expand_cpymem_mops (operands, is_memmove);
 
   unsigned HOST_WIDE_INT size = INTVAL (operands[2]);
 
-  /* Try to inline up to 256 bytes or use the MOPS threshold if available.  */
-  unsigned HOST_WIDE_INT max_copy_size
-    = TARGET_MOPS ? aarch64_mops_memcpy_size_threshold : 256;
+  /* Set inline limits for memmove/memcpy.  MOPS has a separate threshold.  */
+  unsigned HOST_WIDE_INT max_copy_size = is_memmove ? 0 : 256;
+  unsigned HOST_WIDE_INT max_mops_size = max_copy_size;
 
-  bool size_p = optimize_function_for_size_p (cfun);
+  if (TARGET_MOPS)
+    max_mops_size = is_memmove ? aarch64_mops_memmove_size_threshold
+                              : aarch64_mops_memcpy_size_threshold;
 
-  /* Large constant-sized cpymem should go through MOPS when possible.
-     It should be a win even for size optimization in the general case.
-     For speed optimization the choice between MOPS and the SIMD sequence
-     depends on the size of the copy, rather than number of instructions,
-     alignment etc.  */
-  if (size > max_copy_size)
-    return aarch64_expand_cpymem_mops (operands);
+  /* Large copies use library call or MOPS when available.  */
+  if (size > max_copy_size || size > max_mops_size)
+    return aarch64_expand_cpymem_mops (operands, is_memmove);
 
   int copy_bits = 256;
 
@@ -25254,6 +25255,8 @@ aarch64_expand_cpymem (rtx *operands)
      the constant size into a register.  */
   unsigned mops_cost = 3 + 1;
 
+  bool size_p = optimize_function_for_size_p (cfun);
+
   /* If MOPS is available at this point we don't consider the libcall as it's
      not a win even on code size.  At this point only consider MOPS if
      optimizing for size.  For speed optimizations we will have chosen between
@@ -25261,7 +25264,7 @@ aarch64_expand_cpymem (rtx *operands)
   if (TARGET_MOPS)
     {
       if (size_p && mops_cost < nops)
-       return aarch64_expand_cpymem_mops (operands);
+       return aarch64_expand_cpymem_mops (operands, is_memmove);
       emit_insn (seq);
       return true;
     }
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
01cf989641fce8e6c3828f6cfef62e101c4142df..97f70d39cc0ddeb330e044bae0544d85a695567d
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1609,15 +1609,30 @@ (define_expand "cpymemdi"
    (match_operand:BLK 1 "memory_operand")
    (match_operand:DI 2 "general_operand")
    (match_operand:DI 3 "immediate_operand")]
-   "!STRICT_ALIGNMENT || TARGET_MOPS"
+   ""
 {
-  if (aarch64_expand_cpymem (operands))
+  if (aarch64_expand_cpymem (operands, false))
     DONE;
   FAIL;
 }
 )
 
-(define_insn "aarch64_movmemdi"
+(define_expand "aarch64_movmemdi"
+  [(parallel
+     [(set (match_operand 2) (const_int 0))
+      (clobber (match_dup 3))
+      (clobber (match_dup 4))
+      (clobber (reg:CC CC_REGNUM))
+      (set (match_operand 0)
+          (unspec:BLK [(match_operand 1) (match_dup 2)] UNSPEC_MOVMEM))])]
+  "TARGET_MOPS"
+  {
+    operands[3] = XEXP (operands[0], 0);
+    operands[4] = XEXP (operands[1], 0);
+  }
+)
+
+(define_insn "*aarch64_movmemdi"
   [(parallel [
    (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
    (clobber (match_operand:DI 0 "register_operand" "+&r"))
@@ -1640,27 +1655,11 @@ (define_expand "movmemdi"
    (match_operand:BLK 1 "memory_operand")
    (match_operand:DI 2 "general_operand")
    (match_operand:DI 3 "immediate_operand")]
-   "TARGET_MOPS"
+   ""
 {
-   rtx sz_reg = operands[2];
-   /* For constant-sized memmoves check the threshold.
-      FIXME: We should add a non-MOPS memmove expansion for smaller,
-      constant-sized memmove to avoid going to a libcall.  */
-   if (CONST_INT_P (sz_reg)
-       && INTVAL (sz_reg) < aarch64_mops_memmove_size_threshold)
-     FAIL;
-
-   rtx addr_dst = XEXP (operands[0], 0);
-   rtx addr_src = XEXP (operands[1], 0);
-
-   if (!REG_P (sz_reg))
-     sz_reg = force_reg (DImode, sz_reg);
-   if (!REG_P (addr_dst))
-     addr_dst = force_reg (DImode, addr_dst);
-   if (!REG_P (addr_src))
-     addr_src = force_reg (DImode, addr_src);
-   emit_insn (gen_aarch64_movmemdi (addr_dst, addr_src, sz_reg));
-   DONE;
+  if (aarch64_expand_cpymem (operands, true))
+    DONE;
+  FAIL;
 }
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_4.c 
b/gcc/testsuite/gcc.target/aarch64/mops_4.c
index 
1b87759cb5e8bbcbb58cf63404d1d579d44b2818..dd796115cb4093251964d881e93bf4b98ade0c32
 100644
--- a/gcc/testsuite/gcc.target/aarch64/mops_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/mops_4.c
@@ -50,6 +50,54 @@ copy3 (int *x, int *y, long z, long *res)
   *res = z;
 }
 
+/*
+** move1:
+**     mov     (x[0-9]+), x0
+**     cpyp    \[\1\]!, \[x1\]!, x2!
+**     cpym    \[\1\]!, \[x1\]!, x2!
+**     cpye    \[\1\]!, \[x1\]!, x2!
+**     str     x0, \[x3\]
+**     ret
+*/
+void
+move1 (int *x, int *y, long z, int **res)
+{
+  __builtin_memmove (x, y, z);
+  *res = x;
+}
+
+/*
+** move2:
+**     mov     (x[0-9]+), x1
+**     cpyp    \[x0\]!, \[\1\]!, x2!
+**     cpym    \[x0\]!, \[\1\]!, x2!
+**     cpye    \[x0\]!, \[\1\]!, x2!
+**     str     x1, \[x3\]
+**     ret
+*/
+void
+move2 (int *x, int *y, long z, int **res)
+{
+  __builtin_memmove (x, y, z);
+  *res = y;
+}
+
+/*
+** move3:
+**     mov     (x[0-9]+), x2
+**     cpyp    \[x0\]!, \[x1\]!, \1!
+**     cpym    \[x0\]!, \[x1\]!, \1!
+**     cpye    \[x0\]!, \[x1\]!, \1!
+**     str     x2, \[x3\]
+**     ret
+*/
+void
+move3 (int *x, int *y, long z, long *res)
+{
+  __builtin_memmove (x, y, z);
+  *res = z;
+}
+
 /*
 ** set1:
 **     mov     (x[0-9]+), x0

Reply via email to