Hi,
Function output_move_neon now generates vld1.64 for memory ref like "dx <-
[r1:SI]", this is bogus because it requires at least 64-bit alignment for
32-bit aligned memory ref.  It works now because GCC doesn't generate such
insns in the first place, but things are going to change if memset/memcpy
calls are inlined by using neon instructions.

This patch fixes the issue by generating ldr for such instructions.

Bootstrapped on cortex-a15 with neon.
Is it OK?

Thanks,
bin


2014-04-29  Bin Cheng  <bin.ch...@arm.com>

        * config/arm/arm.c (output_move_neon): Handle REG explicitly.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c        (revision 209852)
+++ gcc/config/arm/arm.c        (working copy)
@@ -18427,6 +18453,20 @@ output_move_neon (rtx *operands)
       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
       gcc_unreachable ();
 
+    case REG:
+      /* We have to use vldm / vstm for too-large modes.  */
+      if (nregs > 1)
+       {
+         if (nregs > 4)
+           templ = "v%smia%%?\t%%m0, %%h1";
+         else
+           templ = "v%s1.64\t%%h1, %%A0";
+
+         ops[0] = mem;
+         ops[1] = reg;
+         break;
+       }
+      /* Fall through.  */
     case LABEL_REF:
     case PLUS:
       {
@@ -18460,14 +18500,7 @@ output_move_neon (rtx *operands)
       }
 
     default:
-      /* We have to use vldm / vstm for too-large modes.  */
-      if (nregs > 4)
-       templ = "v%smia%%?\t%%m0, %%h1";
-      else
-       templ = "v%s1.64\t%%h1, %%A0";
-
-      ops[0] = mem;
-      ops[1] = reg;
+      gcc_unreachable ();
     }
 
   sprintf (buff, templ, load ? "ld" : "st");

Reply via email to