Hi,
Function output_move_neon now generates vld1.64 for memory ref like "dx <-
[r1:SI]", this is bogus because it requires at least 64-bit alignment for
32-bit aligned memory ref. It works now because GCC doesn't generate such
insns in the first place, but things are going to change if memset/memcpy
calls are inlined by using neon instructions.
This patch fixes the issue by generating ldr for such instructions.
Bootstrapped on cortex-a15 with neon.
Is it OK?
Thanks,
bin
2014-04-29 Bin Cheng <bin.ch...@arm.com>
* config/arm/arm.c (output_move_neon): Handle REG explicitly.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 209852)
+++ gcc/config/arm/arm.c (working copy)
@@ -18427,6 +18453,20 @@ output_move_neon (rtx *operands)
/* FIXME: Not currently enabled in neon_vector_mem_operand. */
gcc_unreachable ();
+ case REG:
+ /* We have to use vldm / vstm for too-large modes. */
+ if (nregs > 1)
+ {
+ if (nregs > 4)
+ templ = "v%smia%%?\t%%m0, %%h1";
+ else
+ templ = "v%s1.64\t%%h1, %%A0";
+
+ ops[0] = mem;
+ ops[1] = reg;
+ break;
+ }
+ /* Fall through. */
case LABEL_REF:
case PLUS:
{
@@ -18460,14 +18500,7 @@ output_move_neon (rtx *operands)
}
default:
- /* We have to use vldm / vstm for too-large modes. */
- if (nregs > 4)
- templ = "v%smia%%?\t%%m0, %%h1";
- else
- templ = "v%s1.64\t%%h1, %%A0";
-
- ops[0] = mem;
- ops[1] = reg;
+ gcc_unreachable ();
}
sprintf (buff, templ, load ? "ld" : "st");