Our movdi_aarch64 pattern allows moving a constant into an FP_REG,
but has the constraint Dd, which is stricter than the one for
moving a constant into a CORE_REG. This is due to restricted values
allowed for MOVI instructions.
Due to the predicate for the pattern allowing any constant that is
valid for the CORE_REGs, we can run into situations where IRA/reload
has decided to use FP_REGs but the value is not actually valid for
MOVI.
This patch introduces a secondary reload to handle this case.
Supplied with testcase that highlighted original problem.
Tested on Linux GNU regressions.
OK for trunk?
Cheers,
Ian
2013-07-30 Ian Bolton <ian.bol...@arm.com>
gcc/
* config/aarch64/aarch64.c (aarch64_secondary_reload)): Handle
constant into FP_REGs that is not valid for MOVI.
testsuite/
* gcc.target/aarch64/movdi_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9941d7c..f16988e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4070,6 +4070,15 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
rtx x,
if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
return CORE_REGS;
+ /* Only a subset of the DImode immediate values valid for CORE_REGS are
+ valid for FP_REGS. Where we have an immediate value that isn't valid
+ for FP_REGS, and RCLASS is FP_REGS, we return CORE_REGS to cause the
+ value to be generated into there first and later copied to FP_REGS to be
+ used. */
+ if (rclass == FP_REGS && mode == DImode && CONST_INT_P (x)
+ && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
+ return CORE_REGS;
+
return NO_REGS;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_1.c
b/gcc/testsuite/gcc.target/aarch64/movdi_1.c
new file mode 100644
index 0000000..1decd99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movdi_1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <arm_neon.h>
+
+void
+foo (uint64_t *a)
+{
+ uint64x1_t val18;
+ uint32x2_t val19;
+ uint64x1_t val20;
+ val19 = vcreate_u32 (0x800000004cf3dffbUL);
+ val20 = vrsra_n_u64 (val18, vreinterpret_u64_u32 (val19), 34);
+ vst1_u64 (a, val20);
+}