Our movdi_aarch64 pattern allows moving a constant into an FP_REG,
but has the constraint Dd, which is stricter than the one for
moving a constant into a CORE_REG.  This is due to restricted values
allowed for MOVI instructions.

Due to the predicate for the pattern allowing any constant that is
valid for the CORE_REGs, we can run into situations where IRA/reload
has decided to use FP_REGs but the value is not actually valid for
MOVI.

This patch introduces a secondary reload to handle this case.

Supplied with testcase that highlighted original problem.
Tested on Linux GNU regressions.

OK for trunk?

Cheers,
Ian


2013-07-30  Ian Bolton  <ian.bol...@arm.com>

gcc/
        * config/aarch64/aarch64.c (aarch64_secondary_reload)): Handle
        constant into FP_REGs that is not valid for MOVI.

testsuite/
        * gcc.target/aarch64/movdi_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9941d7c..f16988e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4070,6 +4070,15 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, 
rtx x,
   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
       return CORE_REGS;
 
+  /* Only a subset of the DImode immediate values valid for CORE_REGS are
+     valid for FP_REGS.  Where we have an immediate value that isn't valid
+     for FP_REGS, and RCLASS is FP_REGS, we return CORE_REGS to cause the
+     value to be generated into there first and later copied to FP_REGS to be
+     used.  */
+  if (rclass == FP_REGS && mode == DImode && CONST_INT_P (x)
+      && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
+    return CORE_REGS;
+
   return NO_REGS;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_1.c 
b/gcc/testsuite/gcc.target/aarch64/movdi_1.c
new file mode 100644
index 0000000..1decd99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movdi_1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <arm_neon.h>
+
+void
+foo (uint64_t *a)
+{
+  uint64x1_t val18;
+  uint32x2_t val19;
+  uint64x1_t val20;
+  val19 = vcreate_u32 (0x800000004cf3dffbUL);
+  val20 = vrsra_n_u64 (val18, vreinterpret_u64_u32 (val19), 34);
+  vst1_u64 (a, val20);
+}

Reply via email to