Re: [PATCH][ARM] Optimize copysign/copysignf for soft-float using BFI

Jiong Wang Wed, 29 Oct 2014 03:21:13 -0700

On 26/08/14 13:36, Richard Earnshaw wrote:

On 29/07/14 15:49, Jiong Wang wrote:

test done
===
no regression on the full toolchain test on arm-none-eabi.



ok to install?

Hmm, I think this is wrong for DF mode.  The principle the patch works
on is by tying the output to the value containing the sign bit, and then
copying the rest of the other value into that value.  However, for DF
mode it only copies 31 of the 63 bits needed; the least significant 32
bits of the mantissa are not copied over.

R.


updated the patch. fixed the DF mode bug.

no regression on arm-none-eabi multi-lib test.

ok to trunk?

gcc/
  * config/arm/arm.md (copysignsf3): New define_expand for SImode.
  (copysigndf3): New define_expand for DImode.

gcc/testsuite/
  * gcc.target/arm/copysign_softfloat_1.c: New copysign/copysignf testcase for 
soft-float.

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index cd9ab6c..2a7dc11 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -11131,6 +11131,47 @@
   [(set_attr "predicable" "yes")]
 )

+(define_expand "copysignsf3"
+  [(match_operand:SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand:SF 2 "register_operand")]
+  "TARGET_SOFT_FLOAT && arm_arch_thumb2"
+  "{
+     emit_move_insn (operands[0], operands[2]);
+     emit_insn (gen_insv_t2 (simplify_gen_subreg (SImode, operands[0], SFmode, 0),
+		GEN_INT (31), GEN_INT (0),
+		simplify_gen_subreg (SImode, operands[1], SFmode, 0)));
+     DONE;
+  }"
+)
+
+(define_expand "copysigndf3"
+  [(match_operand:DF 0 "register_operand")
+   (match_operand:DF 1 "register_operand")
+   (match_operand:DF 2 "register_operand")]
+  "TARGET_SOFT_FLOAT && arm_arch_thumb2"
+  "{
+     rtx op0_low = gen_lowpart (SImode, operands[0]);
+     rtx op0_high = gen_highpart (SImode, operands[0]);
+     rtx op1_low = gen_lowpart (SImode, operands[1]);
+     rtx op1_high = gen_highpart (SImode, operands[1]);
+     rtx op2_high = gen_highpart (SImode, operands[2]);
+
+     rtx scratch1 = gen_reg_rtx (SImode);
+     rtx scratch2 = gen_reg_rtx (SImode);
+     emit_move_insn (scratch1, op2_high);
+     emit_move_insn (scratch2, op1_high);
+
+     emit_insn(gen_rtx_SET(SImode, scratch1,
+			   gen_rtx_LSHIFTRT (SImode, op2_high, GEN_INT(31))));
+     emit_insn(gen_insv_t2(scratch2, GEN_INT(1), GEN_INT(31), scratch1));
+     emit_move_insn (op0_low, op1_low);
+     emit_move_insn (op0_high, scratch2);
+
+     DONE;
+  }"
+)
+
 ;; Vector bits common to IWMMXT and Neon
 (include "vec-common.md")
 ;; Load the Intel Wireless Multimedia Extension patterns
diff --git a/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c b/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
new file mode 100644
index 0000000..990c46e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=softfp" "-mfloat-abi=hard" } { "" } } */
+/* { dg-options "-O2 -mfloat-abi=soft --save-temps" } */
+extern void abort (void);
+
+#define N 16
+
+float a_f[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+		-12.5f, -15.6f, -18.7f, -21.8f,
+		24.9f, 27.1f, 30.2f, 33.3f,
+		36.4f, 39.5f, 42.6f, 45.7f};
+
+float b_f[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+		-9.0f, 1.0f, -2.0f, 3.0f,
+		-4.0f, -5.0f, 6.0f, 7.0f,
+		-8.0f, -9.0f, 10.0f, 11.0f};
+
+float c_f[N] = {-0.1f, 3.2f, -6.3f, 9.4f,
+		-12.5f, 15.6f, -18.7f, 21.8f,
+		-24.9f, -27.1f, 30.2f, 33.3f,
+		-36.4f, -39.5f, 42.6f, 45.7f};
+
+double a_d[N] = {-0.1, -3.2, -6.3, -9.4,
+		 -12.5, -15.6, -18.7, -21.8,
+		 24.9, 27.1, 30.2, 33.3,
+		 36.4, 39.5, 42.6, 45.7};
+
+double b_d[N] = {-1.2, 3.4, -5.6, 7.8,
+		 -9.0, 1.0, -2.0, 3.0,
+		 -4.0, -5.0, 6.0, 7.0,
+		 -8.0, -9.0, 10.0, 11.0};
+
+double c_d[N] = {-0.1, 3.2, -6.3, 9.4,
+		 -12.5, 15.6, -18.7, 21.8,
+		 -24.9, -27.1, 30.2, 33.3,
+		 -36.4, -39.5, 42.6, 45.7};
+
+int
+main (int argc, char **argv)
+{
+  int index = 0;
+
+/* { dg-final { scan-assembler-times "bfi" 2 } } */
+/* { dg-final { scan-assembler-times "lsr" 1 } } */
+  for (index; index < N; index++)
+    {
+      if (__builtin_copysignf (a_f[index], b_f[index]) != c_f[index])
+	abort();
+    }
+
+  for (index = 0; index < N; index++)
+    {
+      if (__builtin_copysign (a_d[index], b_d[index]) != c_d[index])
+	abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */

Re: [PATCH][ARM] Optimize copysign/copysignf for soft-float using BFI

Reply via email to