Hi All,
This patch adjusts the cost model so that when both sdiv and udiv are possible
it prefers udiv over sdiv. This was done by making sdiv slightly more expensive
instead of making udiv cheaper to keep the baseline costs of a division the same
as before.
For aarch64 this patch along with my other two related mid-end changes
makes a big difference in division by constants.
Given:
int f2(int x)
{
return ((x * x) % 300) + ((x * x) / 300);
}
we now generate
f2:
mul w0, w0, w0
mov w1, 33205
movk w1, 0x1b4e, lsl 16
mov w2, 300
umull x1, w0, w1
lsr x1, x1, 37
msub w0, w1, w2, w0
add w0, w0, w1
ret
as opposed to
f2:
mul w0, w0, w0
mov w2, 33205
movk w2, 0x1b4e, lsl 16
mov w3, 300
smull x1, w0, w2
umull x2, w0, w2
asr x1, x1, 37
sub w1, w1, w0, asr 31
lsr x2, x2, 37
msub w0, w1, w3, w0
add w0, w0, w2
ret
Bootstrapped and reg tested on aarch64-none-linux-gnu with no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-05-02 Tamar Christina <tamar.christ...@arm.com>
* config/aarch64/aarch64.c (aarch64_rtx_costs): Make sdiv more
expensive than udiv.
Remove floating point cases from mod.
gcc/testsuite/
2017-05-02 Tamar Christina <tamar.christ...@arm.com>
* gcc.target/aarch64/sdiv_costs_1.c: New.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4f769a40a4e9de83cb5aacfd3ff58301c2feeb78..1f4fe51eda9057f1ccaded8e0d5ccd4bc3bc11ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7484,17 +7484,13 @@ cost_plus:
case UMOD:
if (speed)
{
+ /* Slighly prefer UMOD over SMOD. */
if (VECTOR_MODE_P (mode))
*cost += extra_cost->vect.alu;
else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += (extra_cost->mult[mode == DImode].add
- + extra_cost->mult[mode == DImode].idiv);
- else if (mode == DFmode)
- *cost += (extra_cost->fp[1].mult
- + extra_cost->fp[1].div);
- else if (mode == SFmode)
- *cost += (extra_cost->fp[0].mult
- + extra_cost->fp[0].div);
+ + extra_cost->mult[mode == DImode].idiv
+ + (code == MOD ? 1 : 0));
}
return false; /* All arguments need to be in registers. */
@@ -7508,7 +7504,9 @@ cost_plus:
else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
- *cost += extra_cost->mult[mode == DImode].idiv;
+ *cost += (extra_cost->mult[mode == DImode].idiv
+ /* Slighly prefer UDIV over SDIV. */
+ + (code == DIV ? 1 : 0));
else
*cost += extra_cost->fp[mode == DFmode].div;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..24d7f7df2089398288bdf67a489eb71d733a4450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* Both sdiv and udiv can be used here, so prefer udiv. */
+int f1 (unsigned char *p)
+{
+ return 100 / p[1];
+}
+
+int f2 (unsigned char *p, unsigned short x)
+{
+ return x / p[0];
+}
+
+int f3 (unsigned char *p, int x)
+{
+ x &= 0x7fffffff;
+ return x / p[0];
+}
+
+int f5 (unsigned char *p, unsigned short x)
+{
+ return x % p[0];
+}
+
+/* This should only generate signed divisions. */
+int f4 (unsigned char *p)
+{
+ return -100 / p[1];
+}
+
+int f6 (unsigned char *p, short x)
+{
+ return x % p[0];
+}
+
+/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */