Hi All, 

This patch adjusts the cost model so that when both sdiv and udiv are possible
it prefers udiv over sdiv. This was done by making sdiv slightly more expensive
instead of making udiv cheaper to keep the baseline costs of a division the same
as before.

For aarch64 this patch along with my other two related mid-end changes
makes a big difference in division by constants.

Given:

int f2(int x)
{
  return ((x * x) % 300) + ((x * x) / 300);
}

we now generate

f2:
        mul     w0, w0, w0
        mov     w1, 33205
        movk    w1, 0x1b4e, lsl 16
        mov     w2, 300
        umull   x1, w0, w1
        lsr     x1, x1, 37
        msub    w0, w1, w2, w0
        add     w0, w0, w1
        ret

as opposed to

f2:
        mul     w0, w0, w0
        mov     w2, 33205
        movk    w2, 0x1b4e, lsl 16
        mov     w3, 300
        smull   x1, w0, w2
        umull   x2, w0, w2
        asr     x1, x1, 37
        sub     w1, w1, w0, asr 31
        lsr     x2, x2, 37
        msub    w0, w1, w3, w0
        add     w0, w0, w2
        ret

Bootstrapped and reg tested on aarch64-none-linux-gnu with no regressions.

OK for trunk?

Thanks,
Tamar


gcc/
2017-05-02  Tamar Christina  <tamar.christ...@arm.com>

        * config/aarch64/aarch64.c (aarch64_rtx_costs): Make sdiv more 
expensive than udiv.
        Remove floating point cases from mod.

gcc/testsuite/
2017-05-02  Tamar Christina  <tamar.christ...@arm.com>

        * gcc.target/aarch64/sdiv_costs_1.c: New.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4f769a40a4e9de83cb5aacfd3ff58301c2feeb78..1f4fe51eda9057f1ccaded8e0d5ccd4bc3bc11ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7484,17 +7484,13 @@ cost_plus:
     case UMOD:
       if (speed)
 	{
+	  /* Slighly prefer UMOD over SMOD.  */
 	  if (VECTOR_MODE_P (mode))
 	    *cost += extra_cost->vect.alu;
 	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    *cost += (extra_cost->mult[mode == DImode].add
-		      + extra_cost->mult[mode == DImode].idiv);
-	  else if (mode == DFmode)
-	    *cost += (extra_cost->fp[1].mult
-		      + extra_cost->fp[1].div);
-	  else if (mode == SFmode)
-	    *cost += (extra_cost->fp[0].mult
-		      + extra_cost->fp[0].div);
+		      + extra_cost->mult[mode == DImode].idiv
+		      + (code == MOD ? 1 : 0));
 	}
       return false;  /* All arguments need to be in registers.  */
 
@@ -7508,7 +7504,9 @@ cost_plus:
 	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    /* There is no integer SQRT, so only DIV and UDIV can get
 	       here.  */
-	    *cost += extra_cost->mult[mode == DImode].idiv;
+	    *cost += (extra_cost->mult[mode == DImode].idiv
+		     /* Slighly prefer UDIV over SDIV.  */
+		     + (code == DIV ? 1 : 0));
 	  else
 	    *cost += extra_cost->fp[mode == DFmode].div;
 	}
diff --git a/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..24d7f7df2089398288bdf67a489eb71d733a4450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* Both sdiv and udiv can be used here, so prefer udiv.  */
+int f1 (unsigned char *p)
+{
+  return 100 / p[1];
+}
+
+int f2 (unsigned char *p, unsigned short x)
+{
+  return x / p[0];
+}
+
+int f3 (unsigned char *p, int x)
+{
+  x &= 0x7fffffff;
+  return x / p[0];
+}
+
+int f5 (unsigned char *p, unsigned short x)
+{
+  return x % p[0];
+}
+
+/* This should only generate signed divisions.  */
+int f4 (unsigned char *p)
+{
+  return -100 / p[1];
+}
+
+int f6 (unsigned char *p, short x)
+{
+  return x % p[0];
+}
+
+/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */

Reply via email to