416.gamess regressed 4-6% on x86_64 since my r15-882-g1d6199e5f8c1c0.
The commit adjust rtx_cost of mem to reduce cost of (add op0 disp).
But Cost of ADDR could be cheaper than XEXP (addr, 0) when it's a lea.
It is the case in the PR, the patch uses lower cost to enable more
simplication and fix the regression.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

        PR target/115462
        * config/i386/i386.cc (ix86_rtx_costs): Use cost of addr when
        it's lower than rtx_cost (XEXP (addr, 0)) + 1.

gcc/testsuite/ChangeLog:
        * gcc.target/i386/pr115462.c: New test.
---
 gcc/config/i386/i386.cc                  |  9 +++++++--
 gcc/testsuite/gcc.target/i386/pr115462.c | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115462.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ccc24be6e..83dab8220dd 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22341,8 +22341,13 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
          if (GET_CODE (addr) == PLUS
              && x86_64_immediate_operand (XEXP (addr, 1), Pmode))
            {
-             *total += 1;
-             *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
+             /* PR115462: Cost of ADDR could be cheaper than XEXP (addr, 0)
+                when it's a lea, use lower cost to enable more
+                simplification.  */
+             unsigned cost1 = rtx_cost (addr, Pmode, MEM, 0, speed);
+             unsigned cost2 = rtx_cost (XEXP (addr, 0), Pmode,
+                                        PLUS, 0, speed) + 1;
+             *total += MIN (cost1, cost2);
              return true;
            }
        }
diff --git a/gcc/testsuite/gcc.target/i386/pr115462.c 
b/gcc/testsuite/gcc.target/i386/pr115462.c
new file mode 100644
index 00000000000..ad50a6382bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115462.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fno-tree-vectorize -fno-pic" } */
+/* { dg-final { scan-assembler-times {(?n)movl[ \t]+.*, p1\.0\+[0-9]*\(,} 3 } 
} */
+
+int
+foo (long indx, long indx2, long indx3, long indx4, long indx5, long indx6, 
long n, int* q)
+{
+  static int p1[10000];
+  int* p2 = p1 + 1000;
+  int* p3 = p1 + 4000;
+  int* p4 = p1 + 8000;
+
+  for (long i = 0; i != n; i++)
+    {
+      /* scan for      movl    %edi, p1.0+3996(,%rax,4),
+        p1.0+3996 should be propagted into the loop.  */
+      p2[indx++] = q[indx++];
+      p3[indx2++] = q[indx2++];
+      p4[indx3++] = q[indx3++];
+    }
+  return p1[indx6] + p1[indx5];
+}
-- 
2.31.1

Reply via email to