Hi,
this patch fixes ix86_adjust_cost for zen support.  In particular the original
code was accounting memory latencies incorrectly (3 for integer, 2 for FP unit)
while they are 4 for integer and 7 for FP on this CPU.

Using lower latencies makes scheduler overly pesimistic about CPU's ability
to execute sequences involving loads effectively.

I have decided to split the code into new switch, even tought it is currently
similar to Athon-Buldozer tuning.  The reason is that some extra special cases
will appear here and Zen is probably good place to cut away from sharing
implementation with older AMD designs.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

        * x86-tune-sched.c (ix86_adjust_cost): Fix Zen support.
Index: config/i386/x86-tune-sched.c
===================================================================
--- config/i386/x86-tune-sched.c        (revision 253651)
+++ config/i386/x86-tune-sched.c        (working copy)
@@ -352,7 +352,6 @@ ix86_adjust_cost (rtx_insn *insn, int de
     case PROCESSOR_BDVER2:
     case PROCESSOR_BDVER3:
     case PROCESSOR_BDVER4:
-    case PROCESSOR_ZNVER1:
     case PROCESSOR_BTVER1:
     case PROCESSOR_BTVER2:
     case PROCESSOR_GENERIC:
@@ -387,6 +386,35 @@ ix86_adjust_cost (rtx_insn *insn, int de
 
          if (cost >= loadcost)
            cost -= loadcost;
+         else
+           cost = 0;
+       }
+      break;
+
+    case PROCESSOR_ZNVER1:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+         && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+       return 0;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+        in parallel with previous instruction in case
+        previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+         && !ix86_agi_dependent (dep_insn, insn))
+       {
+         enum attr_unit unit = get_attr_unit (insn);
+         int loadcost;
+
+         if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+           loadcost = 4;
+         else
+           loadcost = 7;
+
+         if (cost >= loadcost)
+           cost -= loadcost;
          else
            cost = 0;
        }

Reply via email to