The following patch corrects some Power9 resource requirements and
instruction latencies. Bootstrap/regtest on powerpc64le-linux with no
new regressions. Ok for trunk?

-Pat


2017-09-27  Pat Haugen  <pthau...@us.ibm.com>

        * config/rs6000/power9.md (DU_C2_3_power9): Remove an incorrect
        combination.
        (power9-alu): Split out insert/shift types...
        (power9-rot): ... to here. Correct dispatch resources.
        (power9-cracked-alu): Correct dispatch resources.
        (power9-mul): Likewise.
        (power9-mul-compare): Likewise.
        (power9-fp): Correct latency.
        (power9-ddiv): Likewise.
        (power9-vecfdiv): Likewise.
        (power9-vecdiv): Likewise.
Index: gcc/config/rs6000/power9.md
===================================================================
--- gcc/config/rs6000/power9.md	(revision 252029)
+++ gcc/config/rs6000/power9.md	(working copy)
@@ -80,7 +80,6 @@ (define_reservation "DU_C2_power9" "x0_p
 ; 2-way cracked plus 3rd slot
 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
 				      x1_power9+x2_power9+xa0_power9|
-				      x1_power9+x2_power9+xb0_power9|
 				      x2_power9+x3_power9+xb0_power9")
 
 ; 3-way cracked (consumes whole decode/dispatch cycle)
@@ -243,21 +242,29 @@ (define_insn_reservation "power9-sync" 4
 
 ; Most ALU insns are simple 2 cycle, including record form
 (define_insn_reservation "power9-alu" 2
-  (and (ior (eq_attr "type" "add,exts,integer,logical,isel")
-	    (and (eq_attr "type" "insert,shift")
-		 (eq_attr "dot" "no")))
+  (and (eq_attr "type" "add,exts,integer,logical,isel")
        (eq_attr "cpu" "power9"))
   "DU_any_power9,VSU_power9")
 ; 5 cycle CR latency
 (define_bypass 5 "power9-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
 
+; Rotate/shift prevent use of third slot
+(define_insn_reservation "power9-rot" 2
+  (and (eq_attr "type" "insert,shift")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power9"))
+  "DU_slice_3_power9,VSU_power9")
+; 5 cycle CR latency
+(define_bypass 5 "power9-rot"
+		 "power9-crlogical,power9-mfcr,power9-mfcrf")
+
 ; Record form rotate/shift are cracked
 (define_insn_reservation "power9-cracked-alu" 2
   (and (eq_attr "type" "insert,shift")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 7 cycle CR latency
 (define_bypass 7 "power9-cracked-alu"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -291,13 +298,13 @@ (define_insn_reservation "power9-mul" 5
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "no")
        (eq_attr "cpu" "power9"))
-  "DU_any_power9,VSU_power9")
+  "DU_slice_3_power9,VSU_power9")
 
 (define_insn_reservation "power9-mul-compare" 5
   (and (eq_attr "type" "mul")
        (eq_attr "dot" "yes")
        (eq_attr "cpu" "power9"))
-  "DU_C2_power9,VSU_power9")
+  "DU_C2_3_power9,VSU_power9")
 ; 10 cycle CR latency
 (define_bypass 10 "power9-mul-compare"
 		 "power9-crlogical,power9-mfcr,power9-mfcrf")
@@ -349,7 +356,7 @@ (define_insn_reservation "power9-fpsimpl
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
 
-(define_insn_reservation "power9-fp" 7
+(define_insn_reservation "power9-fp" 5
   (and (eq_attr "type" "fp,dmul")
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
@@ -366,7 +373,7 @@ (define_insn_reservation "power9-sdiv" 2
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
 
-(define_insn_reservation "power9-ddiv" 33
+(define_insn_reservation "power9-ddiv" 27
   (and (eq_attr "type" "ddiv")
        (eq_attr "cpu" "power9"))
   "DU_slice_3_power9,VSU_power9")
@@ -419,12 +426,12 @@ (define_insn_reservation "power9-veccomp
        (eq_attr "cpu" "power9"))
   "DU_super_power9,VSU_super_power9")
 
-(define_insn_reservation "power9-vecfdiv" 28
+(define_insn_reservation "power9-vecfdiv" 24
   (and (eq_attr "type" "vecfdiv")
        (eq_attr "cpu" "power9"))
   "DU_super_power9,VSU_super_power9")
 
-(define_insn_reservation "power9-vecdiv" 32
+(define_insn_reservation "power9-vecdiv" 27
   (and (eq_attr "type" "vecdiv")
        (eq_attr "size" "!128")
        (eq_attr "cpu" "power9"))

Reply via email to