The following patch corrects some Power9 resource requirements and instruction latencies. Bootstrap/regtest on powerpc64le-linux with no new regressions. Ok for trunk?
-Pat 2017-09-27 Pat Haugen <pthau...@us.ibm.com> * config/rs6000/power9.md (DU_C2_3_power9): Remove an incorrect combination. (power9-alu): Split out insert/shift types... (power9-rot): ... to here. Correct dispatch resources. (power9-cracked-alu): Correct dispatch resources. (power9-mul): Likewise. (power9-mul-compare): Likewise. (power9-fp): Correct latency. (power9-ddiv): Likewise. (power9-vecfdiv): Likewise. (power9-vecdiv): Likewise.
Index: gcc/config/rs6000/power9.md =================================================================== --- gcc/config/rs6000/power9.md (revision 252029) +++ gcc/config/rs6000/power9.md (working copy) @@ -80,7 +80,6 @@ (define_reservation "DU_C2_power9" "x0_p ; 2-way cracked plus 3rd slot (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| x1_power9+x2_power9+xa0_power9| - x1_power9+x2_power9+xb0_power9| x2_power9+x3_power9+xb0_power9") ; 3-way cracked (consumes whole decode/dispatch cycle) @@ -243,21 +242,29 @@ (define_insn_reservation "power9-sync" 4 ; Most ALU insns are simple 2 cycle, including record form (define_insn_reservation "power9-alu" 2 - (and (ior (eq_attr "type" "add,exts,integer,logical,isel") - (and (eq_attr "type" "insert,shift") - (eq_attr "dot" "no"))) + (and (eq_attr "type" "add,exts,integer,logical,isel") (eq_attr "cpu" "power9")) "DU_any_power9,VSU_power9") ; 5 cycle CR latency (define_bypass 5 "power9-alu" "power9-crlogical,power9-mfcr,power9-mfcrf") +; Rotate/shift prevent use of third slot +(define_insn_reservation "power9-rot" 2 + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") +; 5 cycle CR latency +(define_bypass 5 "power9-rot" + "power9-crlogical,power9-mfcr,power9-mfcrf") + ; Record form rotate/shift are cracked (define_insn_reservation "power9-cracked-alu" 2 (and (eq_attr "type" "insert,shift") (eq_attr "dot" "yes") (eq_attr "cpu" "power9")) - "DU_C2_power9,VSU_power9") + "DU_C2_3_power9,VSU_power9") ; 7 cycle CR latency (define_bypass 7 "power9-cracked-alu" "power9-crlogical,power9-mfcr,power9-mfcrf") @@ -291,13 +298,13 @@ (define_insn_reservation "power9-mul" 5 (and (eq_attr "type" "mul") (eq_attr "dot" "no") (eq_attr "cpu" "power9")) - "DU_any_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9") (define_insn_reservation "power9-mul-compare" 5 (and (eq_attr "type" "mul") (eq_attr "dot" "yes") (eq_attr "cpu" "power9")) - "DU_C2_power9,VSU_power9") + "DU_C2_3_power9,VSU_power9") ; 10 cycle CR latency (define_bypass 10 "power9-mul-compare" "power9-crlogical,power9-mfcr,power9-mfcrf") @@ -349,7 +356,7 @@ (define_insn_reservation "power9-fpsimpl (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") -(define_insn_reservation "power9-fp" 7 +(define_insn_reservation "power9-fp" 5 (and (eq_attr "type" "fp,dmul") (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") @@ -366,7 +373,7 @@ (define_insn_reservation "power9-sdiv" 2 (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") -(define_insn_reservation "power9-ddiv" 33 +(define_insn_reservation "power9-ddiv" 27 (and (eq_attr "type" "ddiv") (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") @@ -419,12 +426,12 @@ (define_insn_reservation "power9-veccomp (eq_attr "cpu" "power9")) "DU_super_power9,VSU_super_power9") -(define_insn_reservation "power9-vecfdiv" 28 +(define_insn_reservation "power9-vecfdiv" 24 (and (eq_attr "type" "vecfdiv") (eq_attr "cpu" "power9")) "DU_super_power9,VSU_super_power9") -(define_insn_reservation "power9-vecdiv" 32 +(define_insn_reservation "power9-vecdiv" 27 (and (eq_attr "type" "vecdiv") (eq_attr "size" "!128") (eq_attr "cpu" "power9"))