Hi, Please find attached the patch that implements automod load and store for Thunderx2t99. The patch doesn't change spec but improve other benchmarks.
Bootstrapped and Regression tested on aarch64-thunder-linux. Please review the patch and let us know if its okay for Stage-1? Thanks, Naveen 2017-03-06 Julian Brown <jul...@codesourcery.com> Naveen H.S <naveen.hurugalaw...@cavium.com> * config/aarch64/aarch64-protos.h (aarch64_automod_addr_only_dep): Add prototype. * config/aarch64/aarch64.c (aarch64_automod_addr_only_dep): New function. * config/aarch64/thunderx2t99.md (thunderx2t99_load_basic) (thunderx2t99_store_basic, thunderx2t99_storepair_basic) (thunderx2t99_fp_load_basic, thunderx2t99_fp_loadpair_basic) (thunderx2t99_fp_storepair_basic): Add aarch64_mem_type_p test. (thunderx2t99_load_automod, thunderx2t99_load_regoffset) (thunderx2t99_load_scale_ext, thunderx2t99_store_automod) (thunderx2t99_store_regoffset_scale_ext, thunderx2t99_fp_load_automod) (thunderx2t99_storepair_automod, thunderx2t99_fp_load_regoffset) (thunderx2t99_fp_load_scale_ext, thunderx2t99_fp_loadpair_automod) (thunderx2t99_fp_store_automod, thunderx2t99_fp_storepair_automod) (thunderx2t99_fp_store_regoffset_scale_ext): New insn reservations. (thunderx2t99_load_automod, thunderx2t99_fp_load_automod) (thunderx2t99_fp_loadpair_automod): Add bypass for output address-only dependencies.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e045df8..7472d98 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -488,5 +488,6 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long, unsigned long); rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt); +int aarch64_automod_addr_only_dep (rtx_insn *, rtx_insn *); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 62f5461..c674c51 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14875,6 +14875,94 @@ aarch64_run_selftests (void) #endif /* #if CHECKING_P */ +/* Return nonzero if the CONSUMER has a dependency only on an automodify + address in PRODUCER (a load instruction, i.e. the dependency is not on the + loaded value). */ + +int +aarch64_automod_addr_only_dep (rtx_insn *producer, rtx_insn *consumer) +{ + rtx prod_set = single_set (producer); + + if (prod_set) + { + rtx dst, src = SET_SRC (prod_set); + + if (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND) + src = XEXP (src, 0); + + gcc_assert (MEM_P (src)); + + dst = XEXP (prod_set, 0); + + rtx cons_set = single_set (consumer); + rtx cons_pat = PATTERN (consumer); + + if (cons_set) + return !reg_overlap_mentioned_p (dst, cons_set); + else if (GET_CODE (cons_pat) == PARALLEL) + { + for (int i = 0; i < XVECLEN (cons_pat, 0); i++) + { + rtx set = XVECEXP (cons_pat, 0, i); + + if (GET_CODE (set) != SET) + continue; + + if (reg_overlap_mentioned_p (dst, set)) + return 0; + } + } + else + return 0; + } + else if (GET_CODE (PATTERN (producer)) == PARALLEL) + { + rtx prod_pat = PATTERN (producer); + rtx cons_set = single_set (consumer); + rtx cons_pat = PATTERN (consumer); + + for (int i = 0; i < XVECLEN (prod_pat, 0); i++) + { + prod_set = XVECEXP (prod_pat, 0, i); + + if (GET_CODE (prod_set) == SET) + { + rtx src = XEXP (prod_set, 1), dst = XEXP (prod_set, 0); + + if (GET_CODE (src) == ZERO_EXTEND + || GET_CODE (src) == SIGN_EXTEND) + src = XEXP (src, 0); + + gcc_assert (MEM_P (src)); + + if (cons_set) + { + if (reg_overlap_mentioned_p (dst, cons_set)) + return 0; + } + else if (GET_CODE (cons_pat) == PARALLEL) + { + for (int i = 0; i < XVECLEN (cons_pat, 0); i++) + { + rtx set = XVECEXP (cons_pat, 0, i); + + if (GET_CODE (set) != SET) + continue; + + if (reg_overlap_mentioned_p (dst, set)) + return 0; + } + } + else + return 0; + } + } + } + + return 1; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md index 936078c..add3707 100644 --- a/gcc/config/aarch64/thunderx2t99.md +++ b/gcc/config/aarch64/thunderx2t99.md @@ -123,24 +123,73 @@ (define_insn_reservation "thunderx2t99_load_basic" 4 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "load1")) + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01") +(define_insn_reservation "thunderx2t99_load_automod" 4 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01+thunderx2t99_i012") + +(define_insn_reservation "thunderx2t99_load_regoffset" 5 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)")) + "thunderx2t99_i012,thunderx2t99_ls01") + +(define_insn_reservation "thunderx2t99_load_scale_ext" 6 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "thunderx2t99_i012,thunderx2t99_i012,thunderx2t99_ls01") + (define_insn_reservation "thunderx2t99_loadpair" 5 (and (eq_attr "tune" "thunderx2t99") (eq_attr "type" "load2")) "thunderx2t99_i012,thunderx2t99_ls01") -(define_insn_reservation "thunderx2t99_store_basic" 1 +(define_insn_reservation "thunderx2t99_store_basic" 0 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "store1")) + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01,thunderx2t99_sd") -(define_insn_reservation "thunderx2t99_storepair_basic" 1 +(define_insn_reservation "thunderx2t99_store_automod" 0 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "store2")) + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)") + +(define_insn_reservation "thunderx2t99_store_regoffset_scale_ext" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG + | AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd") + +(define_insn_reservation "thunderx2t99_storepair_basic" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "store2") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01,thunderx2t99_sd") +(define_insn_reservation "thunderx2t99_storepair_automod" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "store2") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)") + ;; FP data processing instructions. (define_insn_reservation "thunderx2t99_fp_simple" 5 @@ -204,24 +253,81 @@ (define_insn_reservation "thunderx2t99_fp_load_basic" 4 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "f_loads,f_loadd")) + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01") +(define_insn_reservation "thunderx2t99_fp_load_automod" 4 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01,thunderx2t99_i012") + +(define_insn_reservation "thunderx2t99_fp_load_regoffset" 5 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)")) + "thunderx2t99_ls01,thunderx2t99_i012") + +(define_insn_reservation "thunderx2t99_fp_load_scale_ext" 6 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "thunderx2t99_ls01,thunderx2t99_i012") + (define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "neon_load1_2reg")) + (eq_attr "type" "neon_load1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01*2") -(define_insn_reservation "thunderx2t99_fp_store_basic" 1 +(define_insn_reservation "thunderx2t99_fp_loadpair_automod" 4 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "f_stores,f_stored")) + (eq_attr "type" "neon_load1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "(thunderx2t99_ls01+thunderx2t99_i012),thunderx2t99_ls01") + +(define_insn_reservation "thunderx2t99_fp_store_basic" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01,thunderx2t99_sd") -(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1 +(define_insn_reservation "thunderx2t99_fp_store_automod" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)") + +(define_insn_reservation "thunderx2t99_fp_store_regoffset_scale_ext" 0 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "neon_store1_2reg")) + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG + | AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd") + +(define_insn_reservation "thunderx2t99_fp_storepair_basic" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "neon_store1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd") +(define_insn_reservation "thunderx2t99_fp_storepair_automod" 0 + (and (eq_attr "tune" "thunderx2t99") + (eq_attr "type" "neon_store1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd+thunderx2t99_i012),thunderx2t99_sd") + ;; ASIMD integer instructions. (define_insn_reservation "thunderx2t99_asimd_int" 7 @@ -443,6 +549,16 @@ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q")) "thunderx2t99_ls01,thunderx2t99_f01") +;; Bypasses for automodify load insns. + +; For automodify loads, the address should be available before the loaded data. + +(define_bypass 1 + "thunderx2t99_load_automod,thunderx2t99_fp_load_automod,\ + thunderx2t99_fp_loadpair_automod" + "thunderx2t99_*" + "aarch64_automod_addr_only_dep") + ;; Crypto extensions. ; FIXME: Forwarding path for aese/aesmc or aesd/aesimc pairs?