Minor tweaks to the cost and scheduling models for Exynos M1.
Committed as r233646 and r233647. -- Evandro Menezes
>From ab6127823e706361315f1c8b87fb4c32bc299b65 Mon Sep 17 00:00:00 2001 From: evandro <evandro@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue, 23 Feb 2016 20:21:23 +0000 Subject: [PATCH 1/2] * gcc/config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton series for reciprocal square root in Exynos M1. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233646 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 5 +++++ gcc/config/aarch64/aarch64.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3c629ef..22dd022 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2016-02-23 Evandro Menezes <e.mene...@samsung.com> + + * config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton + series for reciprocal square root in Exynos M1. + 2016-02-23 Martin Sebor <mse...@redhat.com> PR c/69759 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 923a4b3..dc3dfea 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings = 48, /* max_case_values. */ 64, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */ }; static const struct tune_params thunderx_tunings = -- 1.9.1
>From 01cadc5b883a2613f847aa7a88b86aed454d9413 Mon Sep 17 00:00:00 2001 From: evandro <evandro@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue, 23 Feb 2016 21:31:00 +0000 Subject: [PATCH 2/2] Tweak the pipeline model for Exynos M1 gcc/ * config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E} and AESMC pairs. * config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores and add bypass for AES{D,E} and AESMC pairs. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233647 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 7 +++++++ gcc/config/aarch64/aarch64.c | 2 +- gcc/config/arm/exynos-m1.md | 26 +++++++++++++++++--------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 22dd022..07b50b5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,12 @@ 2016-02-23 Evandro Menezes <e.mene...@samsung.com> + * config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores + and add bypass for AES{D,E} and AESMC pairs. + * config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E} + and AESMC pairs. + +2016-02-23 Evandro Menezes <e.mene...@samsung.com> + * config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton series for reciprocal square root in Exynos M1. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index dc3dfea..6dc8330 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -526,7 +526,7 @@ static const struct tune_params exynosm1_tunings = &generic_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ - (AARCH64_FUSE_NOTHING), /* fusible_ops */ + (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ 4, /* function_align. */ 4, /* jump_align. */ 4, /* loop_align. */ diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md index 2f52b22..318b151 100644 --- a/gcc/config/arm/exynos-m1.md +++ b/gcc/config/arm/exynos-m1.md @@ -248,10 +248,6 @@ (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") (const_string "neon_load4_all") - (eq_attr "type" "f_stores, f_stored,\ - neon_stp, neon_stp_q") - (const_string "neon_store") - (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") (const_string "neon_store1_1") @@ -730,8 +726,14 @@ (define_insn_reservation "exynos_m1_neon_store" 1 (and (eq_attr "tune" "exynosm1") - (eq_attr "exynos_m1_neon_type" "neon_store")) - "(em1_fst, em1_st)") + (eq_attr "type" "f_stores, f_stored, neon_stp")) + "em1_sfst") + +(define_insn_reservation + "exynos_m1_neon_store_q" 3 + (and (eq_attr "tune" "exynosm1") + (eq_attr "type" "neon_stp_q")) + "(em1_sfst * 2)") (define_insn_reservation "exynos_m1_neon_store1_1" 1 @@ -761,7 +763,7 @@ "exynos_m1_neon_store1_one" 7 (and (eq_attr "tune" "exynosm1") (eq_attr "exynos_m1_neon_type" "neon_store1_one")) - "(em1_fst, em1_st)") + "em1_sfst") (define_insn_reservation "exynos_m1_neon_store2" 7 @@ -892,7 +894,9 @@ ;; Pre-decrement and post-increment addressing modes update the register quickly. ;; TODO: figure out how to tell the addressing mode register from the loaded one. -(define_bypass 1 "exynos_m1_store*" "exynos_m1_store*") +(define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*" + "exynos_m1_store*, exynos_m1_neon_store*, + exynos_m1_load*, exynos_m1_neon_load*") ;; MLAs can feed other MLAs quickly. (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") @@ -908,7 +912,6 @@ (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") - (define_bypass 3 "exynos_m1_fp_add" "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") (define_bypass 3 "exynos_m1_neon_fp_add" @@ -947,6 +950,11 @@ "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ exynos_m1_crypto_poly*") +;; AES{D,E}/AESMC pairs can feed each other instantly. +(define_bypass 0 "exynos_m1_crypto_simple" + "exynos_m1_crypto_simple" + "aarch_crypto_can_dual_issue") + ;; Predicted branches take no time, but mispredicted ones take forever anyway. (define_bypass 1 "exynos_m1_*" "exynos_m1_call, exynos_m1_branch") -- 1.9.1