Hi, The FMA steering pass should be enabled through the tuning structures rather than be an intrinsic property of the core. This patch moves the control of the pass to the tuning structures - turning it off for everything other than a Cortex-A57 system (i.e. -mcpu=cortex-a57 or -mcpu=cortex-a57.cortex-a53).
Some CPU's share the cortexa57 tuning structs, but do not use this steering pass. For those I've taken a copy of the cortexa57 tuning structures and called it cortexa72. Tested with a compiler build and all known values of -mcpu to make sure the pass runs in the expected configurations. OK? Thanks, James --- 2015-06-23 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64.h (AARCH64_FL_USE_FMA_STEERING_PASS): Delete. (aarch64_tune_flags): Likewise. (AARCH64_TUNE_FMA_STEERING): Likewise. * config/aarch64/aarch64-cores.def (cortex-a57): Remove reference to AARCH64_FL_USE_FMA_STEERING_PASS. (cortex-a57.cortex-a53): Likewise. (cortex-a72): Use cortexa72_tunings. (cortex-a72.cortex-a53): Likewise. (exynos-m1): Likewise. * config/aarch64/aarch64-protos.h (tune_params): Add a field: extra_tuning_flags. * config/aarch64/aarch64-tuning-flags.def: New. * config/aarch64/aarch64-protos.h (AARCH64_EXTRA_TUNING_OPTION): New. (aarch64_extra_tuning_flags): Likewise. (aarch64_tune_params): Declare here. * config/aarch64/aarch64.c (generic_tunings): Set extra_tuning_flags. (cortexa53_tunings): Likewise. (cortexa57_tunings): Likewise. (thunderx_tunings): Likewise. (xgene1_tunings): Likewise. (cortexa72_tunings): New. * config/aarch64/cortex-a57-fma-steering.c: Include aarch64-protos.h. (gate): Check against aarch64_tune_params. * config/aarch64/t-aarch64 (cortex-a57-fma-steering.o): Depend on aarch64-protos.h.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index dfc9cc8..c4e22fe 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -40,13 +40,13 @@ /* V8 Architecture Processors. */ AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_USE_FMA_STEERING_PASS, cortexa57, "0x41", "0xd07") -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08") -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001") +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001") AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") /* V8 big.LITTLE implementations. */ -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_USE_FMA_STEERING_PASS, cortexa57, "0x41", "0xd07.0xd03") -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03") +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4bdcc46..7ece346 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -187,6 +187,7 @@ struct tune_params const int vec_reassoc_width; const int min_div_recip_mul_sf; const int min_div_recip_mul_df; + const unsigned int extra_tuning_flags; }; #define AARCH64_FUSION_PAIR(x, name, index) \ @@ -209,6 +210,26 @@ enum aarch64_fusion_pairs }; #undef AARCH64_FUSION_PAIR +#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \ + AARCH64_EXTRA_TUNE_##name = (1 << index), +/* Supported tuning flags. */ +enum aarch64_extra_tuning_flags +{ + AARCH64_EXTRA_TUNE_NONE = 0, +#include "aarch64-tuning-flags.def" + +/* Hacky macro to build the "all" flag mask. + Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc. */ +#undef AARCH64_EXTRA_TUNING_OPTION +#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \ + | AARCH64_EXTRA_TUNE_##name + AARCH64_EXTRA_TUNE_ALL = 0 +#include "aarch64-tuning-flags.def" +}; +#undef AARCH64_EXTRA_TUNING_OPTION + +extern const struct tune_params *aarch64_tune_params; + HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def new file mode 100644 index 0000000..01aaca8 --- /dev/null +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -0,0 +1,34 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Additional control over certain tuning parameters. Before including + this file, define a macro: + + AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit) + + Where: + + NAME is a string giving a friendly name for the tuning flag. + INTERNAL_NAME gives the internal name suitable for appending to + AARCH64_TUNE_ to give an enum name. + INDEX_BIT is the bit to set in the bitmask of supported tuning + flags. */ + +AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0) + diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 5fe487b..96327a2 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -343,7 +343,8 @@ static const struct tune_params generic_tunings = 4, /* fp_reassoc_width. */ 1, /* vec_reassoc_width. */ 2, /* min_div_recip_mul_sf. */ - 2 /* min_div_recip_mul_df. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params cortexa53_tunings = @@ -364,7 +365,8 @@ static const struct tune_params cortexa53_tunings = 4, /* fp_reassoc_width. */ 1, /* vec_reassoc_width. */ 2, /* min_div_recip_mul_sf. */ - 2 /* min_div_recip_mul_df. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params cortexa57_tunings = @@ -385,7 +387,30 @@ static const struct tune_params cortexa57_tunings = 4, /* fp_reassoc_width. */ 1, /* vec_reassoc_width. */ 2, /* min_div_recip_mul_sf. */ - 2 /* min_div_recip_mul_df. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */ +}; + +static const struct tune_params cortexa72_tunings = +{ + &cortexa57_extra_costs, + &cortexa57_addrcost_table, + &cortexa57_regmove_cost, + &cortexa57_vector_cost, + &generic_branch_cost, + 4, /* memmov_cost */ + 3, /* issue_rate */ + (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ + 16, /* function_align. */ + 8, /* jump_align. */ + 4, /* loop_align. */ + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params thunderx_tunings = @@ -405,7 +430,8 @@ static const struct tune_params thunderx_tunings = 4, /* fp_reassoc_width. */ 1, /* vec_reassoc_width. */ 2, /* min_div_recip_mul_sf. */ - 2 /* min_div_recip_mul_df. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; static const struct tune_params xgene1_tunings = @@ -425,7 +451,8 @@ static const struct tune_params xgene1_tunings = 4, /* fp_reassoc_width. */ 1, /* vec_reassoc_width. */ 2, /* min_div_recip_mul_sf. */ - 2 /* min_div_recip_mul_df. */ + 2, /* min_div_recip_mul_df. */ + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ }; /* A processor implementing AArch64. */ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index a22c6e4..a99beaf 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -199,13 +199,11 @@ extern unsigned aarch64_architecture_version; #define AARCH64_FL_FP (1 << 1) /* Has FP. */ #define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ -/* Has static dispatch of FMA. */ -#define AARCH64_FL_USE_FMA_STEERING_PASS (1 << 4) /* ARMv8.1 architecture extensions. */ -#define AARCH64_FL_LSE (1 << 5) /* Has Large System Extensions. */ -#define AARCH64_FL_PAN (1 << 6) /* Has Privileged Access Never. */ -#define AARCH64_FL_LOR (1 << 7) /* Has Limited Ordering regions. */ -#define AARCH64_FL_RDMA (1 << 8) /* Has ARMv8.1 Adv.SIMD. */ +#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ +#define AARCH64_FL_PAN (1 << 5) /* Has Privileged Access Never. */ +#define AARCH64_FL_LOR (1 << 6) /* Has Limited Ordering regions. */ +#define AARCH64_FL_RDMA (1 << 7) /* Has ARMv8.1 Adv.SIMD. */ /* Has FP and SIMD. */ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) @@ -226,11 +224,6 @@ extern unsigned long aarch64_isa_flags; #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) -/* Macros to test tuning flags. */ -extern unsigned long aarch64_tune_flags; -#define AARCH64_TUNE_FMA_STEERING \ - (aarch64_tune_flags & AARCH64_FL_USE_FMA_STEERING_PASS) - /* Crypto is an optional extension to AdvSIMD. */ #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c index 648a88c..07bf8de 100644 --- a/gcc/config/aarch64/cortex-a57-fma-steering.c +++ b/gcc/config/aarch64/cortex-a57-fma-steering.c @@ -43,6 +43,7 @@ #include "tree-pass.h" #include "regrename.h" #include "cortex-a57-fma-steering.h" +#include "aarch64-protos.h" #include <list> @@ -1051,7 +1052,9 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return AARCH64_TUNE_FMA_STEERING && optimize >= 2; + return (aarch64_tune_params->extra_tuning_flags + & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) + && optimize >= 2; } virtual unsigned int execute (function *) diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 0371203..af154f4 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -53,7 +53,8 @@ cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \ dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \ output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \ $(CONTEXT_H) $(TREE_PASS_H) regrename.h \ - $(srcdir)/config/aarch64/cortex-a57-fma-steering.h + $(srcdir)/config/aarch64/cortex-a57-fma-steering.h \ + $(srcdir)/config/aarch64/aarch64-protos.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/aarch64/cortex-a57-fma-steering.c