Hi, As ARM supports unaligned vector accesses for almost no penalty, I'd like to disable loop peeling on ARM targets.
I have ran benchmarks on cortex-A9 (hard-float) and noticed these significant improvements: * 1.5% improvement on a popular embedded benchmark (with peaks at +20% and +29%) * 2.1% on spec2k mesa * 9.2% on spec2k eon * up to 3.4% on some part of another embedded benchmark The largest regression I noticed is 1%. I have attached a preliminary patch to discuss how acceptable it would be, and to discuss the needed changes in the testsuite. Indeed; quite a few tests now fail because they count the number of "vectorizing an unaligned access" and "alignment of access forced using peeling" occurrences in the vectorizer traces. I could add a property to target-supports.exp, which would currently be only true on ARM to select whether to rely on peeling or not, and updated all the affected tests accordingly. As there are quite a few tests to update, I'd like opinions first. Thanks, Christophe.
2012-12-07 Christophe Lyon <christophe.l...@linaro.org> gcc/ * config/arm/arm.c (arm_vector_worth_peeling): New function. (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): New define. * doc/tm.texi.in (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): Add documentation. * doc/tm.texi (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): Likewise. * target.def (vector_worth_peeling): New hook. * targhooks.c (default_vector_worth_peeling): New function. * targhooks.h (default_vector_worth_peeling): Declare. * tree-vect-data-refs.c (vector_alignment_reachable_p): Call vector_worth_peeling hook.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1470602..ebbf594 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -261,6 +261,7 @@ static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, const_tree type, int misalignment, bool is_packed); +static bool arm_vector_worth_peeling (int misalignment); static void arm_conditional_register_usage (void); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); static unsigned int arm_autovectorize_vector_sizes (void); @@ -618,6 +619,10 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ arm_builtin_support_vector_misalignment +#undef TARGET_VECTORIZE_VECTOR_WORTH_PEELING +#define TARGET_VECTORIZE_VECTOR_WORTH_PEELING \ + arm_vector_worth_peeling + #undef TARGET_PREFERRED_RENAME_CLASS #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class @@ -25200,6 +25205,14 @@ arm_builtin_support_vector_misalignment (enum machine_mode mode, is_packed); } +/* ARM supports misaligned accesses with low penalty. It's not worth + peeling. */ +static bool +arm_vector_worth_peeling (int misalignment) +{ + return false; +} + static void arm_conditional_register_usage (void) { diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index b36c764..05b1f67 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5755,6 +5755,12 @@ the elements in the vectors should be of type @var{type}. @var{is_packed} parameter is true if the memory access is defined in a packed struct. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_WORTH_PEELING (int @var{misalignment}) +This hook should return true if the cost of peeling is cheaper than a +misaligned access of a specific factor denoted in the +@var{misalignment} parameter. +@end deftypefn + @deftypefn {Target Hook} {enum machine_mode} TARGET_VECTORIZE_PREFERRED_SIMD_MODE (enum machine_mode @var{mode}) This hook should return the preferred mode for vectorizing scalar mode @var{mode}. The default is diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 4858d97..452a929 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -5679,6 +5679,12 @@ the elements in the vectors should be of type @var{type}. @var{is_packed} parameter is true if the memory access is defined in a packed struct. @end deftypefn +@hook TARGET_VECTORIZE_VECTOR_WORTH_PEELING +This hook should return true if the cost of peeling is cheaper than a +misaligned access of a specific factor denoted in the +@var{misalignment} parameter. +@end deftypefn + @hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE This hook should return the preferred mode for vectorizing scalar mode @var{mode}. The default is diff --git a/gcc/target.def b/gcc/target.def index 2d79290..d3a2671 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1005,6 +1005,15 @@ DEFHOOK (enum machine_mode mode, const_tree type, int misalignment, bool is_packed), default_builtin_support_vector_misalignment) +/* Return true if peeling is worth its cost compared to misaligned + accesses on the target. */ +DEFHOOK +(vector_worth_peeling, + "", + bool, + (int misalignment), + default_vector_worth_peeling) + /* Return the builtin decl needed to load a vector of TYPE. */ DEFHOOK (builtin_tm_load, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 265fc98..4f9b6cc 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -985,6 +985,14 @@ default_builtin_support_vector_misalignment (enum machine_mode mode, return false; } +/* By default, assume that the cost of misaligned accesses is + sufficiently high so that peeling is worth its cost. */ +bool +default_vector_worth_peeling (int misalignment) +{ + return true; +} + /* By default, only attempt to parallelize bitwise operations, and possibly adds/subtracts using bit-twiddling. */ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index e89f096..a60d6b7 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -90,6 +90,7 @@ extern bool default_builtin_support_vector_misalignment (enum machine_mode mode, const_tree, int, bool); +extern bool default_vector_worth_peeling (int); extern enum machine_mode default_preferred_simd_mode (enum machine_mode mode); extern unsigned int default_autovectorize_vector_sizes (void); extern void *default_init_cost (struct loop *); diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index dc6e1e7..c43c468 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1209,6 +1209,11 @@ vector_alignment_reachable_p (struct data_reference *dr) return false; } + /* Check if peeling is worth the cost in case misaligned accesses + are cheap on this target. */ + if (!targetm.vectorize.vector_worth_peeling (DR_MISALIGNMENT (dr))) + return false; + /* If misalignment is known at the compile time then allow peeling only if natural alignment is reachable through peeling. */ if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))