Hi,

As ARM supports unaligned vector accesses for almost no penalty, I'd
like to disable loop peeling on ARM targets.

I have ran benchmarks on cortex-A9 (hard-float) and noticed these
significant improvements:
* 1.5% improvement on a popular embedded benchmark (with peaks at +20% and +29%)
* 2.1% on spec2k mesa
* 9.2% on spec2k eon
* up to 3.4% on some part of another embedded benchmark

The largest regression I noticed is 1%.

I have attached a preliminary patch to discuss how acceptable it would
be, and to discuss the needed changes in the testsuite. Indeed; quite
a few tests now fail because they count the number of "vectorizing an
unaligned access" and "alignment of access forced using peeling"
occurrences in the vectorizer traces.

I could add a property to target-supports.exp, which would currently
be only true on ARM to select whether to rely on peeling or not, and
updated all the affected tests accordingly.

As there are quite a few tests to update, I'd like opinions first.

Thanks,

Christophe.
2012-12-07  Christophe Lyon <christophe.l...@linaro.org>

        gcc/
        * config/arm/arm.c (arm_vector_worth_peeling): New function.
        (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): New define.
        * doc/tm.texi.in (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): Add
        documentation.
        * doc/tm.texi (TARGET_VECTORIZE_VECTOR_WORTH_PEELING): Likewise.
        * target.def (vector_worth_peeling): New hook.
        * targhooks.c (default_vector_worth_peeling): New function.
        * targhooks.h (default_vector_worth_peeling): Declare.
        * tree-vect-data-refs.c (vector_alignment_reachable_p): Call
        vector_worth_peeling hook.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1470602..ebbf594 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -261,6 +261,7 @@ static bool arm_builtin_support_vector_misalignment (enum 
machine_mode mode,
                                                     const_tree type,
                                                     int misalignment,
                                                     bool is_packed);
+static bool arm_vector_worth_peeling (int misalignment);
 static void arm_conditional_register_usage (void);
 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 static unsigned int arm_autovectorize_vector_sizes (void);
@@ -618,6 +619,10 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
   arm_builtin_support_vector_misalignment
 
+#undef TARGET_VECTORIZE_VECTOR_WORTH_PEELING
+#define TARGET_VECTORIZE_VECTOR_WORTH_PEELING \
+  arm_vector_worth_peeling
+
 #undef TARGET_PREFERRED_RENAME_CLASS
 #define TARGET_PREFERRED_RENAME_CLASS \
   arm_preferred_rename_class
@@ -25200,6 +25205,14 @@ arm_builtin_support_vector_misalignment (enum 
machine_mode mode,
                                                      is_packed);
 }
 
+/* ARM supports misaligned accesses with low penalty. It's not worth
+   peeling.  */
+static bool
+arm_vector_worth_peeling (int misalignment)
+{
+  return false;
+}
+
 static void
 arm_conditional_register_usage (void)
 {
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index b36c764..05b1f67 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5755,6 +5755,12 @@ the elements in the vectors should be of type 
@var{type}.  @var{is_packed}
 parameter is true if the memory access is defined in a packed struct.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_WORTH_PEELING (int 
@var{misalignment})
+This hook should return true if the cost of peeling is cheaper than a
+misaligned access of a specific factor denoted in the
+@var{misalignment} parameter.
+@end deftypefn
+
 @deftypefn {Target Hook} {enum machine_mode} 
TARGET_VECTORIZE_PREFERRED_SIMD_MODE (enum machine_mode @var{mode})
 This hook should return the preferred mode for vectorizing scalar
 mode @var{mode}.  The default is
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 4858d97..452a929 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -5679,6 +5679,12 @@ the elements in the vectors should be of type 
@var{type}.  @var{is_packed}
 parameter is true if the memory access is defined in a packed struct.
 @end deftypefn
 
+@hook TARGET_VECTORIZE_VECTOR_WORTH_PEELING
+This hook should return true if the cost of peeling is cheaper than a
+misaligned access of a specific factor denoted in the
+@var{misalignment} parameter.
+@end deftypefn
+
 @hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 This hook should return the preferred mode for vectorizing scalar
 mode @var{mode}.  The default is
diff --git a/gcc/target.def b/gcc/target.def
index 2d79290..d3a2671 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1005,6 +1005,15 @@ DEFHOOK
  (enum machine_mode mode, const_tree type, int misalignment, bool is_packed),
  default_builtin_support_vector_misalignment)
 
+/* Return true if peeling is worth its cost compared to misaligned
+   accesses on the target.  */
+DEFHOOK
+(vector_worth_peeling,
+ "",
+ bool,
+ (int misalignment),
+ default_vector_worth_peeling)
+
 /* Return the builtin decl needed to load a vector of TYPE.  */
 DEFHOOK
 (builtin_tm_load,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 265fc98..4f9b6cc 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -985,6 +985,14 @@ default_builtin_support_vector_misalignment (enum 
machine_mode mode,
   return false;
 }
 
+/* By default, assume that the cost of misaligned accesses is
+   sufficiently high so that peeling is worth its cost.  */
+bool
+default_vector_worth_peeling (int misalignment)
+{
+  return true;
+}
+
 /* By default, only attempt to parallelize bitwise operations, and
    possibly adds/subtracts using bit-twiddling.  */
 
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index e89f096..a60d6b7 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -90,6 +90,7 @@ extern bool
 default_builtin_support_vector_misalignment (enum machine_mode mode,
                                             const_tree,
                                             int, bool);
+extern bool default_vector_worth_peeling (int);
 extern enum machine_mode default_preferred_simd_mode (enum machine_mode mode);
 extern unsigned int default_autovectorize_vector_sizes (void);
 extern void *default_init_cost (struct loop *);
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index dc6e1e7..c43c468 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1209,6 +1209,11 @@ vector_alignment_reachable_p (struct data_reference *dr)
        return false;
     }
 
+  /* Check if peeling is worth the cost in case misaligned accesses
+     are cheap on this target.  */
+  if (!targetm.vectorize.vector_worth_peeling (DR_MISALIGNMENT (dr)))
+    return false;
+
   /* If misalignment is known at the compile time then allow peeling
      only if natural alignment is reachable through peeling.  */
   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))

Reply via email to