[Patch, AArch64] Vector cost model.

Tejas Belagod Tue, 25 Jun 2013 09:48:55 -0700

Hi,


The attached patch implements a generic vector cost model for aarch64.

Regression tested on aarch64-none-elf.

OK?

Thanks,
Tejas Belagod.
ARM.

2013-06-25  Tejas Belagod  <tejas.bela...@arm.com>

gcc/
        * config/aarch64/aarch64-protos.h (cpu_vector_cost): New.
        (tune_params): New member 'const vec_costs'.
        * config/aarch64/aarch64.c (generic_vector_cost): New.
        (generic_tunings): New member 'generic_vector_cost'.
        (aarch64_builtin_vectorization_cost): New.
        (aarch64_add_stmt_cost): New.
        (TARGET_VECTORIZE_ADD_STMT_COST): New.
        (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 001842e..28d8bae 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -126,11 +126,34 @@ struct cpu_regmove_cost
   const int FP2FP;
 };
 
+/* Cost for vector insn classes.  */
+struct cpu_vector_cost
+{
+  const int scalar_stmt_cost;           /* Cost of any scalar operation,
+                                           excluding load and store.  */
+  const int scalar_load_cost;           /* Cost of scalar load.  */
+  const int scalar_store_cost;          /* Cost of scalar store.  */
+  const int vec_stmt_cost;              /* Cost of any vector operation,
+                                           excluding load, store,
+                                           vector-to-scalar and
+                                           scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;                 /* Cost of vec-to-scalar 
operation.  */
+  const int scalar_to_vec_cost;                 /* Cost of scalar-to-vector
+                                           operation.  */
+  const int vec_align_load_cost;        /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost;      /* Cost of unaligned vector load.  */
+  const int vec_unalign_store_cost;     /* Cost of unaligned vector store.  */
+  const int vec_store_cost;             /* Cost of vector store.  */
+  const int cond_taken_branch_cost;     /* Cost of taken branch.  */
+  const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
+};
+
 struct tune_params
 {
   const struct cpu_rtx_cost_table *const insn_extra_cost;
   const struct cpu_addrcost_table *const addr_cost;
   const struct cpu_regmove_cost *const regmove_cost;
+  const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
 };
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d32563d..05eae32 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -45,6 +45,8 @@
 #include "gimple.h"
 #include "optabs.h"
 #include "dwarf2.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* Classifies an address.
 
@@ -178,6 +180,26 @@ static const struct cpu_regmove_cost generic_regmove_cost =
   NAMED_PARAM (FP2FP, 4)
 };
 
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost generic_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 1),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 1),
+  NAMED_PARAM (vec_to_scalar_cost, 1),
+  NAMED_PARAM (scalar_to_vec_cost, 1),
+  NAMED_PARAM (vec_align_load_cost, 1),
+  NAMED_PARAM (vec_unalign_load_cost, 1),
+  NAMED_PARAM (vec_unalign_store_cost, 1),
+  NAMED_PARAM (vec_store_cost, 1),
+  NAMED_PARAM (cond_taken_branch_cost, 3),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -186,6 +208,7 @@ static const struct tune_params generic_tunings =
   &generic_rtx_cost_table,
   &generic_addrcost_table,
   &generic_regmove_cost,
+  &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4)
 };
 
@@ -4641,6 +4664,101 @@ aarch64_memory_move_cost (enum machine_mode mode 
ATTRIBUTE_UNUSED,
   return aarch64_tune_params->memmov_cost;
 }
 
+/* Vectorizer cost model target hooks.  */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                   tree vectype,
+                                   int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+       return aarch64_tune_params->vec_costs->scalar_stmt_cost;
+
+      case scalar_load:
+       return aarch64_tune_params->vec_costs->scalar_load_cost;
+
+      case scalar_store:
+       return aarch64_tune_params->vec_costs->scalar_store_cost;
+
+      case vector_stmt:
+       return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vector_load:
+       return aarch64_tune_params->vec_costs->vec_align_load_cost;
+
+      case vector_store:
+       return aarch64_tune_params->vec_costs->vec_store_cost;
+
+      case vec_to_scalar:
+       return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+       return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
+
+      case unaligned_load:
+       return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
+
+      case unaligned_store:
+       return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
+
+      case cond_branch_taken:
+       return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+       return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+       return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vec_construct:
+        elements = TYPE_VECTOR_SUBPARTS (vectype);
+       return elements / 2 + 1;
+
+      default:
+       gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+static unsigned
+aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+                      struct _stmt_vec_info *stmt_info, int misalign,
+                      enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost =
+           aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+        vectorized are weighted more heavily.  The value here is
+        a function (linear for now) of the loop nest level.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+       {
+         loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+         struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
+         unsigned nest_level = loop_depth (loop);
+
+         count *= nest_level;
+       }
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
 static void initialize_aarch64_code_model (void);
 
 /* Parse the architecture extension string.  */
@@ -8021,6 +8139,13 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode 
vmode,
 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
 
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  aarch64_builtin_vectorization_cost
+
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode

[Patch, AArch64] Vector cost model.

Reply via email to