Hi,
The attached patch implements a generic vector cost model for aarch64.
Regression tested on aarch64-none-elf.
OK?
Thanks,
Tejas Belagod.
ARM.
2013-06-25 Tejas Belagod <tejas.bela...@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (cpu_vector_cost): New.
(tune_params): New member 'const vec_costs'.
* config/aarch64/aarch64.c (generic_vector_cost): New.
(generic_tunings): New member 'generic_vector_cost'.
(aarch64_builtin_vectorization_cost): New.
(aarch64_add_stmt_cost): New.
(TARGET_VECTORIZE_ADD_STMT_COST): New.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 001842e..28d8bae 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -126,11 +126,34 @@ struct cpu_regmove_cost
const int FP2FP;
};
+/* Cost for vector insn classes. */
+struct cpu_vector_cost
+{
+ const int scalar_stmt_cost; /* Cost of any scalar operation,
+ excluding load and store. */
+ const int scalar_load_cost; /* Cost of scalar load. */
+ const int scalar_store_cost; /* Cost of scalar store. */
+ const int vec_stmt_cost; /* Cost of any vector operation,
+ excluding load, store,
+ vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_to_scalar_cost; /* Cost of vec-to-scalar
operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector
+ operation. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
+ const int vec_unalign_store_cost; /* Cost of unaligned vector store. */
+ const int vec_store_cost; /* Cost of vector store. */
+ const int cond_taken_branch_cost; /* Cost of taken branch. */
+ const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
+};
+
struct tune_params
{
const struct cpu_rtx_cost_table *const insn_extra_cost;
const struct cpu_addrcost_table *const addr_cost;
const struct cpu_regmove_cost *const regmove_cost;
+ const struct cpu_vector_cost *const vec_costs;
const int memmov_cost;
};
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d32563d..05eae32 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -45,6 +45,8 @@
#include "gimple.h"
#include "optabs.h"
#include "dwarf2.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
/* Classifies an address.
@@ -178,6 +180,26 @@ static const struct cpu_regmove_cost generic_regmove_cost =
NAMED_PARAM (FP2FP, 4)
};
+/* Generic costs for vector insn classes. */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost generic_vector_cost =
+{
+ NAMED_PARAM (scalar_stmt_cost, 1),
+ NAMED_PARAM (scalar_load_cost, 1),
+ NAMED_PARAM (scalar_store_cost, 1),
+ NAMED_PARAM (vec_stmt_cost, 1),
+ NAMED_PARAM (vec_to_scalar_cost, 1),
+ NAMED_PARAM (scalar_to_vec_cost, 1),
+ NAMED_PARAM (vec_align_load_cost, 1),
+ NAMED_PARAM (vec_unalign_load_cost, 1),
+ NAMED_PARAM (vec_unalign_store_cost, 1),
+ NAMED_PARAM (vec_store_cost, 1),
+ NAMED_PARAM (cond_taken_branch_cost, 3),
+ NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
@@ -186,6 +208,7 @@ static const struct tune_params generic_tunings =
&generic_rtx_cost_table,
&generic_addrcost_table,
&generic_regmove_cost,
+ &generic_vector_cost,
NAMED_PARAM (memmov_cost, 4)
};
@@ -4641,6 +4664,101 @@ aarch64_memory_move_cost (enum machine_mode mode
ATTRIBUTE_UNUSED,
return aarch64_tune_params->memmov_cost;
}
+/* Vectorizer cost model target hooks. */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return aarch64_tune_params->vec_costs->scalar_stmt_cost;
+
+ case scalar_load:
+ return aarch64_tune_params->vec_costs->scalar_load_cost;
+
+ case scalar_store:
+ return aarch64_tune_params->vec_costs->scalar_store_cost;
+
+ case vector_stmt:
+ return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+ case vector_load:
+ return aarch64_tune_params->vec_costs->vec_align_load_cost;
+
+ case vector_store:
+ return aarch64_tune_params->vec_costs->vec_store_cost;
+
+ case vec_to_scalar:
+ return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
+
+ case unaligned_load:
+ return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
+
+ case unaligned_store:
+ return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
+
+ case cond_branch_taken:
+ return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ case vec_promote_demote:
+ return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+ case vec_construct:
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ return elements / 2 + 1;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost. */
+static unsigned
+aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, int misalign,
+ enum vect_cost_model_location where)
+{
+ unsigned *cost = (unsigned *) data;
+ unsigned retval = 0;
+
+ if (flag_vect_cost_model)
+ {
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost =
+ aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ a function (linear for now) of the loop nest level. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ {
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+ unsigned nest_level = loop_depth (loop);
+
+ count *= nest_level;
+ }
+
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
+ }
+
+ return retval;
+}
+
static void initialize_aarch64_code_model (void);
/* Parse the architecture extension string. */
@@ -8021,6 +8139,13 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode
vmode,
#undef TARGET_ARRAY_MODE_SUPPORTED_P
#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ aarch64_builtin_vectorization_cost
+
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode