gcc-patches-ow...@gcc.gnu.org wrote on 13/12/2011 04:05:57 AM:
> On core2, unaligned vector load/store using movdqu is a very slow operation. > Experiments show it is six times slower than movdqa (aligned) and this is > irrespective of whether the resulting data happens to be aligned or not. > For Corei7, there is no performance difference between the two and on AMDs, > movdqu is only about 10% slower. > > This patch does not vectorize loops that need to generate the slow unaligned > memory load/stores on core2. > > > Do not vectorize loops on Core2 that need to use unaligned > vector load/stores. > * tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function. > (vect_analyze_stmt): Check if the vectorizable load/store is slow. > * target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook. > * doc/m.texi.in: Document new target hook: > TARGET_SLOW_UNALIGNED_VECTOR_MEMOP > * doc/m.texi: Regenerate. > * config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function. > (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro. > > @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect > if (!bb_vinfo > && (STMT_VINFO_RELEVANT_P (stmt_info) > || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) > + { > ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL) > || vectorizable_type_demotion (stmt, NULL, NULL, NULL) > || vectorizable_conversion (stmt, NULL, NULL, NULL) > || vectorizable_shift (stmt, NULL, NULL, NULL) > || vectorizable_operation (stmt, NULL, NULL, NULL) > || vectorizable_assignment (stmt, NULL, NULL, NULL) > - || vectorizable_load (stmt, NULL, NULL, NULL, NULL) > || vectorizable_call (stmt, NULL, NULL) > - || vectorizable_store (stmt, NULL, NULL, NULL) > - || vectorizable_reduction (stmt, NULL, NULL, NULL) > + || vectorizable_reduction (stmt, NULL, NULL, NULL) > || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); > + > + if (!ok) > + { > + ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL) > + || vectorizable_store (stmt, NULL, NULL, NULL)); > + > + if (ok && is_slow_vect_unaligned_load_store (stmt)) > + ok = false; Why not call is_slow_vect_unaligned_load_store from vectorizable_load/store? Ira > + } > + } > else > { > if (bb_vinfo) > - ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) > - || vectorizable_type_demotion (stmt, NULL, NULL, node) > - || vectorizable_shift (stmt, NULL, NULL, node) > - || vectorizable_operation (stmt, NULL, NULL, node) > - || vectorizable_assignment (stmt, NULL, NULL, node) > - || vectorizable_load (stmt, NULL, NULL, node, NULL) > - || vectorizable_store (stmt, NULL, NULL, node)); > + { > + ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) > + || vectorizable_type_demotion (stmt, NULL, NULL, node) > + || vectorizable_shift (stmt, NULL, NULL, node) > + || vectorizable_operation (stmt, NULL, NULL, node) > + || vectorizable_assignment (stmt, NULL, NULL, node)); > + if (!ok) > + { > + ok = (vectorizable_load (stmt, NULL, NULL, node, NULL) > + || vectorizable_store (stmt, NULL, NULL, node)); > + if (ok && is_slow_vect_unaligned_load_store (stmt)) > + ok = false; > + } > + } > }