The following patch fixes PR66917 - the vectorizer assumes element alignment even for unaligned accesses, which in case of "packed" ones is not a valid assumption. This breaks testcases on arm at least which can do unaligned (but not "completely" unaligned) loads and stores.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk sofar. Richard. 2015-08-03 Richard Biener <rguent...@suse.de> PR tree-optimization/66917 * tree-vectorizer.h (struct dataref_aux): Add base_element_aligned field. (DR_VECT_AUX): New macro. (set_dr_misalignment): Adjust. (dr_misalignment): Likewise. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Compute whether the base is at least element aligned. * tree-vect-stmts.c (ensure_base_align): Adjust. (vectorizable_store): If the base is not element aligned preserve alignment of the original access if misalignment is unknown. (vectorizable_load): Likewise. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h (revision 226396) +++ gcc/tree-vectorizer.h (working copy) @@ -707,11 +707,16 @@ typedef struct _stmt_vec_info { #define STMT_SLP_TYPE(S) (S)->slp_type struct dataref_aux { - tree base_decl; - bool base_misaligned; int misalignment; + /* If true the alignment of base_decl needs to be increased. */ + bool base_misaligned; + /* If true we know the base is at least vector element alignment aligned. */ + bool base_element_aligned; + tree base_decl; }; +#define DR_VECT_AUX(dr) ((dataref_aux *)(dr)->aux) + #define VECT_MAX_COST 1000 /* The maximum number of intermediate steps required in multi-step type @@ -910,14 +915,13 @@ destroy_cost_data (void *data) targetm.vectorize.destroy_cost_data (data); } - /*-----------------------------------------------------------------*/ /* Info on data references alignment. */ /*-----------------------------------------------------------------*/ inline void set_dr_misalignment (struct data_reference *dr, int val) { - dataref_aux *data_aux = (dataref_aux *) dr->aux; + dataref_aux *data_aux = DR_VECT_AUX (dr); if (!data_aux) { @@ -931,8 +935,7 @@ set_dr_misalignment (struct data_referen inline int dr_misalignment (struct data_reference *dr) { - gcc_assert (dr->aux); - return ((dataref_aux *) dr->aux)->misalignment; + return DR_VECT_AUX (dr)->misalignment; } /* Reflects actual alignment of first access in the vectorized loop, Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 226396) +++ gcc/tree-vect-data-refs.c (working copy) @@ -622,7 +622,6 @@ vect_compute_data_ref_alignment (struct tree ref = DR_REF (dr); tree vectype; tree base, base_addr; - bool base_aligned; tree misalign = NULL_TREE; tree aligned_to; unsigned HOST_WIDE_INT alignment; @@ -698,6 +697,19 @@ vect_compute_data_ref_alignment (struct } } + /* To look at alignment of the base we have to preserve an inner MEM_REF + as that carries alignment information of the actual access. */ + base = ref; + while (handled_component_p (base)) + base = TREE_OPERAND (base, 0); + if (TREE_CODE (base) == MEM_REF) + base = build2 (MEM_REF, TREE_TYPE (base), base_addr, + build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0)); + unsigned int base_alignment = get_object_alignment (base); + + if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype))) + DR_VECT_AUX (dr)->base_element_aligned = true; + alignment = TYPE_ALIGN_UNIT (vectype); if ((compare_tree_int (aligned_to, alignment) < 0) @@ -713,21 +725,7 @@ vect_compute_data_ref_alignment (struct return true; } - /* To look at alignment of the base we have to preserve an inner MEM_REF - as that carries alignment information of the actual access. */ - base = ref; - while (handled_component_p (base)) - base = TREE_OPERAND (base, 0); - if (TREE_CODE (base) == MEM_REF) - base = build2 (MEM_REF, TREE_TYPE (base), base_addr, - build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), 0)); - - if (get_object_alignment (base) >= TYPE_ALIGN (vectype)) - base_aligned = true; - else - base_aligned = false; - - if (!base_aligned) + if (base_alignment < TYPE_ALIGN (vectype)) { /* Strip an inner MEM_REF to a bare decl if possible. */ if (TREE_CODE (base) == MEM_REF @@ -757,8 +755,9 @@ vect_compute_data_ref_alignment (struct dump_printf (MSG_NOTE, "\n"); } - ((dataref_aux *)dr->aux)->base_decl = base; - ((dataref_aux *)dr->aux)->base_misaligned = true; + DR_VECT_AUX (dr)->base_decl = base; + DR_VECT_AUX (dr)->base_misaligned = true; + DR_VECT_AUX (dr)->base_element_aligned = true; } /* If this is a backward running DR then first access in the larger Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 226396) +++ gcc/tree-vect-stmts.c (working copy) @@ -5056,10 +5056,10 @@ ensure_base_align (stmt_vec_info stmt_in if (!dr->aux) return; - if (((dataref_aux *)dr->aux)->base_misaligned) + if (DR_VECT_AUX (dr)->base_misaligned) { tree vectype = STMT_VINFO_VECTYPE (stmt_info); - tree base_decl = ((dataref_aux *)dr->aux)->base_decl; + tree base_decl = DR_VECT_AUX (dr)->base_decl; if (decl_in_symtab_p (base_decl)) symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype)); @@ -5068,7 +5068,7 @@ ensure_base_align (stmt_vec_info stmt_in DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype); DECL_USER_ALIGN (base_decl) = 1; } - ((dataref_aux *)dr->aux)->base_misaligned = false; + DR_VECT_AUX (dr)->base_misaligned = false; } } @@ -5739,11 +5739,15 @@ vectorizable_store (gimple stmt, gimple_ misalign = 0; else if (DR_MISALIGNMENT (first_dr) == -1) { + if (DR_VECT_AUX (first_dr)->base_element_aligned) + align = TYPE_ALIGN_UNIT (elem_type); + else + align = get_object_alignment (DR_REF (first_dr)) + / BITS_PER_UNIT; + misalign = 0; TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref), - TYPE_ALIGN (elem_type)); - align = TYPE_ALIGN_UNIT (elem_type); - misalign = 0; + align * BITS_PER_UNIT); } else { @@ -6824,11 +6828,15 @@ vectorizable_load (gimple stmt, gimple_s } else if (DR_MISALIGNMENT (first_dr) == -1) { + if (DR_VECT_AUX (first_dr)->base_element_aligned) + align = TYPE_ALIGN_UNIT (elem_type); + else + align = (get_object_alignment (DR_REF (first_dr)) + / BITS_PER_UNIT); + misalign = 0; TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref), - TYPE_ALIGN (elem_type)); - align = TYPE_ALIGN_UNIT (elem_type); - misalign = 0; + align * BITS_PER_UNIT); } else {