On 6/12/18 11:56 AM, Will Schmidt wrote: > Hi, > Gimple folding for unaligned vector loads and stores. > Regtest completed across variety of systems, P6,P7,P8,P9. > > [v2] Added the type for the MEM_REF, per feedback. > Testcases for gimple-folding of the same are currently in-tree > as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c. > Re-tested, still looks good. :-) > > Thanks > -Will > > [gcc] > > 2018-06-12 Will Schmidt <will_schm...@vnet.ibm.com> > > * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add > vec_xst variants to the list. > (rs6000_gimple_fold_builtin): Add support for folding unaligned > vector loads and stores. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index d62abdf..374666c 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -15360,10 +15360,16 @@ rs6000_builtin_valid_without_lhs (enum > rs6000_builtins fn_code) > case ALTIVEC_BUILTIN_STVX_V8HI: > case ALTIVEC_BUILTIN_STVX_V4SI: > case ALTIVEC_BUILTIN_STVX_V4SF: > case ALTIVEC_BUILTIN_STVX_V2DI: > case ALTIVEC_BUILTIN_STVX_V2DF: > + case VSX_BUILTIN_STXVW4X_V16QI: > + case VSX_BUILTIN_STXVW4X_V8HI: > + case VSX_BUILTIN_STXVW4X_V4SF: > + case VSX_BUILTIN_STXVW4X_V4SI: > + case VSX_BUILTIN_STXVD2X_V2DF: > + case VSX_BUILTIN_STXVD2X_V2DI: > return true; > default: > return false; > } > } > @@ -15869,10 +15875,78 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator > *gsi) > gimple_set_location (g, loc); > gsi_replace (gsi, g, true); > return true; > } > > + /* unaligned Vector loads. */ > + case VSX_BUILTIN_LXVW4X_V16QI: > + case VSX_BUILTIN_LXVW4X_V8HI: > + case VSX_BUILTIN_LXVW4X_V4SF: > + case VSX_BUILTIN_LXVW4X_V4SI: > + case VSX_BUILTIN_LXVD2X_V2DF: > + case VSX_BUILTIN_LXVD2X_V2DI: > + { > + arg0 = gimple_call_arg (stmt, 0); // offset > + arg1 = gimple_call_arg (stmt, 1); // address > + lhs = gimple_call_lhs (stmt); > + location_t loc = gimple_location (stmt); > + /* Since arg1 may be cast to a different type, just use ptr_type_node > + here instead of trying to enforce TBAA on pointer types. */ > + tree arg1_type = ptr_type_node; > + tree lhs_type = TREE_TYPE (lhs); > + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create > + the tree using the value from arg0. The resulting type will match > + the type of arg1. */ > + gimple_seq stmts = NULL; > + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); > + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, > + arg1_type, arg1, temp_offset); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also > + take an offset, but since we've already incorporated the offset > + above, here we just pass in a zero. */ > + gimple *g; > + tree align_ltype = build_aligned_type (lhs_type, TYPE_ALIGN > ((lhs_type)));
This alignment is too strong (assuming lhs is a vector type). lxvd2x and lxvw4x only require word alignment, so tree align_ltype = build_aligned_type (lhs_type, 4); seems like what you need. See my earlier discussion with Richard. Sorry I didn't notice this before! > + g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr, > + build_int_cst (arg1_type, 0))); > + gimple_set_location (g, loc); > + gsi_replace (gsi, g, true); > + return true; > + } > + > + /* unaligned Vector stores. */ > + case VSX_BUILTIN_STXVW4X_V16QI: > + case VSX_BUILTIN_STXVW4X_V8HI: > + case VSX_BUILTIN_STXVW4X_V4SF: > + case VSX_BUILTIN_STXVW4X_V4SI: > + case VSX_BUILTIN_STXVD2X_V2DF: > + case VSX_BUILTIN_STXVD2X_V2DI: > + { > + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ > + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ > + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ > + location_t loc = gimple_location (stmt); > + tree arg0_type = TREE_TYPE (arg0); > + /* Use ptr_type_node (no TBAA) for the arg2_type. */ > + tree arg2_type = ptr_type_node; > + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create > + the tree using the value from arg0. The resulting type will match > + the type of arg2. */ > + gimple_seq stmts = NULL; > + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); > + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, > + arg2_type, arg2, temp_offset); > + /* Mask off any lower bits from the address. */ Spurious comment? You're not doing that, nor do you want to... Thanks, Bill > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + gimple *g; > + g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr, > + build_int_cst (arg2_type, 0)), arg0); > + gimple_set_location (g, loc); > + gsi_replace (gsi, g, true); > + return true; > + } > + > /* Vector Fused multiply-add (fma). */ > case ALTIVEC_BUILTIN_VMADDFP: > case VSX_BUILTIN_XVMADDDP: > case ALTIVEC_BUILTIN_VMLADDUHM: > { > >