On Sat, 26 Oct 2013, Jakub Jelinek wrote: > Hi! > > And here is a patch that allows vectorization without peeling for alignment > and scalar loop for bound even for fn2, fn3 and fn4 in the following > testcase, though as with the range __builtin_unreachable () notes, it is > quite fragile, because it only works if there are no immediate uses of the > tested SSA_NAME before the assertion. Perhaps more reliable way would be to > convert those assertions info __builtin_assume_aligned, but that has the > disadvantage that it's first argument is a pointer and it returns a pointer, > so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED > internal function. > > Bootstrapped/regtested on x86_64-linux and i686-linux.
Ok. Thanks, Richard. > int a[1024]; > > void > fn1 (int x, int y) > { > int i; > x &= -32; > y &= -32; > for (i = x + 32; i < y; i++) > a[i]++; > } > > void > fn2 (int x, int y) > { > int i; > if (x & 31) > __builtin_unreachable (); > if (y & 31) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > void > fn3 (int x, int y) > { > int i; > if (x % 32) > __builtin_unreachable (); > if (y % 32) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > void > fn3 (int x, int y) > { > int i; > if ((x % 32) != 0) > __builtin_unreachable (); > if ((y % 32) != 0) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > 2013-10-25 Jakub Jelinek <ja...@redhat.com> > > * tree-vrp.c (maybe_set_nonzero_bits): New function. > (remove_range_assertions): Call it. > > --- gcc/tree-vrp.c.jj 2013-10-24 14:32:29.000000000 +0200 > +++ gcc/tree-vrp.c 2013-10-25 21:21:35.183092937 +0200 > @@ -6459,6 +6459,60 @@ check_all_array_refs (void) > } > } > > +/* Handle > + _4 = x_3 & 31; > + if (_4 != 0) > + goto <bb 6>; > + else > + goto <bb 7>; > + <bb 6>: > + __builtin_unreachable (); > + <bb 7>: > + x_5 = ASSERT_EXPR <x_3, ...>; > + If x_3 has no other immediate uses (checked by caller), > + var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits > + from the non-zero bitmask. */ > + > +static void > +maybe_set_nonzero_bits (basic_block bb, tree var) > +{ > + edge e = single_pred_edge (bb); > + basic_block cond_bb = e->src; > + gimple stmt = last_stmt (cond_bb); > + tree cst; > + > + if (stmt == NULL > + || gimple_code (stmt) != GIMPLE_COND > + || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE) > + ? EQ_EXPR : NE_EXPR) > + || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME > + || !integer_zerop (gimple_cond_rhs (stmt))) > + return; > + > + stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt)); > + if (!is_gimple_assign (stmt) > + || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR > + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) > + return; > + if (gimple_assign_rhs1 (stmt) != var) > + { > + gimple stmt2; > + > + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) > + return; > + stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > + if (!gimple_assign_cast_p (stmt2) > + || gimple_assign_rhs1 (stmt2) != var > + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2)) > + || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt))) > + != TYPE_PRECISION (TREE_TYPE (var)))) > + return; > + } > + cst = gimple_assign_rhs2 (stmt); > + set_nonzero_bits (var, (get_nonzero_bits (var) > + & ~tree_to_double_int (cst))); > +} > + > /* Convert range assertion expressions into the implied copies and > copy propagate away the copies. Doing the trivial copy propagation > here avoids the need to run the full copy propagation pass after > @@ -6576,8 +6630,11 @@ remove_range_assertions (void) > } > } > if (ok) > - set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, > - SSA_NAME_RANGE_INFO (lhs)->max); > + { > + set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, > + SSA_NAME_RANGE_INFO (lhs)->max); > + maybe_set_nonzero_bits (bb, var); > + } > } > } > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend