On Sat, 26 Oct 2013, Jakub Jelinek wrote:

> Hi!
> 
> And here is a patch that allows vectorization without peeling for alignment
> and scalar loop for bound even for fn2, fn3 and fn4 in the following
> testcase, though as with the range __builtin_unreachable () notes, it is
> quite fragile, because it only works if there are no immediate uses of the
> tested SSA_NAME before the assertion.  Perhaps more reliable way would be to
> convert those assertions info __builtin_assume_aligned, but that has the
> disadvantage that it's first argument is a pointer and it returns a pointer,
> so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED
> internal function.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux.

Ok.

Thanks,
Richard.

> int a[1024];
> 
> void
> fn1 (int x, int y)
> {
>   int i;
>   x &= -32;
>   y &= -32;
>   for (i = x + 32; i < y; i++)
>     a[i]++;
> }
> 
> void
> fn2 (int x, int y)
> {
>   int i;
>   if (x & 31)
>     __builtin_unreachable ();
>   if (y & 31)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> void
> fn3 (int x, int y)
> {
>   int i;
>   if (x % 32)
>     __builtin_unreachable ();
>   if (y % 32)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> void
> fn3 (int x, int y)
> {
>   int i;
>   if ((x % 32) != 0)
>     __builtin_unreachable ();
>   if ((y % 32) != 0)
>     __builtin_unreachable ();
>   for (i = x + 32; i < x + y; i++)
>     a[i]++;
> }
> 
> 2013-10-25  Jakub Jelinek  <ja...@redhat.com>
> 
>       * tree-vrp.c (maybe_set_nonzero_bits): New function.
>       (remove_range_assertions): Call it.
> 
> --- gcc/tree-vrp.c.jj 2013-10-24 14:32:29.000000000 +0200
> +++ gcc/tree-vrp.c    2013-10-25 21:21:35.183092937 +0200
> @@ -6459,6 +6459,60 @@ check_all_array_refs (void)
>      }
>  }
>  
> +/* Handle
> +   _4 = x_3 & 31;
> +   if (_4 != 0)
> +     goto <bb 6>;
> +   else
> +     goto <bb 7>;
> +   <bb 6>:
> +   __builtin_unreachable ();
> +   <bb 7>:
> +   x_5 = ASSERT_EXPR <x_3, ...>;
> +   If x_3 has no other immediate uses (checked by caller),
> +   var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits
> +   from the non-zero bitmask.  */
> +
> +static void
> +maybe_set_nonzero_bits (basic_block bb, tree var)
> +{
> +  edge e = single_pred_edge (bb);
> +  basic_block cond_bb = e->src;
> +  gimple stmt = last_stmt (cond_bb);
> +  tree cst;
> +
> +  if (stmt == NULL
> +      || gimple_code (stmt) != GIMPLE_COND
> +      || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE)
> +                                  ? EQ_EXPR : NE_EXPR)
> +      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME
> +      || !integer_zerop (gimple_cond_rhs (stmt)))
> +    return;
> +
> +  stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
> +  if (!is_gimple_assign (stmt)
> +      || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR
> +      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
> +    return;
> +  if (gimple_assign_rhs1 (stmt) != var)
> +    {
> +      gimple stmt2;
> +
> +      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
> +     return;
> +      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> +      if (!gimple_assign_cast_p (stmt2)
> +       || gimple_assign_rhs1 (stmt2) != var
> +       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
> +       || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt)))
> +                           != TYPE_PRECISION (TREE_TYPE (var))))
> +     return;
> +    }
> +  cst = gimple_assign_rhs2 (stmt);
> +  set_nonzero_bits (var, (get_nonzero_bits (var)
> +                       & ~tree_to_double_int (cst)));
> +}
> +
>  /* Convert range assertion expressions into the implied copies and
>     copy propagate away the copies.  Doing the trivial copy propagation
>     here avoids the need to run the full copy propagation pass after
> @@ -6576,8 +6630,11 @@ remove_range_assertions (void)
>                           }
>                       }
>                   if (ok)
> -                   set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> -                                   SSA_NAME_RANGE_INFO (lhs)->max);
> +                   {
> +                     set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> +                                     SSA_NAME_RANGE_INFO (lhs)->max);
> +                     maybe_set_nonzero_bits (bb, var);
> +                   }
>                 }
>             }
>  
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend

Reply via email to