Re: [PATCH] Handle different bit_not_p in store merging (PR tree-optimization/78821)

2017-11-13 Thread Richard Biener
On Fri, 10 Nov 2017, Jakub Jelinek wrote:

> Hi!
> 
> This is something Uros requested in the PR, at least with BIT_NOT_EXPRs
> it is easy.  Previous store merging changes required that bit_not_p
> is equal on all stores in the group (in all 3 spots, i.e. on the result
> of BIT_{AND,IOR,XOR}_EXPR and on both of the operands).
> 
> This patch handles mixed values of that flag.  If none of the
> orig_stores have a particular bit_not_p set, then as previously nothing
> is inverted, if all of them have it set, then as previously we BIT_NOT_EXPR
> the particular SSA_NAME, and newly if there is a mix of false and true
> in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this
> invert only the bits that were bit_not_p and not the others.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2017-11-10  Jakub Jelinek  
> 
>   PR tree-optimization/78821
>   * gimple-ssa-store-merging.c (compatible_load_p): Don't require
>   that bit_not_p is the same.
>   (imm_store_chain_info::coalesce_immediate_stores): Likewise.
>   (split_group): Count precisely bit_not_p bits in each statement.
>   (invert_op): New function.
>   (imm_store_chain_info::output_merged_store): Use invert_op to
>   emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some
>   but not all orig_stores have BIT_NOT_EXPR in the corresponding spots.
> 
>   * gcc.dg/store_merging_15.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2017-11-10 08:04:49.0 +0100
> +++ gcc/gimple-ssa-store-merging.c2017-11-10 10:53:13.152731543 +0100
> @@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m
>  {
>store_immediate_info *infof = merged_store->stores[0];
>if (!info->ops[idx].base_addr
> -  || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
>|| (info->ops[idx].bitpos - infof->ops[idx].bitpos
> != info->bitpos - infof->bitpos)
>|| !operand_equal_p (info->ops[idx].base_addr,
> @@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate
>Merge it into the current store group.  There can be gaps in between
>the stores, but there can't be gaps in between bitregions.  */
>else if (info->bitregion_start <= merged_store->bitregion_end
> -&& info->rhs_code == merged_store->stores[0]->rhs_code
> -&& info->bit_not_p == merged_store->stores[0]->bit_not_p)
> +&& info->rhs_code == merged_store->stores[0]->rhs_code)
>   {
> store_immediate_info *infof = merged_store->stores[0];
>  
> @@ -1496,16 +1494,14 @@ split_group (merged_store_group *group,
>total_orig[0] = 1; /* The orig store.  */
>info = group->stores[0];
>if (info->ops[0].base_addr)
> - total_orig[0] += 1 + info->ops[0].bit_not_p;
> + total_orig[0]++;
>if (info->ops[1].base_addr)
> - total_orig[0] += 1 + info->ops[1].bit_not_p;
> + total_orig[0]++;
>switch (info->rhs_code)
>   {
>   case BIT_AND_EXPR:
>   case BIT_IOR_EXPR:
>   case BIT_XOR_EXPR:
> -   if (info->bit_not_p)
> - total_orig[0]++; /* The orig BIT_NOT_EXPR stmt.  */
> total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
> break;
>   default:
> @@ -1514,7 +1510,12 @@ split_group (merged_store_group *group,
>total_orig[0] *= group->stores.length ();
>  
>FOR_EACH_VEC_ELT (group->stores, i, info)
> - total_new[0] += count_multiple_uses (info);
> + {
> +   total_new[0] += count_multiple_uses (info);
> +   total_orig[0] += (info->bit_not_p
> + + info->ops[0].bit_not_p
> + + info->ops[1].bit_not_p);
> + }
>  }
>  
>if (!allow_unaligned_load)
> @@ -1654,13 +1655,13 @@ split_group (merged_store_group *group,
>  
>if (total_orig)
>  {
> +  unsigned int i;
> +  struct split_store *store;
>/* If we are reusing some original stores and any of the
>original SSA_NAMEs had multiple uses, we need to subtract
>those now before we add the new ones.  */
>if (total_new[0] && any_orig)
>   {
> -   unsigned int i;
> -   struct split_store *store;
> FOR_EACH_VEC_ELT (*split_stores, i, store)
>   if (store->orig)
> total_new[0] -= count_multiple_uses (store->orig_stores[0]);
> @@ -1668,26 +1669,105 @@ split_group (merged_store_group *group,
>total_new[0] += ret; /* The new store.  */
>store_immediate_info *info = group->stores[0];
>if (info->ops[0].base_addr)
> - total_new[0] += ret * (1 + info->ops[0].bit_not_p);
> + total_new[0] += ret;
>if (info->ops[1].base_addr)
> - total_new[0] += ret * (1 + info->ops[1].bit_not_p);
> + total_new[0] += ret;
>switch (info->rhs_code)
>   {
>   case BIT_AND_EXPR:
>   case BIT_IOR_EXPR:
>   case BIT_XOR_EXPR:
> -   if 

Re: [PATCH] Handle different bit_not_p in store merging (PR tree-optimization/78821)

2017-11-12 Thread Jakub Jelinek
On Fri, Nov 10, 2017 at 04:51:19PM +, Kyrill Tkachov wrote:
> Hi Jakub,
> 
> On 10/11/17 13:59, Jakub Jelinek wrote:
> > This is something Uros requested in the PR, at least with BIT_NOT_EXPRs
> > it is easy.  Previous store merging changes required that bit_not_p
> > is equal on all stores in the group (in all 3 spots, i.e. on the result
> > of BIT_{AND,IOR,XOR}_EXPR and on both of the operands).
> > 
> > This patch handles mixed values of that flag.  If none of the
> > orig_stores have a particular bit_not_p set, then as previously nothing
> > is inverted, if all of them have it set, then as previously we
> > BIT_NOT_EXPR
> > the particular SSA_NAME, and newly if there is a mix of false and true
> > in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this
> > invert only the bits that were bit_not_p and not the others.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> 
> Might be worth doing a sanity check test run on a big-endian target
> since there is a bit of BYTES_BIG_ENDIAN logic in the patch (though it looks
> correct to me).

Indeed.  Successfully bootstrapped/regtested also on powerpc64{,le}-linux
now.

Jakub


Re: [PATCH] Handle different bit_not_p in store merging (PR tree-optimization/78821)

2017-11-10 Thread Kyrill Tkachov

Hi Jakub,

On 10/11/17 13:59, Jakub Jelinek wrote:

Hi!

This is something Uros requested in the PR, at least with BIT_NOT_EXPRs
it is easy.  Previous store merging changes required that bit_not_p
is equal on all stores in the group (in all 3 spots, i.e. on the result
of BIT_{AND,IOR,XOR}_EXPR and on both of the operands).

This patch handles mixed values of that flag.  If none of the
orig_stores have a particular bit_not_p set, then as previously nothing
is inverted, if all of them have it set, then as previously we 
BIT_NOT_EXPR

the particular SSA_NAME, and newly if there is a mix of false and true
in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this
invert only the bits that were bit_not_p and not the others.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?



Might be worth doing a sanity check test run on a big-endian target
since there is a bit of BYTES_BIG_ENDIAN logic in the patch (though it 
looks correct to me).


Thanks,
Kyrill


2017-11-10  Jakub Jelinek  

PR tree-optimization/78821
* gimple-ssa-store-merging.c (compatible_load_p): Don't require
that bit_not_p is the same.
(imm_store_chain_info::coalesce_immediate_stores): Likewise.
(split_group): Count precisely bit_not_p bits in each statement.
(invert_op): New function.
(imm_store_chain_info::output_merged_store): Use invert_op to
emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some
but not all orig_stores have BIT_NOT_EXPR in the corresponding 
spots.


* gcc.dg/store_merging_15.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2017-11-10 08:04:49.0 
+0100
+++ gcc/gimple-ssa-store-merging.c  2017-11-10 10:53:13.152731543 
+0100

@@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m
 {
   store_immediate_info *infof = merged_store->stores[0];
   if (!info->ops[idx].base_addr
-  || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
   || (info->ops[idx].bitpos - infof->ops[idx].bitpos
   != info->bitpos - infof->bitpos)
   || !operand_equal_p (info->ops[idx].base_addr,
@@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate
  Merge it into the current store group.  There can be gaps in 
between

  the stores, but there can't be gaps in between bitregions.  */
   else if (info->bitregion_start <= merged_store->bitregion_end
-  && info->rhs_code == merged_store->stores[0]->rhs_code
-  && info->bit_not_p == merged_store->stores[0]->bit_not_p)
+  && info->rhs_code == merged_store->stores[0]->rhs_code)
 {
   store_immediate_info *infof = merged_store->stores[0];

@@ -1496,16 +1494,14 @@ split_group (merged_store_group *group,
   total_orig[0] = 1; /* The orig store.  */
   info = group->stores[0];
   if (info->ops[0].base_addr)
-   total_orig[0] += 1 + info->ops[0].bit_not_p;
+   total_orig[0]++;
   if (info->ops[1].base_addr)
-   total_orig[0] += 1 + info->ops[1].bit_not_p;
+   total_orig[0]++;
   switch (info->rhs_code)
 {
 case BIT_AND_EXPR:
 case BIT_IOR_EXPR:
 case BIT_XOR_EXPR:
- if (info->bit_not_p)
-   total_orig[0]++; /* The orig BIT_NOT_EXPR stmt. */
   total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
   break;
 default:
@@ -1514,7 +1510,12 @@ split_group (merged_store_group *group,
   total_orig[0] *= group->stores.length ();

   FOR_EACH_VEC_ELT (group->stores, i, info)
-   total_new[0] += count_multiple_uses (info);
+   {
+ total_new[0] += count_multiple_uses (info);
+ total_orig[0] += (info->bit_not_p
+   + info->ops[0].bit_not_p
+   + info->ops[1].bit_not_p);
+   }
 }

   if (!allow_unaligned_load)
@@ -1654,13 +1655,13 @@ split_group (merged_store_group *group,

   if (total_orig)
 {
+  unsigned int i;
+  struct split_store *store;
   /* If we are reusing some original stores and any of the
  original SSA_NAMEs had multiple uses, we need to subtract
  those now before we add the new ones.  */
   if (total_new[0] && any_orig)
 {
- unsigned int i;
- struct split_store *store;
   FOR_EACH_VEC_ELT (*split_stores, i, store)
 if (store->orig)
   total_new[0] -= count_multiple_uses 
(store->orig_stores[0]);

@@ -1668,26 +1669,105 @@ split_group (merged_store_group *group,
   total_new[0] += ret; /* The new store.  */
   store_immediate_info *info = group->stores[0];
   if (info->ops[0].base_addr)
-   total_new[0] += ret * (1 + info->ops[0].bit_not_p);
+   total_new[0] += ret;
   if (info->ops[1].base_addr)
-   total_new[0] += ret * (1 + info->ops[1].bit_not_p);
+   total_new[0] += ret;
   switch (info->rhs_code)
 {
 case 

[PATCH] Handle different bit_not_p in store merging (PR tree-optimization/78821)

2017-11-10 Thread Jakub Jelinek
Hi!

This is something Uros requested in the PR, at least with BIT_NOT_EXPRs
it is easy.  Previous store merging changes required that bit_not_p
is equal on all stores in the group (in all 3 spots, i.e. on the result
of BIT_{AND,IOR,XOR}_EXPR and on both of the operands).

This patch handles mixed values of that flag.  If none of the
orig_stores have a particular bit_not_p set, then as previously nothing
is inverted, if all of them have it set, then as previously we BIT_NOT_EXPR
the particular SSA_NAME, and newly if there is a mix of false and true
in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this
invert only the bits that were bit_not_p and not the others.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-11-10  Jakub Jelinek  

PR tree-optimization/78821
* gimple-ssa-store-merging.c (compatible_load_p): Don't require
that bit_not_p is the same.
(imm_store_chain_info::coalesce_immediate_stores): Likewise.
(split_group): Count precisely bit_not_p bits in each statement.
(invert_op): New function.
(imm_store_chain_info::output_merged_store): Use invert_op to
emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some
but not all orig_stores have BIT_NOT_EXPR in the corresponding spots.

* gcc.dg/store_merging_15.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2017-11-10 08:04:49.0 +0100
+++ gcc/gimple-ssa-store-merging.c  2017-11-10 10:53:13.152731543 +0100
@@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m
 {
   store_immediate_info *infof = merged_store->stores[0];
   if (!info->ops[idx].base_addr
-  || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
   || (info->ops[idx].bitpos - infof->ops[idx].bitpos
  != info->bitpos - infof->bitpos)
   || !operand_equal_p (info->ops[idx].base_addr,
@@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate
 Merge it into the current store group.  There can be gaps in between
 the stores, but there can't be gaps in between bitregions.  */
   else if (info->bitregion_start <= merged_store->bitregion_end
-  && info->rhs_code == merged_store->stores[0]->rhs_code
-  && info->bit_not_p == merged_store->stores[0]->bit_not_p)
+  && info->rhs_code == merged_store->stores[0]->rhs_code)
{
  store_immediate_info *infof = merged_store->stores[0];
 
@@ -1496,16 +1494,14 @@ split_group (merged_store_group *group,
   total_orig[0] = 1; /* The orig store.  */
   info = group->stores[0];
   if (info->ops[0].base_addr)
-   total_orig[0] += 1 + info->ops[0].bit_not_p;
+   total_orig[0]++;
   if (info->ops[1].base_addr)
-   total_orig[0] += 1 + info->ops[1].bit_not_p;
+   total_orig[0]++;
   switch (info->rhs_code)
{
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
- if (info->bit_not_p)
-   total_orig[0]++; /* The orig BIT_NOT_EXPR stmt.  */
  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
  break;
default:
@@ -1514,7 +1510,12 @@ split_group (merged_store_group *group,
   total_orig[0] *= group->stores.length ();
 
   FOR_EACH_VEC_ELT (group->stores, i, info)
-   total_new[0] += count_multiple_uses (info);
+   {
+ total_new[0] += count_multiple_uses (info);
+ total_orig[0] += (info->bit_not_p
+   + info->ops[0].bit_not_p
+   + info->ops[1].bit_not_p);
+   }
 }
 
   if (!allow_unaligned_load)
@@ -1654,13 +1655,13 @@ split_group (merged_store_group *group,
 
   if (total_orig)
 {
+  unsigned int i;
+  struct split_store *store;
   /* If we are reusing some original stores and any of the
 original SSA_NAMEs had multiple uses, we need to subtract
 those now before we add the new ones.  */
   if (total_new[0] && any_orig)
{
- unsigned int i;
- struct split_store *store;
  FOR_EACH_VEC_ELT (*split_stores, i, store)
if (store->orig)
  total_new[0] -= count_multiple_uses (store->orig_stores[0]);
@@ -1668,26 +1669,105 @@ split_group (merged_store_group *group,
   total_new[0] += ret; /* The new store.  */
   store_immediate_info *info = group->stores[0];
   if (info->ops[0].base_addr)
-   total_new[0] += ret * (1 + info->ops[0].bit_not_p);
+   total_new[0] += ret;
   if (info->ops[1].base_addr)
-   total_new[0] += ret * (1 + info->ops[1].bit_not_p);
+   total_new[0] += ret;
   switch (info->rhs_code)
{
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
- if (info->bit_not_p)
-   total_new[0] += ret; /* The new BIT_NOT_EXPR stmt.  */
  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
  break;
default: