Hi,

the previous patch makes it possible to merge bit-field stores whose RHS is a 
constant or a SSA name, but there is a hitch: if the SSA name is the result of 
an "interesting" load, then the optimization is blocked.  That's because the 
GIMPLE store-merging pass not only attempts to merge stores but also loads if 
they directly feed subsequent stores.  Therefore the code generated for:

struct S {
  unsigned int flag : 1;
  unsigned int size : 31;
};

void foo (struct S *s, struct S *m)
{
  s->flag = 1;
  s->size = m->size;
}

is still abysmal at -O2:

        orb     $1, (%rdi)
        movl    (%rsi), %eax
        andl    $-2, %eax
        movl    %eax, %edx
        movl    (%rdi), %eax
        andl    $1, %eax
        orl     %edx, %eax
        movl    %eax, (%rdi)
        ret

The attached patch changes it into the optimal:

        movl    (%rsi), %eax
        orl     $1, %eax
        movl    %eax, (%rdi)
        ret

The patch doesn't modify the overall logic of the pass but just turns MEM_REF 
stores into BIT_INSERT_EXPR stores when there is a preceding or subsequent 
BIT_INSERT_EXPR or INTEGER_CST store in the same bit-field region.

Tested on x86-64/Linux, OK for the mainline?


2018-06-04  Eric Botcazou  <ebotca...@adacore.com>

        * gimple-ssa-store-merging.c (struct merged_store_group): Move up
        bit_insertion field and declare can_be_merged_into method.
        (merged_store_group::can_be_merged_into): New method.
        (imm_store_chain_info::coalesce_immediate): Call it to decide whether
        consecutive non-overlapping stores can be merged.  Turn MEM_REF stores
        into BIT_INSERT_EXPR stores if the group contains a non-MEM_REF store.


2018-06-04  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.dg/store_merging_21.c: New test.
        * gnat.dg/opt71b.adb: Likewise.

-- 
Eric Botcazou
Index: gimple-ssa-store-merging.c
===================================================================
--- gimple-ssa-store-merging.c	(revision 261128)
+++ gimple-ssa-store-merging.c	(working copy)
@@ -1426,6 +1426,7 @@ struct merged_store_group
   unsigned int load_align[2];
   unsigned int first_order;
   unsigned int last_order;
+  bool bit_insertion;
 
   auto_vec<store_immediate_info *> stores;
   /* We record the first and last original statements in the sequence because
@@ -1435,10 +1436,10 @@ struct merged_store_group
   gimple *first_stmt;
   unsigned char *val;
   unsigned char *mask;
-  bool bit_insertion;
 
   merged_store_group (store_immediate_info *);
   ~merged_store_group ();
+  bool can_be_merged_into (store_immediate_info *);
   void merge_into (store_immediate_info *);
   void merge_overlapping (store_immediate_info *);
   bool apply_stores ();
@@ -1851,8 +1852,47 @@ merged_store_group::~merged_store_group
     XDELETEVEC (val);
 }
 
+/* Return true if the store described by INFO can be merged into the group.  */
+
+bool
+merged_store_group::can_be_merged_into (store_immediate_info *info)
+{
+  /* Do not merge bswap patterns.  */
+  if (info->rhs_code == LROTATE_EXPR)
+    return false;
+
+  /* The canonical case.  */
+  if (info->rhs_code == stores[0]->rhs_code)
+    return true;
+
+  /* BIT_INSERT_EXPR is compatible with INTEGER_CST.  */
+  if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
+    return true;
+
+  if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
+    return true;
+
+  /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
+  if (info->rhs_code == MEM_REF
+      && (stores[0]->rhs_code == INTEGER_CST
+	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
+      && info->bitregion_start == stores[0]->bitregion_start
+      && info->bitregion_end == stores[0]->bitregion_end)
+    return true;
+
+  if (stores[0]->rhs_code == MEM_REF
+      && (info->rhs_code == INTEGER_CST
+	  || info->rhs_code == BIT_INSERT_EXPR)
+      && info->bitregion_start == stores[0]->bitregion_start
+      && info->bitregion_end == stores[0]->bitregion_end)
+    return true;
+
+  return false;
+}
+
 /* Helper method for merge_into and merge_overlapping to do
    the common part.  */
+
 void
 merged_store_group::do_merge (store_immediate_info *info)
 {
@@ -2673,12 +2713,7 @@ imm_store_chain_info::coalesce_immediate
 	 Merge it into the current store group.  There can be gaps in between
 	 the stores, but there can't be gaps in between bitregions.  */
       else if (info->bitregion_start <= merged_store->bitregion_end
-	       && info->rhs_code != LROTATE_EXPR
-	       && (info->rhs_code == merged_store->stores[0]->rhs_code
-		   || (info->rhs_code == INTEGER_CST
-		       && merged_store->stores[0]->rhs_code == BIT_INSERT_EXPR)
-		   || (info->rhs_code == BIT_INSERT_EXPR
-		       && merged_store->stores[0]->rhs_code == INTEGER_CST)))
+	       && merged_store->can_be_merged_into (info))
 	{
 	  store_immediate_info *infof = merged_store->stores[0];
 
@@ -2696,21 +2731,41 @@ imm_store_chain_info::coalesce_immediate
 	      std::swap (info->ops[0], info->ops[1]);
 	      info->ops_swapped_p = true;
 	    }
-	  if ((infof->ops[0].base_addr
-	       ? compatible_load_p (merged_store, info, base_addr, 0)
-	       : !info->ops[0].base_addr)
-	      && (infof->ops[1].base_addr
-		  ? compatible_load_p (merged_store, info, base_addr, 1)
-		  : !info->ops[1].base_addr)
-	      && check_no_overlap (m_store_info, i, info->rhs_code,
-				   MAX (merged_store->last_order,
-					info->order),
-				   MAX (merged_store->start
-					+ merged_store->width,
-					info->bitpos + info->bitsize)))
+	  if (check_no_overlap (m_store_info, i, info->rhs_code,
+				MAX (merged_store->last_order,
+				     info->order),
+				MAX (merged_store->start
+				     + merged_store->width,
+				     info->bitpos + info->bitsize)))
 	    {
-	      merged_store->merge_into (info);
-	      goto done;
+	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
+	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
+		{
+		  info->rhs_code = BIT_INSERT_EXPR;
+		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
+		  info->ops[0].base_addr = NULL_TREE;
+		}
+	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
+		{
+		  store_immediate_info *infoj;
+		  unsigned int j;
+		  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
+		    {
+		      infoj->rhs_code = BIT_INSERT_EXPR;
+		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
+		      infoj->ops[0].base_addr = NULL_TREE;
+		    }
+		}
+	      if ((infof->ops[0].base_addr
+		   ? compatible_load_p (merged_store, info, base_addr, 0)
+		   : !info->ops[0].base_addr)
+		  && (infof->ops[1].base_addr
+		      ? compatible_load_p (merged_store, info, base_addr, 1)
+		      : !info->ops[1].base_addr))
+		{
+		  merged_store->merge_into (info);
+		  goto done;
+		}
 	    }
 	}
 
/* { dg-do compile } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */

extern void abort (void);

struct S1 {
  unsigned int flag : 1;
  unsigned int size : 31;
};

void foo1 (struct S1 *s, struct S1 *m)
{
  s->flag = 1;
  s->size = m->size;
}

void bar1 (struct S1 *s, struct S1 *m, _Bool flag)
{
  s->flag = flag;
  s->size = m->size;
}

struct S2 {
  unsigned int size : 31;
  unsigned int flag : 1;
};

void foo2 (struct S2 *s, struct S2 *m)
{
  s->size = m->size;
  s->flag = 1;
}

void bar2 (struct S2 *s, struct S2 *m, _Bool flag)
{
  s->flag = flag;
  s->size = m->size;
}

/* { dg-final { scan-tree-dump-times "Merging successful" 4 "store-merging" } } */
-- { dg-do compile }
-- { dg-require-effective-target store_merge }
-- { dg-options "-O2 -fdump-tree-store-merging" }

with Opt71_Pkg; use Opt71_Pkg;

procedure Opt71b (X : not null access Rec; Y : not null access Rec) is
begin
   X.all := (Flag => True, Size => Y.Size);
end;

-- { dg-final { scan-tree-dump "Merging successful" "store-merging" } }

Reply via email to