The patch below extents memset recognition to cover a few more
non-byte-size store loops and all byte-size store loops.  This exposes
issues with our builtins.exp testsuite which has custom memset
routines like

void *
my_memset (void *d, int c, size_t n)
{
  char *dst = (char *) d;
  while (n--)
    *dst++ = c;
  return (char *) d;
}

Now, for LTO we have papered over similar issues by attaching
the used attribute to the functions.  But the general question is - when
can we be sure the function we are dealing with are not the actual
implementation for the builtin call we want to generate?  A few
things come to my mind:

 1) the function already calls the function we want to generate (well,
    it might be a tail-recursive memset implementation ...)

 2) the function availability is AVAIL_LOCAL

 3) ... ?

For sure 2) would work, but it would severely restrict the transform
(do we care?).

We have a similar issue with sin/cos -> sincos transform and a
trivial sincos implementation.

Any ideas?

Bootstrapped (with memset recognition enabled by default) and tested
on x86_64-unknown-linux-gnu with the aforementioned issues.

Thanks,
Richard.

2012-05-30  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/53081
        * tree-data-ref.h (stores_zero_from_loop): Rename to ...
        (stores_bytes_from_loop): ... this.
        (stmt_with_adjacent_zero_store_dr_p): Rename to ...
        (stmt_with_adjacent_byte_store_dr_p): ... this.
        * tree-data-ref.c (stmt_with_adjacent_zero_store_dr_p): Rename to ...
        (stmt_with_adjacent_byte_store_dr_p): ... this.  Handle all kinds
        of byte-sized stores.
        (stores_zero_from_loop): Rename to ...
        (stores_bytes_from_loop): ... this.
        * tree-loop-distribution.c (generate_memset_zero): Rename to ...
        (generate_memset): ... this.  Handle all kinds of byte-sized
        stores.
        (generate_builtin): Adjust.
        (can_generate_builtin): Likewise.
        (tree_loop_distribution): Likewise.

Index: gcc/tree-data-ref.h
===================================================================
*** gcc/tree-data-ref.h (revision 188004)
--- gcc/tree-data-ref.h (working copy)
*************** index_in_loop_nest (int var, VEC (loop_p
*** 606,616 ****
  }
  
  void stores_from_loop (struct loop *, VEC (gimple, heap) **);
! void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
  void remove_similar_memory_refs (VEC (gimple, heap) **);
  bool rdg_defs_used_in_other_loops_p (struct graph *, int);
  bool have_similar_memory_accesses (gimple, gimple);
! bool stmt_with_adjacent_zero_store_dr_p (gimple);
  
  /* Returns true when STRIDE is equal in absolute value to the size of
     the unit type of TYPE.  */
--- 606,616 ----
  }
  
  void stores_from_loop (struct loop *, VEC (gimple, heap) **);
! void stores_bytes_from_loop (struct loop *, VEC (gimple, heap) **);
  void remove_similar_memory_refs (VEC (gimple, heap) **);
  bool rdg_defs_used_in_other_loops_p (struct graph *, int);
  bool have_similar_memory_accesses (gimple, gimple);
! bool stmt_with_adjacent_byte_store_dr_p (gimple);
  
  /* Returns true when STRIDE is equal in absolute value to the size of
     the unit type of TYPE.  */
Index: gcc/tree-data-ref.c
===================================================================
*** gcc/tree-data-ref.c (revision 188004)
--- gcc/tree-data-ref.c (working copy)
*************** stores_from_loop (struct loop *loop, VEC
*** 5248,5259 ****
    free (bbs);
  }
  
! /* Returns true when the statement at STMT is of the form "A[i] = 0"
     that contains a data reference on its LHS with a stride of the same
!    size as its unit type.  */
  
  bool
! stmt_with_adjacent_zero_store_dr_p (gimple stmt)
  {
    tree lhs, rhs;
    bool res;
--- 5248,5260 ----
    free (bbs);
  }
  
! /* Returns true when the statement at STMT is of the form "A[i] = x"
     that contains a data reference on its LHS with a stride of the same
!    size as its unit type that can be rewritten as a series of byte
!    stores with the same value.  */
  
  bool
! stmt_with_adjacent_byte_store_dr_p (gimple stmt)
  {
    tree lhs, rhs;
    bool res;
*************** stmt_with_adjacent_zero_store_dr_p (gimp
*** 5272,5278 ****
        && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1)))
      return false;
  
!   if (!(integer_zerop (rhs) || real_zerop (rhs)))
      return false;
  
    dr = XCNEW (struct data_reference);
--- 5273,5286 ----
        && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1)))
      return false;
  
!   if (!(integer_zerop (rhs)
!       || integer_all_onesp (rhs)
!       || real_zerop (rhs)
!       || (TREE_CODE (rhs) == CONSTRUCTOR
!           && !TREE_CLOBBER_P (rhs))
!       || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
!           && (TYPE_MODE (TREE_TYPE (lhs))
!               == TYPE_MODE (unsigned_char_type_node)))))
      return false;
  
    dr = XCNEW (struct data_reference);
*************** stmt_with_adjacent_zero_store_dr_p (gimp
*** 5291,5297 ****
     store to memory of the form "A[i] = 0".  */
  
  void
! stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
  {
    unsigned int i;
    basic_block bb;
--- 5299,5305 ----
     store to memory of the form "A[i] = 0".  */
  
  void
! stores_bytes_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
  {
    unsigned int i;
    basic_block bb;
*************** stores_zero_from_loop (struct loop *loop
*** 5302,5308 ****
    for (i = 0; i < loop->num_nodes; i++)
      for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
        if ((stmt = gsi_stmt (si))
!         && stmt_with_adjacent_zero_store_dr_p (stmt))
        VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
  
    free (bbs);
--- 5310,5316 ----
    for (i = 0; i < loop->num_nodes; i++)
      for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
        if ((stmt = gsi_stmt (si))
!         && stmt_with_adjacent_byte_store_dr_p (stmt))
        VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
  
    free (bbs);
Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c        (revision 188004)
--- gcc/tree-loop-distribution.c        (working copy)
*************** build_size_arg_loc (location_t loc, tree
*** 300,307 ****
  /* Generate a call to memset.  Return true when the operation succeeded.  */
  
  static void
! generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
!                     gimple_stmt_iterator bsi)
  {
    tree addr_base, nb_bytes;
    bool res = false;
--- 300,307 ----
  /* Generate a call to memset.  Return true when the operation succeeded.  */
  
  static void
! generate_memset (gimple stmt, tree op0, tree nb_iter,
!                gimple_stmt_iterator bsi)
  {
    tree addr_base, nb_bytes;
    bool res = false;
*************** generate_memset_zero (gimple stmt, tree
*** 310,315 ****
--- 310,316 ----
    tree mem, fn;
    struct data_reference *dr = XCNEW (struct data_reference);
    location_t loc = gimple_location (stmt);
+   tree val;
  
    DR_STMT (dr) = stmt;
    DR_REF (dr) = op0;
*************** generate_memset_zero (gimple stmt, tree
*** 334,346 ****
    mem = force_gimple_operand (addr_base, &stmts, true, NULL);
    gimple_seq_add_seq (&stmt_list, stmts);
  
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
!   fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
    gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
!     fprintf (dump_file, "generated memset zero\n");
  
    free_data_ref (dr);
  }
--- 335,379 ----
    mem = force_gimple_operand (addr_base, &stmts, true, NULL);
    gimple_seq_add_seq (&stmt_list, stmts);
  
+   /* This exactly matches stmt_with_adjacent_byte_store_dr_p which detects
+      stores of zero or byte-size integer stores.  */
+   val = gimple_assign_rhs1 (stmt);
+   if (integer_zerop (val)
+       || real_zerop (val)
+       || TREE_CODE (val) == CONSTRUCTOR)
+     val = integer_zero_node;
+   else if (integer_all_onesp (val))
+     val = build_int_cst (integer_type_node, -1);
+   else
+     {
+       if (TREE_CODE (val) == INTEGER_CST)
+       val = fold_convert (integer_type_node, val);
+       else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE 
(val)))
+       {
+         gimple cstmt;
+         tree tem = create_tmp_reg (integer_type_node, NULL);
+         tem = make_ssa_name (tem, NULL);
+         cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
+         gimple_seq_add_stmt (&stmt_list, cstmt);
+         val = tem;
+       }
+     }
+ 
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
!   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
    gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
!     {
!       fprintf (dump_file, "generated memset");
!       if (integer_zerop (val))
!       fprintf (dump_file, " zero\n");
!       else if (integer_all_onesp (val))
!       fprintf (dump_file, " minus one\n");
!       else
!       fprintf (dump_file, "\n");
!     }
  
    free_data_ref (dr);
  }
*************** generate_builtin (struct loop *loop, bit
*** 386,399 ****
          if (stmt_has_scalar_dependences_outside_loop (stmt))
            goto end;
  
!         if (is_gimple_assign (stmt)
              && !is_gimple_reg (gimple_assign_lhs (stmt)))
            {
              /* Don't generate the builtins when there are more than
                 one memory write.  */
              if (write != NULL)
                goto end;
  
              write = stmt;
              if (bb == loop->latch)
                nb_iter = number_of_latch_executions (loop);
--- 419,443 ----
          if (stmt_has_scalar_dependences_outside_loop (stmt))
            goto end;
  
!         if (gimple_assign_single_p (stmt)
              && !is_gimple_reg (gimple_assign_lhs (stmt)))
            {
+             tree rhs;
+ 
              /* Don't generate the builtins when there are more than
                 one memory write.  */
              if (write != NULL)
                goto end;
  
+             /* If the store is from a non-constant, verify the value
+                is defined outside of the loop.  */
+             rhs = gimple_assign_rhs1 (stmt);
+             if (TREE_CODE (rhs) == SSA_NAME
+                 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
+                 && flow_bb_inside_loop_p
+                      (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
+               goto end;
+ 
              write = stmt;
              if (bb == loop->latch)
                nb_iter = number_of_latch_executions (loop);
*************** generate_builtin (struct loop *loop, bit
*** 401,412 ****
        }
      }
  
!   if (!stmt_with_adjacent_zero_store_dr_p (write))
      goto end;
  
    /* The new statements will be placed before LOOP.  */
    bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
!   generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
    res = true;
  
    /* If this is the last partition for which we generate code, we have
--- 445,456 ----
        }
      }
  
!   if (!stmt_with_adjacent_byte_store_dr_p (write))
      goto end;
  
    /* The new statements will be placed before LOOP.  */
    bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
!   generate_memset (write, gimple_assign_lhs (write), nb_iter, bsi);
    res = true;
  
    /* If this is the last partition for which we generate code, we have
*************** can_generate_builtin (struct graph *rdg,
*** 825,831 ****
        gimple stmt = RDG_STMT (rdg, i);
        nb_writes++;
        if (!gimple_has_volatile_ops (stmt)
!           && stmt_with_adjacent_zero_store_dr_p (stmt))
          stores_zero++;
        }
  
--- 869,875 ----
        gimple stmt = RDG_STMT (rdg, i);
        nb_writes++;
        if (!gimple_has_volatile_ops (stmt)
!           && stmt_with_adjacent_byte_store_dr_p (stmt))
          stores_zero++;
        }
  
*************** tree_loop_distribution (void)
*** 1266,1272 ****
          /* With the following working list, we're asking
             distribute_loop to separate from the rest of the loop the
             stores of the form "A[i] = 0".  */
!         stores_zero_from_loop (loop, &work_list);
  
          /* Do nothing if there are no patterns to be distributed.  */
          if (VEC_length (gimple, work_list) > 0)
--- 1310,1316 ----
          /* With the following working list, we're asking
             distribute_loop to separate from the rest of the loop the
             stores of the form "A[i] = 0".  */
!         stores_bytes_from_loop (loop, &work_list);
  
          /* Do nothing if there are no patterns to be distributed.  */
          if (VEC_length (gimple, work_list) > 0)
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c    (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c    (revision 0)
***************
*** 0 ****
--- 1,49 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+ 
+ struct Foo
+ {
+   char a;
+ };
+ 
+ struct Foo x[256];
+ 
+ void foo()
+ {
+   int i;
+   for (i = 0; i < 256; ++i)
+     x[i] = (struct Foo){};
+ }
+ 
+ void bar()
+ {
+   int i;
+   for (i = 0; i < 256; ++i)
+     x[i].a = 1;
+ }
+ 
+ void foobar(unsigned char c)
+ {
+   int i;
+   for (i = 0; i < 256; ++i)
+     x[i].a = c;
+ }
+ 
+ struct Baz
+ {
+   short a;
+ };
+ 
+ struct Baz y[256];
+ 
+ void baz()
+ {
+   int i;
+   for (i = 0; i < 256; ++i)
+     y[i].a = -1;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
+ /* { dg-final { scan-tree-dump-times "generated memset minus one" 1 "ldist" } 
} */
+ /* { dg-final { scan-tree-dump-times "generated memset" 4 "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */

Reply via email to