On Thu, 31 May 2012, Richard Guenther wrote: > On Wed, 30 May 2012, Richard Guenther wrote: > > > > > The patch below extents memset recognition to cover a few more > > non-byte-size store loops and all byte-size store loops. This exposes > > issues with our builtins.exp testsuite which has custom memset > > routines like > > > > void * > > my_memset (void *d, int c, size_t n) > > { > > char *dst = (char *) d; > > while (n--) > > *dst++ = c; > > return (char *) d; > > } > > > > Now, for LTO we have papered over similar issues by attaching > > the used attribute to the functions. But the general question is - when > > can we be sure the function we are dealing with are not the actual > > implementation for the builtin call we want to generate? A few > > things come to my mind: > > > > 1) the function already calls the function we want to generate (well, > > it might be a tail-recursive memset implementation ...) > > > > 2) the function availability is AVAIL_LOCAL > > > > 3) ... ? > > > > For sure 2) would work, but it would severely restrict the transform > > (do we care?). > > > > We have a similar issue with sin/cos -> sincos transform and a > > trivial sincos implementation. > > > > Any ideas? > > > > Bootstrapped (with memset recognition enabled by default) and tested > > on x86_64-unknown-linux-gnu with the aforementioned issues. > > The following fixes it by simply always adding > -fno-tree-loop-distribute-patterns to builtins.exp. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > If there are no further comments I'll go with the local advise from > Micha who says "who cares".
Now done with the much simpler patch below (after all the loop distribution TLC). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2012-06-05 Richard Guenther <rguent...@suse.de> PR tree-optimization/53081 * tree-loop-distribution.c (generate_memset_builtin): Handle all kinds of byte-sized stores. (classify_partition): Likewise. (tree_loop_distribution): Adjust seed statements used for !flag_tree_loop_distribution. * gcc.dg/tree-ssa/ldist-19.c: New testcase. * gcc.c-torture/execute/builtins/builtins.exp: Always pass -fno-tree-loop-distribute-patterns. Index: gcc/tree-loop-distribution.c =================================================================== *** gcc/tree-loop-distribution.c.orig 2012-06-04 17:05:14.000000000 +0200 --- gcc/tree-loop-distribution.c 2012-06-04 17:32:38.829355831 +0200 *************** generate_memset_builtin (struct loop *lo *** 332,337 **** --- 332,338 ---- gimple_seq stmt_list = NULL, stmts; struct data_reference *dr = XCNEW (struct data_reference); location_t loc; + tree val; stmt = partition->main_stmt; loc = gimple_location (stmt); *************** generate_memset_builtin (struct loop *lo *** 364,376 **** mem = force_gimple_operand (addr_base, &stmts, true, NULL); gimple_seq_add_seq (&stmt_list, stmts); fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); ! fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, "generated memset zero\n"); } /* Remove and destroy the loop LOOP. */ --- 365,408 ---- mem = force_gimple_operand (addr_base, &stmts, true, NULL); gimple_seq_add_seq (&stmt_list, stmts); + /* This exactly matches the pattern recognition in classify_partition. */ + val = gimple_assign_rhs1 (stmt); + if (integer_zerop (val) + || real_zerop (val) + || TREE_CODE (val) == CONSTRUCTOR) + val = integer_zero_node; + else if (integer_all_onesp (val)) + val = build_int_cst (integer_type_node, -1); + else + { + if (TREE_CODE (val) == INTEGER_CST) + val = fold_convert (integer_type_node, val); + else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) + { + gimple cstmt; + tree tem = create_tmp_reg (integer_type_node, NULL); + tem = make_ssa_name (tem, NULL); + cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); + gimple_seq_add_stmt (&stmt_list, cstmt); + val = tem; + } + } + fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); ! fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) ! { ! fprintf (dump_file, "generated memset"); ! if (integer_zerop (val)) ! fprintf (dump_file, " zero\n"); ! else if (integer_all_onesp (val)) ! fprintf (dump_file, " minus one\n"); ! else ! fprintf (dump_file, "\n"); ! } } /* Remove and destroy the loop LOOP. */ *************** classify_partition (loop_p loop, struct *** 865,871 **** return; partition->main_stmt = stmt; rhs = gimple_assign_rhs1 (stmt); ! if (!(integer_zerop (rhs) || real_zerop (rhs))) return; if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1) return; --- 897,915 ---- return; partition->main_stmt = stmt; rhs = gimple_assign_rhs1 (stmt); ! if (!(integer_zerop (rhs) ! || integer_all_onesp (rhs) ! || real_zerop (rhs) ! || (TREE_CODE (rhs) == CONSTRUCTOR ! && !TREE_CLOBBER_P (rhs)) ! || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) ! && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) ! == TYPE_MODE (unsigned_char_type_node))))) ! return; ! if (TREE_CODE (rhs) == SSA_NAME ! && !SSA_NAME_IS_DEFAULT_DEF (rhs) ! && flow_bb_inside_loop_p ! (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) return; if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1) return; *************** tree_loop_distribution (void) *** 1346,1354 **** /* If we are only performing pattern detection restrict what we try to distribute to stores from constants. */ ! if (!flag_tree_loop_distribution ! && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))) ! continue; VEC_safe_push (gimple, heap, work_list, stmt); } --- 1390,1408 ---- /* If we are only performing pattern detection restrict what we try to distribute to stores from constants. */ ! if (!flag_tree_loop_distribution) ! { ! tree rhs = gimple_assign_rhs1 (stmt); ! if (!is_gimple_min_invariant (rhs) ! && TREE_CODE (rhs) != CONSTRUCTOR ! && TREE_CODE (rhs) != SSA_NAME) ! continue; ! if (TREE_CODE (rhs) == SSA_NAME ! && !SSA_NAME_IS_DEFAULT_DEF (rhs) ! && flow_bb_inside_loop_p ! (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) ! continue; ! } VEC_safe_push (gimple, heap, work_list, stmt); } Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c 2012-06-04 17:27:29.305366537 +0200 *************** *** 0 **** --- 1,72 ---- + /* { dg-do compile } */ + /* { dg-options "-O3 -fdump-tree-ldist-details" } */ + + struct Foo + { + char a; + }; + + struct Foo x[256]; + + static void __attribute__((noinline,noclone)) + foo() + { + int i; + for (i = 0; i < 256; ++i) + x[i] = (struct Foo){}; + } + + static void __attribute__((noinline,noclone)) + bar() + { + int i; + for (i = 0; i < 256; ++i) + x[i].a = 1; + } + + static void __attribute__((noinline,noclone)) + foobar(unsigned char c) + { + int i; + for (i = 0; i < 256; ++i) + x[i].a = c; + } + + static void __attribute__((noinline,noclone)) + foobar2(char c) + { + int i; + for (i = 0; i < 256; ++i) + x[i].a = c; + } + + struct Baz + { + short a; + }; + + struct Baz y[256]; + + static void __attribute__((noinline,noclone)) + baz() + { + int i; + for (i = 0; i < 256; ++i) + y[i].a = -1; + } + + int main() + { + volatile int x; + foo(); + bar(); + foobar(x); + foobar2(x); + baz(); + return 0; + } + + /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */ + /* { dg-final { scan-tree-dump-times "generated memset minus one" 1 "ldist" } } */ + /* { dg-final { scan-tree-dump-times "generated memset" 5 "ldist" } } */ + /* { dg-final { cleanup-tree-dump "ldist" } } */ Index: gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp =================================================================== *** gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp.orig 2012-06-04 13:47:31.000000000 +0200 --- gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp 2012-06-04 17:06:46.662409562 +0200 *************** load_lib c-torture.exp *** 37,43 **** torture-init set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS ! set additional_flags "" if [istarget "powerpc-*-darwin*"] { lappend additional_flags "-Wl,-multiply_defined,suppress" } --- 37,43 ---- torture-init set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS ! set additional_flags "-fno-tree-loop-distribute-patterns" if [istarget "powerpc-*-darwin*"] { lappend additional_flags "-Wl,-multiply_defined,suppress" }