On Fri, 14 Dec 2018, Jakub Jelinek wrote: > On Fri, Dec 14, 2018 at 10:40:19AM +0100, Richard Biener wrote: > > This looks OK to me - the only comment I have is on the two magic > > constants (64 and 8) which are used twice in the patch. Can you > > either see to hoist the common condition into sth like > > > > bool prefer_loop_initializer_p = ... > > > > or add #defines for those constants? I suppose the hoisting > > might be tricky as int_size_in_bytes can return -1 and the > > workarounds are different in both places right now (maybe that's > > a bug as well...). OTOH using (unsigned)int_size_in_bytes > > looks reasonable in general. > > So like this?
Yes. Thanks, Richard. > Still need to wait for the FE patch if I want to commit the testcases, those > depend on both patches. > I've added size32plus effective target to the larger test, as 384MB is too > much for 16 or 20 bit address targets. > And, I'll gather statistics on how often this makes a difference during > gimplification during my next bootstraps/regtests. > > 2018-12-14 Jakub Jelinek <ja...@redhat.com> > > PR c++/82294 > PR c++/87436 > * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument. > * expr.c (categorize_ctor_elements_1): Likewise. Compute it like > p_nz_elts, except don't multiply it by mult. Adjust recursive call. > Fix up COMPLEX_CST handling. > (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize > it and pass it through to categorize_ctor_elements_1. > (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers. > * gimplify.c (gimplify_init_constructor): Likewise. Don't force > ctor into readonly data section if num_unique_nonzero_elements is > smaller or equal to 1/8 of num_nonzero_elements and size is >= 64 > bytes. > > * g++.dg/tree-ssa/pr82294.C: New test. > * g++.dg/tree-ssa/pr87436.C: New test. > > --- gcc/expr.h.jj 2018-12-13 18:00:10.527301479 +0100 > +++ gcc/expr.h 2018-12-14 11:52:31.941071185 +0100 > @@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned > extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree); > > extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, > - HOST_WIDE_INT *, bool *); > + HOST_WIDE_INT *, HOST_WIDE_INT *, > + bool *); > > extern void expand_operands (tree, tree, rtx, rtx*, rtx*, > enum expand_modifier); > --- gcc/expr.c.jj 2018-12-13 18:00:10.426303121 +0100 > +++ gcc/expr.c 2018-12-14 11:52:31.945071118 +0100 > @@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo > > static bool > categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, > + HOST_WIDE_INT *p_unique_nz_elts, > HOST_WIDE_INT *p_init_elts, bool *p_complete) > { > unsigned HOST_WIDE_INT idx; > - HOST_WIDE_INT nz_elts, init_elts, num_fields; > + HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields; > tree value, purpose, elt_type; > > /* Whether CTOR is a valid constant initializer, in accordance with what > @@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c > bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor); > > nz_elts = 0; > + unique_nz_elts = 0; > init_elts = 0; > num_fields = 0; > elt_type = NULL_TREE; > @@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c > { > case CONSTRUCTOR: > { > - HOST_WIDE_INT nz = 0, ic = 0; > + HOST_WIDE_INT nz = 0, unz = 0, ic = 0; > > - bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic, > - p_complete); > + bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &unz, > + &ic, p_complete); > > nz_elts += mult * nz; > + unique_nz_elts += unz; > init_elts += mult * ic; > > if (const_from_elts_p && const_p) > @@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c > case REAL_CST: > case FIXED_CST: > if (!initializer_zerop (value)) > - nz_elts += mult; > + { > + nz_elts += mult; > + unique_nz_elts++; > + } > init_elts += mult; > break; > > case STRING_CST: > nz_elts += mult * TREE_STRING_LENGTH (value); > + unique_nz_elts += TREE_STRING_LENGTH (value); > init_elts += mult * TREE_STRING_LENGTH (value); > break; > > case COMPLEX_CST: > if (!initializer_zerop (TREE_REALPART (value))) > - nz_elts += mult; > + { > + nz_elts += mult; > + unique_nz_elts++; > + } > if (!initializer_zerop (TREE_IMAGPART (value))) > - nz_elts += mult; > - init_elts += mult; > + { > + nz_elts += mult; > + unique_nz_elts++; > + } > + init_elts += 2 * mult; > break; > > case VECTOR_CST: > @@ -6025,7 +6038,10 @@ categorize_ctor_elements_1 (const_tree c > { > tree v = VECTOR_CST_ELT (value, i); > if (!initializer_zerop (v)) > - nz_elts += mult; > + { > + nz_elts += mult; > + unique_nz_elts++; > + } > init_elts += mult; > } > } > @@ -6035,6 +6051,7 @@ categorize_ctor_elements_1 (const_tree c > { > HOST_WIDE_INT tc = count_type_elements (elt_type, false); > nz_elts += mult * tc; > + unique_nz_elts += tc; > init_elts += mult * tc; > > if (const_from_elts_p && const_p) > @@ -6054,6 +6071,7 @@ categorize_ctor_elements_1 (const_tree c > *p_complete = false; > > *p_nz_elts += nz_elts; > + *p_unique_nz_elts += unique_nz_elts; > *p_init_elts += init_elts; > > return const_p; > @@ -6062,6 +6080,11 @@ categorize_ctor_elements_1 (const_tree c > /* Examine CTOR to discover: > * how many scalar fields are set to nonzero values, > and place it in *P_NZ_ELTS; > + * the same, but counting RANGE_EXPRs as multiplier of 1 instead of > + high - low + 1 (this can be useful for callers to determine ctors > + that could be cheaply initialized with - perhaps nested - loops > + compared to copied from huge read-only data), > + and place it in *P_UNIQUE_NZ_ELTS; > * how many scalar fields in total are in CTOR, > and place it in *P_ELT_COUNT. > * whether the constructor is complete -- in the sense that every > @@ -6073,13 +6096,16 @@ categorize_ctor_elements_1 (const_tree c > > bool > categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, > + HOST_WIDE_INT *p_unique_nz_elts, > HOST_WIDE_INT *p_init_elts, bool *p_complete) > { > *p_nz_elts = 0; > + *p_unique_nz_elts = 0; > *p_init_elts = 0; > *p_complete = true; > > - return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, > p_complete); > + return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts, > + p_init_elts, p_complete); > } > > /* TYPE is initialized by a constructor with NUM_ELTS elements, the last > @@ -6110,17 +6136,18 @@ complete_ctor_at_level_p (const_tree typ > return count_type_elements (type, true) == num_elts; > } > > -/* Return 1 if EXP contains mostly (3/4) zeros. */ > +/* Return 1 if EXP contains mostly (3/4) zeros. */ > > static int > mostly_zeros_p (const_tree exp) > { > if (TREE_CODE (exp) == CONSTRUCTOR) > { > - HOST_WIDE_INT nz_elts, init_elts; > + HOST_WIDE_INT nz_elts, unz_elts, init_elts; > bool complete_p; > > - categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); > + categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, > + &complete_p); > return !complete_p || nz_elts < init_elts / 4; > } > > @@ -6134,10 +6161,11 @@ all_zeros_p (const_tree exp) > { > if (TREE_CODE (exp) == CONSTRUCTOR) > { > - HOST_WIDE_INT nz_elts, init_elts; > + HOST_WIDE_INT nz_elts, unz_elts, init_elts; > bool complete_p; > > - categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); > + categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, > + &complete_p); > return nz_elts == 0; > } > > --- gcc/gimplify.c.jj 2018-12-13 18:00:10.597300341 +0100 > +++ gcc/gimplify.c 2018-12-14 12:01:17.491402703 +0100 > @@ -4778,7 +4778,15 @@ gimplify_init_constructor (tree *expr_p, > { > struct gimplify_init_ctor_preeval_data preeval_data; > HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; > + HOST_WIDE_INT num_unique_nonzero_elements; > bool cleared, complete_p, valid_const_initializer; > + /* Use readonly data for initializers of this or smaller size > + regardless of the num_nonzero_elements / num_unique_nonzero_elements > + ratio. */ > + const HOST_WIDE_INT min_unique_size = 64; > + /* If num_nonzero_elements / num_unique_nonzero_elements ratio > + is smaller than this, use readonly data. */ > + const int unique_nonzero_ratio = 8; > > /* Aggregate types must lower constructors to initialization of > individual elements. The exception is that a CONSTRUCTOR node > @@ -4795,6 +4803,7 @@ gimplify_init_constructor (tree *expr_p, > can only do so if it known to be a valid constant initializer. */ > valid_const_initializer > = categorize_ctor_elements (ctor, &num_nonzero_elements, > + &num_unique_nonzero_elements, > &num_ctor_elements, &complete_p); > > /* If a const aggregate variable is being initialized, then it > @@ -4803,7 +4812,15 @@ gimplify_init_constructor (tree *expr_p, > && num_nonzero_elements > 1 > && TREE_READONLY (object) > && VAR_P (object) > - && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object))) > + && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object)) > + /* For ctors that have many repeated nonzero elements > + represented through RANGE_EXPRs, prefer initializing > + those through runtime loops over copies of large amounts > + of data from readonly data section. */ > + && (num_unique_nonzero_elements > + > num_nonzero_elements / unique_nonzero_ratio > + || ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) > + <= (unsigned HOST_WIDE_INT) min_unique_size))) > { > if (notify_temp_creation) > return GS_ERROR; > @@ -4896,6 +4913,13 @@ gimplify_init_constructor (tree *expr_p, > is so large as to make individual moves inefficient. */ > if (size > 0 > && num_nonzero_elements > 1 > + /* For ctors that have many repeated nonzero elements > + represented through RANGE_EXPRs, prefer initializing > + those through runtime loops over copies of large amounts > + of data from readonly data section. */ > + && (num_unique_nonzero_elements > + > num_nonzero_elements / unique_nonzero_ratio > + || size < min_unique_size) > && (size < num_nonzero_elements > || !can_move_by_pieces (size, align))) > { > --- gcc/testsuite/g++.dg/tree-ssa/pr82294.C.jj 2018-12-14 > 11:52:31.949071052 +0100 > +++ gcc/testsuite/g++.dg/tree-ssa/pr82294.C 2018-12-14 11:52:31.949071052 > +0100 > @@ -0,0 +1,13 @@ > +// PR c++/82294 > +// { dg-do compile { target c++11 } } > +// { dg-options "-O2 -fdump-tree-gimple" } > + > +// Verify we don't "optimize" the ctor as copying a 1KB .rodata > +// object into the variable. It is better to initialize it through > +// a loop. > +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } } > + > +struct S { int x; explicit constexpr S (); }; > +constexpr S::S () : x{7} {} > +struct T { S arr[256]; explicit T (); }; > +T::T () {} > --- gcc/testsuite/g++.dg/tree-ssa/pr87436.C.jj 2018-12-14 > 11:52:31.949071052 +0100 > +++ gcc/testsuite/g++.dg/tree-ssa/pr87436.C 2018-12-14 11:52:31.949071052 > +0100 > @@ -0,0 +1,25 @@ > +// PR c++/87436 > +// { dg-do compile { target { c++11 && size32plus } } } > +// { dg-options "-O2 -fdump-tree-gimple" } > + > +// Verify we don't "optimize" the ctor as copying a 384MB .rodata > +// object into the variable. It is better to initialize it through > +// two nested loops. > +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } } > + > +struct S { > + int a = -1; > + short b = 3; > + int x = 0; > + int y = 1; > + int z = 42; > + float f = 0.123f; > +}; > + > +struct T { S arr[4096][4096]; }; > + > +T * > +foo () > +{ > + return new T; > +} > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)