Hi!

The following patch attempts to improve both compile time and generated
code for array initializers if there are many consecutive same elements.

Without the patch even T10000 instead of T100000 takes a minute to compile
and generates hundreds of kilobytes large routine.

Without the patch, we generate (from gimple dump):
              A::A (D.2035, 3);
              D.2035 = D.2035 + 1;
              D.2036 = D.2036 + -1;
times 100000, with the patch we generate a simple loop that does the same
thing 100000 times.  Of course the compiler can then decide how many times
it wants to unroll it etc.

Bootstrapped/regtested on x86_64-linux and i686-linux.

Not sure if cp_tree_equal (elt, elt2) is the best check, perhaps
operand_equal_p instead, though wasn't sure if it wouldn't be upset about
C++ FE trees.  Or perhaps just limit it to the case where both elt and elt2
are same constants rather than arbitrary trees?

And, the patch hardcodes threshold 10 - 10 consecutive same ctor values are
initialized already with a loop, 9 still with straightforward code.  Wonder
if it is ok to use a constant, or do we want a param, or another constant?

2014-01-10  Jakub Jelinek  <ja...@redhat.com>

        PR c++/59659
        * init.c (build_vec_init): If there are 10+ elements with the
        same value in the CONSTRUCTOR, construct them using a runtime loop
        rather than one by one.

        * g++.dg/opt/pr59659.C: New test.

--- gcc/cp/init.c.jj    2014-01-09 19:09:42.086613999 +0100
+++ gcc/cp/init.c       2014-01-10 12:14:11.434068915 +0100
@@ -3565,6 +3565,9 @@ build_vec_init (tree base, tree maxindex
        {
          tree baseref = build1 (INDIRECT_REF, type, base);
          tree one_init;
+         tree value = NULL_TREE;
+         tree for_stmt = NULL_TREE;
+         unsigned int cnt = 0;
 
          num_initialized_elts++;
 
@@ -3589,7 +3592,7 @@ build_vec_init (tree base, tree maxindex
              e = maybe_constant_init (e);
              if (reduced_constant_expression_p (e))
                {
-                 CONSTRUCTOR_APPEND_ELT (new_vec, field, e);
+                 value = e;
                  if (do_static_init)
                    one_init = NULL_TREE;
                  else
@@ -3599,16 +3602,44 @@ build_vec_init (tree base, tree maxindex
              else
                {
                  if (do_static_init)
-                   {
-                     tree value = build_zero_init (TREE_TYPE (e), NULL_TREE,
-                                                   true);
-                     if (value)
-                       CONSTRUCTOR_APPEND_ELT (new_vec, field, value);
-                   }
+                   value = build_zero_init (TREE_TYPE (e), NULL_TREE, true);
                  saw_non_const = true;
                }
            }
 
+         cnt = 1;
+         if (one_init)
+           {
+             unsigned len = CONSTRUCTOR_NELTS (init);
+
+             /* See if we don't have 10 or more elts with the same value
+                in a row.  In that case create a loop rather than straight
+                line code.  */
+             for (cnt = 1; idx + cnt < len; cnt++)
+               {
+                 tree elt2 = CONSTRUCTOR_ELT (init, idx + cnt)->value;
+                 if (!cp_tree_equal (elt, elt2)
+                     || !same_type_p (TREE_TYPE (elt), TREE_TYPE (elt2)))
+                   break;
+               }
+
+             if (cnt < 10)
+               cnt = 1;
+             else
+               {
+                 current_stmt_tree ()->stmts_are_full_exprs_p = 0;
+                 tree lim = build_int_cst (TREE_TYPE (iterator), cnt);
+                 lim = cp_build_binary_op (UNKNOWN_LOCATION, MINUS_EXPR,
+                                           iterator, lim, complain);
+                 lim = get_temp_regvar (ptrdiff_type_node, lim);
+                 for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
+                 finish_for_init_stmt (for_stmt);
+                 finish_for_cond (build2 (NE_EXPR, boolean_type_node,
+                                          iterator, lim), for_stmt, false);
+                 current_stmt_tree ()->stmts_are_full_exprs_p = 1;
+               }
+           }
+
          if (one_init)
            finish_expr_stmt (one_init);
          current_stmt_tree ()->stmts_are_full_exprs_p = 0;
@@ -3625,6 +3656,22 @@ build_vec_init (tree base, tree maxindex
            errors = true;
          else
            finish_expr_stmt (one_init);
+
+         if (value)
+           CONSTRUCTOR_APPEND_ELT (new_vec, field, value);
+
+         if (cnt > 1)
+           {
+             if (value)
+               for (unsigned int i = 1; i < cnt; i++)
+                 CONSTRUCTOR_APPEND_ELT (new_vec,
+                                         CONSTRUCTOR_ELT (init,
+                                                          idx + i)->index,
+                                         value);
+             num_initialized_elts += cnt - 1;
+             idx += cnt - 1;
+             finish_for_stmt (for_stmt);
+           }
        }
 
       if (try_const)
--- gcc/testsuite/g++.dg/opt/pr59659.C.jj       2014-01-10 12:24:55.633712582 
+0100
+++ gcc/testsuite/g++.dg/opt/pr59659.C  2014-01-10 12:25:48.468432311 +0100
@@ -0,0 +1,60 @@
+// PR c++/59659
+// { dg-do run }
+// { dg-options "-O2" }
+
+struct A { A (); A (int); ~A (); };
+
+int cnt;
+
+__attribute__((noinline))
+A::A ()
+{
+  cnt++;
+}
+
+__attribute__((noinline))
+A::A (int)
+{
+  cnt++;
+}
+
+__attribute__((noinline))
+A::~A ()
+{
+  cnt--;
+}
+
+__attribute__((noinline, noclone)) void
+bar (A *a)
+{
+  __asm volatile ("" : : "r" (a) : "memory");
+  if (cnt != 102004)
+    __builtin_abort ();
+}
+
+#define T10(N) N, N, N, N, N, N, N, N, N, N
+#define T100(N) T10(N), T10(N), T10(N), T10(N), T10(N), \
+               T10(N), T10(N), T10(N), T10(N), T10(N)
+#define T1000(N) T100(N), T100(N), T100(N), T100(N), T100(N), \
+                T100(N), T100(N), T100(N), T100(N), T100(N)
+#define T10000(N) T1000(N), T1000(N), T1000(N), T1000(N), T1000(N), \
+                 T1000(N), T1000(N), T1000(N), T1000(N), T1000(N)
+#define T100000(N) T10000(N), T10000(N), T10000(N), T10000(N), T10000(N), \
+                  T10000(N), T10000(N), T10000(N), T10000(N), T10000(N)
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  A a[] = { 1, 2, T1000 (3), T100000 (4), T1000 (3), 2, 1 };
+  bar (a);
+}
+
+int
+main ()
+{
+  if (cnt != 0)
+    __builtin_abort ();
+  foo ();
+  if (cnt != 0)
+    __builtin_abort ();
+}

        Jakub

Reply via email to