On Thu, Nov 21, 2013 at 07:43:35AM +1000, Richard Henderson wrote:
> On 11/20/2013 07:44 PM, Jakub Jelinek wrote:
> > On Wed, Nov 20, 2013 at 10:31:38AM +0100, Richard Biener wrote:
> >> Aww ;)  Nice improvement.  Generally when I see this I always wonder
> >> whether we want to do this kind of stuff pre RTL expansion.
> >> 1st to not rely on being able to TER, 2nd to finally eventually
> >> get rid of TER.
> >>
> >> These patches are unfortunately a step backward for #2.
> >>
> >> As of the patch, do we have a way to query whether the target
> >> can efficiently broadcast?  If so this IMHO belongs in generic
> > 
> > We don't.  Perhaps if we'd add optab for vec_dup<mode> and mentioned
> > clearly in the documentation that it should be used only if it is reasonably
> > efficient.  But still, even with optab, it would probably better to do it
> > in the veclower* passes than in the vectorizer itself.
> 
> I think we can assume that broadcast is relatively efficient, whether or not
> vec_dup is present.  I'd lean to making the transformation generic to start
> with, so that you don't need extra handling in the i386 backend.

Ok, here is a generic veclower implementation without looking at any optabs,
so far only handles PLUS_EXPR, what operation other than MULT_EXPR would
make sense here?  Though, handling MULT_EXPR also would complicate the code
slightly (it would need to handle say:
  _2 = _1(D) + 1;
  _3 = _2 + 2;
  _4 = _3 * 2;
  _5 = _4 * 3;
  _6 = { _3, _4, _5, _4 };
where we could start thinking first the operation is PLUS_EXPR, but it
actually is MULT_EXPR with _3 as base).  Also, for MULT_EXPR, supposedly
we could handle some values to be constant 0, like in:
  _2 = _1(D) * 5;
  _3 = _2 * 2;
  _4 = _1(D) * 10;
  _5 = { _3, 0, _4, _2, _1(D), 0, _4, _2 };

Bootstrap/regtest pending, ok at least for this for the start and can be
improved later on?

2013-11-21  Jakub Jelinek  <ja...@redhat.com>

        * tree-vect-generic.c (optimize_vector_constructor): New function.
        (expand_vector_operations_1): Call it.

        * gcc.dg/vect/vect-124.c: New test.

--- gcc/tree-vect-generic.c.jj  2013-11-19 21:56:40.000000000 +0100
+++ gcc/tree-vect-generic.c     2013-11-21 11:17:55.146118161 +0100
@@ -988,6 +988,84 @@ expand_vector_operation (gimple_stmt_ite
                                    gimple_assign_rhs1 (assign),
                                    gimple_assign_rhs2 (assign), code);
 }
+
+/* Try to optimize
+   a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
+   style stmts into:
+   _9 = { b_7, b_7, b_7, b_7 };
+   a_5 = _9 + { 0, 3, 6, 9 };
+   because vector splat operation is usually more efficient
+   than piecewise initialization of the vector.  */
+
+static void
+optimize_vector_constructor (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  tree type = TREE_TYPE (rhs);
+  unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
+  bool all_same = true;
+  constructor_elt *elt;
+  tree *cst;
+  gimple g;
+  tree base = NULL_TREE;
+
+  if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
+    return;
+  FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
+    if (TREE_CODE (elt->value) != SSA_NAME
+       || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
+      return;
+    else
+      {
+       tree this_base = elt->value;
+       if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
+         all_same = false;
+       for (j = 0; j < nelts + 1; j++)
+         {
+           g = SSA_NAME_DEF_STMT (this_base);
+           if (is_gimple_assign (g)
+               && gimple_assign_rhs_code (g) == PLUS_EXPR
+               && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
+               && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
+               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
+             this_base = gimple_assign_rhs1 (g);
+           else
+             break;
+         }
+       if (i == 0)
+         base = this_base;
+       else if (this_base != base)
+         return;
+      }
+  if (all_same)
+    return;
+  cst = XALLOCAVEC (tree, nelts);
+  for (i = 0; i < nelts; i++)
+    {
+      tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
+      cst[i] = build_zero_cst (TREE_TYPE (base));
+      while (this_base != base)
+       {
+         g = SSA_NAME_DEF_STMT (this_base);
+         cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
+                               cst[i], gimple_assign_rhs2 (g));
+         if (cst[i] == NULL_TREE
+             || TREE_CODE (cst[i]) != INTEGER_CST
+             || TREE_OVERFLOW (cst[i]))
+           return;
+         this_base = gimple_assign_rhs1 (g);
+       }
+    }
+  for (i = 0; i < nelts; i++)
+    CONSTRUCTOR_ELT (rhs, i)->value = base;
+  g = gimple_build_assign (make_ssa_name (type, NULL), rhs);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, gimple_assign_lhs (g),
+                                   build_vector (type, cst));
+  gsi_replace (gsi, g, false);
+}
 
 /* Return a type for the widest vector mode whose components are of type
    TYPE, or NULL_TREE if none is found.  */
@@ -1278,6 +1356,17 @@ expand_vector_operations_1 (gimple_stmt_
       expand_vector_condition (gsi);
       return;
     }
+
+  if (code == CONSTRUCTOR
+      && TREE_CODE (lhs) == SSA_NAME
+      && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
+      && !gimple_clobber_p (stmt)
+      && optimize)
+    {
+      optimize_vector_constructor (gsi);
+      return;
+    }
+
   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
     return;
 
--- gcc/testsuite/gcc.dg/vect/vect-124.c.jj     2013-11-20 09:27:35.760798649 
+0100
+++ gcc/testsuite/gcc.dg/vect/vect-124.c        2013-11-20 09:29:32.219199794 
+0100
@@ -0,0 +1,28 @@
+#include "tree-vect.h"
+
+#ifndef N
+#define N 64
+#endif
+
+int a[N];
+
+__attribute__((noinline, noclone)) void
+foo (int x)
+{
+  int i;
+  for (i = 0; i < N; i++, x += 3)
+    a[i] = x;
+}
+
+int
+main ()
+{
+  int i;
+  
+  check_vect ();
+  foo (6);
+  for (i = 0; i < N; i++)
+    if (a[i] != i * 3 + 6)
+      abort ();
+  return 0;
+}


        Jakub

Reply via email to