The following adds optab checks to see whether the target prefers
vector from vector extracts or integer from punned integer vector
extracts.  But instead of falling back to elementwise operation
we fall back to the vector from vector extract path as spilling
the vector to extract from and then doing the extract via loads
which then feed (and fewer) stores should be always profitable
(N element extracts plus N element stores vs. at most one spill
plus N/2 loads from spill slot plus N/2 stores).
Usually archs do not show store-to-load forwarding issues for
this case.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

Richard.

2017-08-01  Richard Biener  <rguent...@suse.de>

        * tree-vect-stmts.c (vectorizable_store): Perform vector extracts
        via vectors if supported, integer extracts via punning if supported
        or otherwise vector extracts.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 250773)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -6002,6 +6002,7 @@ vectorizable_store (gimple *stmt, gimple
       unsigned nstores = nunits;
       unsigned lnel = 1;
       tree ltype = elem_type;
+      tree lvectype = vectype;
       if (slp)
        {
          if (group_size < nunits
@@ -6010,6 +6011,45 @@ vectorizable_store (gimple *stmt, gimple
              nstores = nunits / group_size;
              lnel = group_size;
              ltype = build_vector_type (elem_type, group_size);
+             lvectype = vectype;
+
+             /* First check if vec_extract optab doesn't support extraction
+                of vector elts directly.  */
+             machine_mode elmode = TYPE_MODE (elem_type);
+             machine_mode vmode = mode_for_vector (elmode, group_size);
+             if (! VECTOR_MODE_P (vmode)
+                 || (convert_optab_handler (vec_extract_optab,
+                                            TYPE_MODE (vectype), vmode)
+                     == CODE_FOR_nothing))
+               {
+                 /* Try to avoid emitting an extract of vector elements
+                    by performing the extracts using an integer type of the
+                    same size, extracting from a vector of those and then
+                    re-interpreting it as the original vector type if
+                    supported.  */
+                 unsigned lsize
+                   = group_size * GET_MODE_BITSIZE (elmode);
+                 elmode = mode_for_size (lsize, MODE_INT, 0);
+                 vmode = mode_for_vector (elmode, nunits / group_size);
+                 /* If we can't construct such a vector fall back to
+                    element extracts from the original vector type and
+                    element size stores.  */
+                 if (VECTOR_MODE_P (vmode)
+                     && (convert_optab_handler (vec_extract_optab,
+                                                vmode, elmode)
+                         != CODE_FOR_nothing))
+                   {
+                     nstores = nunits / group_size;
+                     lnel = group_size;
+                     ltype = build_nonstandard_integer_type (lsize, 1);
+                     lvectype = build_vector_type (ltype, nstores);
+                   }
+                 /* Else fall back to vector extraction anyway.
+                    Fewer stores are more important than avoiding spilling
+                    of the vector we extract from.  Compared to the
+                    construction case in vectorizable_load no store-forwarding
+                    issue exists here for reasonable archs.  */
+               }
            }
          else if (group_size >= nunits
                   && group_size % nunits == 0)
@@ -6017,6 +6057,7 @@ vectorizable_store (gimple *stmt, gimple
              nstores = 1;
              lnel = nunits;
              ltype = vectype;
+             lvectype = vectype;
            }
          ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
@@ -6087,7 +6128,15 @@ vectorizable_store (gimple *stmt, gimple
                      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, 
vec_oprnd);
                    }
                }
-
+             /* Pun the vector to extract from if necessary.  */
+             if (lvectype != vectype)
+               {
+                 tree tem = make_ssa_name (lvectype);
+                 gimple *pun= gimple_build_assign (tem, build1
+                               (VIEW_CONVERT_EXPR, lvectype, vec_oprnd));
+                 vect_finish_stmt_generation (stmt, pun, gsi);
+                 vec_oprnd = tem;
+               }
              for (i = 0; i < nstores; i++)
                {
                  tree newref, newoff;

Reply via email to