Hi,

Like r14-3317 which moves the handlings on memory access
type VMAT_GATHER_SCATTER in vectorizable_load final loop
nest, this one is to deal with vectorizable_store side.

Bootstrapped and regtested on x86_64-redhat-linux,
aarch64-linux-gnu and powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-----

gcc/ChangeLog:

        * tree-vect-stmts.cc (vectorizable_store): Move the handlings on
        VMAT_GATHER_SCATTER in the final loop nest to its own loop,
        and update the final nest accordingly.
---
 gcc/tree-vect-stmts.cc | 258 +++++++++++++++++++++++++----------------
 1 file changed, 159 insertions(+), 99 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 18f5ebcc09c..b959c1861ad 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8930,44 +8930,23 @@ vectorizable_store (vec_info *vinfo,
       return true;
     }

-  auto_vec<tree> result_chain (group_size);
-  auto_vec<tree> vec_offsets;
-  auto_vec<tree, 1> vec_oprnds;
-  for (j = 0; j < ncopies; j++)
+  if (memory_access_type == VMAT_GATHER_SCATTER)
     {
-      gimple *new_stmt;
-      if (j == 0)
+      gcc_assert (!slp && !grouped_store);
+      auto_vec<tree> vec_offsets;
+      for (j = 0; j < ncopies; j++)
        {
-         if (slp)
-           {
-             /* Get vectorized arguments for SLP_NODE.  */
-             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
-                                &vec_oprnds);
-             vec_oprnd = vec_oprnds[0];
-           }
-         else
+         gimple *new_stmt;
+         if (j == 0)
            {
-             /* For interleaved stores we collect vectorized defs for all the
-                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
-                input to vect_permute_store_chain().
-
-                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
-                is of size 1.  */
-             stmt_vec_info next_stmt_info = first_stmt_info;
-             for (i = 0; i < group_size; i++)
-               {
-                 /* Since gaps are not supported for interleaved stores,
-                    DR_GROUP_SIZE is the exact number of stmts in the chain.
-                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
-                    that there is no interleaving, DR_GROUP_SIZE is 1,
-                    and only one iteration of the loop will be executed.  */
-                 op = vect_get_store_rhs (next_stmt_info);
-                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
-                                                op, gvec_oprnds[i]);
-                 vec_oprnd = (*gvec_oprnds[i])[0];
-                 dr_chain.quick_push (vec_oprnd);
-                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
-               }
+             /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
+                DR_CHAIN is of size 1.  */
+             gcc_assert (group_size == 1);
+             op = vect_get_store_rhs (first_stmt_info);
+             vect_get_vec_defs_for_operand (vinfo, first_stmt_info, ncopies,
+                                            op, gvec_oprnds[0]);
+             vec_oprnd = (*gvec_oprnds[0])[0];
+             dr_chain.quick_push (vec_oprnd);
              if (mask)
                {
                  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
@@ -8975,91 +8954,55 @@ vectorizable_store (vec_info *vinfo,
                                                 mask_vectype);
                  vec_mask = vec_masks[0];
                }
-           }

-         /* We should have catched mismatched types earlier.  */
-         gcc_assert (useless_type_conversion_p (vectype,
-                                                TREE_TYPE (vec_oprnd)));
-         bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
-         if (simd_lane_access_p
-             && !loop_masks
-             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
-             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
-             && integer_zerop (DR_INIT (first_dr_info->dr))
-             && alias_sets_conflict_p (get_alias_set (aggr_type),
-                                       get_alias_set (TREE_TYPE (ref_type))))
-           {
-             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
-             dataref_offset = build_int_cst (ref_type, 0);
+             /* We should have catched mismatched types earlier.  */
+             gcc_assert (useless_type_conversion_p (vectype,
+                                                    TREE_TYPE (vec_oprnd)));
+             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+                                            slp_node, &gs_info, &dataref_ptr,
+                                            &vec_offsets);
+             else
+               dataref_ptr
+                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                             NULL, offset, &dummy, gsi,
+                                             &ptr_incr, false, bump);
            }
-         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, slp_node,
-                                        &gs_info, &dataref_ptr, &vec_offsets);
          else
-           dataref_ptr
-             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                         simd_lane_access_p ? loop : NULL,
-                                         offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, bump);
-       }
-      else
-       {
-         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
-            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
-            of size 1.  */
-         for (i = 0; i < group_size; i++)
            {
-             vec_oprnd = (*gvec_oprnds[i])[j];
-             dr_chain[i] = vec_oprnd;
+             gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+             vec_oprnd = (*gvec_oprnds[0])[j];
+             dr_chain[0] = vec_oprnd;
+             if (mask)
+               vec_mask = vec_masks[j];
+             if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                              gsi, stmt_info, bump);
            }
-         if (mask)
-           vec_mask = vec_masks[j];
-         if (dataref_offset)
-           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
-         else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                          stmt_info, bump);
-       }
-
-      new_stmt = NULL;
-      if (grouped_store)
-       /* Permute.  */
-       vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
-                                 &result_chain);

-      stmt_vec_info next_stmt_info = first_stmt_info;
-      for (i = 0; i < vec_num; i++)
-       {
-         unsigned misalign;
+         new_stmt = NULL;
          unsigned HOST_WIDE_INT align;
-
          tree final_mask = NULL_TREE;
          tree final_len = NULL_TREE;
          tree bias = NULL_TREE;
          if (loop_masks)
            final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            vec_num * ncopies, vectype,
-                                            vec_num * j + i);
+                                            ncopies, vectype, j);
          if (vec_mask)
            final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
                                           vec_mask, gsi);

-         if (memory_access_type == VMAT_GATHER_SCATTER
-             && gs_info.ifn != IFN_LAST)
+         if (gs_info.ifn != IFN_LAST)
            {
              if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-               vec_offset = vec_offsets[vec_num * j + i];
+               vec_offset = vec_offsets[j];
              tree scale = size_int (gs_info.scale);

              if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num * ncopies, vectype,
-                                                  vec_num * j + i, 1);
+                                                  ncopies, vectype, j, 1);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -9091,9 +9034,8 @@ vectorizable_store (vec_info *vinfo,
              gimple_call_set_nothrow (call, true);
              vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
              new_stmt = call;
-             break;
            }
-         else if (memory_access_type == VMAT_GATHER_SCATTER)
+         else
            {
              /* Emulated scatter.  */
              gcc_assert (!final_mask);
@@ -9142,8 +9084,126 @@ vectorizable_store (vec_info *vinfo,
                  new_stmt = gimple_build_assign (ref, elt);
                  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                }
-             break;
            }
+         if (j == 0)
+           *vec_stmt = new_stmt;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+       }
+      return true;
+    }
+
+  auto_vec<tree> result_chain (group_size);
+  auto_vec<tree, 1> vec_oprnds;
+  for (j = 0; j < ncopies; j++)
+    {
+      gimple *new_stmt;
+      if (j == 0)
+       {
+         if (slp)
+           {
+             /* Get vectorized arguments for SLP_NODE.  */
+             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
+                                &vec_oprnds);
+             vec_oprnd = vec_oprnds[0];
+           }
+         else
+           {
+             /* For interleaved stores we collect vectorized defs for all the
+                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
+                input to vect_permute_store_chain().
+
+                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
+                is of size 1.  */
+             stmt_vec_info next_stmt_info = first_stmt_info;
+             for (i = 0; i < group_size; i++)
+               {
+                 /* Since gaps are not supported for interleaved stores,
+                    DR_GROUP_SIZE is the exact number of stmts in the chain.
+                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
+                    that there is no interleaving, DR_GROUP_SIZE is 1,
+                    and only one iteration of the loop will be executed.  */
+                 op = vect_get_store_rhs (next_stmt_info);
+                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
+                                                op, gvec_oprnds[i]);
+                 vec_oprnd = (*gvec_oprnds[i])[0];
+                 dr_chain.quick_push (vec_oprnd);
+                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
+               }
+             if (mask)
+               {
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                                mask, &vec_masks,
+                                                mask_vectype);
+                 vec_mask = vec_masks[0];
+               }
+           }
+
+         /* We should have catched mismatched types earlier.  */
+         gcc_assert (useless_type_conversion_p (vectype,
+                                                TREE_TYPE (vec_oprnd)));
+         bool simd_lane_access_p
+           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
+         if (simd_lane_access_p
+             && !loop_masks
+             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
+             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
+             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
+             && integer_zerop (DR_INIT (first_dr_info->dr))
+             && alias_sets_conflict_p (get_alias_set (aggr_type),
+                                       get_alias_set (TREE_TYPE (ref_type))))
+           {
+             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+             dataref_offset = build_int_cst (ref_type, 0);
+           }
+         else
+           dataref_ptr
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                         simd_lane_access_p ? loop : NULL,
+                                         offset, &dummy, gsi, &ptr_incr,
+                                         simd_lane_access_p, bump);
+       }
+      else
+       {
+         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
+            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
+            of size 1.  */
+         for (i = 0; i < group_size; i++)
+           {
+             vec_oprnd = (*gvec_oprnds[i])[j];
+             dr_chain[i] = vec_oprnd;
+           }
+         if (mask)
+           vec_mask = vec_masks[j];
+         if (dataref_offset)
+           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+         else
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                          stmt_info, bump);
+       }
+
+      new_stmt = NULL;
+      if (grouped_store)
+       /* Permute.  */
+       vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
+                                 &result_chain);
+
+      stmt_vec_info next_stmt_info = first_stmt_info;
+      for (i = 0; i < vec_num; i++)
+       {
+         unsigned misalign;
+         unsigned HOST_WIDE_INT align;
+
+         tree final_mask = NULL_TREE;
+         tree final_len = NULL_TREE;
+         tree bias = NULL_TREE;
+         if (loop_masks)
+           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+                                            vec_num * ncopies, vectype,
+                                            vec_num * j + i);
+         if (vec_mask)
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+                                          vec_mask, gsi);

          if (i > 0)
            /* Bump the vector pointer.  */
--
2.31.1

Reply via email to