On Fri, 20 Oct 2023, Richard Biener wrote:

> I went a little bit too simple with implementing SLP gather support
> for emulated and builtin based gathers.  The following fixes the
> conflict that appears when running into .MASK_LOAD where we rely
> on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
> checking wrecks that.  The following properly integrates this with
> vect_get_operand_map, adding another special index refering to
> the vect_check_gather_scatter analyzed offset.
> 
> This unbreaks aarch64 (and hopefully riscv), I'll followup with
> more fixes and testsuite coverage for x86 where I think I got
> masked gather SLP support wrong.
> 
> Boostrap and regtest running on x86_64-unknown-linux-gnu.

The following is what I have installed.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

>From a3feee109136696c5593d9bc9efd0d145f89a81d Mon Sep 17 00:00:00 2001
From: Richard Biener <rguent...@suse.de>
Date: Fri, 20 Oct 2023 09:30:45 +0200
Subject: [PATCH] Fixup vect_get_and_check_slp_defs for gathers and .MASK_LOAD
To: gcc-patches@gcc.gnu.org

I went a little bit too simple with implementing SLP gather support
for emulated and builtin based gathers.  The following fixes the
conflict that appears when running into .MASK_LOAD where we rely
on vect_get_operand_map and the bolted-on STMT_VINFO_GATHER_SCATTER_P
checking wrecks that.  The following properly integrates this with
vect_get_operand_map, adding another special index refering to
the vect_check_gather_scatter analyzed offset.

This unbreaks aarch64 (and hopefully riscv), I'll followup with
more fixes and testsuite coverage for x86 where I think I got
masked gather SLP support wrong.

        * tree-vect-slp.cc (off_map, off_op0_map, off_arg2_map,
        off_arg3_arg2_map): New.
        (vect_get_operand_map): Get flag whether the stmt was
        recognized as gather or scatter and use the above
        accordingly.
        (vect_get_and_check_slp_defs): Adjust.
        (vect_build_slp_tree_2): Likewise.
---
 gcc/tree-vect-slp.cc | 74 +++++++++++++++++++++++++++++++-------------
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8efff2e912d..24bf6582f8d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -287,6 +287,7 @@ typedef struct _slp_oprnd_info
   tree first_op_type;
   enum vect_def_type first_dt;
   bool any_pattern;
+  bool first_gs_p;
   gather_scatter_info first_gs_info;
 } *slp_oprnd_info;
 
@@ -309,6 +310,7 @@ vect_create_oprnd_info (int nops, int group_size)
       oprnd_info->first_dt = vect_uninitialized_def;
       oprnd_info->first_op_type = NULL_TREE;
       oprnd_info->any_pattern = false;
+      oprnd_info->first_gs_p = false;
       oprnds_info.quick_push (oprnd_info);
     }
 
@@ -508,6 +510,10 @@ static const int arg2_map[] = { 1, 2 };
 static const int arg1_arg4_map[] = { 2, 1, 4 };
 static const int arg3_arg2_map[] = { 2, 3, 2 };
 static const int op1_op0_map[] = { 2, 1, 0 };
+static const int off_map[] = { 1, -3 };
+static const int off_op0_map[] = { 2, -3, 0 };
+static const int off_arg2_map[] = { 2, -3, 2 };
+static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
 static const int mask_call_maps[6][7] = {
   { 1, 1, },
   { 2, 1, 2, },
@@ -525,11 +531,14 @@ static const int mask_call_maps[6][7] = {
    - for each child node, the index of the argument associated with that node.
      The special index -1 is the first operand of an embedded comparison and
      the special index -2 is the second operand of an embedded comparison.
+     The special indes -3 is the offset of a gather as analyzed by
+     vect_check_gather_scatter.
 
    SWAP is as for vect_get_and_check_slp_defs.  */
 
 static const int *
-vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
+vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
+                     unsigned char swap = 0)
 {
   if (auto assign = dyn_cast<const gassign *> (stmt))
     {
@@ -539,6 +548,8 @@ vect_get_operand_map (const gimple *stmt, unsigned char 
swap = 0)
       if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
          && swap)
        return op1_op0_map;
+      if (gather_scatter_p)
+       return gimple_vdef (stmt) ? off_op0_map : off_map;
     }
   gcc_assert (!swap);
   if (auto call = dyn_cast<const gcall *> (stmt))
@@ -547,7 +558,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char 
swap = 0)
        switch (gimple_call_internal_fn (call))
          {
          case IFN_MASK_LOAD:
-           return arg2_map;
+           return gather_scatter_p ? off_arg2_map : arg2_map;
 
          case IFN_GATHER_LOAD:
            return arg1_map;
@@ -556,7 +567,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char 
swap = 0)
            return arg1_arg4_map;
 
          case IFN_MASK_STORE:
-           return arg3_arg2_map;
+           return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
 
          case IFN_MASK_CALL:
            {
@@ -611,6 +622,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
   enum vect_def_type dt = vect_uninitialized_def;
   slp_oprnd_info oprnd_info;
   gather_scatter_info gs_info;
+  unsigned int gs_op = -1u;
   unsigned int commutative_op = -1U;
   bool first = stmt_num == 0;
 
@@ -620,7 +632,9 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
     return -1;
 
   number_of_oprnds = gimple_num_args (stmt_info->stmt);
-  const int *map = vect_get_operand_map (stmt_info->stmt, swap);
+  const int *map
+    = vect_get_operand_map (stmt_info->stmt,
+                           STMT_VINFO_GATHER_SCATTER_P (stmt_info), swap);
   if (map)
     number_of_oprnds = *map++;
   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
@@ -642,8 +656,30 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
   enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
   for (i = 0; i < number_of_oprnds; i++)
     {
+      oprnd_info = (*oprnds_info)[i];
       int opno = map ? map[i] : int (i);
-      if (opno < 0)
+      if (opno == -3)
+       {
+         gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+         if (!is_a <loop_vec_info> (vinfo)
+             || !vect_check_gather_scatter (stmt_info,
+                                            as_a <loop_vec_info> (vinfo),
+                                            first ? &oprnd_info->first_gs_info
+                                            : &gs_info))
+           return -1;
+
+         if (first)
+           {
+             oprnd_info->first_gs_p = true;
+             oprnd = oprnd_info->first_gs_info.offset;
+           }
+         else
+           {
+             gs_op = i;
+             oprnd = gs_info.offset;
+           }
+       }
+      else if (opno < 0)
        oprnd = TREE_OPERAND (gimple_arg (stmt_info->stmt, 0), -1 - opno);
       else
        {
@@ -660,21 +696,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
        oprnd = TREE_OPERAND (oprnd, 0);
 
-      oprnd_info = (*oprnds_info)[i];
-
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-       {
-         gcc_assert (number_of_oprnds == 1);
-         if (!is_a <loop_vec_info> (vinfo)
-             || !vect_check_gather_scatter (stmt_info,
-                                            as_a <loop_vec_info> (vinfo),
-                                            first ? &oprnd_info->first_gs_info
-                                            : &gs_info))
-           return -1;
-
-         oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
-       }
-
       stmt_vec_info def_stmt_info;
       if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
        {
@@ -807,7 +828,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
          return 1;
        }
 
-      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      if ((gs_op == i) != oprnd_info->first_gs_p)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "Build SLP failed: mixed gather and non-gather\n");
+         return 1;
+       }
+      else if (gs_op == i)
        {
          if (!operand_equal_p (oprnd_info->first_gs_info.base,
                                gs_info.base))
@@ -1817,7 +1845,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
     return NULL;
 
   nops = gimple_num_args (stmt_info->stmt);
-  if (const int *map = vect_get_operand_map (stmt_info->stmt))
+  if (const int *map = vect_get_operand_map (stmt_info->stmt,
+                                            STMT_VINFO_GATHER_SCATTER_P
+                                              (stmt_info)))
     nops = map[0];
 
   /* If the SLP node is a PHI (induction or reduction), terminate
-- 
2.35.3

Reply via email to