This patch fixes this following FAILs in RISC-V regression:

FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect 
"Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP 
stmts"
FAIL: gcc.dg/vect/vect-gather-3.c -flto -ffat-lto-objects  scan-tree-dump vect 
"Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-3.c scan-tree-dump vect "Loop contains only SLP 
stmts"

The root cause of these FAIL is that GCC SLP failed on MASK_LEN_GATHER_LOAD.

We have 2 following situations of scalar recognized MASK_LEN_GATHER_LOAD:

1. conditional gather load: MASK_LEN_GATHER_LOAD (base, offset, scale, zero, 
condtional mask).
   
   This situation we just need to leverage the current MASK_GATHER_LOAD which 
can achieve SLP MASK_LEN_GATHER_LOAD.

2. un-conditional gather load: MASK_LEN_GATHER_LOAD (base, offset, scale, zero, 
-1)
   
   Current SLP check will failed on dummy mask -1, so we relax the check in 
tree-vect-slp.cc and allow it to be materialized.
    
Consider this following case:

void __attribute__((noipa))
f (int *restrict y, int *restrict x, int *restrict indices, int n)
{
  for (int i = 0; i < n; ++i)
    {
      y[i * 2] = x[indices[i * 2]] + 1;
      y[i * 2 + 1] = x[indices[i * 2 + 1]] + 2;
    }
}

https://godbolt.org/z/WG3M3n7Mo

GCC unable to SLP using VEC_LOAD_LANES/VEC_STORE_LANES:

f:
        ble     a3,zero,.L5
.L3:
        vsetvli a5,a3,e8,mf4,ta,ma
        vsetvli zero,a5,e32,m1,ta,ma
        vlseg2e32.v     v6,(a2)
        vsetvli a4,zero,e64,m2,ta,ma
        vsext.vf2       v2,v6
        vsll.vi v2,v2,2
        vsetvli zero,a5,e32,m1,ta,ma
        vluxei64.v      v1,(a1),v2
        vsetvli a4,zero,e64,m2,ta,ma
        vsext.vf2       v2,v7
        vsetvli zero,zero,e32,m1,ta,ma
        vadd.vi v4,v1,1
        vsetvli zero,zero,e64,m2,ta,ma
        vsll.vi v2,v2,2
        vsetvli zero,a5,e32,m1,ta,ma
        vluxei64.v      v2,(a1),v2
        vsetvli a4,zero,e32,m1,ta,ma
        slli    a6,a5,3
        vadd.vi v5,v2,2
        sub     a3,a3,a5
        vsetvli zero,a5,e32,m1,ta,ma
        vsseg2e32.v     v4,(a0)
        add     a2,a2,a6
        add     a0,a0,a6
        bne     a3,zero,.L3
.L5:
        ret

After this patch:

f:
        ble     a3,zero,.L5
        li      a5,1
        csrr    t1,vlenb
        slli    a5,a5,33
        srli    a7,t1,2
        addi    a5,a5,1
        slli    a3,a3,1
        neg     t3,a7
        vsetvli a4,zero,e64,m1,ta,ma
        vmv.v.x v4,a5
.L3:
        minu    a5,a3,a7
        vsetvli zero,a5,e32,m1,ta,ma
        vle32.v v1,0(a2)
        vsetvli a4,zero,e64,m2,ta,ma
        vsext.vf2       v2,v1
        vsll.vi v2,v2,2
        vsetvli zero,a5,e32,m1,ta,ma
        vluxei64.v      v2,(a1),v2
        vsetvli a4,zero,e32,m1,ta,ma
        mv      a6,a3
        vadd.vv v2,v2,v4
        vsetvli zero,a5,e32,m1,ta,ma
        vse32.v v2,0(a0)
        add     a2,a2,t1
        add     a0,a0,t1
        add     a3,a3,t3
        bgtu    a6,a7,.L3
.L5:
        ret

Note that I found we are missing conditional mask gather_load SLP test, Append 
a test for it in this patch.

Tested on RISC-V and Bootstrap && Regression on X86 passed.

Ok for trunk ?

gcc/ChangeLog:

        * tree-vect-slp.cc (vect_get_operand_map): Add MASK_LEN_GATHER_LOAD.
        (vect_get_and_check_slp_defs): Ditto.
        (vect_build_slp_tree_1): Ditto.
        (vect_build_slp_tree_2): Ditto.
        * tree-vect-stmts.cc (vectorizable_load): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-gather-6.c: New test.

---
 gcc/testsuite/gcc.dg/vect/vect-gather-6.c | 15 +++++++++++++++
 gcc/tree-vect-slp.cc                      | 22 ++++++++++++++++++----
 gcc/tree-vect-stmts.cc                    | 10 +++++++++-
 3 files changed, 42 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-6.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-6.c
new file mode 100644
index 00000000000..ff55f321854
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-6.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+void
+f (int *restrict y, int *restrict x, int *restrict indices, int *restrict 
cond, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (cond[i * 2])
+       y[i * 2] = x[indices[i * 2]] + 1;
+      if (cond[i * 2 + 1])
+       y[i * 2 + 1] = x[indices[i * 2 + 1]] + 2;
+    }
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target 
vect_gather_load_ifn } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index fa098f9ff4e..38fe6ba6296 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -542,6 +542,7 @@ vect_get_operand_map (const gimple *stmt, unsigned char 
swap = 0)
            return arg1_map;
 
          case IFN_MASK_GATHER_LOAD:
+         case IFN_MASK_LEN_GATHER_LOAD:
            return arg1_arg4_map;
 
          case IFN_MASK_STORE:
@@ -700,8 +701,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
        {
          tree type = TREE_TYPE (oprnd);
          dt = dts[i];
-         if ((dt == vect_constant_def
-              || dt == vect_external_def)
+         if (dt == vect_external_def
              && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
              && (TREE_CODE (type) == BOOLEAN_TYPE
                  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
@@ -713,6 +713,17 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
                                 "for variable-length SLP %T\n", oprnd);
              return -1;
            }
+         if (dt == vect_constant_def
+             && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
+             && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Build SLP failed: invalid type of def "
+                                "for variable-length SLP %T\n",
+                                oprnd);
+             return -1;
+           }
 
          /* For the swapping logic below force vect_reduction_def
             for the reduction op in a SLP reduction group.  */
@@ -1077,7 +1088,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
 
          if (cfn == CFN_MASK_LOAD
              || cfn == CFN_GATHER_LOAD
-             || cfn == CFN_MASK_GATHER_LOAD)
+             || cfn == CFN_MASK_GATHER_LOAD
+             || cfn == CFN_MASK_LEN_GATHER_LOAD)
            ldst_p = true;
          else if (cfn == CFN_MASK_STORE)
            {
@@ -1337,6 +1349,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
          if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
              && rhs_code != CFN_GATHER_LOAD
              && rhs_code != CFN_MASK_GATHER_LOAD
+             && rhs_code != CFN_MASK_LEN_GATHER_LOAD
              /* Not grouped loads are handled as externals for BB
                 vectorization.  For loop vectorization we can handle
                 splats the same we handle single element interleaving.  */
@@ -1837,7 +1850,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
        gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
                    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+                   || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD));
       else
        {
          *max_nunits = this_max_nunits;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ce925cc1d53..994234eec08 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9738,12 +9738,20 @@ vectorizable_load (vec_info *vinfo,
        return false;
 
       mask_index = internal_fn_mask_index (ifn);
+      slp_tree slp_op = NULL;
       if (mask_index >= 0 && slp_node)
        mask_index = vect_slp_child_index_for_operand (call, mask_index);
       if (mask_index >= 0
          && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
-                                     &mask, NULL, &mask_dt, &mask_vectype))
+                                     &mask, &slp_op, &mask_dt, &mask_vectype))
        return false;
+      if (slp_node && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "incompatible vector types for invariants\n");
+         return false;
+       }
     }
 
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-- 
2.36.3

Reply via email to