This improves SLP discovery in the face of existing vectors allowing
punning of the vector shape (or even punning from an integer type).
For punning from integer types this does not yet handle lane zero
extraction being represented as conversion rather than BIT_FIELD_REF.

On x86 this for example improves the added testcases from

foo:
.LFB0:
        .cfi_startproc
        movdqa  (%rsi), %xmm0
        movdqa  %xmm0, %xmm2
        movdqa  %xmm0, %xmm1
        punpckhdq       %xmm0, %xmm2
        movdqa  %xmm2, %xmm3
        pshufd  $85, %xmm0, %xmm2
        pshufd  $255, %xmm0, %xmm0
        punpckldq       %xmm0, %xmm2
        movdqa  %xmm1, %xmm0
        punpckldq       %xmm3, %xmm0
        punpcklqdq      %xmm2, %xmm0
        movaps  %xmm0, (%rdi)
        ret

and

bar:
.LFB1:
        .cfi_startproc
        movq    (%rsi), %rax
        movq    8(%rsi), %rdx
        sarq    $32, %rax
        movd    %edx, %xmm3
        movq    %rax, %xmm0
        movq    %rdx, %rax
        sarq    $32, %rax
        movdqa  %xmm0, %xmm1
        punpckldq       %xmm3, %xmm0
        movd    %eax, %xmm2
        punpckldq       %xmm2, %xmm1
        punpcklqdq      %xmm1, %xmm0
        movaps  %xmm0, (%rdi)
        ret

to just

foo:
.LFB0:
        .cfi_startproc
        pshufd  $216, (%esi), %xmm0
        movaps  %xmm0, (%edi)
        ret

and

bar:
.LFB1:
        .cfi_startproc
        pshufd  $217, (%esi), %xmm0
        movaps  %xmm0, (%edi)
        ret

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

2021-01-13  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/92645
        * tree-vect-slp.c (vect_build_slp_tree_1): Relax supported
        BIT_FIELD_REF argument.
        (vect_build_slp_tree_2): Record the desired vector type
        on the external vector def.
        (vectorizable_slp_permutation): Handle required punning
        of existing vector defs.

        * gcc.target/i386/pr92645-6.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr92645-6.c | 34 +++++++++++++++++++++++
 gcc/tree-vect-slp.c                       | 31 +++++++++++++++++++--
 2 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92645-6.c

diff --git a/gcc/testsuite/gcc.target/i386/pr92645-6.c 
b/gcc/testsuite/gcc.target/i386/pr92645-6.c
new file mode 100644
index 00000000000..c5c5f8f8df2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92645-6.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void foo (v4si *p, v2di *q)
+{
+  union { v2di a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[0];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+void bar (v4si *p, __int128_t *q)
+{
+  union { __int128_t a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[1];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+/* Both functions should end up with sth like
+     [v]pshufd $val, (%esi), %xmm0
+     [v]movdqa %xmm0, (%edi)
+     ret
+   recognized by SLP vectorization involving an existing "vector".  */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 2 } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 8670d5455b9..c2a3d46c6e7 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1109,7 +1109,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
              tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
              if (!is_a <bb_vec_info> (vinfo)
                  || TREE_CODE (vec) != SSA_NAME
-                 || !types_compatible_p (vectype, TREE_TYPE (vec)))
+                 || !operand_equal_p (TYPE_SIZE (vectype),
+                                      TYPE_SIZE (TREE_TYPE (vec))))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1721,7 +1722,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          lperm.safe_push (std::make_pair (0, (unsigned)lane));
        }
       slp_tree vnode = vect_create_new_slp_node (vNULL);
-      SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);
+      /* ???  We record vectype here but we hide eventually necessary
+        punning and instead rely on code generation to materialize
+        VIEW_CONVERT_EXPRs as necessary.  We instead should make
+        this explicit somehow.  */
+      SLP_TREE_VECTYPE (vnode) = vectype;
       SLP_TREE_VEC_DEFS (vnode).safe_push (vec);
       /* We are always building a permutation node even if it is an identity
         permute to shield the rest of the vectorizer from the odd node
@@ -6114,6 +6119,18 @@ vectorizable_slp_permutation (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
              slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first];
              tree first_def
                = vect_get_slp_vect_def (first_node, first_vec.second);
+             /* ???  We SLP match existing vector element extracts but
+                allow punning which we need to re-instantiate at uses
+                but have no good way of explicitely representing.  */
+             if (!types_compatible_p (TREE_TYPE (first_def), vectype))
+               {
+                 gassign *conv_stmt;
+                 conv_stmt = gimple_build_assign (make_ssa_name (vectype),
+                                                  build1 (VIEW_CONVERT_EXPR,
+                                                          vectype, first_def));
+                 vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
+                 first_def = gimple_assign_lhs (conv_stmt);
+               }
              gassign *perm_stmt;
              tree perm_dest = make_ssa_name (vectype);
              if (!identity_p)
@@ -6122,6 +6139,16 @@ vectorizable_slp_permutation (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
                    = SLP_TREE_CHILDREN (node)[second_vec.first];
                  tree second_def
                    = vect_get_slp_vect_def (second_node, second_vec.second);
+                 if (!types_compatible_p (TREE_TYPE (second_def), vectype))
+                   {
+                     gassign *conv_stmt;
+                     conv_stmt = gimple_build_assign (make_ssa_name (vectype),
+                                                      build1
+                                                        (VIEW_CONVERT_EXPR,
+                                                         vectype, second_def));
+                     vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
+                     second_def = gimple_assign_lhs (conv_stmt);
+                   }
                  tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
                  perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
                                                   first_def, second_def,
-- 
2.26.2

Reply via email to