On Wed, 2 Oct 2019, Jakub Jelinek wrote:

> On Wed, Nov 09, 2016 at 09:14:55AM +0100, Richard Biener wrote:
> > The following implements vectorization of bswap via VEC_PERM_EXPR
> > on the corresponding QImode vector.
> > 
> > ARM already has backend handling via the builtin_vectorized_call
> > hook and thus there were already testcases available.  It doesn't
> > end up working for vect-bswap16.c because we have a promoted
> > argument to __builtin_bswap16 which confuses vectorization.
> 
> Indeed.  The following patch handles that in tree-vect-patterns.c.
> If it sees a __builtin_bswap16 with the promoted argument, it checks if we'd
> vectorize the builtin if it didn't have a promoted argument and if yes,
> it just changes it in a pattern_stmt to use an unpromoted argument or casts
> it first to the right type.  This works e.g. for the SSE4 case.
> Otherwise, it handles __builtin_bswap16 like a x r<< 8, if that is
> vectorizable, emits a pattern_stmt with x r<< 8, if it isn't, falls back to
> (x << 8) | (x >> 8) if that can be vectorized.  The last case matters for 
> SSE2.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-10-02  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/91940
>       * tree-vect-patterns.c: Include tree-vector-builder.h and
>       vec-perm-indices.h.
>       (vect_recog_rotate_pattern): Also handle __builtin_bswap16, either by
>       unpromoting the argument back to uint16_t, or by converting into a
>       rotate, or into shifts plus ior.
> 
>       * gcc.dg/vect/vect-bswap16.c: Add -msse4 on x86, run on all targets,
>       expect vectorized 1 loops message on both vect_bswap and sse4_runtime
>       targets.
>       * gcc.dg/vect/vect-bswap16a.c: New test.
> 
> --- gcc/tree-vect-patterns.c.jj       2019-09-20 12:25:48.186387075 +0200
> +++ gcc/tree-vect-patterns.c  2019-10-01 11:29:18.229215895 +0200
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.
>  #include "cgraph.h"
>  #include "omp-simd-clone.h"
>  #include "predict.h"
> +#include "tree-vector-builder.h"
> +#include "vec-perm-indices.h"
>  
>  /* Return true if we have a useful VR_RANGE range for VAR, storing it
>     in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
> @@ -2168,24 +2170,107 @@ vect_recog_rotate_pattern (stmt_vec_info
>    enum vect_def_type dt;
>    optab optab1, optab2;
>    edge ext_def = NULL;
> +  bool bswap16_p = false;
>  
> -  if (!is_gimple_assign (last_stmt))
> -    return NULL;
> -
> -  rhs_code = gimple_assign_rhs_code (last_stmt);
> -  switch (rhs_code)
> +  if (is_gimple_assign (last_stmt))
>      {
> -    case LROTATE_EXPR:
> -    case RROTATE_EXPR:
> -      break;
> -    default:
> -      return NULL;
> +      rhs_code = gimple_assign_rhs_code (last_stmt);
> +      switch (rhs_code)
> +     {
> +     case LROTATE_EXPR:
> +     case RROTATE_EXPR:
> +       break;
> +     default:
> +       return NULL;
> +     }
> +
> +      lhs = gimple_assign_lhs (last_stmt);
> +      oprnd0 = gimple_assign_rhs1 (last_stmt);
> +      type = TREE_TYPE (oprnd0);
> +      oprnd1 = gimple_assign_rhs2 (last_stmt);
> +    }
> +  else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
> +    {
> +      /* __builtin_bswap16 (x) is another form of x r>> 8.
> +      The vectorizer has bswap support, but only if the argument isn't
> +      promoted.  */
> +      lhs = gimple_call_lhs (last_stmt);
> +      oprnd0 = gimple_call_arg (last_stmt, 0);
> +      type = TREE_TYPE (oprnd0);
> +      if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
> +       || TYPE_PRECISION (type) <= 16
> +       || TREE_CODE (oprnd0) != SSA_NAME
> +       || BITS_PER_UNIT != 8
> +       || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
> +     return NULL;
> +
> +      stmt_vec_info def_stmt_info;
> +      if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, 
> &def_stmt))
> +     return NULL;
> +
> +      if (dt != vect_internal_def)
> +     return NULL;
> +
> +      if (gimple_assign_cast_p (def_stmt))
> +     {
> +       def = gimple_assign_rhs1 (def_stmt);
> +       if (INTEGRAL_TYPE_P (TREE_TYPE (def))
> +           && TYPE_PRECISION (TREE_TYPE (def)) == 16)
> +         oprnd0 = def;
> +     }
> +
> +      type = TREE_TYPE (lhs);
> +      vectype = get_vectype_for_scalar_type (type);
> +      if (vectype == NULL_TREE)
> +     return NULL;
> +
> +      if (tree char_vectype = get_same_sized_vectype (char_type_node, 
> vectype))
> +     {
> +       /* The encoding uses one stepped pattern for each byte in the
> +          16-bit word.  */
> +       vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
> +       for (unsigned i = 0; i < 3; ++i)
> +         for (unsigned j = 0; j < 2; ++j)
> +           elts.quick_push ((i + 1) * 2 - j - 1);
> +
> +       vec_perm_indices indices (elts, 1,
> +                                 TYPE_VECTOR_SUBPARTS (char_vectype));
> +       if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
> +         {
> +           /* vectorizable_bswap can handle the __builtin_bswap16 if we
> +              undo the argument promotion.  */
> +           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
> +             {
> +               def = vect_recog_temp_ssa_var (type, NULL);
> +               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
> +               append_pattern_def_seq (stmt_vinfo, def_stmt);
> +               oprnd0 = def;
> +             }
> +
> +           /* Pattern detected.  */
> +           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
> +
> +           *type_out = vectype;
> +
> +           /* Pattern supported.  Create a stmt to be used to replace the
> +              pattern, with the unpromoted argument.  */
> +           var = vect_recog_temp_ssa_var (type, NULL);
> +           pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
> +                                             1, oprnd0);
> +           gimple_call_set_lhs (pattern_stmt, var);
> +           gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
> +                                   gimple_call_fntype (last_stmt));
> +           return pattern_stmt;
> +         }
> +     }
> +
> +      oprnd1 = build_int_cst (integer_type_node, 8);
> +      rhs_code = LROTATE_EXPR;
> +      bswap16_p = true;
>      }
> +  else
> +    return NULL;
>  
> -  lhs = gimple_assign_lhs (last_stmt);
> -  oprnd0 = gimple_assign_rhs1 (last_stmt);
> -  type = TREE_TYPE (oprnd0);
> -  oprnd1 = gimple_assign_rhs2 (last_stmt);
>    if (TREE_CODE (oprnd0) != SSA_NAME
>        || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
>        || !INTEGRAL_TYPE_P (type)
> @@ -2210,14 +2295,39 @@ vect_recog_rotate_pattern (stmt_vec_info
>    optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
>    if (optab1
>        && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
> -    return NULL;
> +    {
> +     use_rotate:
> +      if (bswap16_p)
> +     {
> +       if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
> +         {
> +           def = vect_recog_temp_ssa_var (type, NULL);
> +           def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
> +           append_pattern_def_seq (stmt_vinfo, def_stmt);
> +           oprnd0 = def;
> +         }
> +
> +       /* Pattern detected.  */
> +       vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
> +
> +       *type_out = vectype;
> +
> +       /* Pattern supported.  Create a stmt to be used to replace the
> +          pattern.  */
> +       var = vect_recog_temp_ssa_var (type, NULL);
> +       pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
> +                                           oprnd1);
> +       return pattern_stmt;
> +     }
> +      return NULL;
> +    }
>  
>    if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
>      {
>        optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
>        if (optab2
>         && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
> -     return NULL;
> +     goto use_rotate;
>      }
>  
>    /* If vector/vector or vector/scalar shifts aren't supported by the target,
> @@ -2242,6 +2352,14 @@ vect_recog_rotate_pattern (stmt_vec_info
>  
>    *type_out = vectype;
>  
> +  if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
> +    {
> +      def = vect_recog_temp_ssa_var (type, NULL);
> +      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
> +      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      oprnd0 = def;
> +    }
> +
>    if (dt == vect_external_def
>        && TREE_CODE (oprnd1) == SSA_NAME)
>      ext_def = vect_get_external_def_edge (vinfo, oprnd1);
> --- gcc/testsuite/gcc.dg/vect/vect-bswap16.c.jj       2017-11-09 
> 20:24:05.957112733 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-bswap16.c  2019-10-01 12:39:56.993268063 
> +0200
> @@ -1,4 +1,4 @@
> -/* { dg-require-effective-target vect_bswap } */
> +/* { dg-additional-options "-msse4" { target sse4_runtime } } */
>  
>  #include "tree-vect.h"
>  
> @@ -39,4 +39,4 @@ main (void)
>    return 0;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
> { vect_bswap || sse4_runtime } } } } */
> --- gcc/testsuite/gcc.dg/vect/vect-bswap16a.c.jj      2019-10-01 
> 12:35:29.965294527 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-bswap16a.c 2019-10-01 12:40:45.700533603 
> +0200
> @@ -0,0 +1,5 @@
> +/* { dg-additional-options "-msse2 -mno-sse3" { target sse2_runtime } } */
> +
> +#include "vect-bswap16.c"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
> { vect_shift } } } } */
> 
> 
>       Jakub
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 247165 (AG München)

Reply via email to