On Mon, 2020-08-31 at 04:06 -0500, Xiong Hu Luo via Gcc-patches wrote:
> vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
> to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
> __builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
> not expand too early in gimple stage if arg2 is variable, to avoid generate
> store hit load instructions.
> 
> For Power9 V4SI:
>       addi 9,1,-16
>       rldic 6,6,2,60
>       stxv 34,-16(1)
>       stwx 5,9,6
>       lxv 34,-16(1)
> =>
>       addis 9,2,.LC0@toc@ha
>       addi 9,9,.LC0@toc@l
>       mtvsrwz 33,5
>       lxv 32,0(9)
>       sradi 9,6,2
>       addze 9,9
>       sldi 9,9,2
>       subf 9,9,6
>       subfic 9,9,3
>       sldi 9,9,2
>       subfic 9,9,20
>       lvsl 13,0,9
>       xxperm 33,33,45
>       xxperm 32,32,45
>       xxsel 34,34,33,32
> 
> Though instructions increase from 5 to 15, the performance is improved
> 60% in typical cases.

Ok.  :-)


(bunch of nits below, no issues with the gist of the patch).


> gcc/ChangeLog:
> 
>       * config/rs6000/altivec.md (altivec_lvsl_reg_<mode>2): Extend to
>       SDI mode.

(altivec_lvsl_reg): Rename to (altivec_lvsl_reg_<mode>2) and extend to SDI mode.


>       * config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
>       macros for vec_insert built-in functions.

Should that list the VEC_INSERT_V16QI, VEC_INSERT_V8HI, ... values instead of 
the BU_VSX_X ?  (need second opinion.. )


>       * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>       Generate built-in calls for vec_insert.

>       * config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
>       New function.

>       (altivec_expand_builtin): Add case entry for
>       VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
>       VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
>       VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.

plural entries :-) 


>       (altivec_init_builtins):

Add defines for __builtin_vec_insert_v16qi, __builtin_vec_insert_v8hi, ...


>       * config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
>       New declear.

declare

>       * config/rs6000/rs6000.c (rs6000_expand_vector_insert):
>       New function.



>       * config/rs6000/rs6000.md (FQHS): New mode iterator.
>       (FD): New mode iterator.
>       p8_mtvsrwz_v16qi<mode>2: New define_insn.
>       p8_mtvsrd_v16qi<mode>2: New define_insn.
>       * config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.
ok

> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/powerpc/pr79251.c: New test.
> ---
>  gcc/config/rs6000/altivec.md               |   4 +-
>  gcc/config/rs6000/rs6000-builtin.def       |   6 +
>  gcc/config/rs6000/rs6000-c.c               |  61 +++++++++
>  gcc/config/rs6000/rs6000-call.c            |  74 +++++++++++
>  gcc/config/rs6000/rs6000-protos.h          |   1 +
>  gcc/config/rs6000/rs6000.c                 | 146 +++++++++++++++++++++
>  gcc/config/rs6000/rs6000.md                |  19 +++
>  gcc/config/rs6000/vsx.md                   |   2 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 ++++
>  9 files changed, 333 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 0a2e634d6b0..66b636059a6 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2772,10 +2772,10 @@
>    DONE;
>  })
> 
> -(define_insn "altivec_lvsl_reg"
> +(define_insn "altivec_lvsl_reg_<mode>2"
>    [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
>       (unspec:V16QI
> -     [(match_operand:DI 1 "gpc_reg_operand" "b")]
> +     [(match_operand:SDI 1 "gpc_reg_operand" "b")]
>       UNSPEC_LVSL_REG))]
>    "TARGET_ALTIVEC"
>    "lvsl %0,0,%1"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index f9f0fece549..d095b365c14 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,      "vec_init_v2di", CONST)
>  BU_VSX_X (VEC_SET_V1TI,            "vec_set_v1ti",   CONST)
>  BU_VSX_X (VEC_SET_V2DF,            "vec_set_v2df",   CONST)
>  BU_VSX_X (VEC_SET_V2DI,            "vec_set_v2di",   CONST)
> +BU_VSX_X (VEC_INSERT_V16QI,        "vec_insert_v16qi",       CONST)
> +BU_VSX_X (VEC_INSERT_V8HI,         "vec_insert_v8hi",        CONST)
> +BU_VSX_X (VEC_INSERT_V4SI,         "vec_insert_v4si",        CONST)
> +BU_VSX_X (VEC_INSERT_V4SF,         "vec_insert_v4sf",        CONST)
> +BU_VSX_X (VEC_INSERT_V2DI,         "vec_insert_v2di",        CONST)
> +BU_VSX_X (VEC_INSERT_V2DF,         "vec_insert_v2df",        CONST)
>  BU_VSX_X (VEC_EXT_V1TI,            "vec_ext_v1ti",   CONST)
>  BU_VSX_X (VEC_EXT_V2DF,            "vec_ext_v2df",   CONST)
>  BU_VSX_X (VEC_EXT_V2DI,            "vec_ext_v2di",   CONST)
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 2fad3d94706..03b00738a5e 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -1563,6 +1563,67 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
>         return build_call_expr (call, 3, arg1, arg0, arg2);
>       }
> 
> +      else if (VECTOR_MEM_VSX_P (mode))
> +     {
> +       tree call = NULL_TREE;
> +
> +       arg2 = fold_for_warn (arg2);
> +
> +       /* If the second argument is variable, we can optimize it if we are
> +          generating 64-bit code on a machine with direct move.  */
> +       if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
> +         {
> +           switch (mode)
> +             {
> +             default:
> +               break;
> +
> +             case E_V2DImode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DI];
> +               break;
> +
> +             case E_V2DFmode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DF];
> +               break;
> +
> +             case E_V4SFmode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SF];
> +               break;
> +
> +             case E_V4SImode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SI];
> +               break;
> +
> +             case E_V8HImode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V8HI];
> +               break;
> +
> +             case E_V16QImode:
> +               call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V16QI];
> +               break;
> +             }
> +         }
> +
> +       if (call)
> +         {
> +           if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
> +             arg2 = build_int_cst (TREE_TYPE (arg2), 0);
> +           else
> +             arg2 = build_binary_op (
> +               loc, BIT_AND_EXPR, arg2,
> +               build_int_cst (TREE_TYPE (arg2),
> +                              TYPE_VECTOR_SUBPARTS (arg1_type) - 1),
> +               0);

                                                
Indentation nit there, the "loc, BIT_AND_EXPR, ..." line should go on
the previous line.   If that greatly messes up the indentation of the
rest of the statement, use your judgement.  


> +           tree result
> +             = build_call_expr (call, 3, arg1,
> +                                convert (TREE_TYPE (arg1_type), arg0),
> +                                convert (integer_type_node, arg2));
> +           /* Coerce the result to vector element type.  May be no-op.  */
> +           result = fold_convert (TREE_TYPE (arg1), result);
> +           return result;
> +         }
> +     }
> +
>        /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
>        arg1_inner_type = TREE_TYPE (arg1_type);
>        if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index e39cfcf672b..339e9ae87e3 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -10660,6 +10660,40 @@ altivec_expand_vec_set_builtin (tree exp)
>    return op0;
>  }
> 
> +/* Expand vec_insert builtin.  */
> +static rtx
> +altivec_expand_vec_insert_builtin (tree exp, rtx target)
> +{
> +  machine_mode tmode, mode1, mode2;
> +  tree arg0, arg1, arg2;
> +  rtx op0 = NULL_RTX, op1, op2;
> +
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  arg1 = CALL_EXPR_ARG (exp, 1);
> +  arg2 = CALL_EXPR_ARG (exp, 2);
> +
> +  tmode = TYPE_MODE (TREE_TYPE (arg0));
> +  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
> +  mode2 = TYPE_MODE ((TREE_TYPE (arg2)));
> +  gcc_assert (VECTOR_MODE_P (tmode));
> +
> +  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
> +  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
> +  op2 = expand_expr (arg2, NULL_RTX, mode2, EXPAND_NORMAL);
> +
> +  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
> +    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
> +
> +  op0 = force_reg (tmode, op0);
> +  op1 = force_reg (mode1, op1);
> +  op2 = force_reg (mode2, op2);
> +
> +  target = gen_reg_rtx (V16QImode);

Should that be tmode, or is V16QImode always correct here?

> +  rs6000_expand_vector_insert (target, op0, op1, op2);
> +
> +  return target;
> +}
> +
>  /* Expand vec_ext builtin.  */
>  static rtx
>  altivec_expand_vec_ext_builtin (tree exp, rtx target)
> @@ -10922,6 +10956,14 @@ altivec_expand_builtin (tree exp, rtx target, bool 
> *expandedp)
>      case VSX_BUILTIN_VEC_SET_V1TI:
>        return altivec_expand_vec_set_builtin (exp);
> 
> +    case VSX_BUILTIN_VEC_INSERT_V16QI:
> +    case VSX_BUILTIN_VEC_INSERT_V8HI:
> +    case VSX_BUILTIN_VEC_INSERT_V4SF:
> +    case VSX_BUILTIN_VEC_INSERT_V4SI:
> +    case VSX_BUILTIN_VEC_INSERT_V2DF:
> +    case VSX_BUILTIN_VEC_INSERT_V2DI:
> +      return altivec_expand_vec_insert_builtin (exp, target);
> +
>      case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
>      case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
>      case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
> @@ -13681,6 +13723,38 @@ altivec_init_builtins (void)
>                                   integer_type_node, NULL_TREE);
>    def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
> 
> +  /* Access to the vec_insert patterns.  */
> +  ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
> +                                 intQI_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v16qi", ftype,
> +            VSX_BUILTIN_VEC_INSERT_V16QI);
> +
> +  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
> +                                 intHI_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v8hi", ftype, 
> VSX_BUILTIN_VEC_INSERT_V8HI);
> +
> +  ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
> +                                 integer_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v4si", ftype, 
> VSX_BUILTIN_VEC_INSERT_V4SI);
> +
> +  ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
> +                                 float_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v4sf", ftype, 
> VSX_BUILTIN_VEC_INSERT_V4SF);
> +
> +  ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
> +                                 intDI_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v2di", ftype, 
> VSX_BUILTIN_VEC_INSERT_V2DI);
> +
> +  ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
> +                                 double_type_node,
> +                                 integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v2df", ftype, 
> VSX_BUILTIN_VEC_INSERT_V2DF);
> +
>    /* Access to the vec_extract patterns.  */
>    ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
>                                   integer_type_node, NULL_TREE);
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 28e859f4381..78b5b31d79f 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -58,6 +58,7 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
>  extern void rs6000_expand_float128_convert (rtx, rtx, bool);
>  extern void rs6000_expand_vector_init (rtx, rtx);
>  extern void rs6000_expand_vector_set (rtx, rtx, int);
> +extern void rs6000_expand_vector_insert (rtx, rtx, rtx, rtx);
>  extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
>  extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
>  extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index fe93cf6ff2b..afa845f3dff 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6788,6 +6788,152 @@ rs6000_expand_vector_set (rtx target, rtx val, int 
> elt)
>    emit_insn (gen_rtx_SET (target, x));
>  }
> 
> +/* Insert value from VEC into idx of TARGET.  */
> +
> +void
> +rs6000_expand_vector_insert (rtx target, rtx vec, rtx val, rtx idx)
> +{
> +  machine_mode mode = GET_MODE (vec);
> +
> +  if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (idx))
> +      gcc_unreachable ();

only 2 spaces indent.

(My mailer has suddenly gotten confused with tabs and spaces,..  here
and below may need spaces replaced with tabs, or may just be a problem
on my end.. )

> +  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)
> +        && TARGET_DIRECT_MOVE_64BIT)
> +    {
> +      gcc_assert (GET_MODE (idx) == E_SImode);
> +      machine_mode inner_mode = GET_MODE (val);
> +      HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
> +
> +      rtx tmp = gen_reg_rtx (GET_MODE (idx));
> +      if (GET_MODE_SIZE (inner_mode) == 8)
> +     {
> +       if (!BYTES_BIG_ENDIAN)
> +         {
> +           /*  idx = 1 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (1), idx));
> +           /*  idx = idx * 8.  */
> +           emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (3)));
> +           /*  idx = 16 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
> +         }
> +       else
> +         {
> +           emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (3)));
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
> +         }
> +     }
> +      else if (GET_MODE_SIZE (inner_mode) == 4)
> +     {
> +       if (!BYTES_BIG_ENDIAN)
> +         {
> +           /*  idx = 3 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (3), idx));
> +           /*  idx = idx * 4.  */
> +           emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (2)));
> +           /*  idx = 20 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
> +         }
> +       else
> +       {
> +           emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (2)));
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
> +       }
> +     }
> +      else if (GET_MODE_SIZE (inner_mode) == 2)
> +     {
> +       if (!BYTES_BIG_ENDIAN)
> +         {
> +           /*  idx = 7 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (7), idx));
> +           /*  idx = idx * 2.  */
> +           emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
> +           /*  idx = 22 - idx.  */
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (22), tmp));
> +         }
> +       else
> +         {
> +           emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
> +           emit_insn (gen_subsi3 (tmp, GEN_INT (22), idx));
> +         }
> +     }
> +      else if (GET_MODE_SIZE (inner_mode) == 1)
> +     if (!BYTES_BIG_ENDIAN)
> +       emit_insn (gen_addsi3 (tmp, idx, GEN_INT (8)));
> +     else
> +       emit_insn (gen_subsi3 (tmp, GEN_INT (23), idx));
> +      else
> +     gcc_unreachable ();
> +
> +      /*  lxv vs32, mask.
> +       DImode: 0xffffffffffffffff0000000000000000
> +       SImode: 0x00000000ffffffff0000000000000000
> +       HImode: 0x000000000000ffff0000000000000000.
> +       QImode: 0x00000000000000ff0000000000000000.  */
good. :-)

> +      rtx mask = gen_reg_rtx (V16QImode);
> +      rtx mask_v2di = gen_reg_rtx (V2DImode);
> +      rtvec v = rtvec_alloc (2);
> +      if (!BYTES_BIG_ENDIAN)
> +     {
> +       RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
> +       RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
> +     }
> +      else
> +      {
> +       RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
> +       RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
> +     }
> +      emit_insn (
> +     gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
> +      rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
> +      emit_insn (gen_rtx_SET (mask, sub_mask));
> +
> +      /*  mtvsrd[wz] f0,val.  */
> +      rtx val_v16qi = gen_reg_rtx (V16QImode);
> +      switch (inner_mode)
> +     {
> +     default:
> +       gcc_unreachable ();
> +       break;
> +     case E_QImode:
> +       emit_insn (gen_p8_mtvsrwz_v16qiqi2 (val_v16qi, val));
> +       break;
> +     case E_HImode:
> +       emit_insn (gen_p8_mtvsrwz_v16qihi2 (val_v16qi, val));
> +       break;
> +     case E_SImode:
> +       emit_insn (gen_p8_mtvsrwz_v16qisi2 (val_v16qi, val));
> +       break;
> +     case E_SFmode:
> +       emit_insn (gen_p8_mtvsrwz_v16qisf2 (val_v16qi, val));
> +       break;
> +     case E_DImode:
> +       emit_insn (gen_p8_mtvsrd_v16qidi2 (val_v16qi, val));
> +       break;
> +     case E_DFmode:
> +       emit_insn (gen_p8_mtvsrd_v16qidf2 (val_v16qi, val));
> +       break;
> +     }
> +
> +      /*  lvsl    v1,0,idx.  */
> +      rtx pcv = gen_reg_rtx (V16QImode);
> +      emit_insn (gen_altivec_lvsl_reg_si2 (pcv, tmp));
> +
> +      /*  xxperm  vs0,vs0,vs33.  */
> +      /*  xxperm  vs32,vs32,vs33.  */
> +      rtx val_perm = gen_reg_rtx (V16QImode);
> +      rtx mask_perm = gen_reg_rtx (V16QImode);
> +      emit_insn (
> +     gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
> +      emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
> +
> +      rtx sub_target = simplify_gen_subreg (V16QImode, vec, mode, 0);
> +      emit_insn (gen_rtx_SET (target, sub_target));
> +
> +      /*  xxsel   vs34,vs34,vs0,vs32.  */
> +      emit_insn (gen_vector_select_v16qi (target, target, val_perm, 
> mask_perm));
> +    }
> +}
> +
>  /* Extract field ELT from VEC into TARGET.  */
> 
>  void
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 43b620ae1c0..b02fda836d4 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -8713,6 +8713,25 @@
>    "mtvsrwz %x0,%1"
>    [(set_attr "type" "mftgpr")])
> 
> +(define_mode_iterator FQHS [SF QI HI SI])
> +(define_mode_iterator FD [DF DI])
> +
> +(define_insn "p8_mtvsrwz_v16qi<mode>2"
> +  [(set (match_operand:V16QI 0 "register_operand" "=wa")
> +     (unspec:V16QI [(match_operand:FQHS 1 "register_operand" "r")]
> +                UNSPEC_P8V_MTVSRWZ))]
> +  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
> +  "mtvsrwz %x0,%1"
> +  [(set_attr "type" "mftgpr")])
> +
> +(define_insn "p8_mtvsrd_v16qi<mode>2"
> +  [(set (match_operand:V16QI 0 "register_operand" "=wa")
> +     (unspec:V16QI [(match_operand:FD 1 "register_operand" "r")]
> +                UNSPEC_P8V_MTVSRD))]
> +  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
> +  "mtvsrd %x0,%1"
> +  [(set_attr "type" "mftgpr")])
> +
>  (define_insn_and_split "reload_fpr_from_gpr<mode>"
>    [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
>       (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index dd750210758..7e82690d12d 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5349,7 +5349,7 @@
>    rtx rtx_vtmp = gen_reg_rtx (V16QImode);
>    rtx tmp = gen_reg_rtx (DImode);
> 
> -  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
> +  emit_insn (gen_altivec_lvsl_reg_di2 (shift_mask, operands[2]));
>    emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
>    emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
>    emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.c 
> b/gcc/testsuite/gcc.target/powerpc/pr79251.c
> new file mode 100644
> index 00000000000..877659a0146
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9 -maltivec" } */
> +
> +#include <stddef.h>
> +#include <altivec.h>
> +
> +#define TYPE int

Is testing against int types sufficient coverage? (are there other
existing tests?)

thanks,
-Will

> +  
> +__attribute__ ((noinline))
> +vector TYPE test (vector TYPE v, TYPE i, size_t n)
> +{
> +  vector TYPE v1 = v;
> +  v1 = vec_insert (i, v, n);
> +
> +  return v1;
> +}
> +
> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
> +/* { dg-final { scan-assembler-times {\mlvsl\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */


Reply via email to