On Tue, 2021-01-26 at 01:46 -0600, Xionghu Luo via Gcc-patches wrote:
> From: "luo...@cn.ibm.com" <luo...@cn.ibm.com>
> 
> UNSPEC_SI_FROM_SF is not supported when TARGET_DIRECT_MOVE_64BIT
> is false for -m32, don't generate VIEW_CONVERT_EXPR(ARRAY_REF) for
> variable vector insert.  Remove rs6000_expand_vector_set_var helper
> function, adjust the p8 and p9 definitions position and make them
> static.
> 
> The previous commit r11-6858 missed check m32, This patch is tested pass
> on P7BE{m32,m64}/P8BE{m32,m64}/P8LE/P9LE with
> RUNTESTFLAGS="--target_board =unix'{-m32,-m64}" for BE targets.
> 
> gcc/ChangeLog:
> 
>       2021-01-26  Xionghu Luo  <luo...@linux.ibm.com>
>           David Edelsohn <dje....@gmail.com>
> 
>       PR target/98799
>       * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>       Don't generate VIEW_CONVERT_EXPR for m32.

This is hinted at in the description, but would be good to be clear in
the changelog too. 

Consider something like "Don't generate VIEW_CONVERT_EXPR for fcode
ALTIVEC_BUILTIN_VEC_INSERT when -m32."


>       * config/rs6000/rs6000-protos.h (rs6000_expand_vector_set_var):
>       Delete.


>       * config/rs6000/rs6000.c (rs6000_expand_vector_set): Remove the
>       wrapper call rs6000_expand_vector_set_var. Call
>       rs6000_expand_vector_set_var_p9 and rs6000_expand_vector_set_var_p8
>       directly.
>       (rs6000_expand_vector_set_var): Delete.

The diff conflates the deleted function with the changes to an existing
function, making it harder to sort out...   

Was/is deleting the rs6000_expand_vector_set_var() helper necessary for
this fix, or just cleanup?   


Add:
        (rs6000_expand_vector_set_var_p9): Make static.
        (rs6000_expand_
vector_set_var_p8): Make static.





> 
> gcc/testsuite/ChangeLog:
> 
>       2021-01-26  Xionghu Luo  <luo...@linux.ibm.com>
> 
>       PR target/98827
>       * gcc.target/powerpc/fold-vec-insert-char-p8.c: Adjust ilp32.
>       * gcc.target/powerpc/fold-vec-insert-char-p9.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-double.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-float-p8.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-float-p9.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-int-p8.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-int-p9.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-longlong.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-short-p8.c: Likewise.
>       * gcc.target/powerpc/fold-vec-insert-short-p9.c: Likewise.
>       * gcc.target/powerpc/pr79251.p8.c: Likewise.
>       * gcc.target/powerpc/pr79251.p9.c: Likewise.
>       * gcc.target/powerpc/vsx-builtin-7.c: Likewise.

Just a glance, those changes look OK.


> ---
>  gcc/config/rs6000/rs6000-c.c                  |   2 +-
>  gcc/config/rs6000/rs6000-protos.h             |   1 -
>  gcc/config/rs6000/rs6000.c                    | 236 +++++++++---------
>  .../powerpc/fold-vec-insert-char-p8.c         |  14 +-
>  .../powerpc/fold-vec-insert-char-p9.c         |   6 +-
>  .../powerpc/fold-vec-insert-double.c          |  10 +-
>  .../powerpc/fold-vec-insert-float-p8.c        |  12 +-
>  .../powerpc/fold-vec-insert-float-p9.c        |   6 +-
>  .../powerpc/fold-vec-insert-int-p8.c          |  13 +-
>  .../powerpc/fold-vec-insert-int-p9.c          |   9 +-
>  .../powerpc/fold-vec-insert-longlong.c        |   8 +-
>  .../powerpc/fold-vec-insert-short-p8.c        |  10 +-
>  .../powerpc/fold-vec-insert-short-p9.c        |  13 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.p9.c |  16 +-
>  .../gcc.target/powerpc/vsx-builtin-7.c        |   2 +-
>  16 files changed, 203 insertions(+), 172 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index f6ee1e61b56..656cdb39f3f 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -1600,7 +1600,7 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
>         stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>       }
> 
> -      if (TARGET_P8_VECTOR)
> +      if (TARGET_P8_VECTOR && TARGET_DIRECT_MOVE_64BIT)
>       {
>         stmt = build_array_ref (loc, stmt, arg2);
>         stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 9a46a414743..9cca7325d0d 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -58,7 +58,6 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
>  extern void rs6000_expand_float128_convert (rtx, rtx, bool);
>  extern void rs6000_expand_vector_init (rtx, rtx);
>  extern void rs6000_expand_vector_set (rtx, rtx, rtx);
> -extern void rs6000_expand_vector_set_var (rtx, rtx, rtx);
>  extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
>  extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
>  extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index f5565a1a253..471bf5660bd 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6977,122 +6977,10 @@ rs6000_expand_vector_init (rtx target, rtx vals)
>    emit_move_insn (target, mem);
>  }
> 
> -/* Set field ELT_RTX of TARGET to VAL.  */
> -
> -void
> -rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
> -{
> -  machine_mode mode = GET_MODE (target);
> -  machine_mode inner_mode = GET_MODE_INNER (mode);
> -  rtx reg = gen_reg_rtx (mode);
> -  rtx mask, mem, x;
> -  int width = GET_MODE_SIZE (inner_mode);
> -  int i;
> -
> -  val = force_reg (GET_MODE (val), val);
> -
> -  if (VECTOR_MEM_VSX_P (mode))
> -    {
> -      if (!CONST_INT_P (elt_rtx))
> -     {
> -       rs6000_expand_vector_set_var (target, val, elt_rtx);
> -       return;
> -     }
> -
> -      rtx insn = NULL_RTX;
> -
> -      if (mode == V2DFmode)
> -     insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
> -
> -      else if (mode == V2DImode)
> -     insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
> -
> -      else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
> -     {
> -       if (mode == V4SImode)
> -         insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
> -       else if (mode == V8HImode)
> -         insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
> -       else if (mode == V16QImode)
> -         insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
> -       else if (mode == V4SFmode)
> -         insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
> -     }
> -
> -      if (insn)
> -     {
> -       emit_insn (insn);
> -       return;
> -     }
> -    }
> -
> -  gcc_assert (CONST_INT_P (elt_rtx));
> -
> -  /* Simplify setting single element vectors like V1TImode.  */
> -  if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
> -      && INTVAL (elt_rtx) == 0)
> -    {
> -      emit_move_insn (target, gen_lowpart (mode, val));
> -      return;
> -    }
> -
> -  /* Load single variable value.  */
> -  mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
> -  emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
> -  x = gen_rtx_UNSPEC (VOIDmode,
> -                   gen_rtvec (1, const0_rtx), UNSPEC_LVE);
> -  emit_insn (gen_rtx_PARALLEL (VOIDmode,
> -                            gen_rtvec (2,
> -                                       gen_rtx_SET (reg, mem),
> -                                       x)));
> -
> -  /* Linear sequence.  */
> -  mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
> -  for (i = 0; i < 16; ++i)
> -    XVECEXP (mask, 0, i) = GEN_INT (i);
> -
> -  /* Set permute mask to insert element into target.  */
> -  for (i = 0; i < width; ++i)
> -    XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
> -  x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
> -
> -  if (BYTES_BIG_ENDIAN)
> -    x = gen_rtx_UNSPEC (mode,
> -                     gen_rtvec (3, target, reg,
> -                                force_reg (V16QImode, x)),
> -                     UNSPEC_VPERM);
> -  else
> -    {
> -      if (TARGET_P9_VECTOR)
> -     x = gen_rtx_UNSPEC (mode,
> -                         gen_rtvec (3, reg, target,
> -                                    force_reg (V16QImode, x)),
> -                         UNSPEC_VPERMR);
> -      else
> -     {
> -       /* Invert selector.  We prefer to generate VNAND on P8 so
> -          that future fusion opportunities can kick in, but must
> -          generate VNOR elsewhere.  */
> -       rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
> -       rtx iorx = (TARGET_P8_VECTOR
> -                   ? gen_rtx_IOR (V16QImode, notx, notx)
> -                   : gen_rtx_AND (V16QImode, notx, notx));
> -       rtx tmp = gen_reg_rtx (V16QImode);
> -       emit_insn (gen_rtx_SET (tmp, iorx));
> -
> -       /* Permute with operands reversed and adjusted selector.  */
> -       x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
> -                           UNSPEC_VPERM);
> -     }
> -    }
> -
> -  emit_insn (gen_rtx_SET (target, x));
> -}
> -
>  /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
>     is variable and also counts by vector element size for p9 and above.  */
> 
> -void
> +static void
>  rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>  {
>    machine_mode mode = GET_MODE (target);
> @@ -7139,7 +7027,7 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
> rtx idx)
>  /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
>     is variable and also counts by vector element size for p8.  */
> 
> -void
> +static void
>  rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>  {
>    machine_mode mode = GET_MODE (target);
> @@ -7235,18 +7123,126 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx 
> val, rtx idx)
>      gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, 
> mask_perm));
>  }
> 
> -/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> -   is variable and also counts by vector element size.  */
> +/* Set field ELT_RTX of TARGET to VAL.  */
> 
>  void
> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
> +rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
>  {
>    machine_mode mode = GET_MODE (target);
>    machine_mode inner_mode = GET_MODE_INNER (mode);
> -  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
> -    rs6000_expand_vector_set_var_p9 (target, val, idx);
> +  rtx reg = gen_reg_rtx (mode);
> +  rtx mask, mem, x;
> +  int width = GET_MODE_SIZE (inner_mode);
> +  int i;
> +
> +  val = force_reg (GET_MODE (val), val);
> +
> +  if (VECTOR_MEM_VSX_P (mode))
> +    {
> +      if (!CONST_INT_P (elt_rtx))
> +     {
> +       /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
> +          when elt_rtx is variable.  */
> +       if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
> +         {
> +           rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
> +           return;
> +         }
> +       else if (TARGET_P8_VECTOR && TARGET_DIRECT_MOVE_64BIT)
> +         {
> +           rs6000_expand_vector_set_var_p8 (target, val, elt_rtx);
> +           return;
> +         }
> +     }
> +
> +      rtx insn = NULL_RTX;
> +
> +      if (mode == V2DFmode)
> +     insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
> +
> +      else if (mode == V2DImode)
> +     insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
> +
> +      else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
> +     {
> +       if (mode == V4SImode)
> +         insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
> +       else if (mode == V8HImode)
> +         insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
> +       else if (mode == V16QImode)
> +         insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
> +       else if (mode == V4SFmode)
> +         insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
> +     }
> +
> +      if (insn)
> +     {
> +       emit_insn (insn);
> +       return;
> +     }
> +    }
> +
> +  gcc_assert (CONST_INT_P (elt_rtx));
> +
> +  /* Simplify setting single element vectors like V1TImode.  */
> +  if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
> +      && INTVAL (elt_rtx) == 0)
> +    {
> +      emit_move_insn (target, gen_lowpart (mode, val));
> +      return;
> +    }
> +
> +  /* Load single variable value.  */
> +  mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
> +  emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
> +  x = gen_rtx_UNSPEC (VOIDmode,
> +                   gen_rtvec (1, const0_rtx), UNSPEC_LVE);
> +  emit_insn (gen_rtx_PARALLEL (VOIDmode,
> +                            gen_rtvec (2,
> +                                       gen_rtx_SET (reg, mem),
> +                                       x)));
> +
> +  /* Linear sequence.  */
> +  mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
> +  for (i = 0; i < 16; ++i)
> +    XVECEXP (mask, 0, i) = GEN_INT (i);
> +
> +  /* Set permute mask to insert element into target.  */
> +  for (i = 0; i < width; ++i)
> +    XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
> +  x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
> +
> +  if (BYTES_BIG_ENDIAN)
> +    x = gen_rtx_UNSPEC (mode,
> +                     gen_rtvec (3, target, reg,
> +                                force_reg (V16QImode, x)),
> +                     UNSPEC_VPERM);
>    else
> -    rs6000_expand_vector_set_var_p8 (target, val, idx);
> +    {
> +      if (TARGET_P9_VECTOR)
> +     x = gen_rtx_UNSPEC (mode,
> +                         gen_rtvec (3, reg, target,
> +                                    force_reg (V16QImode, x)),
> +                         UNSPEC_VPERMR);
> +      else
> +     {
> +       /* Invert selector.  We prefer to generate VNAND on P8 so
> +          that future fusion opportunities can kick in, but must
> +          generate VNOR elsewhere.  */
> +       rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
> +       rtx iorx = (TARGET_P8_VECTOR
> +                   ? gen_rtx_IOR (V16QImode, notx, notx)
> +                   : gen_rtx_AND (V16QImode, notx, notx));
> +       rtx tmp = gen_reg_rtx (V16QImode);
> +       emit_insn (gen_rtx_SET (tmp, iorx));
> +
> +       /* Permute with operands reversed and adjusted selector.  */
> +       x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
> +                           UNSPEC_VPERM);
> +     }
> +    }
> +
> +  emit_insn (gen_rtx_SET (target, x));
>  }
> 
>  /* Extract field ELT from VEC into TARGET.  */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c
> index 8f94e8c80b8..4149d5240c8 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c
> @@ -45,15 +45,21 @@ vector unsigned char testuu_cst (unsigned char x, vector 
> unsigned char v)
>  }
> 
>  /* no store per _var test */
> -/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 0 { target lp64 
> } } } */
>  /* one store-byte per test */
> -/* { dg-final { scan-assembler-times {\mstb\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mstb\M} 4 { target lp64 } } } */
>  /* one load per test */
>  /* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target le } } 
> } */
> -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target be } } 
> } */
> +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target { be && 
> lp64 } } } } */
> 
>  /* one lvebx per _cst test.*/
>  /* { dg-final { scan-assembler-times {\mlvebx\M} 4 } } */
>  /* one vperm per _cst test.*/
> -/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 12 { target lp64 } } } */
> +
> +/* -m32 codegen. */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 4 { target ilp32 
> } } } */
> +/* { dg-final { scan-assembler-times {\mstb\M} 8 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target { be && 
> ilp32 } } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c
> index e8f8ba39731..b8e751fa2c1 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c
> @@ -56,6 +56,6 @@ vector unsigned char testuu_cst (unsigned char x, vector 
> unsigned char v)
>  /* { dg-final { scan-assembler-times {\mrlwinm\M} 4 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {\mstb\M} 8 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {\mlxv\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvebx\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mxxperm\M} 8 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvebx\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 0 { target ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
> index 5afadc9aa91..1286010bbaf 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
> @@ -28,7 +28,11 @@ testd_cst (double d, vector double vd)
>  /* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 { target { ! 
> has_arch_pwr8 } } } } */
>  /* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 { target 
> { ! has_arch_pwr8 } } } } */
> 
> -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { 
> target { has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 0 { target { 
> has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target 
> { has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { 
> target { has_arch_pwr8 && lp64 } } } } */
> +/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 0 { target { 
> has_arch_pwr8 && lp64 } } } } */
> +
> +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target 
> { has_arch_pwr8 && lp64 } } } } */
> +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 { target 
> { has_arch_pwr8 && ilp32 } } } } */
> +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 { 
> target { has_arch_pwr8 && ilp32 } } } } */
> +/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 { target { 
> has_arch_pwr8 && ilp32 } } } } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c
> index 834f1d39579..e458d8f9171 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c
> @@ -19,12 +19,18 @@ testf_cst (float f, vector float vf)
>    return vec_insert (f, vf, 12);
>  }
> 
> -/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 0 { 
> target lp64 } } } */
>  /* cst tests has stfs instead of stfsx. */
> -/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 1 { target lp64 } 
> } } */
>  /* { dg-final { scan-assembler-times {\mlvx\M|\mlxv\M|\mlxvd2x\M|\mlxvw4x\M} 
> 2 } } */
> 
>  /* cst test has a lvewx,vperm combo */
>  /* { dg-final { scan-assembler-times {\mlvewx\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 3 { target lp64 } } } */
> +
> +/* -m32 codegen. */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 1 { 
> target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 2 { target ilp32 } 
> } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 1 { target ilp32 } } } */
> +
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c
> index dfca9fd04ef..9684c9f26ba 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c
> @@ -30,6 +30,6 @@ testf_cst (float f, vector float vf)
> 
>  /* { dg-final { scan-assembler-times {\mstfs\M} 2 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {\mlxv\M} 2 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvewx\M} 2 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 2 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mxxperm\M} 2 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvewx\M} 1 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 1 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 0 { target ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c
> index 37502417c7f..d5fa422c9c1 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c
> @@ -50,9 +50,14 @@ testui2_cst(unsigned int x, vector unsigned int v)
> 
>  /* Each test has lvx (8).  cst tests have additional lvewx. (4) */
>  /* var tests have no stwx and stvx.  cst tests have stw (4).*/
> -/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 
> 4 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target le } } 
> } */
> -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target be } } 
> } */
> +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target { be && 
> lp64 } } } } */
> 
> -/* { dg-final { scan-assembler-times {\mlvewx\M} 4 } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */
> +/* { dg-final { scan-assembler-times {\mlvewx\M} 4 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 12 { target lp64 } } } */
> +
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 
> 12 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target { be && 
> ilp32 } } } } */
> +/* { dg-final { scan-assembler-times {\mlvewx\M} 4 { target ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c
> index 21f0d9a0272..5cd6b3e0148 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c
> @@ -49,7 +49,7 @@ testui2_cst(unsigned int x, vector unsigned int v)
>  }
> 
> 
> -/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 { target lp64 } } 
> } */
>  /* { dg-final { scan-assembler-times {\mstwx\M} 0 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 0 { target lp64 } } } 
> */
> 
> @@ -57,8 +57,9 @@ testui2_cst(unsigned int x, vector unsigned int v)
>  /* { dg-final { scan-assembler-times {\mmtvsrwz\M} 8 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mxxinsertw\M} 8 { target lp64 } } } */
> 
> +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 { target ilp32 } 
> } } */
>  /* { dg-final { scan-assembler-times {\mstw\M} 8 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {\mlxv\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvewx\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 8 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mxxperm\M} 8 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvewx\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 0 { target ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c
> index b8d5528a4e0..0a0ee31a411 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c
> @@ -60,9 +60,11 @@ testul2_cst(unsigned long long x, vector unsigned long 
> long v)
> 
>  /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 4 } } */
> 
> -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 0 { 
> target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mstdx\M} 0 { target lp64 } } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 8 { target ilp32 } } } */
> 
> -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target 
> lp64 } } } */
> 
> +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 4 { 
> target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 0 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 4 { target 
> ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c
> index 8dc98f78ad4..a479d987bf0 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c
> @@ -49,10 +49,14 @@ testus2_cst(unsigned short x, vector unsigned short v)
>  }
> 
>  /* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 8 
> { target le } } } */
> -/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 4 
> { target be } } } */
> +/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 4 
> { target {  be && lp64 } } } } */
>  /* stores.. 0 per variable tests, 1 each per cst test. */
> -/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 
> 4 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {\mlvehx\M} 4 } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 12 { target lp64 } } } */
> +
> +/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 8 
> { target { be && ilp32 } } } } */
> +/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 
> 12 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c
> index dbb43a7929a..cea7488b3c6 100644
> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c
> @@ -51,13 +51,14 @@ testus2_cst(unsigned short x, vector unsigned short v)
>  /* { dg-final { scan-assembler-times {\mmtvsrwz\M} 8 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mvinserth\M} 8 { target lp64 } } } */
> 
> -/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 } } */
> +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 { target lp64 } } 
> } */
>  /* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 0 { target lp64 }} } 
> */
> 
>  /* -m32 uses sth/lvehx as part of the sequence. */
> -/* { dg-final { scan-assembler-times {\msth\M} 8 { target ilp32 }} } */
> -/* { dg-final { scan-assembler-times {\mlvehx\M} 8 { target ilp32 }} } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 8 { target ilp32 }} } */
> -/* { dg-final { scan-assembler-times {\mxxperm\M} 8 { target ilp32 }} } */
> -/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 8 { target ilp32 }} } 
> */
> +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 { target ilp32 } 
> } } */
> +/* { dg-final { scan-assembler-times {\msth\M} 8 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlvehx\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 0 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 8 { target ilp32 } } 
> } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c 
> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> index 06da47b7758..c4ee898eef0 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
> @@ -9,9 +9,16 @@
>  TEST_VEC_INSERT_ALL (test)
> 
>  /* { dg-final { scan-assembler-not {\mstxw\M} } } */
> -/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
> -/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
> -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
> -/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 { target lp64 } } } */
> +
> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 10 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstxvw4x\M} 6 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstxvd2x\M} 4 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstb|sth|stw|stfs|stfd\M} 22 { target 
> ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlxvw4x\M} 6 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlxvd2x\M} 4 { target ilp32 } } } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p9.c 
> b/gcc/testsuite/gcc.target/powerpc/pr79251.p9.c
> index 8ebeab425ff..9af5982efb5 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr79251.p9.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p9.c
> @@ -9,16 +9,16 @@
>  TEST_VEC_INSERT_ALL (test)
> 
>  /* { dg-final { scan-assembler-not {\mstxw\M} } } */
> -/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
> -/* { dg-final { scan-assembler-times {\mlvsr\M} 10 } } */
> -/* { dg-final { scan-assembler-times {\mxxperm\M} 20 } } */
> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mlvsr\M} 10 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 20 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mxxinsertw\M} 3 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mvinserth\M} 2 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {\mvinsertb\M} 2 { target lp64 } } } */
> -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 3 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {\mrlwinm\M} 10 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mvperm\M} 7 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvebx\M} 2 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvehx\M} 2 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {\mlvewx\M} 3 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstxv\M} 10 { target ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mstb|sth|stw|stfs|stfd\M} 22 { target 
> ilp32 } } } */
> +/* { dg-final { scan-assembler-times {\mlxv\M} 10 { target ilp32 } } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
> index 6fffb7eb098..0780b01ffab 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
> @@ -193,7 +193,7 @@ vector unsigned __int128 splat_uint128 (unsigned __int128 
> x) { return vec_splats
>  /* { dg-final { scan-assembler-times {\mrldic\M} 0  { target { be && ilp32 } 
> } } } */
>  /* { dg-final { scan-assembler-times {\mrldic\M} 64 { target { be && lp64 } 
> } } } */
>  /* { dg-final { scan-assembler-times {\mrldic\M} 64 { target le } } } */
> -/* { dg-final { scan-assembler-times "xxpermdi" 11 { target be } } } */
> +/* { dg-final { scan-assembler-times "xxpermdi" 4 { target be } } } */
>  /* { dg-final { scan-assembler-times "xxpermdi" 6 { target le } } } */
>  /* { dg-final { scan-assembler-times "vspltisb" 2 } } */
>  /* { dg-final { scan-assembler-times "vspltish" 2 } } */

Reply via email to