Hi,

I'd like to gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612213.html

It's to fix one regression, I think it's stage 4 content.

BR,
Kewen

on 2023/2/17 17:55, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR108807 exposes, the current handling in function
> rs6000_expand_vector_set_var_p9 doesn't take care of big
> endianness.  Currently the function is to rotate the
> target vector by moving element to-be-set to element 0,
> set element 0 with the given val, then rotate back.  To
> get the permutation control vector for the rotation, it
> makes use of lvsr and lvsl, but the element ordering is
> different for BE and LE (like element 0 is the most
> significant one on BE while the least significant one on
> LE), this patch is to add consideration for BE and make
> sure permutation control vectors for rotations are expected.
> 
> As tested, it helped to fix the below failures:
> 
> FAIL: gcc.target/powerpc/pr79251-run.p9.c execution test
> FAIL: gcc.target/powerpc/pr89765-mc.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-10d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-11d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-14d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-16d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-18d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-9d.c execution test
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P{8,9}
> and powerpc64le-linux-gnu P10.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -----
>       PR target/108807
> 
> gcc/ChangeLog:
> 
>       * config/rs6000/rs6000.cc (rs6000_expand_vector_set_var_p9): Fix gen
>       function for permutation control vector by considering big endianness.
> ---
>  gcc/config/rs6000/rs6000.cc | 48 +++++++++++++++++++++----------------
>  1 file changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 16ca3a31757..774eb2963d9 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
> rtx idx)
> 
>    machine_mode shift_mode;
>    rtx (*gen_ashl)(rtx, rtx, rtx);
> -  rtx (*gen_lvsl)(rtx, rtx);
> -  rtx (*gen_lvsr)(rtx, rtx);
> +  rtx (*gen_pcvr1)(rtx, rtx);
> +  rtx (*gen_pcvr2)(rtx, rtx);
> 
>    if (TARGET_POWERPC64)
>      {
>        shift_mode = DImode;
>        gen_ashl = gen_ashldi3;
> -      gen_lvsl = gen_altivec_lvsl_reg_di;
> -      gen_lvsr = gen_altivec_lvsr_reg_di;
> +      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
> +                                : gen_altivec_lvsr_reg_di;
> +      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
> +                                : gen_altivec_lvsl_reg_di;
>      }
>    else
>      {
>        shift_mode = SImode;
>        gen_ashl = gen_ashlsi3;
> -      gen_lvsl = gen_altivec_lvsl_reg_si;
> -      gen_lvsr = gen_altivec_lvsr_reg_si;
> +      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
> +                                : gen_altivec_lvsr_reg_si;
> +      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
> +                                : gen_altivec_lvsl_reg_si;
>      }
>    /* Generate the IDX for permute shift, width is the vector element size.
>       idx = idx * width.  */
> @@ -7259,25 +7263,29 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
> rtx idx)
> 
>    emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
> 
> -  /*  lvsr    v1,0,idx.  */
> -  rtx pcvr = gen_reg_rtx (V16QImode);
> -  emit_insn (gen_lvsr (pcvr, tmp));
> -
> -  /*  lvsl    v2,0,idx.  */
> -  rtx pcvl = gen_reg_rtx (V16QImode);
> -  emit_insn (gen_lvsl (pcvl, tmp));
> +  /* Generate one permutation control vector used for rotating the element
> +     at to-insert position to element zero in target vector.  lvsl is
> +     used for big endianness while lvsr is used for little endianness:
> +     lvs[lr]    v1,0,idx.  */
> +  rtx pcvr1 = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_pcvr1 (pcvr1, tmp));
> 
>    rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
> +  rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, 
> sub_target,
> +                                        pcvr1);
> +  emit_insn (perm1);
> 
> -  rtx permr
> -    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
> -  emit_insn (permr);
> -
> +  /* Insert val into element 0 of target vector.  */
>    rs6000_expand_vector_set (target, val, const0_rtx);
> 
> -  rtx perml
> -    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
> -  emit_insn (perml);
> +  /* Rotate back with a reversed permutation control vector generated from:
> +     lvs[rl]   v2,0,idx.  */
> +  rtx pcvr2 = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_pcvr2 (pcvr2, tmp));
> +
> +  rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, 
> sub_target,
> +                                        pcvr2);
> +  emit_insn (perm2);
>  }
> 
>  /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> --
> 2.39.1

Reply via email to