On Mon, Jan 18, 2021 at 6:18 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > Hi:
> >   If SRC had been assigned a mode narrower than the copy, we can't link
> > DEST into the chain even they have same
> > hard_regno_nregs(i.e. HImode/SImode in i386 backend).
>
> In general, changes between modes within the same hard register are OK.
> Could you explain in more detail what's going wrong?
>

cprop hardreg change

(insn 457 499 460 33 (set (reg:SI 39 r11 [orig:86 _11 ] [86])
        (reg:SI 37 r9 [orig:86 _11 ] [86])) "test.c":29:36 75 {*movsi_internal}
     (expr_list:REG_DEAD (reg:SI 37 r9 [orig:86 _11 ] [86])
        (nil)))

to

(insn 457 499 460 33 (set (reg:SI 39 r11 [orig:86 _11 ] [86])
        (reg:SI 22 xmm2 [orig:86 _11 ] [86])) "test.c":29:36 75
{*movsi_internal}
     (expr_list:REG_DEAD (reg:SI 22 xmm2 [orig:86 _11 ] [86])
        (nil)))

since (reg:SI 22 xmm2) and (reg:SI r9) are in the same value chain in
which the oldest regno is k0.

but with xmm2 defined as

kmovw %k0, %edi  # 69 [c=4 l=4] *movhi_internal/6----- kmovw move the
lower 16bits to %edi, and clear the upper 16 bits.
vmovd %edi, %xmm2 # 489 *movsi_internal  --- vmovd move 32bits from
%edi to %xmm2.

(insn 69 68 70 12 (set (reg:HI 5 di [orig:96 _52 ] [96])
        (reg:HI 68 k0 [orig:82 var_6.0_1 ] [82])) "test.c":21:23 76
{*movhi_internal}
     (nil))

(insn 489 75 78 12 (set (reg:SI 22 xmm2 [297])
        (reg:SI 5 di [orig:96 _52 ] [96])) 75 {*movsi_internal}
     (nil))
...
kmovd %k0, %r9d (movsi) ---- kmovd move 32bits from %k0 to %r9d.

for %edi, bit 16-31 is cleared by kmovw which means %r9d is not equal
to %xmm2 as a SImode value.

> Thanks,
> Richard
>
>
> >
> > i.e
> >         kmovw   %k0, %edi
> >         vmovd   %edi, %xmm2
> >         vpshuflw        $0, %xmm2, %xmm0
> >         kmovw   %k0, %r8d
> >         kmovd   %k0, %r9d
> > ...
> > -        movl %r9d, %r11d
> > +        vmovd %xmm2, %r11d
> >
> >   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         PR rtl-optimization/98694
> >         * regcprop.c (copy_value): If SRC had been assigned a mode
> >         narrower than the copy, we can't link DEST into the chain even
> >         they have same hard_regno_nregs(i.e. HImode/SImode in i386
> >         backend).
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR rtl-optimization/98694
> >         * gcc.target/i386/pr98694.c: New test.
> >
> >   ---
> >  gcc/regcprop.c                          |  3 +-
> >  gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++
> >  2 files changed, 40 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c
> >
> > diff --git a/gcc/regcprop.c b/gcc/regcprop.c
> > index dd62cb36013..997516eca07 100644
> > --- a/gcc/regcprop.c
> > +++ b/gcc/regcprop.c
> > @@ -355,7 +355,8 @@ copy_value (rtx dest, rtx src, struct value_data *vd)
> >    /* If SRC had been assigned a mode narrower than the copy, we can't
> >       link DEST into the chain, because not all of the pieces of the
> >       copy came from oldest_regno.  */
> > -  else if (sn > hard_regno_nregs (sr, vd->e[sr].mode))
> > +  else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)
> > +          || partial_subreg_p (vd->e[sr].mode, GET_MODE (src)))
> >      return;
> >
> >    /* Link DR at the end of the value chain used by SR.  */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c
> > b/gcc/testsuite/gcc.target/i386/pr98694.c
> > new file mode 100644
> > index 00000000000..611f9e77627
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr98694.c
> > @@ -0,0 +1,38 @@
> > +/* PR rtl-optimization/98694 */
> > +/* { dg-do run { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mavx512bw" } */
> > +/* { dg-require-effective-target avx512bw } */
> > +
> > +#include<immintrin.h>
> > +typedef short v4hi __attribute__ ((vector_size (8)));
> > +typedef int v2si __attribute__ ((vector_size (8)));
> > +v4hi b;
> > +
> > +__attribute__ ((noipa))
> > +v2si
> > +foo (__m512i src1, __m512i src2)
> > +{
> > +  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
> > +  short s = (short) m;
> > +  int i = (int)m;
> > +  b = __extension__ (v4hi) {s, s, s, s};
> > +  return __extension__ (v2si) {i, i};
> > +}
> > +
> > +int main ()
> > +{
> > +  __m512i src1 = _mm512_setzero_si512 ();
> > +  __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1,
> > +                                 0, 1, 0, 1, 0, 1, 0, 1);
> > +  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
> > +  v2si a = foo (src1, src2);
> > +  if (a[0] != (int)m)
> > +    __builtin_abort ();
> > +  return 0;
> > +}
> > --



--
BR,
Hongtao

Reply via email to