On Mon, Jan 18, 2021 at 6:18 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > > Hi: > > If SRC had been assigned a mode narrower than the copy, we can't link > > DEST into the chain even they have same > > hard_regno_nregs(i.e. HImode/SImode in i386 backend). > > In general, changes between modes within the same hard register are OK. > Could you explain in more detail what's going wrong? >
cprop hardreg change (insn 457 499 460 33 (set (reg:SI 39 r11 [orig:86 _11 ] [86]) (reg:SI 37 r9 [orig:86 _11 ] [86])) "test.c":29:36 75 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 37 r9 [orig:86 _11 ] [86]) (nil))) to (insn 457 499 460 33 (set (reg:SI 39 r11 [orig:86 _11 ] [86]) (reg:SI 22 xmm2 [orig:86 _11 ] [86])) "test.c":29:36 75 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 22 xmm2 [orig:86 _11 ] [86]) (nil))) since (reg:SI 22 xmm2) and (reg:SI r9) are in the same value chain in which the oldest regno is k0. but with xmm2 defined as kmovw %k0, %edi # 69 [c=4 l=4] *movhi_internal/6----- kmovw move the lower 16bits to %edi, and clear the upper 16 bits. vmovd %edi, %xmm2 # 489 *movsi_internal --- vmovd move 32bits from %edi to %xmm2. (insn 69 68 70 12 (set (reg:HI 5 di [orig:96 _52 ] [96]) (reg:HI 68 k0 [orig:82 var_6.0_1 ] [82])) "test.c":21:23 76 {*movhi_internal} (nil)) (insn 489 75 78 12 (set (reg:SI 22 xmm2 [297]) (reg:SI 5 di [orig:96 _52 ] [96])) 75 {*movsi_internal} (nil)) ... kmovd %k0, %r9d (movsi) ---- kmovd move 32bits from %k0 to %r9d. for %edi, bit 16-31 is cleared by kmovw which means %r9d is not equal to %xmm2 as a SImode value. > Thanks, > Richard > > > > > > i.e > > kmovw %k0, %edi > > vmovd %edi, %xmm2 > > vpshuflw $0, %xmm2, %xmm0 > > kmovw %k0, %r8d > > kmovd %k0, %r9d > > ... > > - movl %r9d, %r11d > > + vmovd %xmm2, %r11d > > > > Bootstrap and regtested on x86_64-linux-gnu{-m32,}. > > Ok for trunk? > > > > gcc/ChangeLog: > > > > PR rtl-optimization/98694 > > * regcprop.c (copy_value): If SRC had been assigned a mode > > narrower than the copy, we can't link DEST into the chain even > > they have same hard_regno_nregs(i.e. HImode/SImode in i386 > > backend). > > > > gcc/testsuite/ChangeLog: > > > > PR rtl-optimization/98694 > > * gcc.target/i386/pr98694.c: New test. > > > > --- > > gcc/regcprop.c | 3 +- > > gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++ > > 2 files changed, 40 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c > > > > diff --git a/gcc/regcprop.c b/gcc/regcprop.c > > index dd62cb36013..997516eca07 100644 > > --- a/gcc/regcprop.c > > +++ b/gcc/regcprop.c > > @@ -355,7 +355,8 @@ copy_value (rtx dest, rtx src, struct value_data *vd) > > /* If SRC had been assigned a mode narrower than the copy, we can't > > link DEST into the chain, because not all of the pieces of the > > copy came from oldest_regno. */ > > - else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) > > + else if (sn > hard_regno_nregs (sr, vd->e[sr].mode) > > + || partial_subreg_p (vd->e[sr].mode, GET_MODE (src))) > > return; > > > > /* Link DR at the end of the value chain used by SR. */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c > > b/gcc/testsuite/gcc.target/i386/pr98694.c > > new file mode 100644 > > index 00000000000..611f9e77627 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr98694.c > > @@ -0,0 +1,38 @@ > > +/* PR rtl-optimization/98694 */ > > +/* { dg-do run { target { ! ia32 } } } */ > > +/* { dg-options "-O2 -mavx512bw" } */ > > +/* { dg-require-effective-target avx512bw } */ > > + > > +#include<immintrin.h> > > +typedef short v4hi __attribute__ ((vector_size (8))); > > +typedef int v2si __attribute__ ((vector_size (8))); > > +v4hi b; > > + > > +__attribute__ ((noipa)) > > +v2si > > +foo (__m512i src1, __m512i src2) > > +{ > > + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); > > + short s = (short) m; > > + int i = (int)m; > > + b = __extension__ (v4hi) {s, s, s, s}; > > + return __extension__ (v2si) {i, i}; > > +} > > + > > +int main () > > +{ > > + __m512i src1 = _mm512_setzero_si512 (); > > + __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1, > > + 0, 1, 0, 1, 0, 1, 0, 1); > > + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); > > + v2si a = foo (src1, src2); > > + if (a[0] != (int)m) > > + __builtin_abort (); > > + return 0; > > +} > > -- -- BR, Hongtao