Hi: If SRC had been assigned a mode narrower than the copy, we can't link DEST into the chain even they have same hard_regno_nregs(i.e. HImode/SImode in i386 backend).
i.e kmovw %k0, %edi vmovd %edi, %xmm2 vpshuflw $0, %xmm2, %xmm0 kmovw %k0, %r8d kmovd %k0, %r9d ... - movl %r9d, %r11d + vmovd %xmm2, %r11d Bootstrap and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR rtl-optimization/98694 * regcprop.c (copy_value): If SRC had been assigned a mode narrower than the copy, we can't link DEST into the chain even they have same hard_regno_nregs(i.e. HImode/SImode in i386 backend). gcc/testsuite/ChangeLog: PR rtl-optimization/98694 * gcc.target/i386/pr98694.c: New test. --- gcc/regcprop.c | 3 +- gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c diff --git a/gcc/regcprop.c b/gcc/regcprop.c index dd62cb36013..997516eca07 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -355,7 +355,8 @@ copy_value (rtx dest, rtx src, struct value_data *vd) /* If SRC had been assigned a mode narrower than the copy, we can't link DEST into the chain, because not all of the pieces of the copy came from oldest_regno. */ - else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) + else if (sn > hard_regno_nregs (sr, vd->e[sr].mode) + || partial_subreg_p (vd->e[sr].mode, GET_MODE (src))) return; /* Link DR at the end of the value chain used by SR. */ diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c b/gcc/testsuite/gcc.target/i386/pr98694.c new file mode 100644 index 00000000000..611f9e77627 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98694.c @@ -0,0 +1,38 @@ +/* PR rtl-optimization/98694 */ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +#include<immintrin.h> +typedef short v4hi __attribute__ ((vector_size (8))); +typedef int v2si __attribute__ ((vector_size (8))); +v4hi b; + +__attribute__ ((noipa)) +v2si +foo (__m512i src1, __m512i src2) +{ + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + short s = (short) m; + int i = (int)m; + b = __extension__ (v4hi) {s, s, s, s}; + return __extension__ (v2si) {i, i}; +} + +int main () +{ + __m512i src1 = _mm512_setzero_si512 (); + __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + v2si a = foo (src1, src2); + if (a[0] != (int)m) + __builtin_abort (); + return 0; +} -- -- BR, Hongtao