Hi:
  If SRC had been assigned a mode narrower than the copy, we can't link
DEST into the chain even they have same
hard_regno_nregs(i.e. HImode/SImode in i386 backend).

i.e
        kmovw   %k0, %edi
        vmovd   %edi, %xmm2
        vpshuflw        $0, %xmm2, %xmm0
        kmovw   %k0, %r8d
        kmovd   %k0, %r9d
...
-        movl %r9d, %r11d
+        vmovd %xmm2, %r11d

  Bootstrap and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

        PR rtl-optimization/98694
        * regcprop.c (copy_value): If SRC had been assigned a mode
        narrower than the copy, we can't link DEST into the chain even
        they have same hard_regno_nregs(i.e. HImode/SImode in i386
        backend).

gcc/testsuite/ChangeLog:

        PR rtl-optimization/98694
        * gcc.target/i386/pr98694.c: New test.

  ---
 gcc/regcprop.c                          |  3 +-
 gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c

diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index dd62cb36013..997516eca07 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -355,7 +355,8 @@ copy_value (rtx dest, rtx src, struct value_data *vd)
   /* If SRC had been assigned a mode narrower than the copy, we can't
      link DEST into the chain, because not all of the pieces of the
      copy came from oldest_regno.  */
-  else if (sn > hard_regno_nregs (sr, vd->e[sr].mode))
+  else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)
+          || partial_subreg_p (vd->e[sr].mode, GET_MODE (src)))
     return;

   /* Link DR at the end of the value chain used by SR.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c
b/gcc/testsuite/gcc.target/i386/pr98694.c
new file mode 100644
index 00000000000..611f9e77627
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98694.c
@@ -0,0 +1,38 @@
+/* PR rtl-optimization/98694 */
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include<immintrin.h>
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+v4hi b;
+
+__attribute__ ((noipa))
+v2si
+foo (__m512i src1, __m512i src2)
+{
+  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+  short s = (short) m;
+  int i = (int)m;
+  b = __extension__ (v4hi) {s, s, s, s};
+  return __extension__ (v2si) {i, i};
+}
+
+int main ()
+{
+  __m512i src1 = _mm512_setzero_si512 ();
+  __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1,
+                                 0, 1, 0, 1, 0, 1, 0, 1);
+  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+  v2si a = foo (src1, src2);
+  if (a[0] != (int)m)
+    __builtin_abort ();
+  return 0;
+}
-- 


-- 
BR,
Hongtao

Reply via email to