Hi:
  When programmers explicitly use mask loaded intrinsics, don't
transform the instruction to vpblend{b,w,d,q} since If mem_addr points
to a memory region with less than whole vector size of accessible
memory,  the mask would prevent reading the inaccessible bytes which
could avoid fault.

  Bootstrap is ok, gcc regress test for i386/x86_64 backend is ok.
  Ok for trunk?

gcc/ChangeLog:

        PR target/97642
        * config/i386/sse.md (UNSPEC_MASKLOAD): New unspec.
        (*<avx512>_load<mode>_mask): New define_insns for masked load
        instructions.
        (<avx512>_load<mode>_mask): Changed to define_expands which
        specifically handle memory operands.
        (<avx512>_blendm<mode>): Changed to define_insns which are same
        as original <avx512>_load<mode>_mask with adjustment of
        operands order.
        (*<avx512>_load<mode>): New define_insn_and_split which is
        used to optimize for masked load with all one mask.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/avx512bw-vmovdqu16-1.c: Adjust testcase to
        make sure only masked load instruction is generated.
        * gcc.target/i386/avx512bw-vmovdqu8-1.c: Ditto.
        * gcc.target/i386/avx512f-vmovapd-1.c: Ditto.
        * gcc.target/i386/avx512f-vmovaps-1.c: Ditto.
        * gcc.target/i386/avx512f-vmovdqa32-1.c: Ditto.
        * gcc.target/i386/avx512f-vmovdqa64-1.c: Ditto.
        * gcc.target/i386/avx512vl-vmovapd-1.c: Ditto.
        * gcc.target/i386/avx512vl-vmovaps-1.c: Ditto.
        * gcc.target/i386/avx512vl-vmovdqa32-1.c: Ditto.
        * gcc.target/i386/avx512vl-vmovdqa64-1.c: Ditto.
        * gcc.target/i386/pr97642-1.c: New test.
        * gcc.target/i386/pr97642-2.c: New test.

-- 
BR,
Hongtao
From 48cf0adcd55395653891888f4768b8bdc19786f2 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Tue, 3 Nov 2020 17:26:43 +0800
Subject: [PATCH] Fix incorrect replacement of vmovdqu32 with vpblendd which
 can cause fault.

gcc/ChangeLog:

	PR target/97642
	* config/i386/sse.md (UNSPEC_MASKLOAD): New unspec.
	(*<avx512>_load<mode>_mask): New define_insns for masked load
	instructions.
	(<avx512>_load<mode>_mask): Changed to define_expands which
	specifically handle memory operands.
	(<avx512>_blendm<mode>): Changed to define_insns which are same
	as original <avx512>_load<mode>_mask with adjustment of
	operands order.
	(*<avx512>_load<mode>): New define_insn_and_split which is
	used to optimize for masked load with all one mask.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512bw-vmovdqu16-1.c: Adjust testcase to
	make sure only masked load instruction is generated.
	* gcc.target/i386/avx512bw-vmovdqu8-1.c: Ditto.
	* gcc.target/i386/avx512f-vmovapd-1.c: Ditto.
	* gcc.target/i386/avx512f-vmovaps-1.c: Ditto.
	* gcc.target/i386/avx512f-vmovdqa32-1.c: Ditto.
	* gcc.target/i386/avx512f-vmovdqa64-1.c: Ditto.
	* gcc.target/i386/avx512vl-vmovapd-1.c: Ditto.
	* gcc.target/i386/avx512vl-vmovaps-1.c: Ditto.
	* gcc.target/i386/avx512vl-vmovdqa32-1.c: Ditto.
	* gcc.target/i386/avx512vl-vmovdqa64-1.c: Ditto.
	* gcc.target/i386/pr97642-1.c: New test.
	* gcc.target/i386/pr97642-2.c: New test.
---
 gcc/config/i386/sse.md                        | 138 ++++++++++++++----
 .../gcc.target/i386/avx512bw-vmovdqu16-1.c    |   6 +-
 .../gcc.target/i386/avx512bw-vmovdqu8-1.c     |   6 +-
 .../gcc.target/i386/avx512f-vmovapd-1.c       |   2 +-
 .../gcc.target/i386/avx512f-vmovaps-1.c       |   2 +-
 .../gcc.target/i386/avx512f-vmovdqa32-1.c     |   2 +-
 .../gcc.target/i386/avx512f-vmovdqa64-1.c     |   2 +-
 .../gcc.target/i386/avx512vl-vmovapd-1.c      |   4 +-
 .../gcc.target/i386/avx512vl-vmovaps-1.c      |   4 +-
 .../gcc.target/i386/avx512vl-vmovdqa32-1.c    |   4 +-
 .../gcc.target/i386/avx512vl-vmovdqa64-1.c    |   4 +-
 gcc/testsuite/gcc.target/i386/pr97642-1.c     |  23 +++
 gcc/testsuite/gcc.target/i386/pr97642-2.c     |  77 ++++++++++
 13 files changed, 228 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97642-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97642-2.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 12e83df3010..0025aba4ad1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -111,6 +111,8 @@ (define_c_enum "unspec" [
   UNSPEC_MASKOP
   UNSPEC_KORTEST
   UNSPEC_KTEST
+  ;; Mask load
+  UNSPEC_MASKLOAD
 
   ;; For embed. rounding feature
   UNSPEC_EMBEDDED_ROUNDING
@@ -1065,18 +1067,34 @@ (define_insn "mov<mode>_internal"
 	      ]
 	      (symbol_ref "true")))])
 
-(define_insn "<avx512>_load<mode>_mask"
-  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
+;; If mem_addr points to a memory region with less than whole vector size bytes
+;; of accessible memory and k is a mask that would prevent reading the inaccessible
+;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
+;; See pr97642.
+(define_expand "<avx512>_load<mode>_mask"
+  [(set (match_operand:V48_AVX512VL 0 "register_operand")
 	(vec_merge:V48_AVX512VL
-	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm")
-	  (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
-	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
+	  (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
   "TARGET_AVX512F"
 {
-  if (REG_P (operands[2])
-     && REGNO (operands[2]) != REGNO (operands[0]))
-    return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}";
+  if (MEM_P (operands[1]))
+    operands[1] = gen_rtx_UNSPEC (<MODE>mode,
+				 gen_rtvec(1, operands[1]),
+				 UNSPEC_MASKLOAD);
+})
 
+(define_insn "*<avx512>_load<mode>_mask"
+  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:V48_AVX512VL
+	  (unspec:V48_AVX512VL
+	    [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
+	    UNSPEC_MASKLOAD)
+	  (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+  "TARGET_AVX512F"
+{
   if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
     {
       if (misaligned_operand (operands[1], <MODE>mode))
@@ -1096,20 +1114,55 @@ (define_insn "<avx512>_load<mode>_mask"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_load<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
+(define_insn_and_split "*<avx512>_load<mode>"
+  [(set (match_operand:V48_AVX512VL 0 "register_operand")
+	(unspec:V48_AVX512VL
+	  [(match_operand:V48_AVX512VL 1 "memory_operand")]
+	  UNSPEC_MASKLOAD))]
+  "TARGET_AVX512F"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 1))])
+
+(define_expand "<avx512>_load<mode>_mask"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
 	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm")
-	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
-	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
   "TARGET_AVX512BW"
-  "@
-    vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
-    vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"
+{
+  if (MEM_P (operands[1]))
+    operands[1] = gen_rtx_UNSPEC (<MODE>mode,
+				 gen_rtvec(1, operands[1]),
+				 UNSPEC_MASKLOAD);
+
+})
+
+(define_insn "*<avx512>_load<mode>_mask"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VI12_AVX512VL
+	  (unspec:VI12_AVX512VL
+	    [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+	    UNSPEC_MASKLOAD)
+	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_load<mode>"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+	(unspec:VI12_AVX512VL
+	  [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+	  UNSPEC_MASKLOAD))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 1))])
+
 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
@@ -1171,21 +1224,50 @@ (define_insn "avx512f_store<mode>_mask"
    (set_attr "memory" "store")
    (set_attr "mode" "<MODE>")])
 
-(define_expand "<avx512>_blendm<mode>"
-  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+(define_insn "<avx512>_blendm<mode>"
+  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:V48_AVX512VL
-	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
-	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
-	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512F")
+	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
+	  (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F"
+{
+  if (REG_P (operands[1])
+     && REGNO (operands[1]) != REGNO (operands[0]))
+    return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
 
-(define_expand "<avx512>_blendm<mode>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
+    {
+      if (misaligned_operand (operands[2], <MODE>mode))
+	return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+      else
+	return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+    }
+  else
+    {
+      if (misaligned_operand (operands[2], <MODE>mode))
+	return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+      else
+	return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_blendm<mode>"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
-	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
-	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512BW")
+	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
+	  (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512BW"
+  "@
+    vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
+    vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_store<mode>_mask"
   [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
index dcb8caaa73e..8603a1909c7 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
index a335bcab3b2..d1e33926c81 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
index 7fc84b16e2b..e869f70665a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
index c2e2655fda6..a7635a3ebf2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
index 8fb816c1317..b93727d9ef2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
index 4352b12b6e7..1c372c4f92a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
index fd59660f932..89c3ebefe35 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
index 455b1a9dc37..2196ebb55d9 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
index 5c6a3d0bfb4..9f991dbaca2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
index 592541aeb8e..d20b4a7b997 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 { target nonpic } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr97642-1.c b/gcc/testsuite/gcc.target/i386/pr97642-1.c
new file mode 100644
index 00000000000..26c6b26af4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97642-1.c
@@ -0,0 +1,23 @@
+/* PR target/97642 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-not { k[0-8] } } } */
+
+#include <immintrin.h>
+__m128i
+foo1 (__m128i src, void const* P)
+{
+  return _mm_mask_loadu_epi32 (src, 15, P);
+}
+
+__m256i
+foo2 (__m256i src, void const* P)
+{
+  return _mm256_mask_loadu_epi32 (src, 255, P);
+}
+
+__m512i
+foo3 (__m512i src, void const* P)
+{
+  return _mm512_mask_loadu_epi32 (src, 65535 , P);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr97642-2.c b/gcc/testsuite/gcc.target/i386/pr97642-2.c
new file mode 100644
index 00000000000..eb06a2739b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97642-2.c
@@ -0,0 +1,77 @@
+/* PR target/97642 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512bw } */
+
+#include <assert.h>
+#include <immintrin.h>
+#include <stdint.h>
+#include <sys/mman.h>
+
+#define N 5
+
+// Faults with GCC because usage of vpblendd
+__m256i __attribute__((noinline)) mask_load(uint32_t * arr) {
+  __m256i tmp;
+  return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr);
+}
+
+// Faults
+__m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) {
+  __m256i tmp = _mm256_set1_epi64x(0);
+  asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t"
+	       : [ tmp ] "+x"(tmp)
+	       : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
+	       :);
+  return tmp;
+}
+
+// Does not fault
+__m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) {
+  __m256i           tmp;
+  asm volatile(
+	       "movb %[m], %%al\n\t"
+	       "kmovb %%eax, %%k1\n\t"
+	       "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t"
+	       : [ tmp ] "+x"(tmp)
+	       : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
+	       : "eax", "k1");
+  return tmp;
+}
+
+
+void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) {
+  return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v);
+}
+
+
+#define NPAGES      (2)
+#define END_OF_PAGE (1024 - N)
+
+#ifndef LOAD_METHOD
+#define LOAD_METHOD mask_load // mask_load_asm does not fault
+#endif
+
+
+int
+main() {
+  if (!(__builtin_cpu_supports ("avx512dq")
+	&& __builtin_cpu_supports ("avx512vl")
+	&& __builtin_cpu_supports ("avx512bw")))
+    return 0;
+
+  uint32_t * addr =
+    (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE,
+		     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+  for (uint32_t i = 0; i < NPAGES; i += 2) {
+
+    uint32_t page_offset      = 1024 * i + END_OF_PAGE;
+    uint32_t next_page_offset = 1024 * (i + 1);
+
+    assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE));
+    mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset));
+  }
+}
-- 
2.18.1

Reply via email to