Hi: When programmers explicitly use mask loaded intrinsics, don't transform the instruction to vpblend{b,w,d,q} since If mem_addr points to a memory region with less than whole vector size of accessible memory, the mask would prevent reading the inaccessible bytes which could avoid fault.
Bootstrap is ok, gcc regress test for i386/x86_64 backend is ok. Ok for trunk? gcc/ChangeLog: PR target/97642 * config/i386/sse.md (UNSPEC_MASKLOAD): New unspec. (*<avx512>_load<mode>_mask): New define_insns for masked load instructions. (<avx512>_load<mode>_mask): Changed to define_expands which specifically handle memory operands. (<avx512>_blendm<mode>): Changed to define_insns which are same as original <avx512>_load<mode>_mask with adjustment of operands order. (*<avx512>_load<mode>): New define_insn_and_split which is used to optimize for masked load with all one mask. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-vmovdqu16-1.c: Adjust testcase to make sure only masked load instruction is generated. * gcc.target/i386/avx512bw-vmovdqu8-1.c: Ditto. * gcc.target/i386/avx512f-vmovapd-1.c: Ditto. * gcc.target/i386/avx512f-vmovaps-1.c: Ditto. * gcc.target/i386/avx512f-vmovdqa32-1.c: Ditto. * gcc.target/i386/avx512f-vmovdqa64-1.c: Ditto. * gcc.target/i386/avx512vl-vmovapd-1.c: Ditto. * gcc.target/i386/avx512vl-vmovaps-1.c: Ditto. * gcc.target/i386/avx512vl-vmovdqa32-1.c: Ditto. * gcc.target/i386/avx512vl-vmovdqa64-1.c: Ditto. * gcc.target/i386/pr97642-1.c: New test. * gcc.target/i386/pr97642-2.c: New test. -- BR, Hongtao
From 48cf0adcd55395653891888f4768b8bdc19786f2 Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Tue, 3 Nov 2020 17:26:43 +0800 Subject: [PATCH] Fix incorrect replacement of vmovdqu32 with vpblendd which can cause fault. gcc/ChangeLog: PR target/97642 * config/i386/sse.md (UNSPEC_MASKLOAD): New unspec. (*<avx512>_load<mode>_mask): New define_insns for masked load instructions. (<avx512>_load<mode>_mask): Changed to define_expands which specifically handle memory operands. (<avx512>_blendm<mode>): Changed to define_insns which are same as original <avx512>_load<mode>_mask with adjustment of operands order. (*<avx512>_load<mode>): New define_insn_and_split which is used to optimize for masked load with all one mask. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-vmovdqu16-1.c: Adjust testcase to make sure only masked load instruction is generated. * gcc.target/i386/avx512bw-vmovdqu8-1.c: Ditto. * gcc.target/i386/avx512f-vmovapd-1.c: Ditto. * gcc.target/i386/avx512f-vmovaps-1.c: Ditto. * gcc.target/i386/avx512f-vmovdqa32-1.c: Ditto. * gcc.target/i386/avx512f-vmovdqa64-1.c: Ditto. * gcc.target/i386/avx512vl-vmovapd-1.c: Ditto. * gcc.target/i386/avx512vl-vmovaps-1.c: Ditto. * gcc.target/i386/avx512vl-vmovdqa32-1.c: Ditto. * gcc.target/i386/avx512vl-vmovdqa64-1.c: Ditto. * gcc.target/i386/pr97642-1.c: New test. * gcc.target/i386/pr97642-2.c: New test. --- gcc/config/i386/sse.md | 138 ++++++++++++++---- .../gcc.target/i386/avx512bw-vmovdqu16-1.c | 6 +- .../gcc.target/i386/avx512bw-vmovdqu8-1.c | 6 +- .../gcc.target/i386/avx512f-vmovapd-1.c | 2 +- .../gcc.target/i386/avx512f-vmovaps-1.c | 2 +- .../gcc.target/i386/avx512f-vmovdqa32-1.c | 2 +- .../gcc.target/i386/avx512f-vmovdqa64-1.c | 2 +- .../gcc.target/i386/avx512vl-vmovapd-1.c | 4 +- .../gcc.target/i386/avx512vl-vmovaps-1.c | 4 +- .../gcc.target/i386/avx512vl-vmovdqa32-1.c | 4 +- .../gcc.target/i386/avx512vl-vmovdqa64-1.c | 4 +- gcc/testsuite/gcc.target/i386/pr97642-1.c | 23 +++ gcc/testsuite/gcc.target/i386/pr97642-2.c | 77 ++++++++++ 13 files changed, 228 insertions(+), 46 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr97642-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr97642-2.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 12e83df3010..0025aba4ad1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -111,6 +111,8 @@ (define_c_enum "unspec" [ UNSPEC_MASKOP UNSPEC_KORTEST UNSPEC_KTEST + ;; Mask load + UNSPEC_MASKLOAD ;; For embed. rounding feature UNSPEC_EMBEDDED_ROUNDING @@ -1065,18 +1067,34 @@ (define_insn "mov<mode>_internal" ] (symbol_ref "true")))]) -(define_insn "<avx512>_load<mode>_mask" - [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") +;; If mem_addr points to a memory region with less than whole vector size bytes +;; of accessible memory and k is a mask that would prevent reading the inaccessible +;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd +;; See pr97642. +(define_expand "<avx512>_load<mode>_mask" + [(set (match_operand:V48_AVX512VL 0 "register_operand") (vec_merge:V48_AVX512VL - (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm") - (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v") - (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] + (match_operand:V48_AVX512VL 1 "nonimmediate_operand") + (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 3 "register_operand")))] "TARGET_AVX512F" { - if (REG_P (operands[2]) - && REGNO (operands[2]) != REGNO (operands[0])) - return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"; + if (MEM_P (operands[1])) + operands[1] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec(1, operands[1]), + UNSPEC_MASKLOAD); +}) +(define_insn "*<avx512>_load<mode>_mask" + [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") + (vec_merge:V48_AVX512VL + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] + "TARGET_AVX512F" +{ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) { if (misaligned_operand (operands[1], <MODE>mode)) @@ -1096,20 +1114,55 @@ (define_insn "<avx512>_load<mode>_mask" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_load<mode>_mask" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") +(define_insn_and_split "*<avx512>_load<mode>" + [(set (match_operand:V48_AVX512VL 0 "register_operand") + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "memory_operand")] + UNSPEC_MASKLOAD))] + "TARGET_AVX512F" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 1))]) + +(define_expand "<avx512>_load<mode>_mask" + [(set (match_operand:VI12_AVX512VL 0 "register_operand") (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm") - (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v") - (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 3 "register_operand")))] "TARGET_AVX512BW" - "@ - vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} - vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}" +{ + if (MEM_P (operands[1])) + operands[1] = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec(1, operands[1]), + UNSPEC_MASKLOAD); + +}) + +(define_insn "*<avx512>_load<mode>_mask" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI12_AVX512VL + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*<avx512>_load<mode>" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + UNSPEC_MASKLOAD))] + "TARGET_AVX512BW" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 1))]) + (define_insn "avx512f_mov<ssescalarmodelower>_mask" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 @@ -1171,21 +1224,50 @@ (define_insn "avx512f_store<mode>_mask" (set_attr "memory" "store") (set_attr "mode" "<MODE>")]) -(define_expand "<avx512>_blendm<mode>" - [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") +(define_insn "<avx512>_blendm<mode>" + [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") (vec_merge:V48_AVX512VL - (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:V48_AVX512VL 1 "register_operand" "v") - (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] - "TARGET_AVX512F") + (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm") + (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] + "TARGET_AVX512F" +{ + if (REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0])) + return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"; -(define_expand "<avx512>_blendm<mode>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) + { + if (misaligned_operand (operands[2], <MODE>mode)) + return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}"; + else + return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}"; + } + else + { + if (misaligned_operand (operands[2], <MODE>mode)) + return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}"; + else + return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}"; + } +} + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "<avx512>_blendm<mode>" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:VI12_AVX512VL 1 "register_operand" "v") - (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] - "TARGET_AVX512BW") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm") + (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] + "TARGET_AVX512BW" + "@ + vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2} + vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_store<mode>_mask" [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c index dcb8caaa73e..8603a1909c7 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c index a335bcab3b2..d1e33926c81 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c index 7fc84b16e2b..e869f70665a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c index c2e2655fda6..a7635a3ebf2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c index 8fb816c1317..b93727d9ef2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c index 4352b12b6e7..1c372c4f92a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c index fd59660f932..89c3ebefe35 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c index 455b1a9dc37..2196ebb55d9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c index 5c6a3d0bfb4..9f991dbaca2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c index 592541aeb8e..d20b4a7b997 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 { target nonpic } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr97642-1.c b/gcc/testsuite/gcc.target/i386/pr97642-1.c new file mode 100644 index 00000000000..26c6b26af4d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97642-1.c @@ -0,0 +1,23 @@ +/* PR target/97642 */ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not { k[0-8] } } } */ + +#include <immintrin.h> +__m128i +foo1 (__m128i src, void const* P) +{ + return _mm_mask_loadu_epi32 (src, 15, P); +} + +__m256i +foo2 (__m256i src, void const* P) +{ + return _mm256_mask_loadu_epi32 (src, 255, P); +} + +__m512i +foo3 (__m512i src, void const* P) +{ + return _mm512_mask_loadu_epi32 (src, 65535 , P); +} diff --git a/gcc/testsuite/gcc.target/i386/pr97642-2.c b/gcc/testsuite/gcc.target/i386/pr97642-2.c new file mode 100644 index 00000000000..eb06a2739b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97642-2.c @@ -0,0 +1,77 @@ +/* PR target/97642 */ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512dq } */ +/* { dg-require-effective-target avx512bw } */ + +#include <assert.h> +#include <immintrin.h> +#include <stdint.h> +#include <sys/mman.h> + +#define N 5 + +// Faults with GCC because usage of vpblendd +__m256i __attribute__((noinline)) mask_load(uint32_t * arr) { + __m256i tmp; + return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr); +} + +// Faults +__m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) { + __m256i tmp = _mm256_set1_epi64x(0); + asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t" + : [ tmp ] "+x"(tmp) + : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) + :); + return tmp; +} + +// Does not fault +__m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) { + __m256i tmp; + asm volatile( + "movb %[m], %%al\n\t" + "kmovb %%eax, %%k1\n\t" + "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t" + : [ tmp ] "+x"(tmp) + : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) + : "eax", "k1"); + return tmp; +} + + +void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) { + return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v); +} + + +#define NPAGES (2) +#define END_OF_PAGE (1024 - N) + +#ifndef LOAD_METHOD +#define LOAD_METHOD mask_load // mask_load_asm does not fault +#endif + + +int +main() { + if (!(__builtin_cpu_supports ("avx512dq") + && __builtin_cpu_supports ("avx512vl") + && __builtin_cpu_supports ("avx512bw"))) + return 0; + + uint32_t * addr = + (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + for (uint32_t i = 0; i < NPAGES; i += 2) { + + uint32_t page_offset = 1024 * i + END_OF_PAGE; + uint32_t next_page_offset = 1024 * (i + 1); + + assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE)); + mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset)); + } +} -- 2.18.1