Those two define_insns have same pattern, and <avx512>_load<mode>_mask would always be matched since it show up earlier in the md file, and it may lose some opportunity in pass_reload since <avx512>_load<mode>_mask only have constraint "0C" for operand2, and "v" constraint in <avx512>_vblendm<mode> would never be matched.
2020-07-21 Hongtao Liu <hongtao....@intel.com> gcc/ PR target/96246 * config/i386/sse.md (<avx512>_load<mode>_mask, <avx512>_load<mode>_mask): Extend to generate blendm instructions. (<avx512>_blendm<mode>, <avx512>_blendm<mode>): Change define_insn to define_expand. gcc/testsuite/ * gcc.target/i386/avx512bw-pr96246-1.c: New test. * gcc.target/i386/avx512bw-pr96246-2.c: New test. * gcc.target/i386/avx512vl-pr96246-1.c: New test. * gcc.target/i386/avx512vl-pr96246-2.c: New test. * gcc.target/i386/avx512bw-vmovdqu16-1.c: New test. * gcc.target/i386/avx512bw-vmovdqu8-1.c: New test. * gcc.target/i386/avx512f-vmovapd-1.c: New test. * gcc.target/i386/avx512f-vmovaps-1.c: New test. * gcc.target/i386/avx512f-vmovdqa32-1.c: New test. * gcc.target/i386/avx512f-vmovdqa64-1.c: New test. * gcc.target/i386/avx512vl-pr92686-movcc-1.c: New test. * gcc.target/i386/avx512vl-pr96246-1.c: New test. * gcc.target/i386/avx512vl-pr96246-2.c: New test. * gcc.target/i386/avx512vl-vmovapd-1.c: New test. * gcc.target/i386/avx512vl-vmovaps-1.c: New test. * gcc.target/i386/avx512vl-vmovdqa32-1.c: New test. * gcc.target/i386/avx512vl-vmovdqa64-1.c: New test. -- BR, Hongtao
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d3ad5833e1f..35801a847ef 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1057,11 +1057,15 @@ (define_insn "<avx512>_load<mode>_mask" [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") (vec_merge:V48_AVX512VL - (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m") - (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C") + (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm") + (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512F" { + if (REG_P (operands[2]) + && REGNO (operands[2]) != REGNO (operands[0])) + return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"; + if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) { if (misaligned_operand (operands[1], <MODE>mode)) @@ -1079,20 +1083,20 @@ } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") - (set_attr "memory" "none,load") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_load<mode>_mask" [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m") - (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C") + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm") + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512BW" - "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + "@ + vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} + vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") - (set_attr "memory" "none,load") (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx512f_mov<ssescalarmodelower>_mask" @@ -1156,29 +1160,21 @@ (set_attr "memory" "store") (set_attr "mode" "<MODE>")]) -(define_insn "<avx512>_blendm<mode>" +(define_expand "<avx512>_blendm<mode>" [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") (vec_merge:V48_AVX512VL (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm") (match_operand:V48_AVX512VL 1 "register_operand" "v") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] - "TARGET_AVX512F" - "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + "TARGET_AVX512F") -(define_insn "<avx512>_blendm<mode>" +(define_expand "<avx512>_blendm<mode>" [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") (vec_merge:VI12_AVX512VL (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") (match_operand:VI12_AVX512VL 1 "register_operand" "v") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + "TARGET_AVX512BW") (define_insn "<avx512>_store<mode>_mask" [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c new file mode 100644 index 00000000000..2bfcc840a91 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c @@ -0,0 +1,30 @@ +/* PR target/96246 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */ +/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 4 } } */ +/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 2 } } */ + +typedef char v64qi __attribute__((vector_size (64))); +typedef short v32hi __attribute__((vector_size (64))); +typedef int v16si __attribute__((vector_size (64))); +typedef long long v8di __attribute__((vector_size (64))); +typedef float v16sf __attribute__((vector_size (64))); +typedef double v8df __attribute__((vector_size (64))); + +#define COMPILE_TEST(vtype, num) \ + vtype \ + __attribute__ ((noipa)) \ + foo_##vtype (vtype a, vtype b, vtype c, vtype d) \ + { \ + vtype e; \ + for (int i = 0; i != num; i++) \ + e[i] = a[i] > b[i] ? c[i] : d[i]; \ + return e; \ + } + +COMPILE_TEST (v64qi, 64); +COMPILE_TEST (v32hi, 32); +COMPILE_TEST (v16si, 16); +COMPILE_TEST (v8di, 8); +COMPILE_TEST (v16sf, 16); +COMPILE_TEST (v8df, 8); diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c new file mode 100644 index 00000000000..422fcfe4ea8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c @@ -0,0 +1,47 @@ +/* PR target/96246 */ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-options "-Ofast -mavx512bw" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512bw_test +#endif + +#include "avx512bw-pr96246-1.c" + +#define RUNTIME_TEST(vtype, num) \ + do \ + { \ + vtype a, b, c, d; \ + vtype res; \ + for (int i = 0; i != num; i++) \ + { \ + a[i] = i * 2; \ + b[i] = i * i - 5; \ + c[i] = 1; \ + d[i] = 0; \ + } \ + res = foo_##vtype (a, b, c, d); \ + for (int i = 0; i != num; i++) \ + if (res [i] != (a[i] > b[i] ? c[i] : d[i])) \ + __builtin_abort (); \ + } \ + while (0) + +static void +__attribute__ ((optimize (0))) +TEST (void) +{ + RUNTIME_TEST (v64qi, 64); + RUNTIME_TEST (v32hi, 32); + RUNTIME_TEST (v16si, 16); + RUNTIME_TEST (v8di, 8); + RUNTIME_TEST (v16sf, 16); + RUNTIME_TEST (v8df, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c index 06550420360..a0d0e36389b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c index 7f2a964bf0b..6d24e79bf66 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c index e869f70665a..7fc84b16e2b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c index a7635a3ebf2..c2e2655fda6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c index b93727d9ef2..8fb816c1317 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c index 1c372c4f92a..4352b12b6e7 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c index 1b9644a3790..8d0b2e94001 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c @@ -3,10 +3,10 @@ /* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */ /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ -/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ -/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ -/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ -/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]8|vpblendmb)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]16|vpblendmw)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]32|vpblendmd)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]64|vpblendmq)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ __attribute__((noipa)) void f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2) diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c new file mode 100644 index 00000000000..95357d6fc84 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c @@ -0,0 +1,36 @@ +/* PR target/96246 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 6 } } */ +/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 3 } } */ + +typedef char v16qi __attribute__ ((vector_size (16))); +typedef char v32qi __attribute__ ((vector_size (32))); +typedef char v16hi __attribute__ ((vector_size (32))); +typedef int v4si __attribute__((vector_size (16))); +typedef int v8si __attribute__((vector_size (32))); +typedef long long v4di __attribute__((vector_size (32))); +typedef float v4sf __attribute__((vector_size (16))); +typedef float v8sf __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); + +#define COMPILE_TEST(vtype, num) \ + vtype \ + __attribute__ ((noipa)) \ + foo_##vtype (vtype a, vtype b, vtype c, vtype d) \ + { \ + vtype e; \ + for (int i = 0; i != num; i++) \ + e[i] = a[i] > b[i] ? c[i] : d[i]; \ + return e; \ + } + +COMPILE_TEST (v16qi, 16); +COMPILE_TEST (v32qi, 32); +COMPILE_TEST (v16hi, 16); +COMPILE_TEST (v4si, 4); +COMPILE_TEST (v8si, 8); +COMPILE_TEST (v4sf, 4); +COMPILE_TEST (v8sf, 8); +COMPILE_TEST (v4di, 4); +COMPILE_TEST (v4df, 4); diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c new file mode 100644 index 00000000000..d219f7c15ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c @@ -0,0 +1,51 @@ +/* PR target/96246 */ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-Ofast -mavx512bw -mavx512vl" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512bw_test +#endif + +#include "avx512vl-pr96246-1.c" + +#define RUNTIME_TEST(vtype, num) \ + do \ + { \ + vtype a, b, c, d; \ + vtype res; \ + for (int i = 0; i != num; i++) \ + { \ + a[i] = i * 2; \ + b[i] = i * i - 5; \ + c[i] = 1; \ + d[i] = 0; \ + } \ + res = foo_##vtype (a, b, c, d); \ + for (int i = 0; i != num; i++) \ + if (res [i] != (a[i] > b[i] ? c[i] : d[i])) \ + __builtin_abort (); \ + } \ + while (0) + +static void +__attribute__ ((optimize (0))) +TEST (void) +{ + RUNTIME_TEST (v16qi, 16); + RUNTIME_TEST (v32qi, 32); + RUNTIME_TEST (v16hi, 16); + RUNTIME_TEST (v4si, 4); + RUNTIME_TEST (v8si, 8); + RUNTIME_TEST (v4sf, 4); + RUNTIME_TEST (v8sf, 8); + RUNTIME_TEST (v4di, 4); + RUNTIME_TEST (v4df, 4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c index 89c3ebefe35..fd59660f932 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c index 2196ebb55d9..455b1a9dc37 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c index e391de5b432..217afbc6904 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c index db4d9d14875..9dc794d6a80 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */