Hi: This patch is about to add a pre-reload splitter to transform vpcmpeqd with a zero operand to vptestnmd, which could save a vpxor instruction.
.i.e - vpxor %xmm1, %xmm1, %xmm1 - vpcmpd $0, %zmm1, %zmm0, %k0 + vptestnmd %zmm0, %zmm0, %k0 vpmovm2d zmm0, k0 Bootstrapped and regtested on x86-64_iinux-gnu{-m32,}. gcc/ChangeLog: PR target/98348 * config/i386/sse.md (*<avx512>_eq<mode>3): New pre_reload splitter. (*<avx512>_eq<mode>3): Changed from combine splitter to pre-reload splitter. gcc/testsuite/ChangeLog: PR target/98348 * gcc.target/i386/pr98348.c: New test. -- BR, Hongtao
From 7e59a44e079f6f4c9d39d625aaa3c5023cb545aa Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Fri, 16 Apr 2021 15:32:14 +0800 Subject: [PATCH] [i386] Add pre-reload splitter for blow transformation. - vpxor %xmm1, %xmm1, %xmm1 - vpcmpd $0, %zmm1, %zmm0, %k0 + vptestnmd %zmm0, %zmm0, %k0 vpmovm2d zmm0, k0 gcc/ChangeLog: PR target/98348 * config/i386/sse.md (*<avx512>_eq<mode>3): New pre_reload splitter. (*<avx512>_eq<mode>3): Changed from combine splitter to pre-reload splitter. gcc/testsuite/ChangeLog: PR target/98348 * gcc.target/i386/pr98348.c: New test. --- gcc/config/i386/sse.md | 23 +++++++++- gcc/testsuite/gcc.target/i386/pr98348.c | 57 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr98348.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9d3728d1cb0..f2c16db1c1a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3273,7 +3273,7 @@ (define_int_attr pcmp_signed_mask ;; For signed comparison, handle EQ 0: NEQ 4, ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ. -(define_split +(define_insn_and_split "*<avx512>_ucmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (unspec:<avx512fmaskmode> [(us_minus:VI12_AVX512VL @@ -3282,9 +3282,11 @@ (define_split (match_operand:VI12_AVX512VL 3 "const0_operand") (match_operand:SI 4 "const_0_to_7_operand")] UNSPEC_PCMP_ITER))] - "TARGET_AVX512BW + "TARGET_AVX512BW && ix86_pre_reload_split () && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands) && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0" + "#" + "&& 1" [(const_int 0)] { /* LE: 2, NLT: 5, NLE: 6, LT: 1 */ @@ -13026,6 +13028,23 @@ (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*<avx512>_eq<mode>3" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI_AVX512BW 1 "nonimm_or_0_operand") + (match_operand:VI_AVX512BW 2 "nonimm_or_0_operand") + (const_int 0)] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512F && ix86_pre_reload_split () + && (operands[1] == CONST0_RTX (<MODE>mode) + || operands[2] == CONST0_RTX (<MODE>mode))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<avx512fmaskmode> + [(match_dup 1) (match_dup 2)] + UNSPEC_MASKED_EQ))]) + (define_insn "*sse4_1_eqv2di3" [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") (eq:V2DI diff --git a/gcc/testsuite/gcc.target/i386/pr98348.c b/gcc/testsuite/gcc.target/i386/pr98348.c new file mode 100644 index 00000000000..3c0259f8ca7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98348.c @@ -0,0 +1,57 @@ +/* PR target/98348 */ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "vptestnm\[bwdq\]" "8" } } */ +/* { dg-final { scan-assembler-not "pcmp" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ + + +typedef int v16si __attribute__ ((vector_size(64))); +typedef short v32hi __attribute__ ((vector_size(64))); +typedef char v64qi __attribute__ ((vector_size(64))); +typedef long long v8di __attribute__ ((vector_size(64))); +typedef unsigned int v16usi __attribute__ ((vector_size(64))); +typedef unsigned short v32uhi __attribute__ ((vector_size(64))); +typedef unsigned char v64uqi __attribute__ ((vector_size(64))); +typedef unsigned long long v8udi __attribute__ ((vector_size(64))); + + +v8di +compareq(v8di x) { + return x == 0; +} + +v16si +compared(v16si x) { + return x == 0; +} + +v32hi +comparew(v32hi x) { + return x == 0; +} + +v64qi +compareb(v64qi x) { + return x == 0; +} + +v8udi +compareuq(v8udi x) { + return x == 0; +} + +v16usi +compareud(v16usi x) { + return x == 0; +} + +v32uhi +compareuw(v32uhi x) { + return x == 0; +} + +v64uqi +compareub(v64uqi x) { + return x == 0; +} -- 2.18.1