[PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd}.

2021-12-20 Thread liuhongt via Gcc-patches
The purpose of those define_insn_and_split:
1. Combine vpcmpuw and zero_extend into vpcmpuw.
2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just 
kmov
3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant 
kmov.

It should partially fix the issue in PR.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

PR target/103750
* config/i386/sse.md
(*_cmp3_zero_extend):
New define_insn_and_split.
(*_cmp3): Ditto.
(*_cmp3_zero_extenddi): New define_insn.
(*_cmp3_zero_extend):
New define_insn_and_split.
(*_ucmp3_zero_extend):
Ditto.
(*_ucmp3): Ditto.
(*_ucmp3_zero_extenddi): New define_insn.
(*_ucmp3_zero_extend):
New define_insn_and_split.

gcc/testsuite/ChangeLog:

* gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
* g++.target/i386/pr103750-1.C: New test.
---
 gcc/config/i386/sse.md| 267 ++
 gcc/testsuite/g++.target/i386/pr103750-1.C|  50 
 .../gcc.target/i386/bitwise_mask_op-3.c   |   6 +-
 3 files changed, 320 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..fb885d58272 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,75 @@ (define_insn 
"_cmp3"
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can 
transfrom
+;; duplicated vpcmpuw to vpcmpuw and kmov
+;; Choose biggest mode(DImode) as dest, so kmov can be optimized by 
cprop_hardreg.
+(define_insn_and_split 
"*_cmp3_zero_extend"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+   (zero_extend:SWI248x
+ (unspec:
+   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (mode)
+   < GET_MODE_PRECISION (mode))"
+  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& mode != E_DImode"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+   UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn_and_split "*_cmp3"
+  [(set (match_operand: 0 "register_operand" "=k")
+   (unspec:
+ [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+  (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+  (match_operand:SI 3 "" "n")]
+ UNSPEC_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+   UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn "*_cmp3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+   (zero_extend:DI
+ (unspec:
+   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (mode) < 64"
+  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
 (define_insn_and_split "*_cmp3"
   [(set (match_operand: 0 "register_operand")
(not:
@@ -3735,6 +3804,72 @@ (define_insn 
"_cmp3"
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_insn_and_split 
"*_cmp3_zero_extend"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+   (zero_extend:SWI248x
+ (unspec:
+   [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+(match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (mode)
+   < GET_MODE_PRECISION (mode))"
+  "vpcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& mode != E_DImode"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+  UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate"

Re: [PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd}.

2021-12-22 Thread Hongtao Liu via Gcc-patches
On Tue, Dec 21, 2021 at 2:27 PM liuhongt  wrote:
>
> The purpose of those define_insn_and_split:
> 1. Combine vpcmpuw and zero_extend into vpcmpuw.
> 2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just 
> kmov
> 3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant 
> kmov.
Use DImode as dest of zero_extend is too aggressive which causes
several regression.
New patch add define_insn_and_split just combine  vpcmpuw and
zero_extend into vpcmpuw.
Here's the patch i'm checking in.
>
> It should partially fix the issue in PR.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ready to push to trunk.
>
> gcc/ChangeLog:
>
> PR target/103750
> * config/i386/sse.md
> (*_cmp3_zero_extend):
> New define_insn_and_split.
> (*_cmp3): Ditto.
> (*_cmp3_zero_extenddi): New define_insn.
> (*_cmp3_zero_extend):
> New define_insn_and_split.
> (*_ucmp3_zero_extend):
> Ditto.
> (*_ucmp3): Ditto.
> (*_ucmp3_zero_extenddi): New define_insn.
> (*_ucmp3_zero_extend):
> New define_insn_and_split.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
> * g++.target/i386/pr103750-1.C: New test.
> ---
>  gcc/config/i386/sse.md| 267 ++
>  gcc/testsuite/g++.target/i386/pr103750-1.C|  50 
>  .../gcc.target/i386/bitwise_mask_op-3.c   |   6 +-
>  3 files changed, 320 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 5196149ee32..fb885d58272 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -3702,6 +3702,75 @@ (define_insn 
> "_cmp3"
> (set_attr "prefix" "evex")
> (set_attr "mode" "")])
>
> +;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can 
> transfrom
> +;; duplicated vpcmpuw to vpcmpuw and kmov
> +;; Choose biggest mode(DImode) as dest, so kmov can be optimized by 
> cprop_hardreg.
> +(define_insn_and_split 
> "*_cmp3_zero_extend"
> +  [(set (match_operand:SWI248x 0 "register_operand" "=k")
> +   (zero_extend:SWI248x
> + (unspec:
> +   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> +(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> +(match_operand:SI 3 "" "n")]
> +   UNSPEC_PCMP)))]
> +  "TARGET_AVX512BW
> +   && (GET_MODE_NUNITS (mode)
> +   < GET_MODE_PRECISION (mode))"
> +  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> +  "&& mode != E_DImode"
> +  [(set (match_dup 0)
> +   (zero_extend:DI
> + (unspec:
> +   [(match_dup 1)
> +(match_dup 2)
> +(match_dup 3)]
> +   UNSPEC_PCMP)))]
> +  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
> +  [(set_attr "type" "ssecmp")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])
> +
> +(define_insn_and_split "*_cmp3"
> +  [(set (match_operand: 0 "register_operand" "=k")
> +   (unspec:
> + [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> +  (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> +  (match_operand:SI 3 "" "n")]
> + UNSPEC_PCMP))]
> +  "TARGET_AVX512BW
> +   && GET_MODE_NUNITS (mode) < 64"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +   (zero_extend:DI
> + (unspec:
> +   [(match_dup 1)
> +(match_dup 2)
> +(match_dup 3)]
> +   UNSPEC_PCMP)))]
> +  "operands[0] = lowpart_subreg (DImode, operands[0], 
> mode);"
> +  [(set_attr "type" "ssecmp")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])
> +
> +(define_insn "*_cmp3_zero_extenddi"
> +  [(set (match_operand:DI 0 "register_operand" "=k")
> +   (zero_extend:DI
> + (unspec:
> +   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> +(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> +(match_operand:SI 3 "" "n")]
> +   UNSPEC_PCMP)))]
> +  "TARGET_AVX512BW
> +   && GET_MODE_NUNITS (mode) < 64"
> +  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> +  [(set_attr "type" "ssecmp")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])
> +
>  (define_insn_and_split "*_cmp3"
>[(set (match_operand: 0 "register_operand")
> (not:
> @@ -3735,6 +3804,72 @@ (define_insn 
> "_cmp3"
> (set_attr "prefix" "evex")
> (set_attr "mode" "")])
>
> +(define_insn_and_split 
> "*_cmp3_zero_extend"
> +  [(set (match_operand:SWI248x 0 "register_operand" "=k")
> +   (zero_extend:SWI248x
> + (unspec:
> +   [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> +(match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> +