> -----Original Message----- > From: Yan Simonaytes <simonaytes....@ispras.ru> > Sent: Wednesday, July 26, 2023 2:11 AM > To: gcc-patches@gcc.gnu.org > Cc: Liu, Hongtao <hongtao....@intel.com>; Uros Bizjak <ubiz...@gmail.com>; > Yan Simonaytes <simonaytes....@ispras.ru> > Subject: [PATCH] Replace invariant ternlog operands > > Sometimes GCC generates ternlog with three operands, but some of them are > invariant. > For example: > > vpternlogq $252, %zmm2, %zmm1, %zmm0 > > In this case zmm1 register isnt used by ternlog. > So should replace zmm1 with zmm0 or zmm2: > > vpternlogq $252, %zmm0, %zmm1, %zmm0 > > When the third operand of ternlog is memory and both others are invariant > should add load instruction from this memory to register and replace the first > and the second operands to this register. > So insted of > > vpternlogq $85, (%rdi), %zmm1, %zmm0 > > Should emit > > vmovdqa64 (%rdi), %zmm0 > vpternlogq $85, %zmm0, %zmm0, %zmm0 > > gcc/ChangeLog: > > * config/i386/i386.cc (ternlog_invariant_operand_mask): New helper > function for replacing invariant operands. > (reduce_ternlog_operands): Likewise. > * config/i386/i386-protos.h (ternlog_invariant_operand_mask): > Prototype here. > (reduce_ternlog_operands): Likewise. > * config/i386/sse.md: > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/reduce-ternlog-operands-1.c: New test. > * gcc.target/i386/reduce-ternlog-operands-2.c: New test. > --- > gcc/config/i386/i386-protos.h | 2 + > gcc/config/i386/i386.cc | 45 +++++++++++++++++++ > gcc/config/i386/sse.md | 43 ++++++++++++++++++ > .../i386/reduce-ternlog-operands-1.c | 20 +++++++++ > .../i386/reduce-ternlog-operands-2.c | 11 +++++ > 5 files changed, 121 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands- > 1.c > create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands- > 2.c > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index 27fe73ca65c..49398ef9936 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -57,6 +57,8 @@ extern int standard_80387_constant_p (rtx); extern > const char *standard_80387_constant_opcode (rtx); extern rtx > standard_80387_constant_rtx (int); extern int standard_sse_constant_p (rtx, > machine_mode); > +extern int ternlog_invariant_operand_mask (rtx *operands); extern void > +reduce_ternlog_operands (rtx *operands); > extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *); extern > bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx); extern > bool ix86_pre_reload_split (void); diff --git a/gcc/config/i386/i386.cc > b/gcc/config/i386/i386.cc index f0d6167e667..140de478571 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -5070,6 +5070,51 @@ ix86_check_no_addr_space (rtx insn) > } > return true; > } > + > +/* Return mask of invariant operands: > + bit number 0 1 2 > + operand number 1 2 3. */ > + > +int > +ternlog_invariant_operand_mask (rtx *operands) { > + int mask = 0; > + int imm8 = XINT (operands[4], 0); > + > + if (((imm8 >> 4) & 0xF) == (imm8 & 0xF)) > + mask |= 1; > + if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) > + mask |= (1 << 1); > + if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) > + mask |= (1 << 2); > + > + return mask; > +} > + > +/* Replace one of the unused operators with the one used. */ > + > +void > +reduce_ternlog_operands (rtx *operands) { > + int mask = ternlog_invariant_operand_mask (operands); > + > + if (mask & 1) /* the first operand is invariant. */ > + operands[1] = operands[2]; > + > + if (mask & 2) /* the second operand is invariant. */ > + operands[2] = operands[1]; > + > + if (mask & 4) /* the third operand is invariant. */ > + operands[3] = operands[1]; > + else if (!MEM_P (operands[3])) > + { > + if (mask & 1) /* the first operand is invariant. */ > + operands[1] = operands[3]; > + if (mask & 2) /* the second operands is invariant. */ > + operands[2] = operands[3]; > + } > +} > + > > > > /* Initialize the table of extra 80387 mathematical constants. */ > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > a2099373123..f88d82b315c 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -12625,6 +12625,49 @@ > (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") > (const_string "*")))]) > > +;; If the first and the second operands of ternlog are invariant and ;; > +the third operand is memory ;; then we should add load third operand > +from memory to register and ;; replace first and second operands with > +this register (define_split > + [(set (match_operand:V 0 "register_operand") > + (unspec:V > + [(match_operand:V 1 "register_operand") > + (match_operand:V 2 "register_operand") > + (match_operand:V 3 "memory_operand") > + (match_operand:SI 4 "const_0_to_255_operand")] > + UNSPEC_VTERNLOG))] > + "ternlog_invariant_operand_mask (operands) == 3 && !reload_completed" Maybe better with "!reload_completed && ternlog_invariant_operand_mask (operands) == 3" > + [(set (match_dup 0) > + (match_dup 3)) > + (set (match_dup 0) > + (unspec:V > + [(match_dup 0) > + (match_dup 0) > + (match_dup 0) > + (match_dup 4)] > + UNSPEC_VTERNLOG))]) > + > +;; Replace invariant ternlog operands with used operands ;; (except for > +the case discussed in the previous define_split) (define_split > + [(set (match_operand:V 0 "register_operand") > + (unspec:V > + [(match_operand:V 1 "register_operand") > + (match_operand:V 2 "register_operand") > + (match_operand:V 3 "nonimmediate_operand") > + (match_operand:SI 4 "const_0_to_255_operand")] > + UNSPEC_VTERNLOG))] > + "ternlog_invariant_operand_mask (operands) != 0 && !reload_completed" Ditto. > + [(set (match_dup 0) > + (unspec:V > + [(match_dup 1) > + (match_dup 2) > + (match_dup 3) > + (match_dup 4)] > + UNSPEC_VTERNLOG))] > + "reduce_ternlog_operands (operands);") > + Others LGTM. > ;; There must be lots of other combinations like ;; ;; (any_logic:V diff > --git > a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c > b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c > new file mode 100644 > index 00000000000..a7063df9dcb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times {vmovdqa*} "4" } } */ > + > +#include <immintrin.h> > + > +__m512i f(__m512i* a, __m512i* b, __m512i* c) { > + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 119); } > + > +__m512i g(__m512i* a, __m512i* b, __m512i* c) { > + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 250); } > + > +__m512i h(__m512i* a, __m512i* b, __m512i* c) { > + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 252); } > diff --git a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c > b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c > new file mode 100644 > index 00000000000..b44986cc259 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler "vpternlog.*0.*0.*0" } } */ > + > +#include <immintrin.h> > + > +__m512i f(__m512i a, __m512i b, __m512i* c) { > + return _mm512_ternarylogic_epi64 (a, b, c[0], 0x55); } > + > -- > 2.34.1
RE: [PATCH] Replace invariant ternlog operands
Liu, Hongtao via Gcc-patches Wed, 26 Jul 2023 20:00:48 -0700
- [PATCH] Replace invariant ternlog operands Yan Simonaytes
- RE: [PATCH] Replace invariant ternlog op... Liu, Hongtao via Gcc-patches
- RE: [PATCH] Replace invariant ternlo... Alexander Monakov
- Re: [PATCH] Replace invariant te... Hongtao Liu via Gcc-patches