> -----Original Message-----
> From: Yan Simonaytes <simonaytes....@ispras.ru>
> Sent: Wednesday, July 26, 2023 2:11 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao <hongtao....@intel.com>; Uros Bizjak <ubiz...@gmail.com>;
> Yan Simonaytes <simonaytes....@ispras.ru>
> Subject: [PATCH] Replace invariant ternlog operands
> 
> Sometimes GCC generates ternlog with three operands, but some of them are
> invariant.
> For example:
> 
> vpternlogq    $252, %zmm2, %zmm1, %zmm0
> 
> In this case zmm1 register isnt used by ternlog.
> So should replace zmm1 with zmm0 or zmm2:
> 
> vpternlogq    $252, %zmm0, %zmm1, %zmm0
> 
> When the third operand of ternlog is memory and both others are invariant
> should add load instruction from this memory to register and replace the first
> and the second operands to this register.
> So insted of
> 
> vpternlogq    $85, (%rdi), %zmm1, %zmm0
> 
> Should emit
> 
> vmovdqa64     (%rdi), %zmm0
> vpternlogq    $85, %zmm0, %zmm0, %zmm0
> 
> gcc/ChangeLog:
> 
>         * config/i386/i386.cc (ternlog_invariant_operand_mask): New helper
>       function for replacing invariant operands.
>         (reduce_ternlog_operands): Likewise.
>         * config/i386/i386-protos.h (ternlog_invariant_operand_mask):
> Prototype here.
>         (reduce_ternlog_operands): Likewise.
>         * config/i386/sse.md:
> 
> gcc/testsuite/ChangeLog:
> 
>         * gcc.target/i386/reduce-ternlog-operands-1.c: New test.
>         * gcc.target/i386/reduce-ternlog-operands-2.c: New test.
> ---
>  gcc/config/i386/i386-protos.h                 |  2 +
>  gcc/config/i386/i386.cc                       | 45 +++++++++++++++++++
>  gcc/config/i386/sse.md                        | 43 ++++++++++++++++++
>  .../i386/reduce-ternlog-operands-1.c          | 20 +++++++++
>  .../i386/reduce-ternlog-operands-2.c          | 11 +++++
>  5 files changed, 121 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-
> 1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-
> 2.c
> 
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 27fe73ca65c..49398ef9936 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -57,6 +57,8 @@ extern int standard_80387_constant_p (rtx);  extern
> const char *standard_80387_constant_opcode (rtx);  extern rtx
> standard_80387_constant_rtx (int);  extern int standard_sse_constant_p (rtx,
> machine_mode);
> +extern int ternlog_invariant_operand_mask (rtx *operands); extern void
> +reduce_ternlog_operands (rtx *operands);
>  extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *);  extern
> bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);  extern
> bool ix86_pre_reload_split (void); diff --git a/gcc/config/i386/i386.cc
> b/gcc/config/i386/i386.cc index f0d6167e667..140de478571 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -5070,6 +5070,51 @@ ix86_check_no_addr_space (rtx insn)
>      }
>    return true;
>  }
> +
> +/* Return mask of invariant operands:
> +   bit number     0 1 2
> +   operand number 1 2 3.  */
> +
> +int
> +ternlog_invariant_operand_mask (rtx *operands) {
> +  int mask = 0;
> +  int imm8 = XINT (operands[4], 0);
> +
> +  if (((imm8 >> 4) & 0xF) == (imm8 & 0xF))
> +    mask |= 1;
> +  if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
> +    mask |= (1 << 1);
> +  if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
> +    mask |= (1 << 2);
> +
> +  return mask;
> +}
> +
> +/* Replace one of the unused operators with the one used.  */
> +
> +void
> +reduce_ternlog_operands (rtx *operands) {
> +  int mask = ternlog_invariant_operand_mask (operands);
> +
> +  if (mask & 1) /* the first operand is invariant.  */
> +    operands[1] = operands[2];
> +
> +  if (mask & 2) /* the second operand is invariant.  */
> +    operands[2] = operands[1];
> +
> +  if (mask & 4)      /* the third operand is invariant.  */
> +   operands[3] = operands[1];
> +  else if (!MEM_P (operands[3]))
> +    {
> +      if (mask & 1) /* the first operand is invariant.  */
> +     operands[1] = operands[3];
> +      if (mask & 2) /* the second operands is invariant.  */
> +     operands[2] = operands[3];
> +    }
> +}
> +
> 
> 
> 
>  /* Initialize the table of extra 80387 mathematical constants.  */
> 
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> a2099373123..f88d82b315c 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -12625,6 +12625,49 @@
>                     (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
>                     (const_string "*")))])
> 
> +;; If the first and the second operands of ternlog are invariant and ;;
> +the third operand is memory ;; then we should add load third operand
> +from memory to register and ;; replace first and second operands with
> +this register (define_split
> +  [(set (match_operand:V 0 "register_operand")
> +     (unspec:V
> +       [(match_operand:V 1 "register_operand")
> +        (match_operand:V 2 "register_operand")
> +        (match_operand:V 3 "memory_operand")
> +        (match_operand:SI 4 "const_0_to_255_operand")]
> +       UNSPEC_VTERNLOG))]
> +  "ternlog_invariant_operand_mask (operands) == 3 && !reload_completed"
Maybe better with "!reload_completed  && ternlog_invariant_operand_mask 
(operands) == 3"
> +  [(set (match_dup 0)
> +     (match_dup 3))
> +   (set (match_dup 0)
> +     (unspec:V
> +       [(match_dup 0)
> +        (match_dup 0)
> +        (match_dup 0)
> +        (match_dup 4)]
> +       UNSPEC_VTERNLOG))])
> +
> +;; Replace invariant ternlog operands with used operands ;; (except for
> +the case discussed in the previous define_split) (define_split
> +  [(set (match_operand:V 0 "register_operand")
> +     (unspec:V
> +       [(match_operand:V 1 "register_operand")
> +        (match_operand:V 2 "register_operand")
> +        (match_operand:V 3 "nonimmediate_operand")
> +        (match_operand:SI 4 "const_0_to_255_operand")]
> +       UNSPEC_VTERNLOG))]
> +  "ternlog_invariant_operand_mask (operands) != 0 && !reload_completed"
Ditto.
> +  [(set (match_dup 0)
> +     (unspec:V
> +       [(match_dup 1)
> +        (match_dup 2)
> +        (match_dup 3)
> +        (match_dup 4)]
> +       UNSPEC_VTERNLOG))]
> +  "reduce_ternlog_operands (operands);")
> +
Others LGTM.
>  ;; There must be lots of other combinations like  ;;  ;; (any_logic:V diff 
> --git
> a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> new file mode 100644
> index 00000000000..a7063df9dcb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times {vmovdqa*} "4" } } */
> +
> +#include <immintrin.h>
> +
> +__m512i f(__m512i* a, __m512i* b, __m512i* c) {
> +     return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 119); }
> +
> +__m512i g(__m512i* a, __m512i* b, __m512i* c) {
> +     return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 250); }
> +
> +__m512i h(__m512i* a, __m512i* b, __m512i* c) {
> +     return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 252); }
> diff --git a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> new file mode 100644
> index 00000000000..b44986cc259
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler "vpternlog.*0.*0.*0" } } */
> +
> +#include <immintrin.h>
> +
> +__m512i f(__m512i a, __m512i b, __m512i* c) {
> +     return _mm512_ternarylogic_epi64 (a, b, c[0], 0x55); }
> +
> --
> 2.34.1

Reply via email to