On Tue, 24 Aug 2021 17:53:27 +0800
Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> On Tue, Aug 24, 2021 at 9:36 AM liuhongt <hongtao....@intel.com> wrote:
> >
> > Also optimize below 3 forms to vpternlog, op1, op2, op3 are
> > register_operand or unary_p as (not reg)

> > gcc/ChangeLog:
> >
> >         PR target/101989
> >         * config/i386/i386-protos.h
> >         (ix86_strip_reg_or_notreg_operand): New declare.

"New declaration."

> >         * config/i386/i386.c (ix86_rtx_costs): Define cost for
> >         UNSPEC_VTERNLOG.

I do not see a considerable amount of VTERNLOG in the docs i have here.
Is there a P missing in vPternlog?

> >         (ix86_strip_reg_or_notreg_operand): New function.  
> Push to trunk by changing ix86_strip_reg_or_notreg_operand to macro,
> function call seems too inefficient for the simple strip unary.
> >         * config/i386/predicates.md (reg_or_notreg_operand): New
> >         predicate.
> >         * config/i386/sse.md (*<avx512>_vternlog<mode>_all): New 
> > define_insn.
> >         (*<avx512>_vternlog<mode>_1): New pre_reload
> >         define_insn_and_split.
> >         (*<avx512>_vternlog<mode>_2): Ditto.
> >         (*<avx512>_vternlog<mode>_3): Ditto.

at least the above 3 insn_and_split do have a 'p' in the md.
thanks,
> >         (any_logic1,any_logic2): New code iterator.
> >         (logic_op): New code attribute.
> >         (ternlogsuffix): Extend to VNxDF and VNxSF.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR target/101989
> >         * gcc.target/i386/pr101989-1.c: New test.
> >         * gcc.target/i386/pr101989-2.c: New test.
> >         * gcc.target/i386/avx512bw-shiftqihi-constant-1.c: Adjust testcase.
> > ---
> >  gcc/config/i386/i386-protos.h                 |   1 +
> >  gcc/config/i386/i386.c                        |  13 +
> >  gcc/config/i386/predicates.md                 |   7 +
> >  gcc/config/i386/sse.md                        | 234 ++++++++++++++++++
> >  .../i386/avx512bw-shiftqihi-constant-1.c      |   4 +-
> >  gcc/testsuite/gcc.target/i386/pr101989-1.c    |  51 ++++
> >  gcc/testsuite/gcc.target/i386/pr101989-2.c    | 102 ++++++++
> >  7 files changed, 410 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-2.c
> >
> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > index 2fd13074c81..2bdaadcf4f3 100644
> > --- a/gcc/config/i386/i386-protos.h
> > +++ b/gcc/config/i386/i386-protos.h
> > @@ -60,6 +60,7 @@ extern rtx standard_80387_constant_rtx (int);
> >  extern int standard_sse_constant_p (rtx, machine_mode);
> >  extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *);
> >  extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);
> > +extern rtx ix86_strip_reg_or_notreg_operand (rtx);
> >  extern bool ix86_pre_reload_split (void);
> >  extern bool symbolic_reference_mentioned_p (rtx);
> >  extern bool extended_reg_mentioned_p (rtx);
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 46844fab08f..a69225ccc81 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5236,6 +5236,14 @@ ix86_standard_x87sse_constant_load_p (const rtx_insn 
> > *insn, rtx dst)
> >    return true;
> >  }
> >
> > +/* Returns true if INSN can be transformed from a memory load
> > +   to a supported FP constant load.  */
> > +rtx
> > +ix86_strip_reg_or_notreg_operand (rtx op)
> > +{
> > +  return UNARY_P (op) ? XEXP (op, 0) : op;
> > +}
> > +
> >  /* Predicate for pre-reload splitters with associated instructions,
> >     which can match any time before the split1 pass (usually combine),
> >     then are unconditionally split in that pass and should not be
> > @@ -20544,6 +20552,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code_i, int opno,
> >      case UNSPEC:
> >        if (XINT (x, 1) == UNSPEC_TP)
> >         *total = 0;
> > +      else if (XINT(x, 1) == UNSPEC_VTERNLOG)
> > +       {
> > +         *total = cost->sse_op;
> > +         return true;
> > +       }
> >        return false;
> >
> >      case VEC_SELECT:
> > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> > index 9321f332ef9..df5acb425d4 100644
> > --- a/gcc/config/i386/predicates.md
> > +++ b/gcc/config/i386/predicates.md
> > @@ -1044,6 +1044,13 @@ (define_predicate "reg_or_pm1_operand"
> >             (ior (match_test "op == const1_rtx")
> >                  (match_test "op == constm1_rtx")))))
> >
> > +;; True for registers, or (not: registers).  Used to optimize 3-operand
> > +;; bitwise operation.
> > +(define_predicate "reg_or_notreg_operand"
> > +  (ior (match_operand 0 "register_operand")
> > +       (and (match_code "not")
> > +           (match_test "register_operand (XEXP (op, 0), mode)"))))
> > +
> >  ;; True if OP is acceptable as operand of DImode shift expander.
> >  (define_predicate "shiftdi_operand"
> >    (if_then_else (match_test "TARGET_64BIT")
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 13889687793..0acd749d21c 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -933,7 +933,9 @@ (define_mode_attr iptr
> >  ;; Mapping of vector modes to VPTERNLOG suffix
> >  (define_mode_attr ternlogsuffix
> >    [(V8DI "q") (V4DI "q") (V2DI "q")
> > +   (V8DF "q") (V4DF "q") (V2DF "q")
> >     (V16SI "d") (V8SI "d") (V4SI "d")
> > +   (V16SF "d") (V8SF "d") (V4SF "d")
> >     (V32HI "d") (V16HI "d") (V8HI "d")
> >     (V64QI "d") (V32QI "d") (V16QI "d")])
> >
> > @@ -10041,6 +10043,238 @@ (define_insn 
> > "<avx512>_vternlog<mode><sd_maskz_name>"
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> >
> > +(define_insn "*<avx512>_vternlog<mode>_all"
> > +  [(set (match_operand:V 0 "register_operand" "=v")
> > +       (unspec:V
> > +         [(match_operand:V 1 "register_operand" "0")
> > +          (match_operand:V 2 "register_operand" "v")
> > +          (match_operand:V 3 "nonimmediate_operand" "vm")
> > +          (match_operand:SI 4 "const_0_to_255_operand")]
> > +         UNSPEC_VTERNLOG))]
> > +  "TARGET_AVX512F"
> > +  "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
> > +  [(set_attr "type" "sselog")
> > +   (set_attr "prefix" "evex")
> > +   (set_attr "mode" "<sseinsnmode>")])
> > +
> > +;; There must be lots of other combinations like
> > +;;
> > +;; (any_logic:V
> > +;;   (any_logic:V op1 op2)
> > +;;   (any_logic:V op1 op3))
> > +;;
> > +;; (any_logic:V
> > +;;   (any_logic:V
> > +;;     (any_logic:V op1, op2)
> > +;;     op3)
> > +;;   op1)
> > +;;
> > +;; and so on.
> > +
> > +(define_code_iterator any_logic1 [and ior xor])
> > +(define_code_iterator any_logic2 [and ior xor])
> > +(define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
> > +
> > +(define_insn_and_split "*<avx512>_vpternlog<mode>_1"
> > +  [(set (match_operand:V 0 "register_operand")
> > +       (any_logic:V
> > +         (any_logic1:V
> > +           (match_operand:V 1 "reg_or_notreg_operand")
> > +           (match_operand:V 2 "reg_or_notreg_operand"))
> > +         (any_logic2:V
> > +           (match_operand:V 3 "reg_or_notreg_operand")
> > +           (match_operand:V 4 "reg_or_notreg_operand"))))]
> > +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> > +   && ix86_pre_reload_split ()
> > +   && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                   ix86_strip_reg_or_notreg_operand (operands[4]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[4]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[3]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[3])))"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:V
> > +         [(match_dup 6)
> > +          (match_dup 2)
> > +          (match_dup 1)
> > +          (match_dup 5)]
> > +         UNSPEC_VTERNLOG))]
> > +{
> > +  /* VPTERNLOGD reg6, reg2, reg1, imm8.  */
> > +  int reg6 = 0xF0;
> > +  int reg2 = 0xCC;
> > +  int reg1 = 0xAA;
> > +  int reg3 = 0;
> > +  int reg4 = 0;
> > +  int reg_mask, tmp1, tmp2;
> > +  if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                  ix86_strip_reg_or_notreg_operand (operands[4])))
> > +    {
> > +      reg4 = reg1;
> > +      reg3 = reg6;
> > +      operands[6] = operands[3];
> > +    }
> > +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[4])))
> > +    {
> > +      reg4 = reg2;
> > +      reg3 = reg6;
> > +      operands[6] = operands[3];
> > +    }
> > +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                       ix86_strip_reg_or_notreg_operand (operands[3])))
> > +    {
> > +      reg4 = reg6;
> > +      reg3 = reg1;
> > +      operands[6] = operands[4];
> > +    }
> > +  else
> > +    {
> > +      reg4 = reg6;
> > +      reg3 = reg2;
> > +      operands[6] = operands[4];
> > +    }
> > +
> > +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> > +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> > +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> > +  reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
> > +
> > +  tmp1 = reg1 <any_logic1:logic_op> reg2;
> > +  tmp2 = reg3 <any_logic2:logic_op> reg4;
> > +  reg_mask = tmp1  <any_logic:logic_op> tmp2;
> > +  reg_mask &= 0xFF;
> > +
> > +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> > +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> > +  operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]);
> > +  operands[5] = GEN_INT (reg_mask);
> > +})
> > +
> > +(define_insn_and_split "*<avx512>_vpternlog<mode>_2"
> > +  [(set (match_operand:V 0 "register_operand")
> > +       (any_logic:V
> > +         (any_logic1:V
> > +           (any_logic2:V
> > +             (match_operand:V 1 "reg_or_notreg_operand")
> > +             (match_operand:V 2 "reg_or_notreg_operand"))
> > +           (match_operand:V 3 "reg_or_notreg_operand"))
> > +         (match_operand:V 4 "reg_or_notreg_operand")))]
> > +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> > +   && ix86_pre_reload_split ()
> > +   && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                   ix86_strip_reg_or_notreg_operand (operands[4]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[4]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[3]))
> > +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[3])))"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:V
> > +         [(match_dup 6)
> > +          (match_dup 2)
> > +          (match_dup 1)
> > +          (match_dup 5)]
> > +         UNSPEC_VTERNLOG))]
> > +{
> > +  /* VPTERNLOGD reg6, reg2, reg1, imm8.  */
> > +  int reg6 = 0xF0;
> > +  int reg2 = 0xCC;
> > +  int reg1 = 0xAA;
> > +  int reg3 = 0;
> > +  int reg4 = 0;
> > +  int reg_mask, tmp1, tmp2;
> > +  if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                  ix86_strip_reg_or_notreg_operand (operands[4])))
> > +    {
> > +      reg4 = reg1;
> > +      reg3 = reg6;
> > +      operands[6] = operands[3];
> > +    }
> > +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> > +                      ix86_strip_reg_or_notreg_operand (operands[4])))
> > +    {
> > +      reg4 = reg2;
> > +      reg3 = reg6;
> > +      operands[6] = operands[3];
> > +    }
> > +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> > +                       ix86_strip_reg_or_notreg_operand (operands[3])))
> > +    {
> > +      reg4 = reg6;
> > +      reg3 = reg1;
> > +      operands[6] = operands[4];
> > +    }
> > +  else
> > +    {
> > +      reg4 = reg6;
> > +      reg3 = reg2;
> > +      operands[6] = operands[4];
> > +    }
> > +
> > +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> > +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> > +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> > +  reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
> > +
> > +  tmp1 = reg1 <any_logic2:logic_op> reg2;
> > +  tmp2 = tmp1 <any_logic1:logic_op> reg3;
> > +  reg_mask = tmp2 <any_logic:logic_op> reg4;
> > +  reg_mask &= 0xFF;
> > +
> > +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> > +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> > +  operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]);
> > +  operands[5] = GEN_INT (reg_mask);
> > +})
> > +
> > +(define_insn_and_split "*<avx512>_vpternlog<mode>_3"
> > +  [(set (match_operand:V 0 "register_operand")
> > +       (any_logic:V
> > +         (any_logic1:V
> > +           (match_operand:V 1 "reg_or_notreg_operand")
> > +           (match_operand:V 2 "reg_or_notreg_operand"))
> > +         (match_operand:V 3 "reg_or_notreg_operand")))]
> > +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> > +   && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:V
> > +         [(match_dup 3)
> > +          (match_dup 2)
> > +          (match_dup 1)
> > +          (match_dup 4)]
> > +         UNSPEC_VTERNLOG))]
> > +{
> > +  /* VPTERNLOGD reg3, reg2, reg1, imm8.  */
> > +  int reg3 = 0xF0;
> > +  int reg2 = 0xCC;
> > +  int reg1 = 0xAA;
> > +  int reg_mask, tmp1;
> > +
> > +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> > +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> > +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> > +
> > +  tmp1 = reg1 <any_logic1:logic_op> reg2;
> > +  reg_mask = tmp1 <any_logic:logic_op> reg3;
> > +  reg_mask &= 0xFF;
> > +
> > +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> > +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> > +  operands[3] = ix86_strip_reg_or_notreg_operand (operands[3]);
> > +  operands[4] = GEN_INT (reg_mask);
> > +})
> > +
> > +
> >  (define_insn "<avx512>_vternlog<mode>_mask"
> >    [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
> >         (vec_merge:VI48_AVX512VL
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c 
> > b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> > index 78bf5d33689..fbc3de08119 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> > @@ -1,7 +1,8 @@
> >  /* PR target/95524 */
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -mavx512bw" } */
> > -/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } }  */
> > +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 2 } }  */
> > +/* { dg-final { scan-assembler-times "vpternlogd\[^\n\]*%zmm" 1 } }  */
> >  typedef char v64qi  __attribute__ ((vector_size (64)));
> >  typedef unsigned char v64uqi  __attribute__ ((vector_size (64)));
> >
> > @@ -11,7 +12,6 @@ foo_ashiftrt_512 (v64qi a)
> >    return a >> 2;
> >  }
> >  /* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */
> > -/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */
> >  /* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */
> >
> >  __attribute__((noipa)) v64qi
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101989-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr101989-1.c
> > new file mode 100644
> > index 00000000000..594093ecdde
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr101989-1.c
> > @@ -0,0 +1,51 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { scan-assembler-times "vpternlog" 6 } } */
> > +/* { dg-final { scan-assembler-not "vpxor" } } */
> > +/* { dg-final { scan-assembler-not "vpor" } } */
> > +/* { dg-final { scan-assembler-not "vpand" } } */
> > +
> > +#include<immintrin.h>
> > +__m256d
> > +__attribute__((noipa, target("avx512vl")))
> > +copysign2_pd(__m256d from, __m256d to) {
> > +  __m256i a = _mm256_castpd_si256(from);
> > +  __m256d avx_signbit = 
> > _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
> > +  /* (avx_signbit & from) | (~avx_signbit & to)  */
> > +  return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), 
> > _mm256_andnot_pd(avx_signbit, to));
> > +}
> > +
> > +__m256i
> > +__attribute__((noipa, target("avx512vl")))
> > +foo (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & ~src1) | (src3 & src1);
> > +}
> > +
> > +__m256i
> > +__attribute__ ((noipa, target("avx512vl")))
> > +foo1 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & src1) | (src3 & ~src1);
> > +}
> > +
> > +__m256i
> > +__attribute__ ((noipa, target("avx512vl")))
> > +foo2 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & src1) | (~src3 & src1);
> > +}
> > +
> > +__m256i
> > +__attribute__ ((noipa, target("avx512vl")))
> > +foo3 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (~src2 & src1) | (src3 & src1);
> > +}
> > +
> > +__m256i
> > +__attribute__ ((noipa, target("avx512vl")))
> > +foo4 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return src3 & src2 ^ src1;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101989-2.c 
> > b/gcc/testsuite/gcc.target/i386/pr101989-2.c
> > new file mode 100644
> > index 00000000000..9d9759a8e1d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr101989-2.c
> > @@ -0,0 +1,102 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
> > +/* { dg-require-effective-target avx512vl } */
> > +
> > +#define AVX512VL
> > +
> > +#include "avx512f-helper.h"
> > +
> > +#include "pr101989-1.c"
> > +__m256d
> > +avx2_copysign2_pd (__m256d from, __m256d to) {
> > +  __m256i a = _mm256_castpd_si256(from);
> > +  __m256d avx_signbit = 
> > _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
> > +  /* (avx_signbit & from) | (~avx_signbit & to)  */
> > +  return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), 
> > _mm256_andnot_pd(avx_signbit, to));
> > +}
> > +
> > +__m256i
> > +avx2_foo (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & ~src1) | (src3 & src1);
> > +}
> > +
> > +__m256i
> > +avx2_foo1 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & src1) | (src3 & ~src1);
> > +}
> > +
> > +__m256i
> > +avx2_foo2 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (src2 & src1) | (~src3 & src1);
> > +}
> > +
> > +__m256i
> > +avx2_foo3 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return (~src2 & src1) | (src3 & src1);
> > +}
> > +
> > +__m256i
> > +avx2_foo4 (__m256i src1, __m256i src2, __m256i src3)
> > +{
> > +  return src3 & src2 ^ src1;
> > +}
> > +
> > +
> > +void
> > +test_256 (void)
> > +{
> > +  union256i_q q1, q2, q3, res2, exp2;
> > +  union256d d1, d2, res1, exp1;
> > +  int i, sign = 1;
> > +
> > +  for (i = 0; i < 4; i++)
> > +    {
> > +      d1.a[i] = 12.34 * (i + 2000) * sign;
> > +      d2.a[i] = 56.78 * (i - 30) * sign;
> > +      q1.a[i] = 12 * (i + 2000) * sign;
> > +      q2.a[i] = 56 * (i - 30) * sign;
> > +      q3.a[i] = 90 * (i + 40) * sign;
> > +      res1.a[i] = DEFAULT_VALUE;
> > +      exp1.a[i] = DEFAULT_VALUE;
> > +      res2.a[i] = exp2.a[i] = -1;
> > +      sign = -sign;
> > +    }
> > +
> > +  exp1.x = avx2_copysign2_pd (d1.x, d2.x);
> > +  res1.x = copysign2_pd (d1.x, d2.x);
> > +  if (UNION_CHECK (256, d) (res1, exp1.a))
> > +    abort ();
> > +
> > +  exp2.x = avx2_foo1 (q1.x, q2.x, q3.x);
> > +  res2.x = foo1 (q1.x, q2.x, q3.x);
> > +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> > +    abort ();
> > +
> > +  exp2.x = avx2_foo2 (q1.x, q2.x, q3.x);
> > +  res2.x = foo2 (q1.x, q2.x, q3.x);
> > +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> > +    abort ();
> > +
> > +  exp2.x = avx2_foo3 (q1.x, q2.x, q3.x);
> > +  res2.x = foo3 (q1.x, q2.x, q3.x);
> > +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> > +    abort ();
> > +
> > +  exp2.x = avx2_foo4 (q1.x, q2.x, q3.x);
> > +  res2.x = foo4 (q1.x, q2.x, q3.x);
> > +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> > +    abort ();
> > +
> > +  exp2.x = avx2_foo (q1.x, q2.x, q3.x);
> > +  res2.x = foo (q1.x, q2.x, q3.x);
> > +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> > +    abort ();
> > +}
> > +
> > +static void
> > +test_128 ()
> > +{}
> > --
> > 2.18.1
> >  
> 
> 

Reply via email to