On Tue, Aug 24, 2021 at 9:36 AM liuhongt <hongtao....@intel.com> wrote:
>
> Also optimize below 3 forms to vpternlog, op1, op2, op3 are
> register_operand or unary_p as (not reg)
>
> A: (any_logic (any_logic op1 op2) op3)
> B: (any_logic (any_logic op1 op2) (any_logic op3 op4)) op3/op4 should
> be equal to op1/op2
> C: (any_logic (any_logic (any_logic:op1 op2) op3) op4) op3/op4 should
> be equal to op1/op2
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>
> gcc/ChangeLog:
>
>         PR target/101989
>         * config/i386/i386-protos.h
>         (ix86_strip_reg_or_notreg_operand): New declare.
>         * config/i386/i386.c (ix86_rtx_costs): Define cost for
>         UNSPEC_VTERNLOG.
>         (ix86_strip_reg_or_notreg_operand): New function.
Push to trunk by changing ix86_strip_reg_or_notreg_operand to macro,
function call seems too inefficient for the simple strip unary.
>         * config/i386/predicates.md (reg_or_notreg_operand): New
>         predicate.
>         * config/i386/sse.md (*<avx512>_vternlog<mode>_all): New define_insn.
>         (*<avx512>_vternlog<mode>_1): New pre_reload
>         define_insn_and_split.
>         (*<avx512>_vternlog<mode>_2): Ditto.
>         (*<avx512>_vternlog<mode>_3): Ditto.
>         (any_logic1,any_logic2): New code iterator.
>         (logic_op): New code attribute.
>         (ternlogsuffix): Extend to VNxDF and VNxSF.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/101989
>         * gcc.target/i386/pr101989-1.c: New test.
>         * gcc.target/i386/pr101989-2.c: New test.
>         * gcc.target/i386/avx512bw-shiftqihi-constant-1.c: Adjust testcase.
> ---
>  gcc/config/i386/i386-protos.h                 |   1 +
>  gcc/config/i386/i386.c                        |  13 +
>  gcc/config/i386/predicates.md                 |   7 +
>  gcc/config/i386/sse.md                        | 234 ++++++++++++++++++
>  .../i386/avx512bw-shiftqihi-constant-1.c      |   4 +-
>  gcc/testsuite/gcc.target/i386/pr101989-1.c    |  51 ++++
>  gcc/testsuite/gcc.target/i386/pr101989-2.c    | 102 ++++++++
>  7 files changed, 410 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-2.c
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 2fd13074c81..2bdaadcf4f3 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -60,6 +60,7 @@ extern rtx standard_80387_constant_rtx (int);
>  extern int standard_sse_constant_p (rtx, machine_mode);
>  extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *);
>  extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);
> +extern rtx ix86_strip_reg_or_notreg_operand (rtx);
>  extern bool ix86_pre_reload_split (void);
>  extern bool symbolic_reference_mentioned_p (rtx);
>  extern bool extended_reg_mentioned_p (rtx);
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 46844fab08f..a69225ccc81 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5236,6 +5236,14 @@ ix86_standard_x87sse_constant_load_p (const rtx_insn 
> *insn, rtx dst)
>    return true;
>  }
>
> +/* Returns true if INSN can be transformed from a memory load
> +   to a supported FP constant load.  */
> +rtx
> +ix86_strip_reg_or_notreg_operand (rtx op)
> +{
> +  return UNARY_P (op) ? XEXP (op, 0) : op;
> +}
> +
>  /* Predicate for pre-reload splitters with associated instructions,
>     which can match any time before the split1 pass (usually combine),
>     then are unconditionally split in that pass and should not be
> @@ -20544,6 +20552,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
> outer_code_i, int opno,
>      case UNSPEC:
>        if (XINT (x, 1) == UNSPEC_TP)
>         *total = 0;
> +      else if (XINT(x, 1) == UNSPEC_VTERNLOG)
> +       {
> +         *total = cost->sse_op;
> +         return true;
> +       }
>        return false;
>
>      case VEC_SELECT:
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 9321f332ef9..df5acb425d4 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1044,6 +1044,13 @@ (define_predicate "reg_or_pm1_operand"
>             (ior (match_test "op == const1_rtx")
>                  (match_test "op == constm1_rtx")))))
>
> +;; True for registers, or (not: registers).  Used to optimize 3-operand
> +;; bitwise operation.
> +(define_predicate "reg_or_notreg_operand"
> +  (ior (match_operand 0 "register_operand")
> +       (and (match_code "not")
> +           (match_test "register_operand (XEXP (op, 0), mode)"))))
> +
>  ;; True if OP is acceptable as operand of DImode shift expander.
>  (define_predicate "shiftdi_operand"
>    (if_then_else (match_test "TARGET_64BIT")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 13889687793..0acd749d21c 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -933,7 +933,9 @@ (define_mode_attr iptr
>  ;; Mapping of vector modes to VPTERNLOG suffix
>  (define_mode_attr ternlogsuffix
>    [(V8DI "q") (V4DI "q") (V2DI "q")
> +   (V8DF "q") (V4DF "q") (V2DF "q")
>     (V16SI "d") (V8SI "d") (V4SI "d")
> +   (V16SF "d") (V8SF "d") (V4SF "d")
>     (V32HI "d") (V16HI "d") (V8HI "d")
>     (V64QI "d") (V32QI "d") (V16QI "d")])
>
> @@ -10041,6 +10043,238 @@ (define_insn 
> "<avx512>_vternlog<mode><sd_maskz_name>"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn "*<avx512>_vternlog<mode>_all"
> +  [(set (match_operand:V 0 "register_operand" "=v")
> +       (unspec:V
> +         [(match_operand:V 1 "register_operand" "0")
> +          (match_operand:V 2 "register_operand" "v")
> +          (match_operand:V 3 "nonimmediate_operand" "vm")
> +          (match_operand:SI 4 "const_0_to_255_operand")]
> +         UNSPEC_VTERNLOG))]
> +  "TARGET_AVX512F"
> +  "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +;; There must be lots of other combinations like
> +;;
> +;; (any_logic:V
> +;;   (any_logic:V op1 op2)
> +;;   (any_logic:V op1 op3))
> +;;
> +;; (any_logic:V
> +;;   (any_logic:V
> +;;     (any_logic:V op1, op2)
> +;;     op3)
> +;;   op1)
> +;;
> +;; and so on.
> +
> +(define_code_iterator any_logic1 [and ior xor])
> +(define_code_iterator any_logic2 [and ior xor])
> +(define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
> +
> +(define_insn_and_split "*<avx512>_vpternlog<mode>_1"
> +  [(set (match_operand:V 0 "register_operand")
> +       (any_logic:V
> +         (any_logic1:V
> +           (match_operand:V 1 "reg_or_notreg_operand")
> +           (match_operand:V 2 "reg_or_notreg_operand"))
> +         (any_logic2:V
> +           (match_operand:V 3 "reg_or_notreg_operand")
> +           (match_operand:V 4 "reg_or_notreg_operand"))))]
> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> +   && ix86_pre_reload_split ()
> +   && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                   ix86_strip_reg_or_notreg_operand (operands[4]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[4]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                      ix86_strip_reg_or_notreg_operand (operands[3]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[3])))"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:V
> +         [(match_dup 6)
> +          (match_dup 2)
> +          (match_dup 1)
> +          (match_dup 5)]
> +         UNSPEC_VTERNLOG))]
> +{
> +  /* VPTERNLOGD reg6, reg2, reg1, imm8.  */
> +  int reg6 = 0xF0;
> +  int reg2 = 0xCC;
> +  int reg1 = 0xAA;
> +  int reg3 = 0;
> +  int reg4 = 0;
> +  int reg_mask, tmp1, tmp2;
> +  if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                  ix86_strip_reg_or_notreg_operand (operands[4])))
> +    {
> +      reg4 = reg1;
> +      reg3 = reg6;
> +      operands[6] = operands[3];
> +    }
> +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[4])))
> +    {
> +      reg4 = reg2;
> +      reg3 = reg6;
> +      operands[6] = operands[3];
> +    }
> +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                       ix86_strip_reg_or_notreg_operand (operands[3])))
> +    {
> +      reg4 = reg6;
> +      reg3 = reg1;
> +      operands[6] = operands[4];
> +    }
> +  else
> +    {
> +      reg4 = reg6;
> +      reg3 = reg2;
> +      operands[6] = operands[4];
> +    }
> +
> +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> +  reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
> +
> +  tmp1 = reg1 <any_logic1:logic_op> reg2;
> +  tmp2 = reg3 <any_logic2:logic_op> reg4;
> +  reg_mask = tmp1  <any_logic:logic_op> tmp2;
> +  reg_mask &= 0xFF;
> +
> +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> +  operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]);
> +  operands[5] = GEN_INT (reg_mask);
> +})
> +
> +(define_insn_and_split "*<avx512>_vpternlog<mode>_2"
> +  [(set (match_operand:V 0 "register_operand")
> +       (any_logic:V
> +         (any_logic1:V
> +           (any_logic2:V
> +             (match_operand:V 1 "reg_or_notreg_operand")
> +             (match_operand:V 2 "reg_or_notreg_operand"))
> +           (match_operand:V 3 "reg_or_notreg_operand"))
> +         (match_operand:V 4 "reg_or_notreg_operand")))]
> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> +   && ix86_pre_reload_split ()
> +   && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                   ix86_strip_reg_or_notreg_operand (operands[4]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[4]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                      ix86_strip_reg_or_notreg_operand (operands[3]))
> +       || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[3])))"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:V
> +         [(match_dup 6)
> +          (match_dup 2)
> +          (match_dup 1)
> +          (match_dup 5)]
> +         UNSPEC_VTERNLOG))]
> +{
> +  /* VPTERNLOGD reg6, reg2, reg1, imm8.  */
> +  int reg6 = 0xF0;
> +  int reg2 = 0xCC;
> +  int reg1 = 0xAA;
> +  int reg3 = 0;
> +  int reg4 = 0;
> +  int reg_mask, tmp1, tmp2;
> +  if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                  ix86_strip_reg_or_notreg_operand (operands[4])))
> +    {
> +      reg4 = reg1;
> +      reg3 = reg6;
> +      operands[6] = operands[3];
> +    }
> +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]),
> +                      ix86_strip_reg_or_notreg_operand (operands[4])))
> +    {
> +      reg4 = reg2;
> +      reg3 = reg6;
> +      operands[6] = operands[3];
> +    }
> +  else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]),
> +                       ix86_strip_reg_or_notreg_operand (operands[3])))
> +    {
> +      reg4 = reg6;
> +      reg3 = reg1;
> +      operands[6] = operands[4];
> +    }
> +  else
> +    {
> +      reg4 = reg6;
> +      reg3 = reg2;
> +      operands[6] = operands[4];
> +    }
> +
> +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> +  reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
> +
> +  tmp1 = reg1 <any_logic2:logic_op> reg2;
> +  tmp2 = tmp1 <any_logic1:logic_op> reg3;
> +  reg_mask = tmp2 <any_logic:logic_op> reg4;
> +  reg_mask &= 0xFF;
> +
> +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> +  operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]);
> +  operands[5] = GEN_INT (reg_mask);
> +})
> +
> +(define_insn_and_split "*<avx512>_vpternlog<mode>_3"
> +  [(set (match_operand:V 0 "register_operand")
> +       (any_logic:V
> +         (any_logic1:V
> +           (match_operand:V 1 "reg_or_notreg_operand")
> +           (match_operand:V 2 "reg_or_notreg_operand"))
> +         (match_operand:V 3 "reg_or_notreg_operand")))]
> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:V
> +         [(match_dup 3)
> +          (match_dup 2)
> +          (match_dup 1)
> +          (match_dup 4)]
> +         UNSPEC_VTERNLOG))]
> +{
> +  /* VPTERNLOGD reg3, reg2, reg1, imm8.  */
> +  int reg3 = 0xF0;
> +  int reg2 = 0xCC;
> +  int reg1 = 0xAA;
> +  int reg_mask, tmp1;
> +
> +  reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
> +  reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
> +  reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
> +
> +  tmp1 = reg1 <any_logic1:logic_op> reg2;
> +  reg_mask = tmp1 <any_logic:logic_op> reg3;
> +  reg_mask &= 0xFF;
> +
> +  operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]);
> +  operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]);
> +  operands[3] = ix86_strip_reg_or_notreg_operand (operands[3]);
> +  operands[4] = GEN_INT (reg_mask);
> +})
> +
> +
>  (define_insn "<avx512>_vternlog<mode>_mask"
>    [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
>         (vec_merge:VI48_AVX512VL
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c 
> b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> index 78bf5d33689..fbc3de08119 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
> @@ -1,7 +1,8 @@
>  /* PR target/95524 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mavx512bw" } */
> -/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } }  */
> +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 2 } }  */
> +/* { dg-final { scan-assembler-times "vpternlogd\[^\n\]*%zmm" 1 } }  */
>  typedef char v64qi  __attribute__ ((vector_size (64)));
>  typedef unsigned char v64uqi  __attribute__ ((vector_size (64)));
>
> @@ -11,7 +12,6 @@ foo_ashiftrt_512 (v64qi a)
>    return a >> 2;
>  }
>  /* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */
> -/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */
>  /* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */
>
>  __attribute__((noipa)) v64qi
> diff --git a/gcc/testsuite/gcc.target/i386/pr101989-1.c 
> b/gcc/testsuite/gcc.target/i386/pr101989-1.c
> new file mode 100644
> index 00000000000..594093ecdde
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101989-1.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-times "vpternlog" 6 } } */
> +/* { dg-final { scan-assembler-not "vpxor" } } */
> +/* { dg-final { scan-assembler-not "vpor" } } */
> +/* { dg-final { scan-assembler-not "vpand" } } */
> +
> +#include<immintrin.h>
> +__m256d
> +__attribute__((noipa, target("avx512vl")))
> +copysign2_pd(__m256d from, __m256d to) {
> +  __m256i a = _mm256_castpd_si256(from);
> +  __m256d avx_signbit = 
> _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
> +  /* (avx_signbit & from) | (~avx_signbit & to)  */
> +  return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), 
> _mm256_andnot_pd(avx_signbit, to));
> +}
> +
> +__m256i
> +__attribute__((noipa, target("avx512vl")))
> +foo (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & ~src1) | (src3 & src1);
> +}
> +
> +__m256i
> +__attribute__ ((noipa, target("avx512vl")))
> +foo1 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & src1) | (src3 & ~src1);
> +}
> +
> +__m256i
> +__attribute__ ((noipa, target("avx512vl")))
> +foo2 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & src1) | (~src3 & src1);
> +}
> +
> +__m256i
> +__attribute__ ((noipa, target("avx512vl")))
> +foo3 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (~src2 & src1) | (src3 & src1);
> +}
> +
> +__m256i
> +__attribute__ ((noipa, target("avx512vl")))
> +foo4 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return src3 & src2 ^ src1;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr101989-2.c 
> b/gcc/testsuite/gcc.target/i386/pr101989-2.c
> new file mode 100644
> index 00000000000..9d9759a8e1d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101989-2.c
> @@ -0,0 +1,102 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
> +/* { dg-require-effective-target avx512vl } */
> +
> +#define AVX512VL
> +
> +#include "avx512f-helper.h"
> +
> +#include "pr101989-1.c"
> +__m256d
> +avx2_copysign2_pd (__m256d from, __m256d to) {
> +  __m256i a = _mm256_castpd_si256(from);
> +  __m256d avx_signbit = 
> _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
> +  /* (avx_signbit & from) | (~avx_signbit & to)  */
> +  return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), 
> _mm256_andnot_pd(avx_signbit, to));
> +}
> +
> +__m256i
> +avx2_foo (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & ~src1) | (src3 & src1);
> +}
> +
> +__m256i
> +avx2_foo1 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & src1) | (src3 & ~src1);
> +}
> +
> +__m256i
> +avx2_foo2 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (src2 & src1) | (~src3 & src1);
> +}
> +
> +__m256i
> +avx2_foo3 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return (~src2 & src1) | (src3 & src1);
> +}
> +
> +__m256i
> +avx2_foo4 (__m256i src1, __m256i src2, __m256i src3)
> +{
> +  return src3 & src2 ^ src1;
> +}
> +
> +
> +void
> +test_256 (void)
> +{
> +  union256i_q q1, q2, q3, res2, exp2;
> +  union256d d1, d2, res1, exp1;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < 4; i++)
> +    {
> +      d1.a[i] = 12.34 * (i + 2000) * sign;
> +      d2.a[i] = 56.78 * (i - 30) * sign;
> +      q1.a[i] = 12 * (i + 2000) * sign;
> +      q2.a[i] = 56 * (i - 30) * sign;
> +      q3.a[i] = 90 * (i + 40) * sign;
> +      res1.a[i] = DEFAULT_VALUE;
> +      exp1.a[i] = DEFAULT_VALUE;
> +      res2.a[i] = exp2.a[i] = -1;
> +      sign = -sign;
> +    }
> +
> +  exp1.x = avx2_copysign2_pd (d1.x, d2.x);
> +  res1.x = copysign2_pd (d1.x, d2.x);
> +  if (UNION_CHECK (256, d) (res1, exp1.a))
> +    abort ();
> +
> +  exp2.x = avx2_foo1 (q1.x, q2.x, q3.x);
> +  res2.x = foo1 (q1.x, q2.x, q3.x);
> +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> +    abort ();
> +
> +  exp2.x = avx2_foo2 (q1.x, q2.x, q3.x);
> +  res2.x = foo2 (q1.x, q2.x, q3.x);
> +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> +    abort ();
> +
> +  exp2.x = avx2_foo3 (q1.x, q2.x, q3.x);
> +  res2.x = foo3 (q1.x, q2.x, q3.x);
> +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> +    abort ();
> +
> +  exp2.x = avx2_foo4 (q1.x, q2.x, q3.x);
> +  res2.x = foo4 (q1.x, q2.x, q3.x);
> +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> +    abort ();
> +
> +  exp2.x = avx2_foo (q1.x, q2.x, q3.x);
> +  res2.x = foo (q1.x, q2.x, q3.x);
> +  if (UNION_CHECK (256, i_q) (res2, exp2.a))
> +    abort ();
> +}
> +
> +static void
> +test_128 ()
> +{}
> --
> 2.18.1
>


-- 
BR,
Hongtao

Reply via email to