On Tue, 24 Aug 2021 17:53:27 +0800 Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> On Tue, Aug 24, 2021 at 9:36 AM liuhongt <hongtao....@intel.com> wrote: > > > > Also optimize below 3 forms to vpternlog, op1, op2, op3 are > > register_operand or unary_p as (not reg) > > gcc/ChangeLog: > > > > PR target/101989 > > * config/i386/i386-protos.h > > (ix86_strip_reg_or_notreg_operand): New declare. "New declaration." > > * config/i386/i386.c (ix86_rtx_costs): Define cost for > > UNSPEC_VTERNLOG. I do not see a considerable amount of VTERNLOG in the docs i have here. Is there a P missing in vPternlog? > > (ix86_strip_reg_or_notreg_operand): New function. > Push to trunk by changing ix86_strip_reg_or_notreg_operand to macro, > function call seems too inefficient for the simple strip unary. > > * config/i386/predicates.md (reg_or_notreg_operand): New > > predicate. > > * config/i386/sse.md (*<avx512>_vternlog<mode>_all): New > > define_insn. > > (*<avx512>_vternlog<mode>_1): New pre_reload > > define_insn_and_split. > > (*<avx512>_vternlog<mode>_2): Ditto. > > (*<avx512>_vternlog<mode>_3): Ditto. at least the above 3 insn_and_split do have a 'p' in the md. thanks, > > (any_logic1,any_logic2): New code iterator. > > (logic_op): New code attribute. > > (ternlogsuffix): Extend to VNxDF and VNxSF. > > > > gcc/testsuite/ChangeLog: > > > > PR target/101989 > > * gcc.target/i386/pr101989-1.c: New test. > > * gcc.target/i386/pr101989-2.c: New test. > > * gcc.target/i386/avx512bw-shiftqihi-constant-1.c: Adjust testcase. > > --- > > gcc/config/i386/i386-protos.h | 1 + > > gcc/config/i386/i386.c | 13 + > > gcc/config/i386/predicates.md | 7 + > > gcc/config/i386/sse.md | 234 ++++++++++++++++++ > > .../i386/avx512bw-shiftqihi-constant-1.c | 4 +- > > gcc/testsuite/gcc.target/i386/pr101989-1.c | 51 ++++ > > gcc/testsuite/gcc.target/i386/pr101989-2.c | 102 ++++++++ > > 7 files changed, 410 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101989-2.c > > > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > > index 2fd13074c81..2bdaadcf4f3 100644 > > --- a/gcc/config/i386/i386-protos.h > > +++ b/gcc/config/i386/i386-protos.h > > @@ -60,6 +60,7 @@ extern rtx standard_80387_constant_rtx (int); > > extern int standard_sse_constant_p (rtx, machine_mode); > > extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *); > > extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx); > > +extern rtx ix86_strip_reg_or_notreg_operand (rtx); > > extern bool ix86_pre_reload_split (void); > > extern bool symbolic_reference_mentioned_p (rtx); > > extern bool extended_reg_mentioned_p (rtx); > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > index 46844fab08f..a69225ccc81 100644 > > --- a/gcc/config/i386/i386.c > > +++ b/gcc/config/i386/i386.c > > @@ -5236,6 +5236,14 @@ ix86_standard_x87sse_constant_load_p (const rtx_insn > > *insn, rtx dst) > > return true; > > } > > > > +/* Returns true if INSN can be transformed from a memory load > > + to a supported FP constant load. */ > > +rtx > > +ix86_strip_reg_or_notreg_operand (rtx op) > > +{ > > + return UNARY_P (op) ? XEXP (op, 0) : op; > > +} > > + > > /* Predicate for pre-reload splitters with associated instructions, > > which can match any time before the split1 pass (usually combine), > > then are unconditionally split in that pass and should not be > > @@ -20544,6 +20552,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > > outer_code_i, int opno, > > case UNSPEC: > > if (XINT (x, 1) == UNSPEC_TP) > > *total = 0; > > + else if (XINT(x, 1) == UNSPEC_VTERNLOG) > > + { > > + *total = cost->sse_op; > > + return true; > > + } > > return false; > > > > case VEC_SELECT: > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > > index 9321f332ef9..df5acb425d4 100644 > > --- a/gcc/config/i386/predicates.md > > +++ b/gcc/config/i386/predicates.md > > @@ -1044,6 +1044,13 @@ (define_predicate "reg_or_pm1_operand" > > (ior (match_test "op == const1_rtx") > > (match_test "op == constm1_rtx"))))) > > > > +;; True for registers, or (not: registers). Used to optimize 3-operand > > +;; bitwise operation. > > +(define_predicate "reg_or_notreg_operand" > > + (ior (match_operand 0 "register_operand") > > + (and (match_code "not") > > + (match_test "register_operand (XEXP (op, 0), mode)")))) > > + > > ;; True if OP is acceptable as operand of DImode shift expander. > > (define_predicate "shiftdi_operand" > > (if_then_else (match_test "TARGET_64BIT") > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index 13889687793..0acd749d21c 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -933,7 +933,9 @@ (define_mode_attr iptr > > ;; Mapping of vector modes to VPTERNLOG suffix > > (define_mode_attr ternlogsuffix > > [(V8DI "q") (V4DI "q") (V2DI "q") > > + (V8DF "q") (V4DF "q") (V2DF "q") > > (V16SI "d") (V8SI "d") (V4SI "d") > > + (V16SF "d") (V8SF "d") (V4SF "d") > > (V32HI "d") (V16HI "d") (V8HI "d") > > (V64QI "d") (V32QI "d") (V16QI "d")]) > > > > @@ -10041,6 +10043,238 @@ (define_insn > > "<avx512>_vternlog<mode><sd_maskz_name>" > > (set_attr "prefix" "evex") > > (set_attr "mode" "<sseinsnmode>")]) > > > > +(define_insn "*<avx512>_vternlog<mode>_all" > > + [(set (match_operand:V 0 "register_operand" "=v") > > + (unspec:V > > + [(match_operand:V 1 "register_operand" "0") > > + (match_operand:V 2 "register_operand" "v") > > + (match_operand:V 3 "nonimmediate_operand" "vm") > > + (match_operand:SI 4 "const_0_to_255_operand")] > > + UNSPEC_VTERNLOG))] > > + "TARGET_AVX512F" > > + "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}" > > + [(set_attr "type" "sselog") > > + (set_attr "prefix" "evex") > > + (set_attr "mode" "<sseinsnmode>")]) > > + > > +;; There must be lots of other combinations like > > +;; > > +;; (any_logic:V > > +;; (any_logic:V op1 op2) > > +;; (any_logic:V op1 op3)) > > +;; > > +;; (any_logic:V > > +;; (any_logic:V > > +;; (any_logic:V op1, op2) > > +;; op3) > > +;; op1) > > +;; > > +;; and so on. > > + > > +(define_code_iterator any_logic1 [and ior xor]) > > +(define_code_iterator any_logic2 [and ior xor]) > > +(define_code_attr logic_op [(and "&") (ior "|") (xor "^")]) > > + > > +(define_insn_and_split "*<avx512>_vpternlog<mode>_1" > > + [(set (match_operand:V 0 "register_operand") > > + (any_logic:V > > + (any_logic1:V > > + (match_operand:V 1 "reg_or_notreg_operand") > > + (match_operand:V 2 "reg_or_notreg_operand")) > > + (any_logic2:V > > + (match_operand:V 3 "reg_or_notreg_operand") > > + (match_operand:V 4 "reg_or_notreg_operand"))))] > > + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) > > + && ix86_pre_reload_split () > > + && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[4])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[4])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[3])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[3])))" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (unspec:V > > + [(match_dup 6) > > + (match_dup 2) > > + (match_dup 1) > > + (match_dup 5)] > > + UNSPEC_VTERNLOG))] > > +{ > > + /* VPTERNLOGD reg6, reg2, reg1, imm8. */ > > + int reg6 = 0xF0; > > + int reg2 = 0xCC; > > + int reg1 = 0xAA; > > + int reg3 = 0; > > + int reg4 = 0; > > + int reg_mask, tmp1, tmp2; > > + if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[4]))) > > + { > > + reg4 = reg1; > > + reg3 = reg6; > > + operands[6] = operands[3]; > > + } > > + else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[4]))) > > + { > > + reg4 = reg2; > > + reg3 = reg6; > > + operands[6] = operands[3]; > > + } > > + else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[3]))) > > + { > > + reg4 = reg6; > > + reg3 = reg1; > > + operands[6] = operands[4]; > > + } > > + else > > + { > > + reg4 = reg6; > > + reg3 = reg2; > > + operands[6] = operands[4]; > > + } > > + > > + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; > > + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; > > + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; > > + reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4; > > + > > + tmp1 = reg1 <any_logic1:logic_op> reg2; > > + tmp2 = reg3 <any_logic2:logic_op> reg4; > > + reg_mask = tmp1 <any_logic:logic_op> tmp2; > > + reg_mask &= 0xFF; > > + > > + operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]); > > + operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]); > > + operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]); > > + operands[5] = GEN_INT (reg_mask); > > +}) > > + > > +(define_insn_and_split "*<avx512>_vpternlog<mode>_2" > > + [(set (match_operand:V 0 "register_operand") > > + (any_logic:V > > + (any_logic1:V > > + (any_logic2:V > > + (match_operand:V 1 "reg_or_notreg_operand") > > + (match_operand:V 2 "reg_or_notreg_operand")) > > + (match_operand:V 3 "reg_or_notreg_operand")) > > + (match_operand:V 4 "reg_or_notreg_operand")))] > > + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) > > + && ix86_pre_reload_split () > > + && (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[4])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[4])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[3])) > > + || rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[3])))" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (unspec:V > > + [(match_dup 6) > > + (match_dup 2) > > + (match_dup 1) > > + (match_dup 5)] > > + UNSPEC_VTERNLOG))] > > +{ > > + /* VPTERNLOGD reg6, reg2, reg1, imm8. */ > > + int reg6 = 0xF0; > > + int reg2 = 0xCC; > > + int reg1 = 0xAA; > > + int reg3 = 0; > > + int reg4 = 0; > > + int reg_mask, tmp1, tmp2; > > + if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[4]))) > > + { > > + reg4 = reg1; > > + reg3 = reg6; > > + operands[6] = operands[3]; > > + } > > + else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[2]), > > + ix86_strip_reg_or_notreg_operand (operands[4]))) > > + { > > + reg4 = reg2; > > + reg3 = reg6; > > + operands[6] = operands[3]; > > + } > > + else if (rtx_equal_p (ix86_strip_reg_or_notreg_operand (operands[1]), > > + ix86_strip_reg_or_notreg_operand (operands[3]))) > > + { > > + reg4 = reg6; > > + reg3 = reg1; > > + operands[6] = operands[4]; > > + } > > + else > > + { > > + reg4 = reg6; > > + reg3 = reg2; > > + operands[6] = operands[4]; > > + } > > + > > + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; > > + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; > > + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; > > + reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4; > > + > > + tmp1 = reg1 <any_logic2:logic_op> reg2; > > + tmp2 = tmp1 <any_logic1:logic_op> reg3; > > + reg_mask = tmp2 <any_logic:logic_op> reg4; > > + reg_mask &= 0xFF; > > + > > + operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]); > > + operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]); > > + operands[6] = ix86_strip_reg_or_notreg_operand (operands[6]); > > + operands[5] = GEN_INT (reg_mask); > > +}) > > + > > +(define_insn_and_split "*<avx512>_vpternlog<mode>_3" > > + [(set (match_operand:V 0 "register_operand") > > + (any_logic:V > > + (any_logic1:V > > + (match_operand:V 1 "reg_or_notreg_operand") > > + (match_operand:V 2 "reg_or_notreg_operand")) > > + (match_operand:V 3 "reg_or_notreg_operand")))] > > + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) > > + && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (unspec:V > > + [(match_dup 3) > > + (match_dup 2) > > + (match_dup 1) > > + (match_dup 4)] > > + UNSPEC_VTERNLOG))] > > +{ > > + /* VPTERNLOGD reg3, reg2, reg1, imm8. */ > > + int reg3 = 0xF0; > > + int reg2 = 0xCC; > > + int reg1 = 0xAA; > > + int reg_mask, tmp1; > > + > > + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; > > + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; > > + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; > > + > > + tmp1 = reg1 <any_logic1:logic_op> reg2; > > + reg_mask = tmp1 <any_logic:logic_op> reg3; > > + reg_mask &= 0xFF; > > + > > + operands[1] = ix86_strip_reg_or_notreg_operand (operands[1]); > > + operands[2] = ix86_strip_reg_or_notreg_operand (operands[2]); > > + operands[3] = ix86_strip_reg_or_notreg_operand (operands[3]); > > + operands[4] = GEN_INT (reg_mask); > > +}) > > + > > + > > (define_insn "<avx512>_vternlog<mode>_mask" > > [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") > > (vec_merge:VI48_AVX512VL > > diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c > > b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c > > index 78bf5d33689..fbc3de08119 100644 > > --- a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c > > +++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c > > @@ -1,7 +1,8 @@ > > /* PR target/95524 */ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -mavx512bw" } */ > > -/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } } */ > > +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 2 } } */ > > +/* { dg-final { scan-assembler-times "vpternlogd\[^\n\]*%zmm" 1 } } */ > > typedef char v64qi __attribute__ ((vector_size (64))); > > typedef unsigned char v64uqi __attribute__ ((vector_size (64))); > > > > @@ -11,7 +12,6 @@ foo_ashiftrt_512 (v64qi a) > > return a >> 2; > > } > > /* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */ > > -/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */ > > /* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */ > > > > __attribute__((noipa)) v64qi > > diff --git a/gcc/testsuite/gcc.target/i386/pr101989-1.c > > b/gcc/testsuite/gcc.target/i386/pr101989-1.c > > new file mode 100644 > > index 00000000000..594093ecdde > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101989-1.c > > @@ -0,0 +1,51 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { scan-assembler-times "vpternlog" 6 } } */ > > +/* { dg-final { scan-assembler-not "vpxor" } } */ > > +/* { dg-final { scan-assembler-not "vpor" } } */ > > +/* { dg-final { scan-assembler-not "vpand" } } */ > > + > > +#include<immintrin.h> > > +__m256d > > +__attribute__((noipa, target("avx512vl"))) > > +copysign2_pd(__m256d from, __m256d to) { > > + __m256i a = _mm256_castpd_si256(from); > > + __m256d avx_signbit = > > _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63)); > > + /* (avx_signbit & from) | (~avx_signbit & to) */ > > + return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), > > _mm256_andnot_pd(avx_signbit, to)); > > +} > > + > > +__m256i > > +__attribute__((noipa, target("avx512vl"))) > > +foo (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & ~src1) | (src3 & src1); > > +} > > + > > +__m256i > > +__attribute__ ((noipa, target("avx512vl"))) > > +foo1 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & src1) | (src3 & ~src1); > > +} > > + > > +__m256i > > +__attribute__ ((noipa, target("avx512vl"))) > > +foo2 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & src1) | (~src3 & src1); > > +} > > + > > +__m256i > > +__attribute__ ((noipa, target("avx512vl"))) > > +foo3 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (~src2 & src1) | (src3 & src1); > > +} > > + > > +__m256i > > +__attribute__ ((noipa, target("avx512vl"))) > > +foo4 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return src3 & src2 ^ src1; > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr101989-2.c > > b/gcc/testsuite/gcc.target/i386/pr101989-2.c > > new file mode 100644 > > index 00000000000..9d9759a8e1d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101989-2.c > > @@ -0,0 +1,102 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */ > > +/* { dg-require-effective-target avx512vl } */ > > + > > +#define AVX512VL > > + > > +#include "avx512f-helper.h" > > + > > +#include "pr101989-1.c" > > +__m256d > > +avx2_copysign2_pd (__m256d from, __m256d to) { > > + __m256i a = _mm256_castpd_si256(from); > > + __m256d avx_signbit = > > _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63)); > > + /* (avx_signbit & from) | (~avx_signbit & to) */ > > + return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), > > _mm256_andnot_pd(avx_signbit, to)); > > +} > > + > > +__m256i > > +avx2_foo (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & ~src1) | (src3 & src1); > > +} > > + > > +__m256i > > +avx2_foo1 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & src1) | (src3 & ~src1); > > +} > > + > > +__m256i > > +avx2_foo2 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (src2 & src1) | (~src3 & src1); > > +} > > + > > +__m256i > > +avx2_foo3 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return (~src2 & src1) | (src3 & src1); > > +} > > + > > +__m256i > > +avx2_foo4 (__m256i src1, __m256i src2, __m256i src3) > > +{ > > + return src3 & src2 ^ src1; > > +} > > + > > + > > +void > > +test_256 (void) > > +{ > > + union256i_q q1, q2, q3, res2, exp2; > > + union256d d1, d2, res1, exp1; > > + int i, sign = 1; > > + > > + for (i = 0; i < 4; i++) > > + { > > + d1.a[i] = 12.34 * (i + 2000) * sign; > > + d2.a[i] = 56.78 * (i - 30) * sign; > > + q1.a[i] = 12 * (i + 2000) * sign; > > + q2.a[i] = 56 * (i - 30) * sign; > > + q3.a[i] = 90 * (i + 40) * sign; > > + res1.a[i] = DEFAULT_VALUE; > > + exp1.a[i] = DEFAULT_VALUE; > > + res2.a[i] = exp2.a[i] = -1; > > + sign = -sign; > > + } > > + > > + exp1.x = avx2_copysign2_pd (d1.x, d2.x); > > + res1.x = copysign2_pd (d1.x, d2.x); > > + if (UNION_CHECK (256, d) (res1, exp1.a)) > > + abort (); > > + > > + exp2.x = avx2_foo1 (q1.x, q2.x, q3.x); > > + res2.x = foo1 (q1.x, q2.x, q3.x); > > + if (UNION_CHECK (256, i_q) (res2, exp2.a)) > > + abort (); > > + > > + exp2.x = avx2_foo2 (q1.x, q2.x, q3.x); > > + res2.x = foo2 (q1.x, q2.x, q3.x); > > + if (UNION_CHECK (256, i_q) (res2, exp2.a)) > > + abort (); > > + > > + exp2.x = avx2_foo3 (q1.x, q2.x, q3.x); > > + res2.x = foo3 (q1.x, q2.x, q3.x); > > + if (UNION_CHECK (256, i_q) (res2, exp2.a)) > > + abort (); > > + > > + exp2.x = avx2_foo4 (q1.x, q2.x, q3.x); > > + res2.x = foo4 (q1.x, q2.x, q3.x); > > + if (UNION_CHECK (256, i_q) (res2, exp2.a)) > > + abort (); > > + > > + exp2.x = avx2_foo (q1.x, q2.x, q3.x); > > + res2.x = foo (q1.x, q2.x, q3.x); > > + if (UNION_CHECK (256, i_q) (res2, exp2.a)) > > + abort (); > > +} > > + > > +static void > > +test_128 () > > +{} > > -- > > 2.18.1 > > > >