On Fri, Sep 16, 2011 at 11:16:44AM +0200, Jakub Jelinek wrote: > The avx2_extracti128 pattern looked like wrong RTL, as to extract > a 2 element vector from 4 element vector it used just one constant > in the parallel instead of two. I've changed it into a define_expand.
Actually there were two further issues with avx2_extracti128, one introduced by my change (pasto in switch control expression), one preexisting (no idea why it didn't fail before) - in vextracti128 the source operand has to be register and destination operand has to be register or memory, while the predicates were incorrectly swapped. So here is a version that has been (together with the smin/smax patch) bootstrapped/regtested on x86_64-linux and i686-linux and additionally tested with RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} i386.exp vect.exp' on AVX capable hw. Sorry for the screw-up. 2011-09-16 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (vec_extract_hi_<mode>, vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use vextracti128 instead of vextractf128 for -mavx2 and integer vectors. For V4DFmode fix up mode attribute. (VEC_EXTRACT_MODE): For TARGET_AVX add 32-byte vectors. (vec_set_lo_<mode>, vec_set_hi_<mode>): For VI8F_256 modes use V4DF instead of V8SF mode attribute. (avx2_extracti128): Change into define_expand. * config/i386/i386.c (ix86_expand_vector_extract): Handle 32-byte vector modes if TARGET_AVX. * gcc.target/i386/sse2-extract-1.c: New test. * gcc.target/i386/avx-extract-1.c: New test. --- gcc/config/i386/sse.md.jj 2011-09-15 17:36:20.000000000 +0200 +++ gcc/config/i386/sse.md 2011-09-16 10:51:51.000000000 +0200 @@ -3863,13 +3863,23 @@ (define_insn "vec_extract_hi_<mode>" (match_operand:VI8F_256 1 "register_operand" "x,x") (parallel [(const_int 2) (const_int 3)])))] "TARGET_AVX" - "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" +{ + if (get_attr_mode (insn) == MODE_OI) + return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}"; + else + return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set (attr "mode") + (if_then_else + (and (match_test "TARGET_AVX2") + (eq (const_string "<MODE>mode") (const_string "V4DImode"))) + (const_string "OI") + (const_string "V4DF")))]) (define_insn_and_split "vec_extract_lo_<mode>" [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") @@ -3898,13 +3908,23 @@ (define_insn "vec_extract_hi_<mode>" (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] "TARGET_AVX" - "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" +{ + if (get_attr_mode (insn) == MODE_OI) + return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}"; + else + return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set (attr "mode") + (if_then_else + (and (match_test "TARGET_AVX2") + (eq (const_string "<MODE>mode") (const_string "V8SImode"))) + (const_string "OI") + (const_string "V8SF")))]) (define_insn_and_split "vec_extract_lo_v16hi" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") @@ -3937,13 +3957,21 @@ (define_insn "vec_extract_hi_v16hi" (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] "TARGET_AVX" - "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" +{ + if (get_attr_mode (insn) == MODE_OI) + return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}"; + else + return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX2") + (const_string "OI") + (const_string "V8SF")))]) (define_insn_and_split "vec_extract_lo_v32qi" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") @@ -3984,13 +4012,21 @@ (define_insn "vec_extract_hi_v32qi" (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] "TARGET_AVX" - "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" +{ + if (get_attr_mode (insn) == MODE_OI) + return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}"; + else + return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,store") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX2") + (const_string "OI") + (const_string "V8SF")))]) (define_insn "*sse4_1_extractps" [(set (match_operand:SF 0 "nonimmediate_operand" "=rm") @@ -4024,7 +4060,10 @@ (define_insn_and_split "*vec_extract_v4s ;; Modes handled by vec_extract patterns. (define_mode_iterator VEC_EXTRACT_MODE - [V16QI V8HI V4SI V2DI + [(V32QI "TARGET_AVX") V16QI + (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI + (V4DI "TARGET_AVX") V2DI (V8SF "TARGET_AVX") V4SF (V4DF "TARGET_AVX") V2DF]) @@ -11952,7 +11991,7 @@ (define_insn "vec_set_lo_<mode>" (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set_attr "mode" "V4DF")]) (define_insn "vec_set_hi_<mode>" [(set (match_operand:VI8F_256 0 "register_operand" "=x") @@ -11967,7 +12006,7 @@ (define_insn "vec_set_hi_<mode>" (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) + (set_attr "mode" "V4DF")]) (define_insn "vec_set_lo_<mode>" [(set (match_operand:VI4F_256 0 "register_operand" "=x") @@ -12158,17 +12197,29 @@ (define_expand "vec_init<mode>" DONE; }) -(define_insn "avx2_extracti128" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_select:V2DI - (match_operand:V4DI 1 "nonimmediate_operand" "xm") - (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] +(define_expand "avx2_extracti128" + [(match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V4DI 1 "register_operand" "") + (match_operand:SI 2 "const_0_to_1_operand" "")] "TARGET_AVX2" - "vextracti128\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) +{ + rtx (*insn)(rtx, rtx); + + switch (INTVAL (operands[2])) + { + case 0: + insn = gen_vec_extract_lo_v4di; + break; + case 1: + insn = gen_vec_extract_hi_v4di; + break; + default: + gcc_unreachable (); + } + + emit_insn (insn (operands[0], operands[1])); + DONE; +}) (define_expand "avx2_inserti128" [(match_operand:V4DI 0 "register_operand" "") --- gcc/config/i386/i386.c.jj 2011-09-15 19:27:03.000000000 +0200 +++ gcc/config/i386/i386.c 2011-09-16 09:37:43.000000000 +0200 @@ -32592,6 +32592,84 @@ ix86_expand_vector_extract (bool mmx_ok, use_vec_extr = TARGET_SSE4_1; break; + case V8SFmode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V4SFmode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + } + break; + + case V4DFmode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V2DFmode); + if (elt < 2) + emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 1); + return; + } + break; + + case V32QImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V16QImode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; + + case V16HImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V8HImode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + } + break; + + case V8SImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V4SImode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + } + break; + + case V4DImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V2DImode); + if (elt < 2) + emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 1); + return; + } + break; + case V8QImode: /* ??? Could extract the appropriate HImode element and shift. */ default: --- gcc/testsuite/gcc.target/i386/sse2-extract-1.c.jj 2011-09-16 10:41:45.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-extract-1.c 2011-09-16 10:41:55.000000000 +0200 @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target sse2_runtime } */ + +extern void abort (void); +typedef unsigned long long uint64_t; + +#define vector(elcount, type) \ +__attribute__((vector_size((elcount)*sizeof(type)))) type + +#define FN(elcount, type, idx) \ +__attribute__((noinline, noclone)) \ +type f##type##elcount##_##idx (vector (elcount, type) x) { return x[idx] + 1; } +#define T2(elcount, type) \ + H (elcount, type) \ + F (elcount, type, 0) \ + F (elcount, type, 1) +#define T4(elcount, type) \ + T2 (elcount, type) \ + F (elcount, type, 2) \ + F (elcount, type, 3) +#define T8(elcount, type) \ + T4 (elcount, type) \ + F (elcount, type, 4) \ + F (elcount, type, 5) \ + F (elcount, type, 6) \ + F (elcount, type, 7) +#define T16(elcount, type) \ + T8 (elcount, type) \ + F (elcount, type, 8) \ + F (elcount, type, 9) \ + F (elcount, type, 10) \ + F (elcount, type, 11) \ + F (elcount, type, 12) \ + F (elcount, type, 13) \ + F (elcount, type, 14) \ + F (elcount, type, 15) +#define T32(elcount, type) \ + T16 (elcount, type) \ + F (elcount, type, 16) \ + F (elcount, type, 17) \ + F (elcount, type, 18) \ + F (elcount, type, 19) \ + F (elcount, type, 20) \ + F (elcount, type, 21) \ + F (elcount, type, 22) \ + F (elcount, type, 23) \ + F (elcount, type, 24) \ + F (elcount, type, 25) \ + F (elcount, type, 26) \ + F (elcount, type, 27) \ + F (elcount, type, 28) \ + F (elcount, type, 29) \ + F (elcount, type, 30) \ + F (elcount, type, 31) +#define TESTS_SSE2 \ +T2 (2, double) E \ +T2 (2, uint64_t) E \ +T4 (4, float) E \ +T4 (4, int) E \ +T8 (8, short) E \ +T16 (16, char) E +#define TESTS_AVX \ +T4 (4, double) E \ +T4 (4, uint64_t) E \ +T8 (8, float) E \ +T8 (8, int) E \ +T16 (16, short) E \ +T32 (32, char) E +#ifdef __AVX__ +#define TESTS TESTS_SSE2 TESTS_AVX +#else +#define TESTS TESTS_SSE2 +#endif + +#define F FN +#define H(elcount, type) +#define E +TESTS + +int +main () +{ +#undef F +#undef H +#undef E +#define H(elcount, type) \ + vector (elcount, type) v##type##elcount = { +#define E }; +#define F(elcount, type, idx) idx + 1, + TESTS +#undef F +#undef H +#undef E +#define H(elcount, type) +#define E +#define F(elcount, type, idx) \ + if (f##type##elcount##_##idx (v##type##elcount) != idx + 2) \ + abort (); + TESTS + return 0; +} --- gcc/testsuite/gcc.target/i386/avx-extract-1.c.jj 2011-09-16 10:44:19.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx-extract-1.c 2011-09-16 10:44:58.000000000 +0200 @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ + +#include "sse2-extract-1.c" Jakub