Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Tue, 29 May 2018, Jakub Jelinek wrote: > On Tue, May 29, 2018 at 11:15:51AM +0200, Richard Biener wrote: > > Looking at other examples the only thing we have is > > maybe_ne and friends on TYPE_VECTOR_SUBPARTS. But I think the only > > thing missing is > > > > || (maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type), > >2 * TYPE_VECTOR_SUBPARTS (rhs_type))) > > > > that together with the mode size check should ensure same size > > vectors. > > The other way around. It would then be (and I've added similar tests for > VEC_PACK*): Ah, of course... OK if it tests ok. Thanks, Richard. > 2018-05-29 Jakub Jelinek > > * tree-cfg.c (verify_gimple_assign_unary): Add checking for > VEC_UNPACK_*_EXPR. > (verify_gimple_assign_binary): Check TYPE_VECTOR_SUBPARTS for > VEC_PACK_*_EXPR. > > --- gcc/tree-cfg.c.jj 2018-05-28 19:47:55.180685259 +0200 > +++ gcc/tree-cfg.c2018-05-29 11:27:14.521339290 +0200 > @@ -3678,7 +3678,37 @@ verify_gimple_assign_unary (gassign *stm > case VEC_UNPACK_FLOAT_LO_EXPR: > case VEC_UNPACK_FIX_TRUNC_HI_EXPR: > case VEC_UNPACK_FIX_TRUNC_LO_EXPR: > - /* FIXME. */ > + if (TREE_CODE (rhs1_type) != VECTOR_TYPE > + || TREE_CODE (lhs_type) != VECTOR_TYPE > + || (!INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))) > + || (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) > + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type))) > + || ((rhs_code == VEC_UNPACK_HI_EXPR > +|| rhs_code == VEC_UNPACK_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + != INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type > + || ((rhs_code == VEC_UNPACK_FLOAT_HI_EXPR > +|| rhs_code == VEC_UNPACK_FLOAT_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type > + || ((rhs_code == VEC_UNPACK_FIX_TRUNC_HI_EXPR > +|| rhs_code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) > + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type > + || (maybe_ne (GET_MODE_SIZE (element_mode (lhs_type)), > + 2 * GET_MODE_SIZE (element_mode (rhs1_type))) > + && (!VECTOR_BOOLEAN_TYPE_P (lhs_type) > + || !VECTOR_BOOLEAN_TYPE_P (rhs1_type))) > + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (lhs_type), > +TYPE_VECTOR_SUBPARTS (rhs1_type))) > + { > + error ("type mismatch in vector unpack expression"); > + debug_generic_expr (lhs_type); > + debug_generic_expr (rhs1_type); > + return true; > +} > + >return false; > > case NEGATE_EXPR: > @@ -3993,7 +4023,9 @@ verify_gimple_assign_binary (gassign *st >== INTEGRAL_TYPE_P (TREE_TYPE (lhs_type > || !types_compatible_p (rhs1_type, rhs2_type) > || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)), > - 2 * GET_MODE_SIZE (element_mode (lhs_type > + 2 * GET_MODE_SIZE (element_mode (lhs_type))) > + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (rhs1_type), > + TYPE_VECTOR_SUBPARTS (lhs_type))) >{ > error ("type mismatch in vector pack expression"); > debug_generic_expr (lhs_type); > @@ -4012,7 +4044,9 @@ verify_gimple_assign_binary (gassign *st > || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type)) > || !types_compatible_p (rhs1_type, rhs2_type) > || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)), > -2 * GET_MODE_SIZE (element_mode (lhs_type > +2 * GET_MODE_SIZE (element_mode (lhs_type))) > + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (rhs1_type), > +TYPE_VECTOR_SUBPARTS (lhs_type))) > { > error ("type mismatch in vector pack expression"); > debug_generic_expr (lhs_type); > > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Tue, May 29, 2018 at 11:15:51AM +0200, Richard Biener wrote: > Looking at other examples the only thing we have is > maybe_ne and friends on TYPE_VECTOR_SUBPARTS. But I think the only > thing missing is > > || (maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type), > 2 * TYPE_VECTOR_SUBPARTS (rhs_type))) > > that together with the mode size check should ensure same size > vectors. The other way around. It would then be (and I've added similar tests for VEC_PACK*): 2018-05-29 Jakub Jelinek * tree-cfg.c (verify_gimple_assign_unary): Add checking for VEC_UNPACK_*_EXPR. (verify_gimple_assign_binary): Check TYPE_VECTOR_SUBPARTS for VEC_PACK_*_EXPR. --- gcc/tree-cfg.c.jj 2018-05-28 19:47:55.180685259 +0200 +++ gcc/tree-cfg.c 2018-05-29 11:27:14.521339290 +0200 @@ -3678,7 +3678,37 @@ verify_gimple_assign_unary (gassign *stm case VEC_UNPACK_FLOAT_LO_EXPR: case VEC_UNPACK_FIX_TRUNC_HI_EXPR: case VEC_UNPACK_FIX_TRUNC_LO_EXPR: - /* FIXME. */ + if (TREE_CODE (rhs1_type) != VECTOR_TYPE + || TREE_CODE (lhs_type) != VECTOR_TYPE + || (!INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))) + || (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type))) + || ((rhs_code == VEC_UNPACK_HI_EXPR + || rhs_code == VEC_UNPACK_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + != INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type + || ((rhs_code == VEC_UNPACK_FLOAT_HI_EXPR + || rhs_code == VEC_UNPACK_FLOAT_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type + || ((rhs_code == VEC_UNPACK_FIX_TRUNC_HI_EXPR + || rhs_code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type + || (maybe_ne (GET_MODE_SIZE (element_mode (lhs_type)), + 2 * GET_MODE_SIZE (element_mode (rhs1_type))) + && (!VECTOR_BOOLEAN_TYPE_P (lhs_type) + || !VECTOR_BOOLEAN_TYPE_P (rhs1_type))) + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (lhs_type), + TYPE_VECTOR_SUBPARTS (rhs1_type))) + { + error ("type mismatch in vector unpack expression"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; +} + return false; case NEGATE_EXPR: @@ -3993,7 +4023,9 @@ verify_gimple_assign_binary (gassign *st == INTEGRAL_TYPE_P (TREE_TYPE (lhs_type || !types_compatible_p (rhs1_type, rhs2_type) || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)), -2 * GET_MODE_SIZE (element_mode (lhs_type +2 * GET_MODE_SIZE (element_mode (lhs_type))) + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (rhs1_type), +TYPE_VECTOR_SUBPARTS (lhs_type))) { error ("type mismatch in vector pack expression"); debug_generic_expr (lhs_type); @@ -4012,7 +4044,9 @@ verify_gimple_assign_binary (gassign *st || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type)) || !types_compatible_p (rhs1_type, rhs2_type) || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)), - 2 * GET_MODE_SIZE (element_mode (lhs_type + 2 * GET_MODE_SIZE (element_mode (lhs_type))) + || maybe_ne (2 * TYPE_VECTOR_SUBPARTS (rhs1_type), + TYPE_VECTOR_SUBPARTS (lhs_type))) { error ("type mismatch in vector pack expression"); debug_generic_expr (lhs_type); Jakub
Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Tue, 29 May 2018, Jakub Jelinek wrote: > On Mon, May 28, 2018 at 12:12:18PM +0200, Richard Biener wrote: > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > Apart from > > > > --- gcc/tree-cfg.c.jj 2018-05-26 23:03:55.361873297 +0200 > > +++ gcc/tree-cfg.c 2018-05-27 12:54:55.046197128 +0200 > > @@ -3676,6 +3676,8 @@ verify_gimple_assign_unary (gassign *stm > > case VEC_UNPACK_LO_EXPR: > > case VEC_UNPACK_FLOAT_HI_EXPR: > > case VEC_UNPACK_FLOAT_LO_EXPR: > > +case VEC_UNPACK_FIX_TRUNC_HI_EXPR: > > +case VEC_UNPACK_FIX_TRUNC_LO_EXPR: > >/* FIXME. */ > >return false; > > > > > > the middle-end changes look OK. Can you please add verification > > for the new codes here? > > So like this (incremental patch, as it affects also the other codes)? > > The VECTOR_BOOLEAN_P stuff is there because apparently we use these codes on > vector booleans too where the element size is the same (for > VEC_UNPACK_{HI,LO}_EXPR only). > > Also, not really sure how to verify sizes of the whole vectors or better > nunits in the world of poly-int vector sizes (but VEC_PACK_*EXPR doesn't > verify that either). Looking at other examples the only thing we have is maybe_ne and friends on TYPE_VECTOR_SUBPARTS. But I think the only thing missing is || (maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type), 2 * TYPE_VECTOR_SUBPARTS (rhs_type))) that together with the mode size check should ensure same size vectors. Ok with this adjustment. Thanks, Richard. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-05-29 Jakub Jelinek > > * tree-cfg.c (verify_gimple_assign_unary): Add checking for > VEC_UNPACK_*_EXPR. > > --- gcc/tree-cfg.c.jj 2018-05-28 19:47:55.180685259 +0200 > +++ gcc/tree-cfg.c2018-05-29 10:05:55.213775216 +0200 > @@ -3678,7 +3678,35 @@ verify_gimple_assign_unary (gassign *stm > case VEC_UNPACK_FLOAT_LO_EXPR: > case VEC_UNPACK_FIX_TRUNC_HI_EXPR: > case VEC_UNPACK_FIX_TRUNC_LO_EXPR: > - /* FIXME. */ > + if (TREE_CODE (rhs1_type) != VECTOR_TYPE > + || TREE_CODE (lhs_type) != VECTOR_TYPE > + || (!INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))) > + || (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) > + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type))) > + || ((rhs_code == VEC_UNPACK_HI_EXPR > +|| rhs_code == VEC_UNPACK_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + != INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type > + || ((rhs_code == VEC_UNPACK_FLOAT_HI_EXPR > +|| rhs_code == VEC_UNPACK_FLOAT_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) > + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type > + || ((rhs_code == VEC_UNPACK_FIX_TRUNC_HI_EXPR > +|| rhs_code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) > + && (INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) > + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type > + || (maybe_ne (GET_MODE_SIZE (element_mode (lhs_type)), > + 2 * GET_MODE_SIZE (element_mode (rhs1_type))) > + && (!VECTOR_BOOLEAN_TYPE_P (lhs_type) > + || !VECTOR_BOOLEAN_TYPE_P (rhs1_type > + { > + error ("type mismatch in vector unpack expression"); > + debug_generic_expr (lhs_type); > + debug_generic_expr (rhs1_type); > + return true; > +} > + >return false; > > case NEGATE_EXPR: > > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Mon, May 28, 2018 at 12:12:18PM +0200, Richard Biener wrote: > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Apart from > > --- gcc/tree-cfg.c.jj 2018-05-26 23:03:55.361873297 +0200 > +++ gcc/tree-cfg.c 2018-05-27 12:54:55.046197128 +0200 > @@ -3676,6 +3676,8 @@ verify_gimple_assign_unary (gassign *stm > case VEC_UNPACK_LO_EXPR: > case VEC_UNPACK_FLOAT_HI_EXPR: > case VEC_UNPACK_FLOAT_LO_EXPR: > +case VEC_UNPACK_FIX_TRUNC_HI_EXPR: > +case VEC_UNPACK_FIX_TRUNC_LO_EXPR: >/* FIXME. */ >return false; > > > the middle-end changes look OK. Can you please add verification > for the new codes here? So like this (incremental patch, as it affects also the other codes)? The VECTOR_BOOLEAN_P stuff is there because apparently we use these codes on vector booleans too where the element size is the same (for VEC_UNPACK_{HI,LO}_EXPR only). Also, not really sure how to verify sizes of the whole vectors or better nunits in the world of poly-int vector sizes (but VEC_PACK_*EXPR doesn't verify that either). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-05-29 Jakub Jelinek * tree-cfg.c (verify_gimple_assign_unary): Add checking for VEC_UNPACK_*_EXPR. --- gcc/tree-cfg.c.jj 2018-05-28 19:47:55.180685259 +0200 +++ gcc/tree-cfg.c 2018-05-29 10:05:55.213775216 +0200 @@ -3678,7 +3678,35 @@ verify_gimple_assign_unary (gassign *stm case VEC_UNPACK_FLOAT_LO_EXPR: case VEC_UNPACK_FIX_TRUNC_HI_EXPR: case VEC_UNPACK_FIX_TRUNC_LO_EXPR: - /* FIXME. */ + if (TREE_CODE (rhs1_type) != VECTOR_TYPE + || TREE_CODE (lhs_type) != VECTOR_TYPE + || (!INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))) + || (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type))) + || ((rhs_code == VEC_UNPACK_HI_EXPR + || rhs_code == VEC_UNPACK_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + != INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type + || ((rhs_code == VEC_UNPACK_FLOAT_HI_EXPR + || rhs_code == VEC_UNPACK_FLOAT_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs1_type + || ((rhs_code == VEC_UNPACK_FIX_TRUNC_HI_EXPR + || rhs_code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) + || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type + || (maybe_ne (GET_MODE_SIZE (element_mode (lhs_type)), + 2 * GET_MODE_SIZE (element_mode (rhs1_type))) + && (!VECTOR_BOOLEAN_TYPE_P (lhs_type) + || !VECTOR_BOOLEAN_TYPE_P (rhs1_type + { + error ("type mismatch in vector unpack expression"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; +} + return false; case NEGATE_EXPR: Jakub
Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Mon, 28 May 2018, Jakub Jelinek wrote: > Hi! > > AVX512DQ and AVX512DQ/AVX512VL has instructions for vector float <-> > {,unsigned} long long conversions. The following patch adds the missing > tree codes, optabs and expanders to make this possible. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Apart from --- gcc/tree-cfg.c.jj 2018-05-26 23:03:55.361873297 +0200 +++ gcc/tree-cfg.c 2018-05-27 12:54:55.046197128 +0200 @@ -3676,6 +3676,8 @@ verify_gimple_assign_unary (gassign *stm case VEC_UNPACK_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: +case VEC_UNPACK_FIX_TRUNC_HI_EXPR: +case VEC_UNPACK_FIX_TRUNC_LO_EXPR: /* FIXME. */ return false; the middle-end changes look OK. Can you please add verification for the new codes here? Thanks, Richard. > 2018-05-28 Jakub Jelinek> > PR target/85918 > * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, > VEC_PACK_FLOAT_EXPR): New tree codes. > * tree-pretty-print.c (op_code_prio): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. > (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, > VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. > * tree-inline.c (estimate_operator_cost): Likewise. > * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. > * fold-const.c (const_binop): Likewise. > (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and > VEC_UNPACK_FIX_TRUNC_LO_EXPR. > * tree-cfg.c (verify_gimple_assign_unary): Likewise. > (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. > * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, > VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. > * expr.c (expand_expr_real_2): Likewise. > * optabs.def (vec_packs_float_optab, vec_packu_float_optab, > vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, > vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New > optabs. > * optabs.c (expand_widen_pattern_expr): For > VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use > sign from result type rather than operand's type. > (expand_binop_directly): For vec_packu_float_optab and > vec_packs_float_optab allow result type to be different from operand's > type. > * optabs-tree.c (optab_for_tree_code): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and > VEC_PACK_FLOAT_EXPR. Formatting fixes. > * tree-vect-generic.c (expand_vector_operations_1): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and > VEC_PACK_FLOAT_EXPR. > * tree-vect-stmts.c (supportable_widening_operation): Handle > FIX_TRUNC_EXPR. > (supportable_narrowing_operation): Handle FLOAT_EXPR. > * config/i386/i386.md (fixprefix, floatprefix): New code attributes. > * config/i386/sse.md (*floatv2div2sf2): Rename to ... > (floatv2div2sf2): ... this. Formatting fix. > (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New > mode attributes. > (vec_pack_float_): New expander. > (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode > attributes. > (vec_unpack_fix_trunc_lo_, > vec_unpack_fix_trunc_hi_): New expanders. > * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, > vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, > vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): > Document. > * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, > VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. > (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, > VEC_PACK_FLOAT_EXPR): Document. > > * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 > and -fno-vect-cost-model options. Add aligned(64) attribute to the > arrays. Add suffix 1 to all functions and use 4 iterations rather > than N. Add functions with conversions to and from float. > Add new set of functions with 8 iterations and another one > with 16 iterations, expect 24 vectorized loops instead of just 4. > * gcc.target/i386/avx512dq-pr85918-2.c: New test. > > --- gcc/tree.def.jj 2018-05-26 23:03:55.321873256 +0200 > +++ gcc/tree.def 2018-05-27 12:54:55.040197121 +0200 > @@ -1371,6 +1371,15 @@ DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_un > DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", > tcc_unary, 1) > DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", > tcc_unary, 1) > > +/* Unpack (extract) the high/low elements of the input vector, convert > + floating point values to integer and widen elements into the output > +
Re: [PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
On Mon, May 28, 2018 at 11:58 AM, Jakub Jelinekwrote: > Hi! > > AVX512DQ and AVX512DQ/AVX512VL has instructions for vector float <-> > {,unsigned} long long conversions. The following patch adds the missing > tree codes, optabs and expanders to make this possible. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-05-28 Jakub Jelinek > > PR target/85918 > * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, > VEC_UNPACK_FIX_TRUNC_LO_EXPR, > VEC_PACK_FLOAT_EXPR): New tree codes. > * tree-pretty-print.c (op_code_prio): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. > (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, > VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. > * tree-inline.c (estimate_operator_cost): Likewise. > * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. > * fold-const.c (const_binop): Likewise. > (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and > VEC_UNPACK_FIX_TRUNC_LO_EXPR. > * tree-cfg.c (verify_gimple_assign_unary): Likewise. > (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. > * cfgexpand.c (expand_debug_expr): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR, > VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. > * expr.c (expand_expr_real_2): Likewise. > * optabs.def (vec_packs_float_optab, vec_packu_float_optab, > vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, > vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New > optabs. > * optabs.c (expand_widen_pattern_expr): For > VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use > sign from result type rather than operand's type. > (expand_binop_directly): For vec_packu_float_optab and > vec_packs_float_optab allow result type to be different from operand's > type. > * optabs-tree.c (optab_for_tree_code): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and > VEC_PACK_FLOAT_EXPR. Formatting fixes. > * tree-vect-generic.c (expand_vector_operations_1): Handle > VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and > VEC_PACK_FLOAT_EXPR. > * tree-vect-stmts.c (supportable_widening_operation): Handle > FIX_TRUNC_EXPR. > (supportable_narrowing_operation): Handle FLOAT_EXPR. > * config/i386/i386.md (fixprefix, floatprefix): New code attributes. > * config/i386/sse.md (*floatv2div2sf2): Rename to ... > (floatv2div2sf2): ... this. Formatting fix. > (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New > mode attributes. > (vec_pack_float_): New expander. > (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode > attributes. > (vec_unpack_fix_trunc_lo_, > vec_unpack_fix_trunc_hi_): New expanders. > * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, > vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, > vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): > Document. > * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, > VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. > (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, > VEC_PACK_FLOAT_EXPR): Document. > > * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 > and -fno-vect-cost-model options. Add aligned(64) attribute to the > arrays. Add suffix 1 to all functions and use 4 iterations rather > than N. Add functions with conversions to and from float. > Add new set of functions with 8 iterations and another one > with 16 iterations, expect 24 vectorized loops instead of just 4. > * gcc.target/i386/avx512dq-pr85918-2.c: New test. LGTM for the x86 part. Thanks, Uros. > --- gcc/tree.def.jj 2018-05-26 23:03:55.321873256 +0200 > +++ gcc/tree.def2018-05-27 12:54:55.040197121 +0200 > @@ -1371,6 +1371,15 @@ DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_un > DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", > tcc_unary, 1) > DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", > tcc_unary, 1) > > +/* Unpack (extract) the high/low elements of the input vector, convert > + floating point values to integer and widen elements into the output > + vector. The input vector has twice as many elements as the output > + vector, that are half the size of the elements of the output vector. */ > +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_HI_EXPR, "vec_unpack_fix_trunc_hi_expr", > +tcc_unary, 1) > +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_LO_EXPR, "vec_unpack_fix_trunc_lo_expr", > +
[PATCH] Introduce VEC_UNPACK_FIX_TRUNC_{LO,HI}_EXPR and VEC_PACK_FLOAT_EXPR, use it in x86 vectorization (PR target/85918)
Hi! AVX512DQ and AVX512DQ/AVX512VL has instructions for vector float <-> {,unsigned} long long conversions. The following patch adds the missing tree codes, optabs and expanders to make this possible. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-05-28 Jakub JelinekPR target/85918 * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): New tree codes. * tree-pretty-print.c (op_code_prio): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-inline.c (estimate_operator_cost): Likewise. * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. * fold-const.c (const_binop): Likewise. (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. * tree-cfg.c (verify_gimple_assign_unary): Likewise. (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * expr.c (expand_expr_real_2): Likewise. * optabs.def (vec_packs_float_optab, vec_packu_float_optab, vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New optabs. * optabs.c (expand_widen_pattern_expr): For VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use sign from result type rather than operand's type. (expand_binop_directly): For vec_packu_float_optab and vec_packs_float_optab allow result type to be different from operand's type. * optabs-tree.c (optab_for_tree_code): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. Formatting fixes. * tree-vect-generic.c (expand_vector_operations_1): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-vect-stmts.c (supportable_widening_operation): Handle FIX_TRUNC_EXPR. (supportable_narrowing_operation): Handle FLOAT_EXPR. * config/i386/i386.md (fixprefix, floatprefix): New code attributes. * config/i386/sse.md (*floatv2div2sf2): Rename to ... (floatv2div2sf2): ... this. Formatting fix. (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New mode attributes. (vec_pack_float_): New expander. (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode attributes. (vec_unpack_fix_trunc_lo_, vec_unpack_fix_trunc_hi_): New expanders. * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): Document. * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): Document. * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 and -fno-vect-cost-model options. Add aligned(64) attribute to the arrays. Add suffix 1 to all functions and use 4 iterations rather than N. Add functions with conversions to and from float. Add new set of functions with 8 iterations and another one with 16 iterations, expect 24 vectorized loops instead of just 4. * gcc.target/i386/avx512dq-pr85918-2.c: New test. --- gcc/tree.def.jj 2018-05-26 23:03:55.321873256 +0200 +++ gcc/tree.def2018-05-27 12:54:55.040197121 +0200 @@ -1371,6 +1371,15 @@ DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_un DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", tcc_unary, 1) DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", tcc_unary, 1) +/* Unpack (extract) the high/low elements of the input vector, convert + floating point values to integer and widen elements into the output + vector. The input vector has twice as many elements as the output + vector, that are half the size of the elements of the output vector. */ +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_HI_EXPR, "vec_unpack_fix_trunc_hi_expr", +tcc_unary, 1) +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_LO_EXPR, "vec_unpack_fix_trunc_lo_expr", +tcc_unary, 1) + /* Pack (demote/narrow and merge) the elements of the two input vectors into the output vector using truncation/saturation. The elements of the input vectors are twice the size of the elements of the @@ -1384,6 +1393,12 @@ DEFTREECODE (VEC_PACK_SAT_EXPR,