Re: Ping: [PATCH 3/6] Vectorize internal functions
On Tue, Nov 17, 2015 at 10:30 AM, Richard Sandiford wrote: > Thanks for all the reviews for this series. I think the patch below > is the only target-independent one that hasn't had any comments. This patch is ok. Thanks, Richard. > Richard > > Richard Sandiford writes: >> This patch tries to vectorize built-in and internal functions as >> internal functions first, falling back on the current built-in >> target hooks otherwise. >> >> >> gcc/ >> * internal-fn.h (direct_internal_fn_info): Add vectorizable flag. >> * internal-fn.c (direct_internal_fn_array): Update accordingly. >> * tree-vectorizer.h (vectorizable_function): Delete. >> * tree-vect-stmts.c: Include internal-fn.h. >> (vectorizable_internal_function): New function. >> (vectorizable_function): Inline into... >> (vectorizable_call): ...here. Explicitly reject calls that read >> from or write to memory. Try using an internal function before >> falling back on the old vectorizable_function behavior. >> >> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c >> index 898c83d..a5bda2f 100644 >> --- a/gcc/internal-fn.c >> +++ b/gcc/internal-fn.c >> @@ -69,13 +69,13 @@ init_internal_fns () >> >> /* Create static initializers for the information returned by >> direct_internal_fn. */ >> -#define not_direct { -2, -2 } >> -#define mask_load_direct { -1, -1 } >> -#define load_lanes_direct { -1, -1 } >> -#define mask_store_direct { 3, 3 } >> -#define store_lanes_direct { 0, 0 } >> -#define unary_direct { 0, 0 } >> -#define binary_direct { 0, 0 } >> +#define not_direct { -2, -2, false } >> +#define mask_load_direct { -1, -1, false } >> +#define load_lanes_direct { -1, -1, false } >> +#define mask_store_direct { 3, 3, false } >> +#define store_lanes_direct { 0, 0, false } >> +#define unary_direct { 0, 0, true } >> +#define binary_direct { 0, 0, true } >> >> const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { >> #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, >> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h >> index 6cb123f..aea6abd 100644 >> --- a/gcc/internal-fn.h >> +++ b/gcc/internal-fn.h >> @@ -134,6 +134,14 @@ struct direct_internal_fn_info >> function isn't directly mapped to an optab. */ >>signed int type0 : 8; >>signed int type1 : 8; >> + /* True if the function is pointwise, so that it can be vectorized by >> + converting the return type and all argument types to vectors of the >> + same number of elements. E.g. we can vectorize an IFN_SQRT on >> + floats as an IFN_SQRT on vectors of N floats. >> + >> + This only needs 1 bit, but occupies the full 16 to ensure a nice >> + layout. */ >> + unsigned int vectorizable : 16; >> }; >> >> extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1]; >> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c >> index 75389c4..1142142 100644 >> --- a/gcc/tree-vect-stmts.c >> +++ b/gcc/tree-vect-stmts.c >> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-scalar-evolution.h" >> #include "tree-vectorizer.h" >> #include "builtins.h" >> +#include "internal-fn.h" >> >> /* For lang_hooks.types.type_for_mode. */ >> #include "langhooks.h" >> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple >> *vec_stmt, >> add_stmt_to_eh_lp (vec_stmt, lp_nr); >> } >> >> -/* Checks if CALL can be vectorized in type VECTYPE. Returns >> - a function declaration if the target has a vectorized version >> - of the function, or NULL_TREE if the function cannot be vectorized. */ >> +/* We want to vectorize a call to combined function CFN with function >> + decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN >> + as the types of all inputs. Check whether this is possible using >> + an internal function, returning its code if so or IFN_LAST if not. */ >> >> -tree >> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in) >> +static internal_fn >> +vectorizable_internal_function (combined_fn cfn, tree fndecl, >> + tree vectype_out, tree vectype_in) >> { >> - /* We only handle functions that do not read or clobber memory. */ >> - if (gimple_vuse (call)) >> -return NULL_TREE; >> - >> - combined_fn fn = gimple_call_combined_fn (call); >> - if (fn != CFN_LAST) >> -return targetm.vectorize.builtin_vectorized_function >> - (fn, vectype_out, vectype_in); >> - >> - if (gimple_call_builtin_p (call, BUILT_IN_MD)) >> -return targetm.vectorize.builtin_md_vectorized_function >> - (gimple_call_fndecl (call), vectype_out, vectype_in); >> - >> - return NULL_TREE; >> + internal_fn ifn; >> + if (internal_fn_p (cfn)) >> +ifn = as_internal_fn (cfn); >> + else >> +ifn = associated_internal_fn (fndecl); >> + if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) >> +{ >> + const direct_internal_fn_info &info = dir
Ping: [PATCH 3/6] Vectorize internal functions
Thanks for all the reviews for this series. I think the patch below is the only target-independent one that hasn't had any comments. Richard Richard Sandiford writes: > This patch tries to vectorize built-in and internal functions as > internal functions first, falling back on the current built-in > target hooks otherwise. > > > gcc/ > * internal-fn.h (direct_internal_fn_info): Add vectorizable flag. > * internal-fn.c (direct_internal_fn_array): Update accordingly. > * tree-vectorizer.h (vectorizable_function): Delete. > * tree-vect-stmts.c: Include internal-fn.h. > (vectorizable_internal_function): New function. > (vectorizable_function): Inline into... > (vectorizable_call): ...here. Explicitly reject calls that read > from or write to memory. Try using an internal function before > falling back on the old vectorizable_function behavior. > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index 898c83d..a5bda2f 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -69,13 +69,13 @@ init_internal_fns () > > /* Create static initializers for the information returned by > direct_internal_fn. */ > -#define not_direct { -2, -2 } > -#define mask_load_direct { -1, -1 } > -#define load_lanes_direct { -1, -1 } > -#define mask_store_direct { 3, 3 } > -#define store_lanes_direct { 0, 0 } > -#define unary_direct { 0, 0 } > -#define binary_direct { 0, 0 } > +#define not_direct { -2, -2, false } > +#define mask_load_direct { -1, -1, false } > +#define load_lanes_direct { -1, -1, false } > +#define mask_store_direct { 3, 3, false } > +#define store_lanes_direct { 0, 0, false } > +#define unary_direct { 0, 0, true } > +#define binary_direct { 0, 0, true } > > const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { > #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index 6cb123f..aea6abd 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -134,6 +134,14 @@ struct direct_internal_fn_info > function isn't directly mapped to an optab. */ >signed int type0 : 8; >signed int type1 : 8; > + /* True if the function is pointwise, so that it can be vectorized by > + converting the return type and all argument types to vectors of the > + same number of elements. E.g. we can vectorize an IFN_SQRT on > + floats as an IFN_SQRT on vectors of N floats. > + > + This only needs 1 bit, but occupies the full 16 to ensure a nice > + layout. */ > + unsigned int vectorizable : 16; > }; > > extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1]; > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 75389c4..1142142 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-scalar-evolution.h" > #include "tree-vectorizer.h" > #include "builtins.h" > +#include "internal-fn.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple > *vec_stmt, > add_stmt_to_eh_lp (vec_stmt, lp_nr); > } > > -/* Checks if CALL can be vectorized in type VECTYPE. Returns > - a function declaration if the target has a vectorized version > - of the function, or NULL_TREE if the function cannot be vectorized. */ > +/* We want to vectorize a call to combined function CFN with function > + decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN > + as the types of all inputs. Check whether this is possible using > + an internal function, returning its code if so or IFN_LAST if not. */ > > -tree > -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in) > +static internal_fn > +vectorizable_internal_function (combined_fn cfn, tree fndecl, > + tree vectype_out, tree vectype_in) > { > - /* We only handle functions that do not read or clobber memory. */ > - if (gimple_vuse (call)) > -return NULL_TREE; > - > - combined_fn fn = gimple_call_combined_fn (call); > - if (fn != CFN_LAST) > -return targetm.vectorize.builtin_vectorized_function > - (fn, vectype_out, vectype_in); > - > - if (gimple_call_builtin_p (call, BUILT_IN_MD)) > -return targetm.vectorize.builtin_md_vectorized_function > - (gimple_call_fndecl (call), vectype_out, vectype_in); > - > - return NULL_TREE; > + internal_fn ifn; > + if (internal_fn_p (cfn)) > +ifn = as_internal_fn (cfn); > + else > +ifn = associated_internal_fn (fndecl); > + if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) > +{ > + const direct_internal_fn_info &info = direct_internal_fn (ifn); > + if (info.vectorizable) > + { > + tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); > + tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); > +