Re: Ping: [PATCH 3/6] Vectorize internal functions

2015-11-17 Thread Richard Biener
On Tue, Nov 17, 2015 at 10:30 AM, Richard Sandiford
 wrote:
> Thanks for all the reviews for this series.  I think the patch below
> is the only target-independent one that hasn't had any comments.

This patch is ok.

Thanks,
Richard.

> Richard
>
> Richard Sandiford  writes:
>> This patch tries to vectorize built-in and internal functions as
>> internal functions first, falling back on the current built-in
>> target hooks otherwise.
>>
>>
>> gcc/
>>   * internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
>>   * internal-fn.c (direct_internal_fn_array): Update accordingly.
>>   * tree-vectorizer.h (vectorizable_function): Delete.
>>   * tree-vect-stmts.c: Include internal-fn.h.
>>   (vectorizable_internal_function): New function.
>>   (vectorizable_function): Inline into...
>>   (vectorizable_call): ...here.  Explicitly reject calls that read
>>   from or write to memory.  Try using an internal function before
>>   falling back on the old vectorizable_function behavior.
>>
>> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> index 898c83d..a5bda2f 100644
>> --- a/gcc/internal-fn.c
>> +++ b/gcc/internal-fn.c
>> @@ -69,13 +69,13 @@ init_internal_fns ()
>>
>>  /* Create static initializers for the information returned by
>> direct_internal_fn.  */
>> -#define not_direct { -2, -2 }
>> -#define mask_load_direct { -1, -1 }
>> -#define load_lanes_direct { -1, -1 }
>> -#define mask_store_direct { 3, 3 }
>> -#define store_lanes_direct { 0, 0 }
>> -#define unary_direct { 0, 0 }
>> -#define binary_direct { 0, 0 }
>> +#define not_direct { -2, -2, false }
>> +#define mask_load_direct { -1, -1, false }
>> +#define load_lanes_direct { -1, -1, false }
>> +#define mask_store_direct { 3, 3, false }
>> +#define store_lanes_direct { 0, 0, false }
>> +#define unary_direct { 0, 0, true }
>> +#define binary_direct { 0, 0, true }
>>
>>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
>> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
>> index 6cb123f..aea6abd 100644
>> --- a/gcc/internal-fn.h
>> +++ b/gcc/internal-fn.h
>> @@ -134,6 +134,14 @@ struct direct_internal_fn_info
>>   function isn't directly mapped to an optab.  */
>>signed int type0 : 8;
>>signed int type1 : 8;
>> +  /* True if the function is pointwise, so that it can be vectorized by
>> + converting the return type and all argument types to vectors of the
>> + same number of elements.  E.g. we can vectorize an IFN_SQRT on
>> + floats as an IFN_SQRT on vectors of N floats.
>> +
>> + This only needs 1 bit, but occupies the full 16 to ensure a nice
>> + layout.  */
>> +  unsigned int vectorizable : 16;
>>  };
>>
>>  extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 75389c4..1142142 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "tree-scalar-evolution.h"
>>  #include "tree-vectorizer.h"
>>  #include "builtins.h"
>> +#include "internal-fn.h"
>>
>>  /* For lang_hooks.types.type_for_mode.  */
>>  #include "langhooks.h"
>> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple 
>> *vec_stmt,
>>  add_stmt_to_eh_lp (vec_stmt, lp_nr);
>>  }
>>
>> -/* Checks if CALL can be vectorized in type VECTYPE.  Returns
>> -   a function declaration if the target has a vectorized version
>> -   of the function, or NULL_TREE if the function cannot be vectorized.  */
>> +/* We want to vectorize a call to combined function CFN with function
>> +   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
>> +   as the types of all inputs.  Check whether this is possible using
>> +   an internal function, returning its code if so or IFN_LAST if not.  */
>>
>> -tree
>> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
>> +static internal_fn
>> +vectorizable_internal_function (combined_fn cfn, tree fndecl,
>> + tree vectype_out, tree vectype_in)
>>  {
>> -  /* We only handle functions that do not read or clobber memory.  */
>> -  if (gimple_vuse (call))
>> -return NULL_TREE;
>> -
>> -  combined_fn fn = gimple_call_combined_fn (call);
>> -  if (fn != CFN_LAST)
>> -return targetm.vectorize.builtin_vectorized_function
>> -  (fn, vectype_out, vectype_in);
>> -
>> -  if (gimple_call_builtin_p (call, BUILT_IN_MD))
>> -return targetm.vectorize.builtin_md_vectorized_function
>> -  (gimple_call_fndecl (call), vectype_out, vectype_in);
>> -
>> -  return NULL_TREE;
>> +  internal_fn ifn;
>> +  if (internal_fn_p (cfn))
>> +ifn = as_internal_fn (cfn);
>> +  else
>> +ifn = associated_internal_fn (fndecl);
>> +  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
>> +   

Ping: [PATCH 3/6] Vectorize internal functions

2015-11-17 Thread Richard Sandiford
Thanks for all the reviews for this series.  I think the patch below
is the only target-independent one that hasn't had any comments.

Richard

Richard Sandiford  writes:
> This patch tries to vectorize built-in and internal functions as
> internal functions first, falling back on the current built-in
> target hooks otherwise.
>
>
> gcc/
>   * internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
>   * internal-fn.c (direct_internal_fn_array): Update accordingly.
>   * tree-vectorizer.h (vectorizable_function): Delete.
>   * tree-vect-stmts.c: Include internal-fn.h.
>   (vectorizable_internal_function): New function.
>   (vectorizable_function): Inline into...
>   (vectorizable_call): ...here.  Explicitly reject calls that read
>   from or write to memory.  Try using an internal function before
>   falling back on the old vectorizable_function behavior.
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 898c83d..a5bda2f 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -69,13 +69,13 @@ init_internal_fns ()
>  
>  /* Create static initializers for the information returned by
> direct_internal_fn.  */
> -#define not_direct { -2, -2 }
> -#define mask_load_direct { -1, -1 }
> -#define load_lanes_direct { -1, -1 }
> -#define mask_store_direct { 3, 3 }
> -#define store_lanes_direct { 0, 0 }
> -#define unary_direct { 0, 0 }
> -#define binary_direct { 0, 0 }
> +#define not_direct { -2, -2, false }
> +#define mask_load_direct { -1, -1, false }
> +#define load_lanes_direct { -1, -1, false }
> +#define mask_store_direct { 3, 3, false }
> +#define store_lanes_direct { 0, 0, false }
> +#define unary_direct { 0, 0, true }
> +#define binary_direct { 0, 0, true }
>  
>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 6cb123f..aea6abd 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -134,6 +134,14 @@ struct direct_internal_fn_info
>   function isn't directly mapped to an optab.  */
>signed int type0 : 8;
>signed int type1 : 8;
> +  /* True if the function is pointwise, so that it can be vectorized by
> + converting the return type and all argument types to vectors of the
> + same number of elements.  E.g. we can vectorize an IFN_SQRT on
> + floats as an IFN_SQRT on vectors of N floats.
> +
> + This only needs 1 bit, but occupies the full 16 to ensure a nice
> + layout.  */
> +  unsigned int vectorizable : 16;
>  };
>  
>  extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 75389c4..1142142 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-scalar-evolution.h"
>  #include "tree-vectorizer.h"
>  #include "builtins.h"
> +#include "internal-fn.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple 
> *vec_stmt,
>  add_stmt_to_eh_lp (vec_stmt, lp_nr);
>  }
>  
> -/* Checks if CALL can be vectorized in type VECTYPE.  Returns
> -   a function declaration if the target has a vectorized version
> -   of the function, or NULL_TREE if the function cannot be vectorized.  */
> +/* We want to vectorize a call to combined function CFN with function
> +   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
> +   as the types of all inputs.  Check whether this is possible using
> +   an internal function, returning its code if so or IFN_LAST if not.  */
>  
> -tree
> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
> +static internal_fn
> +vectorizable_internal_function (combined_fn cfn, tree fndecl,
> + tree vectype_out, tree vectype_in)
>  {
> -  /* We only handle functions that do not read or clobber memory.  */
> -  if (gimple_vuse (call))
> -return NULL_TREE;
> -
> -  combined_fn fn = gimple_call_combined_fn (call);
> -  if (fn != CFN_LAST)
> -return targetm.vectorize.builtin_vectorized_function
> -  (fn, vectype_out, vectype_in);
> -
> -  if (gimple_call_builtin_p (call, BUILT_IN_MD))
> -return targetm.vectorize.builtin_md_vectorized_function
> -  (gimple_call_fndecl (call), vectype_out, vectype_in);
> -
> -  return NULL_TREE;
> +  internal_fn ifn;
> +  if (internal_fn_p (cfn))
> +ifn = as_internal_fn (cfn);
> +  else
> +ifn = associated_internal_fn (fndecl);
> +  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
> +{
> +  const direct_internal_fn_info  = direct_internal_fn (ifn);
> +  if (info.vectorizable)
> + {
> +   tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
> +   tree type1 = (info.type1 < 0 ?