Re: [PATCH 1/2] Add support for IVOPT

2019-05-28 Thread Richard Sandiford
Kugan Vivekanandarajah  writes:
> +/* Return the preferred mem scale factor for accessing MEM_MODE
> +   of BASE which is optimized for SPEED.  */

Maybe:

/* Return the preferred index scale factor for accessing memory of mode
   MEM_MODE in the address space of pointer BASE.  Assume that we're
   optimizing for speed if SPEED is true and for size otherwise.  */

> @@ -3479,7 +3481,7 @@ add_iv_candidate_derived_from_uses (struct ivopts_data 
> *data)
>data->iv_common_cands.truncate (0);
>  }
>  
> -/* Adds candidates based on the value of USE's iv.  */
> +  /* Adds candidates based on the value of USE's iv.  */
>  
>  static void
>  add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)

Stray change: the original is correct.

> @@ -3500,6 +3502,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> struct iv_use *use)
>  basetype = sizetype;
>record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
>  
> +  /* Compare the cost of an address with an unscaled index with the cost of
> +an address with a scaled index and add candidate if useful. */

Should be two spaces after "."

OK with those changes, thanks.  OK for part 2 as well.

Richard


Re: [PATCH 1/2] Add support for IVOPT

2019-05-21 Thread Kugan Vivekanandarajah
Hi Richard,


On Fri, 17 May 2019 at 18:47, Richard Sandiford
 wrote:
>
> Kugan Vivekanandarajah  writes:
> > [...]
> >> > +{
> >> > +  struct mem_address parts = {NULL_TREE, integer_one_node,
> >> > +   NULL_TREE, NULL_TREE, NULL_TREE};
> >>
> >> Might be better to use "= {}" and initialise the fields that matter by
> >> assignment.  As it stands this uses integer_one_node as the base, but I
> >> couldn't tell if that was deliberate.
> >
> > I just copied this part from get_address_cost, similar to what is done
> > there.
>
> Ah, sorry :-)
>
> > I have now changed the way you suggested but using the values
> > used in get_address_cost.
>
> Thanks.
>
> > [...]
> > @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct 
> > ivopts_data *data)
> >data->iv_common_cands.truncate (0);
> >  }
> >
> > +/* Return the preferred mem scale factor for accessing MEM_MODE
> > +   of BASE in LOOP.  */
> > +static unsigned int
> > +preferred_mem_scale_factor (struct loop *loop,
> > + tree base, machine_mode mem_mode)
>
> IMO this should live in tree-ssa-address.c instead.
>
> The only use of "loop" is to test for size vs. speed, but other callers
> might want to make that decision based on individual blocks, so I think
> it would make sense to pass a "speed" bool instead.  Probably also worth
> making it the last parameter, so that the order is consistent with
> address_cost (though probably then inconsistent with something else :-)).
>
> > [...]
> > @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> > struct iv_use *use)
> >  basetype = sizetype;
> >record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
> >
> > +  /* Compare the cost of an address with an unscaled index with the cost of
> > +an address with a scaled index and add candidate if useful. */
> > +  if (use != NULL
> > +  && poly_int_tree_p (iv->step)
> > +  && tree_fits_poly_int64_p (iv->step)
> > +  && address_p (use->type))
> > +{
> > +  poly_int64 new_step;
> > +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);
>
> This should be:
>
>   poly_int64 step;
>   if (use != NULL
>   && poly_int_tree_p (iv->step, )
>   && address_p (use->type))
> {
>   poly_int64 new_step;
>
> > +  unsigned int fact
> > + = preferred_mem_scale_factor (data->current_loop,
> > +use->iv->base,
> > +TYPE_MODE (use->mem_type));
> > +
> > +  if ((fact != 1)
> > +   && multiple_p (poly_step, fact, _step))
>
> Should be no brackets around "fact != 1".
>
> > [...]
>
> Looks really good to me otherwise, thanks.  Bin, any comments?
Revised patch which handles the above review comments is attached.

Thanks,
Kugan

> Richard
From 6a146662fab39de876de332bacbb1a3300caefb8 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah 
Date: Wed, 15 May 2019 09:16:43 +1000
Subject: [PATCH 1/2] Add support for IVOPT

gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.
	* tree-ssa-address.c (preferred_mem_scale_factor): New.

Change-Id: Ie47b1722dc4fb430f07dadb8a58385759e75df58
---
 gcc/tree-ssa-address.c | 28 
 gcc/tree-ssa-address.h |  3 +++
 gcc/tree-ssa-loop-ivopts.c | 26 +-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 1c17e93..fdb6619 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1127,6 +1127,34 @@ maybe_fold_tmr (tree ref)
   return new_ref;
 }
 
+/* Return the preferred mem scale factor for accessing MEM_MODE
+   of BASE which is optimized for SPEED.  */
+unsigned int
+preferred_mem_scale_factor (tree base, machine_mode mem_mode,
+			bool speed)
+{
+  struct mem_address parts = {};
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
+
+  /* Addressing mode "base + index".  */
+  parts.index = integer_one_node;
+  parts.base = integer_one_node;
+  rtx addr = addr_for_mem_ref (, as, false);
+  unsigned cost = address_cost (addr, mem_mode, as, speed)

Re: [PATCH 1/2] Add support for IVOPT

2019-05-17 Thread Richard Sandiford
Kugan Vivekanandarajah  writes:
> [...]
>> > +{
>> > +  struct mem_address parts = {NULL_TREE, integer_one_node,
>> > +   NULL_TREE, NULL_TREE, NULL_TREE};
>>
>> Might be better to use "= {}" and initialise the fields that matter by
>> assignment.  As it stands this uses integer_one_node as the base, but I
>> couldn't tell if that was deliberate.
>
> I just copied this part from get_address_cost, similar to what is done
> there.

Ah, sorry :-)

> I have now changed the way you suggested but using the values
> used in get_address_cost.

Thanks.

> [...]
> @@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct ivopts_data 
> *data)
>data->iv_common_cands.truncate (0);
>  }
>  
> +/* Return the preferred mem scale factor for accessing MEM_MODE
> +   of BASE in LOOP.  */
> +static unsigned int
> +preferred_mem_scale_factor (struct loop *loop,
> + tree base, machine_mode mem_mode)

IMO this should live in tree-ssa-address.c instead.

The only use of "loop" is to test for size vs. speed, but other callers
might want to make that decision based on individual blocks, so I think
it would make sense to pass a "speed" bool instead.  Probably also worth
making it the last parameter, so that the order is consistent with
address_cost (though probably then inconsistent with something else :-)).

> [...]
> @@ -3500,6 +3531,28 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> struct iv_use *use)
>  basetype = sizetype;
>record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
>  
> +  /* Compare the cost of an address with an unscaled index with the cost of
> +an address with a scaled index and add candidate if useful. */
> +  if (use != NULL
> +  && poly_int_tree_p (iv->step)
> +  && tree_fits_poly_int64_p (iv->step)
> +  && address_p (use->type))
> +{
> +  poly_int64 new_step;
> +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);

This should be:

  poly_int64 step;
  if (use != NULL
  && poly_int_tree_p (iv->step, )
  && address_p (use->type))
{
  poly_int64 new_step;

> +  unsigned int fact
> + = preferred_mem_scale_factor (data->current_loop,
> +use->iv->base,
> +TYPE_MODE (use->mem_type));
> +
> +  if ((fact != 1)
> +   && multiple_p (poly_step, fact, _step))

Should be no brackets around "fact != 1".

> [...]

Looks really good to me otherwise, thanks.  Bin, any comments?

Richard


Re: [PATCH 1/2] Add support for IVOPT

2019-05-16 Thread Kugan Vivekanandarajah
Hi Richard,

On Thu, 16 May 2019 at 21:14, Richard Biener  wrote:
>
> On Wed, May 15, 2019 at 4:40 AM  wrote:
> >
> > From: Kugan Vivekanandarajah 
> >
> > gcc/ChangeLog:
> >
> > 2019-05-15  Kugan Vivekanandarajah  
> >
> > PR target/88834
> > * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
> > IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
> > (find_interesting_uses_stmt): Likewise.
> > (get_alias_ptr_type_for_ptr_address): Likewise.
> > (add_iv_candidate_for_use): Add scaled index candidate if useful.
> >
> > Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
> > ---
> >  gcc/tree-ssa-loop-ivopts.c | 60 
> > +-
> >  1 file changed, 59 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> > index 9864b59..115a70c 100644
> > --- a/gcc/tree-ssa-loop-ivopts.c
> > +++ b/gcc/tree-ssa-loop-ivopts.c
> > @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree 
> > *op_p)
> >switch (gimple_call_internal_fn (call))
> >  {
> >  case IFN_MASK_LOAD:
> > +case IFN_MASK_LOAD_LANES:
> >if (op_p == gimple_call_arg_ptr (call, 0))
> > return TREE_TYPE (gimple_call_lhs (call));
> >return NULL_TREE;
> >
> >  case IFN_MASK_STORE:
> > +case IFN_MASK_STORE_LANES:
> >if (op_p == gimple_call_arg_ptr (call, 0))
> > return TREE_TYPE (gimple_call_arg (call, 3));
> >return NULL_TREE;
> > @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> > gimple *stmt)
> >   return;
> > }
> >
> > -  /* TODO -- we should also handle address uses of type
> > +  /* TODO -- we should also handle all address uses of type
> >
> >  memory = call (whatever);
> >
> > @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data 
> > *data, gimple *stmt)
> >
> >  call (memory).  */
> >  }
> > +  else if (is_gimple_call (stmt))
> > +{
> > +  gcall *call = dyn_cast  (stmt);
> > +  if (call
>
> that's testing things twice, just do
>
>else if (gcall *call = dyn_cast  (stmt))
>  {
> ...
>
> no other comments besides why do you need _LANES handling here where
> the w/o _LANES handling didn't need anything.
Right,  I have now changed this in the revised patch.

Thanks,
Kugan

>
> > + && gimple_call_internal_p (call)
> > + && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
> > + || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
> > +   {
> > + tree *arg = gimple_call_arg_ptr (call, 0);
> > + struct iv *civ = get_iv (data, *arg);
> > + tree mem_type = get_mem_type_for_internal_fn (call, arg);
> > + if (civ && mem_type)
> > +   {
> > + civ = alloc_iv (data, civ->base, civ->step);
> > + record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
> > +   mem_type);
> > + return;
> > +   }
> > +   }
> > +}
> > +
> >
> >if (gimple_code (stmt) == GIMPLE_PHI
> >&& gimple_bb (stmt) == data->current_loop->header)
> > @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> > struct iv_use *use)
> >  basetype = sizetype;
> >record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
> >
> > +  /* Compare the cost of an address with an unscaled index with the cost of
> > +an address with a scaled index and add candidate if useful. */
> > +  if (use != NULL && use->type == USE_PTR_ADDRESS)
> > +{
> > +  struct mem_address parts = {NULL_TREE, integer_one_node,
> > + NULL_TREE, NULL_TREE, NULL_TREE};
> > +  poly_uint64 temp;
> > +  poly_int64 fact;
> > +  bool speed = optimize_loop_for_speed_p (data->current_loop);
> > +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);
> > +  machine_mode mem_mode = TYPE_MODE (use->mem_type);
> > +  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
> > +
> > +  fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));
> > +  parts.index = integer_one_node;
> > +
> > +  if (fact.is_constant ()
> > + && can_div_trunc_p (poly_step, fact, ))
> > +   {
> > + /* Addressing mode "base + index".  */
> > + rtx addr = addr_for_mem_ref (, as, false);
> > + unsigned cost = address_cost (addr, mem_mode, as, speed);
> > + tree step = wide_int_to_tree (sizetype,
> > +   exact_div (poly_step, fact));
> > + parts.step = wide_int_to_tree (sizetype, fact);
> > + /* Addressing mode "base + index << scale".  */
> > + addr = addr_for_mem_ref (, as, false);
> > + unsigned new_cost = address_cost (addr, mem_mode, as, speed);
> > + if (new_cost < cost)
> > +   add_candidate 

Re: [PATCH 1/2] Add support for IVOPT

2019-05-16 Thread Kugan Vivekanandarajah
_cost, similar to what is done
there. I have now changed the way you suggested but using the values
used in get_address_cost.
>
> > +  poly_uint64 temp;
> > +  poly_int64 fact;
> > +  bool speed = optimize_loop_for_speed_p (data->current_loop);
> > +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);
>
> The step could be variable, so we should check whether this holds
> using poly_int_tree_p.
OK.

>
> > +  machine_mode mem_mode = TYPE_MODE (use->mem_type);
> > +  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
> > +
> > +  fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));
>
> This is simpler as:
>
>   GET_MODE_UNIT_SIZE (TYPE_MODE (use->mem_type));
>
OK.

> It's always a compile-time constant, so "fact" can be int/unsigned int
> rather than poly_int64.
OK.

>
> > +  parts.index = integer_one_node;
> > +
> > +  if (fact.is_constant ()
> > +   && can_div_trunc_p (poly_step, fact, ))
>
> I think it only makes sense to add the candidate if poly_step is an exact
> multiple of fact, so I think we should use multiple_p here.
OK.
>
> > + {
> > +   /* Addressing mode "base + index".  */
> > +   rtx addr = addr_for_mem_ref (, as, false);
> > +   unsigned cost = address_cost (addr, mem_mode, as, speed);
> > +   tree step = wide_int_to_tree (sizetype,
> > + exact_div (poly_step, fact));
>
> The multiple_p mentioned above would provide this result too.
> We only need to calculate "step" if we decided to add the candidate,
> so I think it should be in the "if" below.
OK.
>
> > +   parts.step = wide_int_to_tree (sizetype, fact);
> > +   /* Addressing mode "base + index << scale".  */
> > +   addr = addr_for_mem_ref (, as, false);
> > +   unsigned new_cost = address_cost (addr, mem_mode, as, speed);
> > +   if (new_cost < cost)
>
> I think it'd be worth splitting the guts of this check out into a helper,
> since it's something that could be reusable.  Maybe:
>
>   unsigned int preferred_mem_scalar_factor (machine_mode);
>
> with the only supported values for now being 1 and GET_MODE_INNER_SIZE.
> (Could be extended later if a target needs it.)
Done in the attached patch. Not tested yet but does this look good if
no regressions?

Thanks,
Kugan

> Thanks,
> Richard
From c41df9740018ea381d03958937d2aee58081 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah 
Date: Wed, 15 May 2019 09:16:43 +1000
Subject: [PATCH 1/2] Add support for IVOPT

gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(find_interesting_uses_stmt): Likewise.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.

Change-Id: Ib11c53e6566318bc36e28abfe9772a5b802d2f59
---
 gcc/tree-ssa-loop-ivopts.c | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 9864b59..5e29114 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
   switch (gimple_call_internal_fn (call))
 {
 case IFN_MASK_LOAD:
+case IFN_MASK_LOAD_LANES:
   if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_lhs (call));
   return NULL_TREE;
 
 case IFN_MASK_STORE:
+case IFN_MASK_STORE_LANES:
   if (op_p == gimple_call_arg_ptr (call, 0))
 	return TREE_TYPE (gimple_call_arg (call, 3));
   return NULL_TREE;
@@ -3479,6 +3481,35 @@ add_iv_candidate_derived_from_uses (struct ivopts_data *data)
   data->iv_common_cands.truncate (0);
 }
 
+/* Return the preferred mem scale factor for accessing MEM_MODE
+   of BASE in LOOP.  */
+static unsigned int
+preferred_mem_scale_factor (struct loop *loop,
+			tree base, machine_mode mem_mode)
+{
+  struct mem_address parts = {};
+  bool speed = optimize_loop_for_speed_p (loop);
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
+
+  /* Addressing mode "base + index".  */
+  parts.index = integer_one_node;
+  parts.base = integer_one_node;
+  rtx addr = addr_for_mem_ref (, as, false);
+  unsigned cost = address_cost (addr, mem_mode, as, speed);
+
+  /* Addressing mode "base + index << scale".  */
+  parts.step = wide_int_to_tree (sizetype, fact);
+  addr = addr_for_mem_ref (, as, false);
+  unsigned new_cost = address_cost (addr, mem_mode, as, speed);

Re: [PATCH 1/2] Add support for IVOPT

2019-05-16 Thread Richard Biener
On Wed, May 15, 2019 at 4:40 AM  wrote:
>
> From: Kugan Vivekanandarajah 
>
> gcc/ChangeLog:
>
> 2019-05-15  Kugan Vivekanandarajah  
>
> PR target/88834
> * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
> IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
> (find_interesting_uses_stmt): Likewise.
> (get_alias_ptr_type_for_ptr_address): Likewise.
> (add_iv_candidate_for_use): Add scaled index candidate if useful.
>
> Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
> ---
>  gcc/tree-ssa-loop-ivopts.c | 60 
> +-
>  1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 9864b59..115a70c 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
>switch (gimple_call_internal_fn (call))
>  {
>  case IFN_MASK_LOAD:
> +case IFN_MASK_LOAD_LANES:
>if (op_p == gimple_call_arg_ptr (call, 0))
> return TREE_TYPE (gimple_call_lhs (call));
>return NULL_TREE;
>
>  case IFN_MASK_STORE:
> +case IFN_MASK_STORE_LANES:
>if (op_p == gimple_call_arg_ptr (call, 0))
> return TREE_TYPE (gimple_call_arg (call, 3));
>return NULL_TREE;
> @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
>   return;
> }
>
> -  /* TODO -- we should also handle address uses of type
> +  /* TODO -- we should also handle all address uses of type
>
>  memory = call (whatever);
>
> @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
>
>  call (memory).  */
>  }
> +  else if (is_gimple_call (stmt))
> +{
> +  gcall *call = dyn_cast  (stmt);
> +  if (call

that's testing things twice, just do

   else if (gcall *call = dyn_cast  (stmt))
 {
...

no other comments besides why do you need _LANES handling here where
the w/o _LANES handling didn't need anything.

> + && gimple_call_internal_p (call)
> + && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
> + || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
> +   {
> + tree *arg = gimple_call_arg_ptr (call, 0);
> + struct iv *civ = get_iv (data, *arg);
> + tree mem_type = get_mem_type_for_internal_fn (call, arg);
> + if (civ && mem_type)
> +   {
> + civ = alloc_iv (data, civ->base, civ->step);
> + record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
> +   mem_type);
> + return;
> +   }
> +   }
> +}
> +
>
>if (gimple_code (stmt) == GIMPLE_PHI
>&& gimple_bb (stmt) == data->current_loop->header)
> @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> struct iv_use *use)
>  basetype = sizetype;
>record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
>
> +  /* Compare the cost of an address with an unscaled index with the cost of
> +an address with a scaled index and add candidate if useful. */
> +  if (use != NULL && use->type == USE_PTR_ADDRESS)
> +{
> +  struct mem_address parts = {NULL_TREE, integer_one_node,
> + NULL_TREE, NULL_TREE, NULL_TREE};
> +  poly_uint64 temp;
> +  poly_int64 fact;
> +  bool speed = optimize_loop_for_speed_p (data->current_loop);
> +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);
> +  machine_mode mem_mode = TYPE_MODE (use->mem_type);
> +  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
> +
> +  fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));
> +  parts.index = integer_one_node;
> +
> +  if (fact.is_constant ()
> + && can_div_trunc_p (poly_step, fact, ))
> +   {
> + /* Addressing mode "base + index".  */
> + rtx addr = addr_for_mem_ref (, as, false);
> + unsigned cost = address_cost (addr, mem_mode, as, speed);
> + tree step = wide_int_to_tree (sizetype,
> +   exact_div (poly_step, fact));
> + parts.step = wide_int_to_tree (sizetype, fact);
> + /* Addressing mode "base + index << scale".  */
> + addr = addr_for_mem_ref (, as, false);
> + unsigned new_cost = address_cost (addr, mem_mode, as, speed);
> + if (new_cost < cost)
> +   add_candidate (data, size_int (0), step, true, NULL);
> +   }
> +}
> +
>/* Record common candidate with constant offset stripped in base.
>   Like the use itself, we also add candidate directly for it.  */
>base = strip_offset (iv->base, );
> @@ -7112,6 +7168,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
>  {
>  case IFN_MASK_LOAD:
>  case IFN_MASK_STORE:
> 

Re: [PATCH 1/2] Add support for IVOPT

2019-05-16 Thread Richard Sandiford
Richard Sandiford  writes:
> kugan.vivekanandara...@linaro.org writes:
>> +  parts.step = wide_int_to_tree (sizetype, fact);
>> +  /* Addressing mode "base + index << scale".  */
>> +  addr = addr_for_mem_ref (, as, false);
>> +  unsigned new_cost = address_cost (addr, mem_mode, as, speed);
>> +  if (new_cost < cost)
>
> I think it'd be worth splitting the guts of this check out into a helper,
> since it's something that could be reusable.  Maybe:
>
>   unsigned int preferred_mem_scalar_factor (machine_mode);
>
> with the only supported values for now being 1 and GET_MODE_INNER_SIZE.
> (Could be extended later if a target needs it.)

Er, I did of course mean "scale_factor" :-)


Re: [PATCH 1/2] Add support for IVOPT

2019-05-15 Thread Richard Sandiford
Thanks for doing this.

kugan.vivekanandara...@linaro.org writes:
> From: Kugan Vivekanandarajah 
>
> gcc/ChangeLog:
>
> 2019-05-15  Kugan Vivekanandarajah  
>
>   PR target/88834
>   * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
>   IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
>   (find_interesting_uses_stmt): Likewise.
>   (get_alias_ptr_type_for_ptr_address): Likewise.
>   (add_iv_candidate_for_use): Add scaled index candidate if useful.
>
> Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
> ---
>  gcc/tree-ssa-loop-ivopts.c | 60 
> +-
>  1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 9864b59..115a70c 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
>switch (gimple_call_internal_fn (call))
>  {
>  case IFN_MASK_LOAD:
> +case IFN_MASK_LOAD_LANES:
>if (op_p == gimple_call_arg_ptr (call, 0))
>   return TREE_TYPE (gimple_call_lhs (call));
>return NULL_TREE;
>  
>  case IFN_MASK_STORE:
> +case IFN_MASK_STORE_LANES:
>if (op_p == gimple_call_arg_ptr (call, 0))
>   return TREE_TYPE (gimple_call_arg (call, 3));
>return NULL_TREE;
> @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
> return;
>   }
>  
> -  /* TODO -- we should also handle address uses of type
> +  /* TODO -- we should also handle all address uses of type
>  
>memory = call (whatever);
>  
> @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
>  
>call (memory).  */
>  }
> +  else if (is_gimple_call (stmt))
> +{
> +  gcall *call = dyn_cast  (stmt);
> +  if (call
> +   && gimple_call_internal_p (call)
> +   && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
> +   || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
> + {
> +   tree *arg = gimple_call_arg_ptr (call, 0);
> +   struct iv *civ = get_iv (data, *arg);
> +   tree mem_type = get_mem_type_for_internal_fn (call, arg);
> +   if (civ && mem_type)
> + {
> +   civ = alloc_iv (data, civ->base, civ->step);
> +   record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
> + mem_type);
> +   return;
> + }
> + }
> +}
> +

Why do you need to handle this specially?  Does:

  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
{
  op = USE_FROM_PTR (use_p);

  if (TREE_CODE (op) != SSA_NAME)
continue;

  iv = get_iv (data, op);
  if (!iv)
continue;

  if (!find_address_like_use (data, stmt, use_p->use, iv))
find_interesting_uses_op (data, op);
}

not do the right thing for the load/store lane case?

> @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> struct iv_use *use)
>  basetype = sizetype;
>record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
>  
> +  /* Compare the cost of an address with an unscaled index with the cost of
> +an address with a scaled index and add candidate if useful. */
> +  if (use != NULL && use->type == USE_PTR_ADDRESS)

I think we want this for all address uses.  E.g. for SVE, masked and
unmasked accesses would both benefit.

> +{
> +  struct mem_address parts = {NULL_TREE, integer_one_node,
> +   NULL_TREE, NULL_TREE, NULL_TREE};

Might be better to use "= {}" and initialise the fields that matter by
assignment.  As it stands this uses integer_one_node as the base, but I
couldn't tell if that was deliberate.

> +  poly_uint64 temp;
> +  poly_int64 fact;
> +  bool speed = optimize_loop_for_speed_p (data->current_loop);
> +  poly_int64 poly_step = tree_to_poly_int64 (iv->step);

The step could be variable, so we should check whether this holds
using poly_int_tree_p.

> +  machine_mode mem_mode = TYPE_MODE (use->mem_type);
> +  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
> +
> +  fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));

This is simpler as:

  GET_MODE_UNIT_SIZE (TYPE_MODE (use->mem_type));

It's always a compile-time constant, so "fact" can be int/unsigned int
rather than poly_int64.

> +  parts.index = integer_one_node;
> +
> +  if (fact.is_constant ()
> +   && can_div_trunc_p (poly_step, fact, ))

I think it only makes sense to add the candidate if poly_step is an exact
multiple of fact, so I think we should use multiple_p here.

> + {
> +   /* Addressing mode "base + index".  */
> +   rtx addr = addr_for_mem_ref (, as, false);
> +   unsigned cost = address_cost (addr, mem_mode, as, speed);
> +   tree step = wide_int_to_tree 

[PATCH 1/2] Add support for IVOPT

2019-05-14 Thread kugan . vivekanandarajah
From: Kugan Vivekanandarajah 

gcc/ChangeLog:

2019-05-15  Kugan Vivekanandarajah  

PR target/88834
* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
(find_interesting_uses_stmt): Likewise.
(get_alias_ptr_type_for_ptr_address): Likewise.
(add_iv_candidate_for_use): Add scaled index candidate if useful.

Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
---
 gcc/tree-ssa-loop-ivopts.c | 60 +-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 9864b59..115a70c 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
   switch (gimple_call_internal_fn (call))
 {
 case IFN_MASK_LOAD:
+case IFN_MASK_LOAD_LANES:
   if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_lhs (call));
   return NULL_TREE;
 
 case IFN_MASK_STORE:
+case IFN_MASK_STORE_LANES:
   if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_arg (call, 3));
   return NULL_TREE;
@@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
gimple *stmt)
  return;
}
 
-  /* TODO -- we should also handle address uses of type
+  /* TODO -- we should also handle all address uses of type
 
 memory = call (whatever);
 
@@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
gimple *stmt)
 
 call (memory).  */
 }
+  else if (is_gimple_call (stmt))
+{
+  gcall *call = dyn_cast  (stmt);
+  if (call
+ && gimple_call_internal_p (call)
+ && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
+ || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
+   {
+ tree *arg = gimple_call_arg_ptr (call, 0);
+ struct iv *civ = get_iv (data, *arg);
+ tree mem_type = get_mem_type_for_internal_fn (call, arg);
+ if (civ && mem_type)
+   {
+ civ = alloc_iv (data, civ->base, civ->step);
+ record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
+   mem_type);
+ return;
+   }
+   }
+}
+
 
   if (gimple_code (stmt) == GIMPLE_PHI
   && gimple_bb (stmt) == data->current_loop->header)
@@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
struct iv_use *use)
 basetype = sizetype;
   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
 
+  /* Compare the cost of an address with an unscaled index with the cost of
+an address with a scaled index and add candidate if useful. */
+  if (use != NULL && use->type == USE_PTR_ADDRESS)
+{
+  struct mem_address parts = {NULL_TREE, integer_one_node,
+ NULL_TREE, NULL_TREE, NULL_TREE};
+  poly_uint64 temp;
+  poly_int64 fact;
+  bool speed = optimize_loop_for_speed_p (data->current_loop);
+  poly_int64 poly_step = tree_to_poly_int64 (iv->step);
+  machine_mode mem_mode = TYPE_MODE (use->mem_type);
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+
+  fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));
+  parts.index = integer_one_node;
+
+  if (fact.is_constant ()
+ && can_div_trunc_p (poly_step, fact, ))
+   {
+ /* Addressing mode "base + index".  */
+ rtx addr = addr_for_mem_ref (, as, false);
+ unsigned cost = address_cost (addr, mem_mode, as, speed);
+ tree step = wide_int_to_tree (sizetype,
+   exact_div (poly_step, fact));
+ parts.step = wide_int_to_tree (sizetype, fact);
+ /* Addressing mode "base + index << scale".  */
+ addr = addr_for_mem_ref (, as, false);
+ unsigned new_cost = address_cost (addr, mem_mode, as, speed);
+ if (new_cost < cost)
+   add_candidate (data, size_int (0), step, true, NULL);
+   }
+}
+
   /* Record common candidate with constant offset stripped in base.
  Like the use itself, we also add candidate directly for it.  */
   base = strip_offset (iv->base, );
@@ -7112,6 +7168,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
 {
 case IFN_MASK_LOAD:
 case IFN_MASK_STORE:
+case IFN_MASK_LOAD_LANES:
+case IFN_MASK_STORE_LANES:
   /* The second argument contains the correct alias type.  */
   gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
   return TREE_TYPE (gimple_call_arg (call, 1));
-- 
2.7.4