Re: [7/7] Add negative and zero strides to vect_memory_access_type

2016-06-21 Thread Jeff Law

On 06/15/2016 02:53 AM, Richard Sandiford wrote:

This patch uses the vect_memory_access_type from patch 6 to represent
the effect of a negative contiguous stride or a zero stride.  The latter
is valid only for loads.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* tree-vectorizer.h (vect_memory_access_type): Add
VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED.
* tree-vect-stmts.c (compare_step_with_zero): New function.
(perm_mask_for_reverse): Move further up file.
(get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the
step is negative.
(get_negative_load_store_type): New function.
(get_load_store_type): Call it.  Add an ncopies argument.
(vectorizable_mask_load_store): Update call accordingly and
remove tests for negative steps.
(vectorizable_store, vectorizable_load): Likewise.  Handle new
memory_access_types.

OK.
jeff



[7/7] Add negative and zero strides to vect_memory_access_type

2016-06-15 Thread Richard Sandiford
This patch uses the vect_memory_access_type from patch 6 to represent
the effect of a negative contiguous stride or a zero stride.  The latter
is valid only for loads.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* tree-vectorizer.h (vect_memory_access_type): Add
VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED.
* tree-vect-stmts.c (compare_step_with_zero): New function.
(perm_mask_for_reverse): Move further up file.
(get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the
step is negative.
(get_negative_load_store_type): New function.
(get_load_store_type): Call it.  Add an ncopies argument.
(vectorizable_mask_load_store): Update call accordingly and
remove tests for negative steps.
(vectorizable_store, vectorizable_load): Likewise.  Handle new
memory_access_types.

Index: gcc/tree-vectorizer.h
===
--- gcc/tree-vectorizer.h
+++ gcc/tree-vectorizer.h
@@ -488,14 +488,26 @@ enum slp_vect_type {
 /* Describes how we're going to vectorize an individual load or store,
or a group of loads or stores.  */
 enum vect_memory_access_type {
+  /* An access to an invariant address.  This is used only for loads.  */
+  VMAT_INVARIANT,
+
   /* A simple contiguous access.  */
   VMAT_CONTIGUOUS,
 
+  /* A contiguous access that goes down in memory rather than up,
+ with no additional permutation.  This is used only for stores
+ of invariants.  */
+  VMAT_CONTIGUOUS_DOWN,
+
   /* A simple contiguous access in which the elements need to be permuted
  after loading or before storing.  Only used for loop vectorization;
  SLP uses separate permutes.  */
   VMAT_CONTIGUOUS_PERMUTE,
 
+  /* A simple contiguous access in which the elements need to be reversed
+ after loading or before storing.  */
+  VMAT_CONTIGUOUS_REVERSE,
+
   /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
   VMAT_LOAD_STORE_LANES,
 
Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c
+++ gcc/tree-vect-stmts.c
@@ -1672,6 +1672,42 @@ vectorizable_internal_function (combined_fn cfn, tree 
fndecl,
 static tree permute_vec_elements (tree, tree, tree, gimple *,
  gimple_stmt_iterator *);
 
+/* STMT is a non-strided load or store, meaning that it accesses
+   elements with a known constant step.  Return -1 if that step
+   is negative, 0 if it is zero, and 1 if it is greater than zero.  */
+
+static int
+compare_step_with_zero (gimple *stmt)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  tree step;
+  if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
+step = STMT_VINFO_DR_STEP (stmt_info);
+  else
+step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
+  return tree_int_cst_compare (step, size_zero_node);
+}
+
+/* If the target supports a permute mask that reverses the elements in
+   a vector of type VECTYPE, return that mask, otherwise return null.  */
+
+static tree
+perm_mask_for_reverse (tree vectype)
+{
+  int i, nunits;
+  unsigned char *sel;
+
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  sel = XALLOCAVEC (unsigned char, nunits);
+
+  for (i = 0; i < nunits; ++i)
+sel[i] = nunits - 1 - i;
+
+  if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+return NULL_TREE;
+  return vect_gen_perm_mask_checked (vectype, sel);
+}
 
 /* A subroutine of get_load_store_type, with a subset of the same
arguments.  Handle the case where STMT is part of a grouped load
@@ -1755,7 +1791,8 @@ get_group_load_store_type (gimple *stmt, tree vectype, 
bool slp,
 would access excess elements in the last iteration.  */
   bool would_overrun_p = (gap != 0);
   if (!STMT_VINFO_STRIDED_P (stmt_info)
- && (can_overrun_p || !would_overrun_p))
+ && (can_overrun_p || !would_overrun_p)
+ && compare_step_with_zero (stmt) > 0)
{
  /* First try using LOAD/STORE_LANES.  */
  if (vls_type == VLS_LOAD
@@ -1814,17 +1851,69 @@ get_group_load_store_type (gimple *stmt, tree vectype, 
bool slp,
   return true;
 }
 
+/* A subroutine of get_load_store_type, with a subset of the same
+   arguments.  Handle the case where STMT is a load or store that
+   accesses consecutive elements with a negative step.  */
+
+static vect_memory_access_type
+get_negative_load_store_type (gimple *stmt, tree vectype,
+ vec_load_store_type vls_type,
+ unsigned int ncopies)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  dr_alignment_support alignment_support_scheme;
+
+  if (ncopies > 1)
+{
+  if (dump_enabled_p ())
+   dump_printf_loc