Re: [PATCH] Add generic vector lowering for integer division and modulus (PR tree-optimization/53645)

2012-06-28 Thread Richard Guenther
On Wed, 27 Jun 2012, Jakub Jelinek wrote:

 Hi!
 
 This patch makes veclower2 attempt to emit integer division/modulus of
 vectors by constants using vector multiplication, shifts or masking.
 
 It is somewhat similar to the vect_recog_divmod_pattern, but it needs
 to analyze everything first, see if all divisions or modulos are doable
 using the same sequence of vector insns, and then emit vector insns
 as opposed to the scalar ones the pattern recognizer adds.
 
 Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

I wonder what to do for -O0 though - shouldn't we not call
expand_vector_divmod in that case?  Thus,

+ if (!optimize
  || !VECTOR_INTEGER_TYPE_P (type) || TREE_CODE (rhs2) != 
VECTOR_CST)
+   break;

?

Thanks,
Richard.

 The testcase additionally eyeballed even for -mavx2, which unlike -mavx
 has vector  vector shifts.
 
 2012-06-27  Jakub Jelinek  ja...@redhat.com
 
   PR tree-optimization/53645
   * tree-vect-generic.c (add_rshift): New function.
   (expand_vector_divmod): New function.
   (expand_vector_operation): Use it for vector integer
   TRUNC_{DIV,MOD}_EXPR by VECTOR_CST.
   * tree-vect-patterns.c (vect_recog_divmod_pattern): Replace
   unused lguup variable with dummy_int.
 
   * gcc.c-torture/execute/pr53645.c: New test.
 
 --- gcc/tree-vect-generic.c.jj2012-06-26 10:00:42.935832834 +0200
 +++ gcc/tree-vect-generic.c   2012-06-27 10:15:20.534103045 +0200
 @@ -391,6 +391,515 @@ expand_vector_comparison (gimple_stmt_it
return t;
  }
  
 +/* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
 +   of OP0 with shift counts in SHIFTCNTS array and return the temporary 
 holding
 +   the result if successful, otherwise return NULL_TREE.  */
 +static tree
 +add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
 +{
 +  optab op;
 +  unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
 +  bool scalar_shift = true;
 +
 +  for (i = 1; i  nunits; i++)
 +{
 +  if (shiftcnts[i] != shiftcnts[0])
 + scalar_shift = false;
 +}
 +
 +  if (scalar_shift  shiftcnts[0] == 0)
 +return op0;
 +
 +  if (scalar_shift)
 +{
 +  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
 +  if (op != NULL
 +optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
 + return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
 + build_int_cst (NULL_TREE, shiftcnts[0]));
 +}
 +
 +  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
 +  if (op != NULL
 +   optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
 +{
 +  tree *vec = XALLOCAVEC (tree, nunits);
 +  for (i = 0; i  nunits; i++)
 + vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
 +  return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
 +   build_vector (type, vec));
 +}
 +
 +  return NULL_TREE;
 +}
 +
 +/* Try to expand integer vector division by constant using
 +   widening multiply, shifts and additions.  */
 +static tree
 +expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 +   tree op1, enum tree_code code)
 +{
 +  bool use_pow2 = true;
 +  bool has_vector_shift = true;
 +  int mode = -1, this_mode;
 +  int pre_shift = -1, post_shift;
 +  unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
 +  int *shifts = XALLOCAVEC (int, nunits * 4);
 +  int *pre_shifts = shifts + nunits;
 +  int *post_shifts = pre_shifts + nunits;
 +  int *shift_temps = post_shifts + nunits;
 +  unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
 +  int prec = TYPE_PRECISION (TREE_TYPE (type));
 +  int dummy_int;
 +  unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
 +  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
 +  optab op;
 +  tree *vec;
 +  unsigned char *sel;
 +  tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
 +
 +  if (prec  HOST_BITS_PER_WIDE_INT)
 +return NULL_TREE;
 +
 +  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
 +  if (op == NULL
 +  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
 +has_vector_shift = false;
 +
 +  /* Analysis phase.  Determine if all op1 elements are either power
 + of two and it is possible to expand it using shifts (or for remainder
 + using masking).  Additionally compute the multiplicative constants
 + and pre and post shifts if the division is to be expanded using
 + widening or high part multiplication plus shifts.  */
 +  for (i = 0; i  nunits; i++)
 +{
 +  tree cst = VECTOR_CST_ELT (op1, i);
 +  unsigned HOST_WIDE_INT ml;
 +
 +  if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
 + return NULL_TREE;
 +  pre_shifts[i] = 0;
 +  post_shifts[i] = 0;
 +  mulc[i] = 0;
 +  if (use_pow2
 +(!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
 + 

[PATCH] Add generic vector lowering for integer division and modulus (PR tree-optimization/53645)

2012-06-27 Thread Jakub Jelinek
Hi!

This patch makes veclower2 attempt to emit integer division/modulus of
vectors by constants using vector multiplication, shifts or masking.

It is somewhat similar to the vect_recog_divmod_pattern, but it needs
to analyze everything first, see if all divisions or modulos are doable
using the same sequence of vector insns, and then emit vector insns
as opposed to the scalar ones the pattern recognizer adds.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

The testcase additionally eyeballed even for -mavx2, which unlike -mavx
has vector  vector shifts.

2012-06-27  Jakub Jelinek  ja...@redhat.com

PR tree-optimization/53645
* tree-vect-generic.c (add_rshift): New function.
(expand_vector_divmod): New function.
(expand_vector_operation): Use it for vector integer
TRUNC_{DIV,MOD}_EXPR by VECTOR_CST.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Replace
unused lguup variable with dummy_int.

* gcc.c-torture/execute/pr53645.c: New test.

--- gcc/tree-vect-generic.c.jj  2012-06-26 10:00:42.935832834 +0200
+++ gcc/tree-vect-generic.c 2012-06-27 10:15:20.534103045 +0200
@@ -391,6 +391,515 @@ expand_vector_comparison (gimple_stmt_it
   return t;
 }
 
+/* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
+   of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
+   the result if successful, otherwise return NULL_TREE.  */
+static tree
+add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
+{
+  optab op;
+  unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
+  bool scalar_shift = true;
+
+  for (i = 1; i  nunits; i++)
+{
+  if (shiftcnts[i] != shiftcnts[0])
+   scalar_shift = false;
+}
+
+  if (scalar_shift  shiftcnts[0] == 0)
+return op0;
+
+  if (scalar_shift)
+{
+  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
+  if (op != NULL
+  optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+   return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
+   build_int_cst (NULL_TREE, shiftcnts[0]));
+}
+
+  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+  if (op != NULL
+   optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+{
+  tree *vec = XALLOCAVEC (tree, nunits);
+  for (i = 0; i  nunits; i++)
+   vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
+  return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
+ build_vector (type, vec));
+}
+
+  return NULL_TREE;
+}
+
+/* Try to expand integer vector division by constant using
+   widening multiply, shifts and additions.  */
+static tree
+expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
+ tree op1, enum tree_code code)
+{
+  bool use_pow2 = true;
+  bool has_vector_shift = true;
+  int mode = -1, this_mode;
+  int pre_shift = -1, post_shift;
+  unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
+  int *shifts = XALLOCAVEC (int, nunits * 4);
+  int *pre_shifts = shifts + nunits;
+  int *post_shifts = pre_shifts + nunits;
+  int *shift_temps = post_shifts + nunits;
+  unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
+  int prec = TYPE_PRECISION (TREE_TYPE (type));
+  int dummy_int;
+  unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
+  optab op;
+  tree *vec;
+  unsigned char *sel;
+  tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
+
+  if (prec  HOST_BITS_PER_WIDE_INT)
+return NULL_TREE;
+
+  op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+  if (op == NULL
+  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+has_vector_shift = false;
+
+  /* Analysis phase.  Determine if all op1 elements are either power
+ of two and it is possible to expand it using shifts (or for remainder
+ using masking).  Additionally compute the multiplicative constants
+ and pre and post shifts if the division is to be expanded using
+ widening or high part multiplication plus shifts.  */
+  for (i = 0; i  nunits; i++)
+{
+  tree cst = VECTOR_CST_ELT (op1, i);
+  unsigned HOST_WIDE_INT ml;
+
+  if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
+   return NULL_TREE;
+  pre_shifts[i] = 0;
+  post_shifts[i] = 0;
+  mulc[i] = 0;
+  if (use_pow2
+  (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
+   use_pow2 = false;
+  if (use_pow2)
+   {
+ shifts[i] = tree_log2 (cst);
+ if (shifts[i] != shifts[0]
+  code == TRUNC_DIV_EXPR
+  !has_vector_shift)
+   use_pow2 = false;
+   }
+  if (mode == -2)
+   continue;
+  if (unsignedp)
+   {
+ unsigned HOST_WIDE_INT mh;
+ unsigned HOST_WIDE_INT d =