On Wed, Oct 23, 2013 at 11:14:54AM +0200, Richard Biener wrote:
> On Tue, 22 Oct 2013, Jakub Jelinek wrote:
> 
> > Hi!
> > 
> > If VRP tells us that oprnd is always >= 0 or always < 0, we can generate
> > better code for the divmode vectorization.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Testcase...?
> 
> Ok with adding one (I suggest a x86 specific one and scanning the
> assembler dump).

Like this?

2013-10-23  Jakub Jelinek  <ja...@redhat.com>

        * tree-vect-patterns.c (vect_recog_divmod_pattern): Optimize
        sequence based on get_range_info returned range.

        * gcc.target/i386/vect-div-1.c: New test.

--- gcc/tree-vect-patterns.c.jj 2013-10-22 18:36:51.947395037 +0200
+++ gcc/tree-vect-patterns.c    2013-10-23 11:28:26.211956658 +0200
@@ -2226,20 +2226,19 @@ vect_recog_divmod_pattern (vec<gimple> *
       if (post_shift >= prec)
        return NULL;
 
-      /* t1 = oprnd1 h* ml;  */
+      /* t1 = oprnd0 h* ml;  */
       t1 = vect_recog_temp_ssa_var (itype, NULL);
       def_stmt
        = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
                                        build_int_cst (itype, ml));
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
 
       if (add)
        {
          /* t2 = t1 + oprnd0;  */
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
          t2 = vect_recog_temp_ssa_var (itype, NULL);
          def_stmt
            = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
-         append_pattern_def_seq (stmt_vinfo, def_stmt);
        }
       else
        t2 = t1;
@@ -2247,27 +2246,57 @@ vect_recog_divmod_pattern (vec<gimple> *
       if (post_shift)
        {
          /* t3 = t2 >> post_shift;  */
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
          t3 = vect_recog_temp_ssa_var (itype, NULL);
          def_stmt
            = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
                                            build_int_cst (itype, post_shift));
-         append_pattern_def_seq (stmt_vinfo, def_stmt);
        }
       else
        t3 = t2;
 
-      /* t4 = oprnd0 >> (prec - 1);  */
-      t4 = vect_recog_temp_ssa_var (itype, NULL);
-      def_stmt
-       = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
-                                       build_int_cst (itype, prec - 1));
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
-
-      /* q = t3 - t4;  or q = t4 - t3;  */
-      q = vect_recog_temp_ssa_var (itype, NULL);
-      pattern_stmt
-       = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
-                                       d < 0 ? t3 : t4);
+      double_int oprnd0_min, oprnd0_max;
+      int msb = 1;
+      if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
+       {
+         if (!oprnd0_min.is_negative ())
+           msb = 0;
+         else if (oprnd0_max.is_negative ())
+           msb = -1;
+       }
+
+      if (msb == 0 && d >= 0)
+       {
+         /* q = t3;  */
+         q = t3;
+         pattern_stmt = def_stmt;
+       }
+      else
+       {
+         /* t4 = oprnd0 >> (prec - 1);
+            or if we know from VRP that oprnd0 >= 0
+            t4 = 0;
+            or if we know from VRP that oprnd0 < 0
+            t4 = -1;  */
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         t4 = vect_recog_temp_ssa_var (itype, NULL);
+         if (msb != 1)
+           def_stmt
+             = gimple_build_assign_with_ops (INTEGER_CST,
+                                             t4, build_int_cst (itype, msb),
+                                             NULL_TREE);
+         else
+           def_stmt
+             = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
+                                             build_int_cst (itype, prec - 1));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         /* q = t3 - t4;  or q = t4 - t3;  */
+         q = vect_recog_temp_ssa_var (itype, NULL);
+         pattern_stmt
+           = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
+                                           d < 0 ? t3 : t4);
+       }
     }
 
   if (rhs_code == TRUNC_MOD_EXPR)
--- gcc/testsuite/gcc.target/i386/vect-div-1.c.jj       2013-10-23 
11:43:49.089265027 +0200
+++ gcc/testsuite/gcc.target/i386/vect-div-1.c  2013-10-23 11:57:06.387187749 
+0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target sse2 } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */
+
+unsigned short b[1024] = { 0 };
+int a[1024] = { 0 };
+
+int
+f1 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] + 7) / 15;
+}
+
+int
+f2 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] + 7) % 15;
+}
+
+int
+f3 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] - 66000) / 15;
+}
+
+int
+f4 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] - 66000) % 15;
+}
+
+/* In f1 and f2, VRP can prove the first operand of division or modulo
+   is always non-negative, so there is no need to do >> 31 shift
+   etc. to check if it is.  And in f3 and f4, VRP can prove it is always
+   negative.  */
+/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */


        Jakub

Reply via email to