Basic-block vectorization "removes" scalar stmt calls (well, replaces them
with assignment from zero) even if the scalar result is used from
non-vectorized code (yes, that's a cost issue as well, I filed
PR56612 for this).

The following is an easy workaround - while DCE is not run for
quite a while after SLP it should be able to figure out if
the scalar calls are dead or not.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

A more elaborate fix has to wait for 4.9 and my SLP re-org patches.

Richard.

2013-03-13  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/56608
        * tree-vect-slp.c (vect_schedule_slp): Do not remove scalar
        calls when vectorizing basic-blocks.

        * gcc.dg/vect/fast-math-bb-slp-call-3.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 196629)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_schedule_slp (loop_vec_info loop_vi
*** 3181,3187 ****
        unsigned int j;
        gimple_stmt_iterator gsi;
  
!       vect_remove_slp_scalar_calls (root);
  
        for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
                    && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
--- 3191,3205 ----
        unsigned int j;
        gimple_stmt_iterator gsi;
  
!       /* Remove scalar call stmts.  Do not do this for basic-block
!        vectorization as not all uses may be vectorized.
!        ???  Why should this be necessary?  DCE should be able to
!        remove the stmts itself.
!        ???  For BB vectorization we can as well remove scalar
!        stmts starting from the SLP tree root if they have no
!        uses.  */
!       if (loop_vinfo)
!       vect_remove_slp_scalar_calls (root);
  
        for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
                    && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
Index: gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-3.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-3.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-3.c (working copy)
***************
*** 0 ****
--- 1,68 ----
+ #include <stdlib.h>
+ #include <math.h>
+ 
+ #define MIN(a, b) (((a) < (b)) ? (a) : (b))
+ #define MAX(a, b) (((a) > (b)) ? (a) : (b))
+ 
+ typedef struct {
+     int initialHeight, initialWidth;
+     int rotatedHeight, rotatedWidth;
+     int autoCropHeight, autoCropWidth;
+ } ufraw_data;
+ 
+ void __attribute__((noinline,noclone))
+ ufraw_test(ufraw_data *uf)
+ {
+   int iWidth = uf->initialWidth;
+   int iHeight = uf->initialHeight;
+   double aspectRatio = ((double)iWidth) / iHeight;
+   double midX = iWidth / 2.0 - 0.5;
+   double midY = iHeight / 2.0 - 0.5;
+   double maxX = 0, maxY = 0;
+   double minX = 999999, minY = 999999;
+   double lastX = 0, lastY = 0, area = 0;
+   double scale;
+   int i;
+   for (i = 0; i < iWidth + iHeight - 1; i++)
+     {
+       int x, y;
+       if (i < iWidth) { // Trace the left border of the image
+         x = i;
+         y = 0;
+       } else { // Trace the bottom border of the image
+         x = iWidth - 1;
+         y = i - iWidth + 1;
+       }
+       double srcX = x - midX;
+       double srcY = y - midY;
+       // A digital planimeter:
+       area += srcY * lastX - srcX * lastY;
+       lastX = srcX;
+       lastY = srcY;
+       maxX = MAX(maxX, fabs(srcX));
+       maxY = MAX(maxY, fabs(srcY));
+       if (fabs(srcX / srcY) > aspectRatio)
+       minX = MIN(minX, fabs(srcX));
+       else
+       minY = MIN(minY, fabs(srcY));
+     }
+   scale = sqrt((iWidth - 1) * (iHeight - 1) / area);
+   uf->rotatedWidth = MIN(ceil(2 * maxX + 1.0) * scale, 2 * iWidth);
+   uf->rotatedHeight = MIN(ceil(2 * maxY + 1.0) * scale, 2 * iHeight);
+   uf->autoCropWidth = MIN(floor(2 * minX) * scale, 2 * iWidth);
+   uf->autoCropHeight = MIN(floor(2 * minY) * scale, 2 * iHeight);
+   if (uf->autoCropWidth != 3)
+     abort ();
+ }
+ 
+ int main()
+ {
+   ufraw_data uf_data;
+   ufraw_data *uf = &uf_data;
+   uf->initialWidth = 4;
+   uf->initialHeight = 5;
+   ufraw_test(uf);
+   return 0;
+ }
+ 
+ /* { dg-final { cleanup-tree-dump "slp" } } */

Reply via email to