On Wed, Mar 29, 2017 at 11:05 AM, Richard Biener <rguent...@suse.de> wrote:
>
> After quite some pondering over this and other related bugs I propose
> the following for GCC 7 which tames down PRE a bit (back to levels
> of GCC 6).  Technically it's the wrong place to fix this, we do
> have measures in place during elimination but they are not in effect
> at -O2.  For GCC 8 I'd like to be more aggressive there but that
> would require to enable predictive commoning at -O2 (with some
> limits to its unrolling) to not lose optimization opportunities.
>
> The other option is to ignore this issue and postpone the solution
> to GCC 8.
>
> Bootstrapped / tested on x86_64-unknown-linux-gnu.
>
> Any preference?
+1 enabling predcom at O2 for this kind of transformation, remember I
once suggested that.

Thanks,
bin
>
> Thanks,
> Richard.
>
> 2017-03-29  Richard Biener  <rguent...@suse.de>
>
>         PR tree-optimization/77498
>         * tree-ssa-pre.c (phi_translate_1): Do not allow simplifications
>         to non-constants over backedges.
>
>         * gfortran.dg/pr77498.f: New testcase.
>
> Index: gcc/tree-ssa-pre.c
> ===================================================================
> *** gcc/tree-ssa-pre.c  (revision 246026)
> --- gcc/tree-ssa-pre.c  (working copy)
> *************** phi_translate_1 (pre_expr expr, bitmap_s
> *** 1468,1477 ****
>                    leader for it.  */
>                 if (constant->kind != CONSTANT)
>                   {
> !                   unsigned value_id = get_expr_value_id (constant);
> !                   constant = find_leader_in_sets (value_id, set1, set2);
> !                   if (constant)
> !                     return constant;
>                   }
>                 else
>                   return constant;
> --- 1468,1487 ----
>                    leader for it.  */
>                 if (constant->kind != CONSTANT)
>                   {
> !                   /* Do not allow simplifications to non-constants over
> !                      backedges as this will likely result in a loop PHI node
> !                      to be inserted and increased register pressure.
> !                      See PR77498 - this avoids doing predcoms work in
> !                      a less efficient way.  */
> !                   if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK)
> !                     ;
> !                   else
> !                     {
> !                       unsigned value_id = get_expr_value_id (constant);
> !                       constant = find_leader_in_sets (value_id, set1, set2);
> !                       if (constant)
> !                         return constant;
> !                     }
>                   }
>                 else
>                   return constant;
> Index: gcc/testsuite/gfortran.dg/pr77498.f
> ===================================================================
> --- gcc/testsuite/gfortran.dg/pr77498.f (nonexistent)
> +++ gcc/testsuite/gfortran.dg/pr77498.f (working copy)
> @@ -0,0 +1,36 @@
> +! { dg-do compile }
> +! { dg-options "-O2 -ffast-math -fdump-tree-pre" }
> +
> +      subroutine foo(U,V,R,N,A)
> +      integer N
> +      real*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
> +      integer I3, I2, I1
> +C
> +      do I3=2,N-1
> +       do I2=2,N-1
> +        do I1=2,N-1
> +         R(I1,I2,I3)=V(I1,I2,I3)
> +     *      -A(0)*( U(I1,  I2,  I3  ) )
> +     *      -A(1)*( U(I1-1,I2,  I3  ) + U(I1+1,I2,  I3  )
> +     *                 +  U(I1,  I2-1,I3  ) + U(I1,  I2+1,I3  )
> +     *                 +  U(I1,  I2,  I3-1) + U(I1,  I2,  I3+1) )
> +     *      -A(2)*( U(I1-1,I2-1,I3  ) + U(I1+1,I2-1,I3  )
> +     *                 +  U(I1-1,I2+1,I3  ) + U(I1+1,I2+1,I3  )
> +     *                 +  U(I1,  I2-1,I3-1) + U(I1,  I2+1,I3-1)
> +     *                 +  U(I1,  I2-1,I3+1) + U(I1,  I2+1,I3+1)
> +     *                 +  U(I1-1,I2,  I3-1) + U(I1-1,I2,  I3+1)
> +     *                 +  U(I1+1,I2,  I3-1) + U(I1+1,I2,  I3+1) )
> +     *      -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
> +     *                 +  U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
> +     *                 +  U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
> +     *                 +  U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
> +        enddo
> +       enddo
> +      enddo
> +      return
> +      end
> +
> +! PRE shouldn't do predictive commonings job here (and in a bad way)
> +! ???  It still does but not as bad as it could.  Less prephitmps
> +! would be better, pcom does it with 6.
> +! { dg-final { scan-tree-dump-times "# prephitmp" 9 "pre" } }

Reply via email to