The following fixes an old missed basic-block vectorization issue
exposing itself as regression caused by a x86 cost change lumping a
lot more code into the same BB.

We are sorting DRs after constant offset and if there are multiple
refs with the same offset we break the DR group.  This doesn't handle
things like

 a[0] = ..;
 a[1] = ..;
 ...
 a[0] = ..;
 a[1] = ..;

very well (read: not).  The temporary fix for GCC 8 to solve the fma3d
regression (and avoid a STLF issue) is to not break groups at such point
but simply ignore the duplicates we run into for group construction
so only the first group in a BB with exact duplicates will be identified.

A more elaborate fix isn't suitable at this stage IMHO (I suggested how
to do it in the comment but didn't try yet to see how complicated it
would end up).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-12-01  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/83232
        * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
        detection of same access. Instead of breaking the group here
        do not consider the duplicate.  Add comment explaining real fix.

        * gfortran.dg/vect/pr83232.f90: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 255300)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -2841,10 +2841,6 @@ vect_analyze_data_ref_accesses (vec_info
          if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
            break;
 
-         /* Do not place the same access in the interleaving chain twice.  */
-         if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
-           break;
-
          /* Check the types are compatible.
             ???  We don't distinguish this during sorting.  */
          if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
@@ -2854,7 +2850,25 @@ vect_analyze_data_ref_accesses (vec_info
          /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
          HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
          HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
-         gcc_assert (init_a <= init_b);
+         HOST_WIDE_INT init_prev
+           = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
+         gcc_assert (init_a <= init_b
+                     && init_a <= init_prev
+                     && init_prev <= init_b);
+
+         /* Do not place the same access in the interleaving chain twice.  */
+         if (init_b == init_prev)
+           {
+             gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
+                         < gimple_uid (DR_STMT (drb)));
+             /* ???  For now we simply "drop" the later reference which is
+                otherwise the same rather than finishing off this group.
+                In the end we'd want to re-process duplicates forming
+                multiple groups from the refs, likely by just collecting
+                all candidates (including duplicates and split points
+                below) in a vector and then process them together.  */
+             continue;
+           }
 
          /* If init_b == init_a + the size of the type * k, we have an
             interleaving, and DRA is accessed before DRB.  */
@@ -2866,10 +2880,7 @@ vect_analyze_data_ref_accesses (vec_info
          /* If we have a store, the accesses are adjacent.  This splits
             groups into chunks we support (we don't support vectorization
             of stores with gaps).  */
-         if (!DR_IS_READ (dra)
-             && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
-                                            (DR_INIT (datarefs_copy[i-1]))
-                 != type_size_a))
+         if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
            break;
 
          /* If the step (if not zero or non-constant) is greater than the
Index: gcc/testsuite/gfortran.dg/vect/pr83232.f90
===================================================================
--- gcc/testsuite/gfortran.dg/vect/pr83232.f90  (nonexistent)
+++ gcc/testsuite/gfortran.dg/vect/pr83232.f90  (working copy)
@@ -0,0 +1,33 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+! { dg-additional-options "-funroll-loops --param 
vect-max-peeling-for-alignment=0 -fdump-tree-slp-details" }
+
+      SUBROUTINE MATERIAL_41_INTEGRATION ( STRESS,YLDC,EFPS,                   
&
+     &  DTnext,Dxx,Dyy,Dzz,Dxy,Dxz,Dyz,MatID,P1,P3 )
+      REAL(KIND(0D0)), INTENT(INOUT) :: STRESS(6)
+      REAL(KIND(0D0)), INTENT(IN)    :: DTnext
+      REAL(KIND(0D0)), INTENT(IN)    :: Dxx,Dyy,Dzz,Dxy,Dxz,Dyz
+      REAL(KIND(0D0)) :: Einc(6)
+      REAL(KIND(0D0)) :: P1,P3
+
+      Einc(1) = DTnext * Dxx ! (1)
+      Einc(2) = DTnext * Dyy
+      Einc(3) = DTnext * Dzz
+      Einc(4) = DTnext * Dxy
+      Einc(5) = DTnext * Dxz
+      Einc(6) = DTnext * Dyz
+      DO i = 1,6
+        STRESS(i) = STRESS(i) + P3*Einc(i)
+      ENDDO
+      STRESS(1) = STRESS(1) + (DTnext * P1 * (Dxx+Dyy+Dzz)) ! (2)
+      STRESS(2) = STRESS(2) + (DTnext * P1 * (Dxx+Dyy+Dzz))
+      STRESS(3) = 0.0
+      Einc(5) = 0.0  ! (3)
+      Einc(6) = 0.0
+      call foo (Einc)
+      END SUBROUTINE
+
+! We should vectorize (1) and (2)
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
+! We fail to vectorize at (3), this can be fixed in the future
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { 
xfail *-*-* } } }

Reply via email to