On Mon, Nov 11, 2019 at 7:47 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> This patch adds a bunch of flags to dr_with_seg_len_pair_t,
> for use by later patches.  The update to tree-loop-distribution.c
> is conservatively correct, but might be tweakable later.

Does this all work with interleaved SLP loads/stores like

  a[i] = b[i];
  tem1 = b[i+1];
  tem2 = b[i+2];
  a[i+2] = tem2;
  a[i+1] = tem1;

where we don't preserve the scalar order but emit code at the
latest stmt of the grouped access?  That is, what does
"preserve scalar oder" actually mean?


> 2019-11-11  Richard Sandiford  <richard.sandif...@arm.com>
>
> gcc/
>         * tree-data-ref.h (DR_ALIAS_RAW, DR_ALIAS_WAR, DR_ALIAS_WAW)
>         (DR_ALIAS_ARBITRARY, DR_ALIAS_SWAPPED, DR_ALIAS_UNSWAPPED): New flags.
>         (dr_with_seg_len_pair_t::sequencing): New enum.
>         (dr_with_seg_len_pair_t::flags): New member variable.
>         (dr_with_seg_len_pair_t::dr_with_seg_len_pair_t): Take a sequencing
>         parameter and initialize the flags member variable.
>         * tree-loop-distribution.c (compute_alias_check_pairs): Update
>         call accordingly.
>         * tree-vect-data-refs.c (vect_prune_runtime_alias_test_list): 
> Likewise.
>         Ensure the two data references in an alias pair are in statement
>         order, if there is a defined order.
>         * tree-data-ref.c (prune_runtime_alias_test_list): Use
>         DR_ALIAS_SWAPPED and DR_ALIAS_UNSWAPPED to record whether we've
>         swapped the references in a dr_with_seg_len_pair_t.  OR together
>         the flags when merging two dr_with_seg_len_pair_ts.  After merging,
>         try to restore the original dr_with_seg_len order, updating the
>         flags if that fails.
>
> Index: gcc/tree-data-ref.h
> ===================================================================
> --- gcc/tree-data-ref.h 2019-07-10 19:41:26.383898124 +0100
> +++ gcc/tree-data-ref.h 2019-11-11 18:30:50.527193443 +0000
> @@ -222,20 +222,107 @@ typedef struct data_reference *data_refe
>    unsigned int align;
>  };
>
> +/* Flags that describe a potential alias between two dr_with_seg_lens.
> +   In general, each pair of dr_with_seg_lens represents a composite of
> +   multiple access pairs P, so testing flags like DR_IS_READ on the DRs
> +   does not give meaningful information.
> +
> +   DR_ALIAS_RAW:
> +       There is a pair in P for which the second reference is a read
> +       and the first is a write.
> +
> +   DR_ALIAS_WAR:
> +       There is a pair in P for which the second reference is a write
> +       and the first is a read.
> +
> +   DR_ALIAS_WAW:
> +       There is a pair in P for which both references are writes.
> +
> +   DR_ALIAS_ARBITRARY:
> +       Either
> +       (a) it isn't possible to classify one pair in P as RAW, WAW or WAR; or
> +       (b) there is a pair in P that breaks the ordering assumption below.
> +
> +       This flag overrides the RAW, WAR and WAW flags above.
> +
> +   DR_ALIAS_UNSWAPPED:
> +   DR_ALIAS_SWAPPED:
> +       Temporary flags that indicate whether there is a pair P whose
> +       DRs have or haven't been swapped around.
> +
> +   The ordering assumption mentioned above is that for every pair
> +   (DR_A, DR_B) in P:
> +
> +   (1) The original code accesses n elements for DR_A and n elements for 
> DR_B,
> +       interleaved as follows:
> +
> +        one access of size DR_A.access_size at DR_A.dr
> +        one access of size DR_B.access_size at DR_B.dr
> +        one access of size DR_A.access_size at DR_A.dr + STEP_A
> +        one access of size DR_B.access_size at DR_B.dr + STEP_B
> +        one access of size DR_A.access_size at DR_A.dr + STEP_A * 2
> +        one access of size DR_B.access_size at DR_B.dr + STEP_B * 2
> +        ...
> +
> +   (2) The new code accesses the same data in exactly two chunks:
> +
> +        one group of accesses spanning |DR_A.seg_len| + DR_A.access_size
> +        one group of accesses spanning |DR_B.seg_len| + DR_B.access_size
> +
> +   A pair might break this assumption if the DR_A and DR_B accesses
> +   in the original or the new code are mingled in some way.  For example,
> +   if DR_A.access_size represents the effect of two individual writes
> +   to nearby locations, the pair breaks the assumption if those writes
> +   occur either side of the access for DR_B.
> +
> +   Note that DR_ALIAS_ARBITRARY describes whether the ordering assumption
> +   fails to hold for any individual pair in P.  If the assumption *does*
> +   hold for every pair in P, it doesn't matter whether it holds for the
> +   composite pair or not.  In other words, P should represent the complete
> +   set of pairs that the composite pair is testing, so only the ordering
> +   of two accesses in the same member of P matters.  */
> +const unsigned int DR_ALIAS_RAW = 1U << 0;
> +const unsigned int DR_ALIAS_WAR = 1U << 1;
> +const unsigned int DR_ALIAS_WAW = 1U << 2;
> +const unsigned int DR_ALIAS_ARBITRARY = 1U << 3;
> +const unsigned int DR_ALIAS_SWAPPED = 1U << 4;
> +const unsigned int DR_ALIAS_UNSWAPPED = 1U << 5;
> +
>  /* This struct contains two dr_with_seg_len objects with aliasing data
>     refs.  Two comparisons are generated from them.  */
>
>  class dr_with_seg_len_pair_t
>  {
>  public:
> -  dr_with_seg_len_pair_t (const dr_with_seg_len& d1,
> -                              const dr_with_seg_len& d2)
> -    : first (d1), second (d2) {}
> +  /* WELL_ORDERED indicates that the ordering assumption described above
> +     DR_ALIAS_ARBITRARY holds.  REORDERED indicates that it doesn't.  */
> +  enum sequencing { WELL_ORDERED, REORDERED };
> +
> +  dr_with_seg_len_pair_t (const dr_with_seg_len &,
> +                         const dr_with_seg_len &, sequencing);
>
>    dr_with_seg_len first;
>    dr_with_seg_len second;
> +  unsigned int flags;
>  };
>
> +inline dr_with_seg_len_pair_t::
> +dr_with_seg_len_pair_t (const dr_with_seg_len &d1, const dr_with_seg_len &d2,
> +                       sequencing seq)
> +  : first (d1), second (d2), flags (0)
> +{
> +  if (DR_IS_READ (d1.dr) && DR_IS_WRITE (d2.dr))
> +    flags |= DR_ALIAS_WAR;
> +  else if (DR_IS_WRITE (d1.dr) && DR_IS_READ (d2.dr))
> +    flags |= DR_ALIAS_RAW;
> +  else if (DR_IS_WRITE (d1.dr) && DR_IS_WRITE (d2.dr))
> +    flags |= DR_ALIAS_WAW;
> +  else
> +    gcc_unreachable ();
> +  if (seq == REORDERED)
> +    flags |= DR_ALIAS_ARBITRARY;
> +}
> +
>  enum data_dependence_direction {
>    dir_positive,
>    dir_negative,
> Index: gcc/tree-loop-distribution.c
> ===================================================================
> --- gcc/tree-loop-distribution.c        2019-11-11 18:30:43.207244530 +0000
> +++ gcc/tree-loop-distribution.c        2019-11-11 18:30:50.527193443 +0000
> @@ -2477,7 +2477,9 @@ compute_alias_check_pairs (class loop *l
>
>        dr_with_seg_len_pair_t dr_with_seg_len_pair
>         (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
> -        dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
> +        dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
> +        /* ??? Would WELL_ORDERED be safe?  */
> +        dr_with_seg_len_pair_t::REORDERED);
>
>        comp_alias_pairs->safe_push (dr_with_seg_len_pair);
>      }
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   2019-11-11 18:30:43.207244530 +0000
> +++ gcc/tree-vect-data-refs.c   2019-11-11 18:30:50.531193415 +0000
> @@ -3509,10 +3509,13 @@ vect_prune_runtime_alias_test_list (loop
>        dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
>        stmt_vec_info stmt_info_b = dr_info_b->stmt;
>
> +      bool preserves_scalar_order_p
> +       = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
> +
>        /* Skip the pair if inter-iteration dependencies are irrelevant
>          and intra-iteration dependencies are guaranteed to be honored.  */
>        if (ignore_step_p
> -         && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
> +         && (preserves_scalar_order_p
>               || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
>                                                  &lower_bound)))
>         {
> @@ -3630,11 +3633,21 @@ vect_prune_runtime_alias_test_list (loop
>                                            stmt_info_b->stmt);
>         }
>
> +      dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
> +                           access_size_a, align_a);
> +      dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
> +                           access_size_b, align_b);
> +      /* Canonicalize the order to be the one that's needed for accurate
> +        RAW, WAR and WAW flags, in cases where the data references are
> +        well-ordered.  The order doesn't really matter otherwise,
> +        but we might as well be consistent.  */
> +      if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
> +       std::swap (dr_a, dr_b);
> +
>        dr_with_seg_len_pair_t dr_with_seg_len_pair
> -       (dr_with_seg_len (dr_info_a->dr, segment_length_a,
> -                         access_size_a, align_a),
> -        dr_with_seg_len (dr_info_b->dr, segment_length_b,
> -                         access_size_b, align_b));
> +       (dr_a, dr_b, (preserves_scalar_order_p
> +                     ? dr_with_seg_len_pair_t::WELL_ORDERED
> +                     : dr_with_seg_len_pair_t::REORDERED));
>
>        comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
>      }
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c 2019-11-11 18:30:47.199216669 +0000
> +++ gcc/tree-data-ref.c 2019-11-11 18:30:50.527193443 +0000
> @@ -1503,7 +1503,12 @@ prune_runtime_alias_test_list (vec<dr_wi
>        if (comp_res == 0)
>         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
>        if (comp_res > 0)
> -       std::swap (alias_pair->first, alias_pair->second);
> +       {
> +         std::swap (alias_pair->first, alias_pair->second);
> +         alias_pair->flags |= DR_ALIAS_SWAPPED;
> +       }
> +      else
> +       alias_pair->flags |= DR_ALIAS_UNSWAPPED;
>      }
>
>    /* Sort the collected data ref pairs so that we can scan them once to
> @@ -1515,10 +1520,13 @@ prune_runtime_alias_test_list (vec<dr_wi
>    for (i = 1; i < alias_pairs->length (); ++i)
>      {
>        /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
> -      dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
> -                     *dr_b1 = &(*alias_pairs)[i-1].second,
> -                     *dr_a2 = &(*alias_pairs)[i].first,
> -                     *dr_b2 = &(*alias_pairs)[i].second;
> +      dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[i - 1];
> +      dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
> +
> +      dr_with_seg_len *dr_a1 = &alias_pair1->first;
> +      dr_with_seg_len *dr_b1 = &alias_pair1->second;
> +      dr_with_seg_len *dr_a2 = &alias_pair2->first;
> +      dr_with_seg_len *dr_b2 = &alias_pair2->second;
>
>        /* Remove duplicate data ref pairs.  */
>        if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
> @@ -1527,6 +1535,7 @@ prune_runtime_alias_test_list (vec<dr_wi
>             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
>                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
>                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
> +         alias_pair1->flags |= alias_pair2->flags;
>           alias_pairs->ordered_remove (i--);
>           continue;
>         }
> @@ -1632,10 +1641,26 @@ prune_runtime_alias_test_list (vec<dr_wi
>             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
>                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
>                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
> +         alias_pair1->flags |= alias_pair2->flags;
>           alias_pairs->ordered_remove (i);
>           i--;
>         }
>      }
> +
> +  /* Try to restore the original dr_with_seg_len order within each
> +     dr_with_seg_len_pair_t.  If we ended up combining swapped and
> +     unswapped pairs into the same check, we have to invalidate any
> +     RAW, WAR and WAW information for it.  */
> +  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
> +    {
> +      unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
> +      unsigned int swapped = (alias_pair->flags & swap_mask);
> +      if (swapped == DR_ALIAS_SWAPPED)
> +       std::swap (alias_pair->first, alias_pair->second);
> +      else if (swapped != DR_ALIAS_UNSWAPPED)
> +       alias_pair->flags |= DR_ALIAS_ARBITRARY;
> +      alias_pair->flags &= ~swap_mask;
> +    }
>  }
>
>  /* Given LOOP's two data references and segment lengths described by DR_A

Reply via email to