On 18 May 2016 at 19:38, Richard Biener <rguent...@suse.de> wrote:
> On Wed, 18 May 2016, Prathamesh Kulkarni wrote:
>
>> On 17 May 2016 at 18:36, Richard Biener <rguent...@suse.de> wrote:
>> > On Wed, 11 May 2016, Prathamesh Kulkarni wrote:
>> >
>> >> On 6 May 2016 at 17:20, Richard Biener <rguent...@suse.de> wrote:
>> >> >
>> >> > You can't simply use
>> >> >
>> >> > +      offset = int_byte_position (field);
>> >> >
>> >> > as it can be placed at variable offset which will make int_byte_position
>> >> > ICE.  Note it also returns a truncated byte position (bit position
>> >> > stripped) which may be undesirable here.  I think you want to use
>> >> > bit_position and if and only if DECL_FIELD_OFFSET and
>> >> > DECL_FIELD_BIT_OFFSET are INTEGER_CST.
>> >> oops, I didn't realize offsets could be variable.
>> >> Will that be the case only for VLA member inside struct ?
>> >
>> > And non-VLA members after such member.
>> >
>> >> > Your observation about the expensiveness of the walk still stands I 
>> >> > guess
>> >> > and eventually you should at least cache the
>> >> > get_vec_alignment_for_record_decl cases.  Please make those workers
>> >> > _type rather than _decl helpers.
>> >> Done
>> >> >
>> >> > You seem to simply get at the maximum vectorized field/array element
>> >> > alignment possible for all arrays - you could restrict that to
>> >> > arrays with at least vector size (as followup).
>> >> Um sorry, I didn't understand this part.
>> >
>> > It doesn't make sense to align
>> >
>> > struct { int a; int b; int c; int d; float b[3]; int e; };
>> >
>> > because we have a float[3] member.  There is no vector size that
>> > would cover the float[3] array.
>> Thanks for the explanation.
>> So we do not want to align struct if sizeof (array_field) < sizeof
>> (vector_type).
>> This issue is also present without patch for global arrays, so I modified
>> get_vec_alignment_for_array_type, to return 0 if sizeof (array_type) <
>> sizeof (vectype).
>> >
>> >> >
>> >> > +  /* Skip artificial decls like typeinfo decls or if
>> >> > +     record is packed.  */
>> >> > +  if (DECL_ARTIFICIAL (record_decl) || TYPE_PACKED (type))
>> >> > +    return 0;
>> >> >
>> >> > I think we should honor DECL_USER_ALIGN as well and not mess with those
>> >> > decls.
>> >> Done
>> >> >
>> >> > Given the patch now does quite some extra work it might make sense
>> >> > to split the symtab part out of the vect_can_force_dr_alignment_p
>> >> > predicate and call that early.
>> >> In the patch I call symtab_node::can_increase_alignment_p early. I tried
>> >> moving that to it's callers - vect_compute_data_ref_alignment and
>> >> increase_alignment::execute, however that failed some tests in vect, and
>> >> hence I didn't add the following hunk in the patch. Did I miss some
>> >> check ?
>> >
>> > Not sure.
>> >
>> >> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>> >> index 7652e21..2c1acee 100644
>> >> --- a/gcc/tree-vect-data-refs.c
>> >> +++ b/gcc/tree-vect-data-refs.c
>> >> @@ -795,7 +795,10 @@ vect_compute_data_ref_alignment (struct 
>> >> data_reference *dr)
>> >>    && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
>> >>   base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
>> >>
>> >> -      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
>> >> +      if (!(TREE_CODE (base) == VAR_DECL
>> >> +            && decl_in_symtab_p (base)
>> >> +            && symtab_node::get (base)->can_increase_alignment_p ()
>> >> +            && vect_can_force_dr_alignment_p (base, TYPE_ALIGN 
>> >> (vectype))))
>> >>   {
>> >>    if (dump_enabled_p ())
>> >>      {
>> >
>> > +  for (tree field = first_field (type);
>> > +       field != NULL_TREE;
>> > +       field = DECL_CHAIN (field))
>> > +    {
>> > +      /* Skip if not FIELD_DECL or has variable offset.  */
>> > +      if (TREE_CODE (field) != FIELD_DECL
>> > +         || TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
>> > +         || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST
>> > +         || DECL_USER_ALIGN (field)
>> > +         || DECL_ARTIFICIAL (field))
>> > +       continue;
>> >
>> > you can stop processing the type and return 0 here if the offset
>> > is not an INTEGER_CST.  All following fields will have the same issue.
>> >
>> > +      /* FIXME: is this check necessary since we check for variable
>> > offset above ?  */
>> > +      if (TREE_CODE (offset_tree) != INTEGER_CST)
>> > +       continue;
>> >
>> > the check should not be necessary.
>> >
>> >       offset = tree_to_uhwi (offset_tree);
>> >
>> > but in theory offset_tree might not fit a unsigned HOST_WIDE_INT, so
>> > instead of for INTEGER_CST please check ! tree_fits_uhwi_p (offset_tree).
>> > As above all following fields will also fail the check if this fails so
>> > you can return 0 early.
>> >
>> > +static unsigned
>> > +get_vec_alignment_for_type (tree type)
>> > +{
>> > +  if (type == NULL_TREE)
>> > +    return 0;
>> > +
>> > +  gcc_assert (TYPE_P (type));
>> > +
>> > +  unsigned *slot = type_align_map->get (type);
>> > +  if (slot)
>> > +    return *slot;
>> >
>> > I suggest to apply the caching only for the RECORD_TYPE case to keep
>> > the size of the map low.
>> >
>> > Otherwise the patch looks ok now.
>> I have done the suggested changes in attached patch.
>> Is this version OK to commit after bootstrap+test ?
>> The patch survives cross-testing on arm*-*-* and aarch64*-*-*.
>
> +  tree type_size_tree = TYPE_SIZE (type);
> +  if (!type_size_tree)
> +    return TYPE_ALIGN (vectype);
> +
> +  if (TREE_CODE (type_size_tree) != INTEGER_CST)
> +    return TYPE_ALIGN (vectype);
> +
> +  /* If array has constant size, ensure that it's at least equal to
> +     size of vector type.  */
> +  if (!tree_fits_uhwi_p (type_size_tree))
> +    return TYPE_ALIGN (vectype);
> +  unsigned HOST_WIDE_INT type_size = tree_to_uhwi (type_size_tree);
> +
> +  tree vectype_size_tree = TYPE_SIZE (vectype);
> +  if (!tree_fits_uhwi_p (vectype_size_tree))
> +    return TYPE_ALIGN (vectype);
> +
> +  unsigned HOST_WIDE_INT vectype_size = tree_to_uhwi (vectype_size_tree);
> +  return (type_size > vectype_size) ? TYPE_ALIGN (vectype) : 0;
>
> please change this to
>
>   if (! TYPE_SIZE (type)
>       || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
>       || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype)))
>     return 0;
>
>   return TYPE_ALIGN (vectype);
>
> consistent with the handling for "VLA" records.
>
> +  unsigned *slot = type_align_map->get (type);
> +  if (slot)
> +    return *slot;
> +
> +  unsigned max_align = 0, alignment;
> +  HOST_WIDE_INT offset;
> +  tree offset_tree;
> +
> +  if (TYPE_PACKED (type))
> +    return 0;
> +
>
> move the tye_align_map query after the TYPE_PACKED check.
>
> +      /* We don't need to process the type further if offset is variable,
> +        since the offsets of remaining members will also be variable.  */
> +      if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
> +         || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
> +       return 0;
>
> just break; here (and in the other case returning zero).  If a
> previous record field said we'd want to align we still should align.
> Also we want to store 0 into the type_align_map as well.
>
> Ok with those changes.
Hi,
Is this version OK ?
Cross tested on arm*-*-* and aarch64*-*-*

Thanks,
Prathamesh
>
> Thanks,
> Richard.
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c 
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c
new file mode 100644
index 0000000..7010a52
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Increase alignment of struct if an array's offset is multiple of alignment 
of
+   vector type corresponding to it's scalar type.
+   For the below test-case:
+   offsetof(e) == 8 bytes. 
+   i) For arm: let x = alignment of vector type corresponding to int,
+   x == 8 bytes.
+   Since offsetof(e) % x == 0, set DECL_ALIGN(a, b, c) to x.
+   ii) For aarch64, ppc: x == 16 bytes.
+   Since offsetof(e) % x != 0, don't increase alignment of a, b, c.
+*/
+
+static struct A {
+  int p1, p2;
+  int e[N];
+} a, b, c;
+
+int foo(void)
+{
+  for (int i = 0; i < N; i++)
+    a.e[i] = b.e[i] + c.e[i];
+
+   return a.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c 
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c
new file mode 100644
index 0000000..7cbd1dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+/* Should not increase alignment of the struct because
+   sizeof (A.e) < sizeof(corresponding vector type).  */
+
+#define N 3
+
+static struct A {
+  int p1, p2;
+  int e[N];
+} a, b, c;
+
+int foo(void)
+{
+  for (int i = 0; i < N; i++)
+    a.e[i] = b.e[i] + c.e[i];
+
+   return a.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c 
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c
new file mode 100644
index 0000000..873fabe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Clone of section-anchors-vect-70.c having nested struct.  */
+
+struct S
+{
+  int e[N];
+};
+
+static struct A {
+  int p1, p2;
+  struct S s; 
+} a, b, c;
+
+int foo(void)
+{
+  for (int i = 0; i < N; i++)
+    a.s.e[i] = b.s.e[i] + c.s.e[i];
+
+   return a.s.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2b25b45..bd04cd7 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -794,38 +793,142 @@ make_pass_slp_vectorize (gcc::context *ctxt)
      This should involve global alignment analysis and in the future also
      array padding.  */
 
+static unsigned get_vec_alignment_for_type (tree);
+static hash_map<tree, unsigned> *type_align_map;
+
+/* Return alignment of array's vector type corresponding to scalar type.
+   0 if no vector type exists.  */
+static unsigned
+get_vec_alignment_for_array_type (tree type) 
+{
+  gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+
+  tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
+  if (!vectype
+      || !TYPE_SIZE (type)
+      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+      || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype)))
+    return 0;
+
+  return TYPE_ALIGN (vectype);
+}
+
+/* Return alignment of field having maximum alignment of vector type
+   corresponding to it's scalar type. For now, we only consider fields whose
+   offset is a multiple of it's vector alignment.
+   0 if no suitable field is found.  */
+static unsigned
+get_vec_alignment_for_record_type (tree type) 
+{
+  gcc_assert (TREE_CODE (type) == RECORD_TYPE);
+
+  unsigned max_align = 0, alignment;
+  HOST_WIDE_INT offset;
+  tree offset_tree;
+
+  if (TYPE_PACKED (type))
+    return 0;
+
+  unsigned *slot = type_align_map->get (type);
+  if (slot)
+    return *slot;
+
+  for (tree field = first_field (type);
+       field != NULL_TREE;
+       field = DECL_CHAIN (field))
+    {
+      /* Skip if not FIELD_DECL or if alignment is set by user.  */ 
+      if (TREE_CODE (field) != FIELD_DECL
+         || DECL_USER_ALIGN (field)
+         || DECL_ARTIFICIAL (field))
+       continue;
+
+      /* We don't need to process the type further if offset is variable,
+        since the offsets of remaining members will also be variable.  */
+      if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
+         || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
+       break;
+
+      /* Similarly stop processing the type if offset_tree
+        does not fit in unsigned HOST_WIDE_INT.  */
+      offset_tree = bit_position (field);
+      if (!tree_fits_uhwi_p (offset_tree))
+       break;
+
+      offset = tree_to_uhwi (offset_tree); 
+      alignment = get_vec_alignment_for_type (TREE_TYPE (field));
+
+      /* Get maximum alignment of vectorized field/array among those members
+        whose offset is multiple of the vector alignment.  */ 
+      if (alignment
+         && (offset % alignment == 0)
+         && (alignment > max_align))
+       max_align = alignment;
+    }
+
+  type_align_map->put (type, max_align);
+  return max_align;
+}
+
+/* Return alignment of vector type corresponding to decl's scalar type
+   or 0 if it doesn't exist or the vector alignment is lesser than
+   decl's alignment.  */
+static unsigned
+get_vec_alignment_for_type (tree type)
+{
+  if (type == NULL_TREE)
+    return 0;
+
+  gcc_assert (TYPE_P (type));
+
+  static unsigned alignment = 0;
+  switch (TREE_CODE (type))
+    {
+      case ARRAY_TYPE:
+       alignment = get_vec_alignment_for_array_type (type);
+       break;
+      case RECORD_TYPE:
+       alignment = get_vec_alignment_for_record_type (type);
+       break;
+      default:
+       alignment = 0;
+       break;
+    }
+
+  return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
+}
+
+/* Entry point to increase_alignment pass.  */
 static unsigned int
 increase_alignment (void)
 {
   varpool_node *vnode;
 
   vect_location = UNKNOWN_LOCATION;
+  type_align_map = new hash_map<tree, unsigned>;
 
   /* Increase the alignment of all global arrays for vectorization.  */
   FOR_EACH_DEFINED_VARIABLE (vnode)
     {
-      tree vectype, decl = vnode->decl;
-      tree t;
+      tree decl = vnode->decl;
       unsigned int alignment;
 
-      t = TREE_TYPE (decl);
-      if (TREE_CODE (t) != ARRAY_TYPE)
-        continue;
-      vectype = get_vectype_for_scalar_type (strip_array_types (t));
-      if (!vectype)
-        continue;
-      alignment = TYPE_ALIGN (vectype);
-      if (DECL_ALIGN (decl) >= alignment)
-        continue;
-
-      if (vect_can_force_dr_alignment_p (decl, alignment))
+      if ((decl_in_symtab_p (decl)
+         && !symtab_node::get (decl)->can_increase_alignment_p ())
+         || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
+       continue;
+
+      alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
+      if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
         {
-         vnode->increase_alignment (TYPE_ALIGN (vectype));
+         vnode->increase_alignment (alignment);
           dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
           dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
           dump_printf (MSG_NOTE, "\n");
         }
     }
+
+  delete type_align_map;
   return 0;
 }
 

Attachment: ChangeLog
Description: Binary data

Reply via email to