Hi,

sorry for a somewhat long turnaround...

On Tue, Mar 05 2019, Richard Biener wrote:
> On Tue, 5 Mar 2019, Richard Biener wrote:
>
>> On Tue, 5 Mar 2019, Martin Jambor wrote:
>> > @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref)
>> >        ref = TREE_OPERAND (ref, 0);
>> >      }
>> >  
>> > -  if (TREE_CODE (ref) != MEM_REF
>> > -      || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
>> > -    return false;
>> > -
>> > -  tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
>> > -  if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
>> > -      != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
>> > -    return true;
>> > +  if (TREE_CODE (ref) == MEM_REF
>> > +      && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1))))
>> > +      return true;
>> 
>> This doesn't make much sense to me - why not simply go back to the
>> old implementation which would mean to just
>> 
>>    return false;
>> 
>> here?
>
> Ah - beacause the testcase from r255510 would break...

Yes.

>
> The above is still a "bit" very tied to how we fold memcpy and friends,
> thus unreliable.

Well, I thought about it a bit too but eventually decided the
unreliability is on the false positive side.

>
> Isn't the issue for the memcpy thing that we fail to record an
> access for the char[] access (remember it's now char[] MEM_REF,
> no longer struct X * MEM_REF with ref-all!).  So maybe it now
> _does_ work with just return false...
>

No, it does not work, I had tried.  But yesterday I had another look at
the testcase and realized that the reason it does not is that total
scalarization kicks back in and we ignore data in padding in totally
scalarized aggregates (otherwise we would have never removed the
original accesses to the aggregate which is the point of total
scalarization).

So, if we can cope with the headache of yet another weird flag in SRA
access structure, the following patch also fixes the issue because it
does consider padding important if it is accessed with a type changing
MEM_REF - even when the aggregate is totally scalarized.

If we wanted to be even less conservative, the test in
contains_vce_or_bfcref_p could be extended along the lines of the code
in comment 6 of PR 85762 but for all three PRs this fixes, the produced
code is the same (or seems to be the same), so perhaps this is good
enough(TM).

So far I have only tested this on x86_64-linux (and found out I need to
un-XFAIL gcc.dg/guality/pr54970.c which is not so surprising because it
was XFAILed by r255510).  Should I test a bit more and commit this to
trunk (and then to gcc-8-branch)?

Thanks,

Martin



2019-03-06  Martin Jambor  <mjam...@suse.cz>

        PR tree-optimization/85762
        PR tree-optimization/87008
        PR tree-optimization/85459
        * tree-sra.c (struct access): New flag grp_type_chaning_ref.
        (dump_access): Dump it.
        (contains_vce_or_bfcref_p): New parameter, set the bool it points
        to if there is a type changing MEM_REF.  Adjust all callers.
        (build_accesses_from_assign): Set grp_type_chaning_ref if
        appropriate.
        (sort_and_splice_var_accesses): Merge grp_type_chaning_ref values.

        testsuite/
        * g++.dg/tree-ssa/pr87008.C: New test.
        * gcc.dg/tree-ssa/pr83141.c: Remove dump test.
---
 gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83141.c |  3 +-
 gcc/tree-sra.c                          | 58 ++++++++++++++++++-------
 3 files changed, 60 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
new file mode 100644
index 00000000000..eef521f9ad5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern void dontcallthis();
+
+struct A { long a, b; };
+struct B : A {};
+template<class T>void cp(T&a,T const&b){a=b;}
+long f(B x){
+  B y; cp<A>(y,x);
+  B z; cp<A>(z,x);
+  if (y.a - z.a)
+    dontcallthis ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
index 73ea45c613c..d1ad3340dbd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O -fdump-tree-esra-details" } */
+/* { dg-options "-O" } */
 
 volatile short vs;
 volatile long vl;
@@ -34,4 +34,3 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-not "Will attempt to totally scalarize" "esra" 
} } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index eeef31ba496..003a54e99f2 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -224,6 +224,10 @@ struct access
      entirely? */
   unsigned grp_total_scalarization : 1;
 
+  /* Set when this portion of the aggregate is accessed either through a
+     non-register type VIEW_CONVERT_EXPR or a type changing MEM_REF.  */
+  unsigned grp_type_chaning_ref : 1;
+
   /* Other passes of the analysis use this bit to make function
      analyze_access_subtree create scalar replacements for this group if
      possible.  */
@@ -441,6 +445,7 @@ dump_access (FILE *f, struct access *access, bool grp)
     fprintf (f, ", grp_read = %d, grp_write = %d, grp_assignment_read = %d, "
             "grp_assignment_write = %d, grp_scalar_read = %d, "
             "grp_scalar_write = %d, grp_total_scalarization = %d, "
+            "grp_type_chaning_ref = %d, "
             "grp_hint = %d, grp_covered = %d, "
             "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
             "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
@@ -449,6 +454,7 @@ dump_access (FILE *f, struct access *access, bool grp)
             access->grp_read, access->grp_write, access->grp_assignment_read,
             access->grp_assignment_write, access->grp_scalar_read,
             access->grp_scalar_write, access->grp_total_scalarization,
+            access->grp_type_chaning_ref,
             access->grp_hint, access->grp_covered,
             access->grp_unscalarizable_region, access->grp_unscalarized_data,
             access->grp_partial_lhs, access->grp_to_be_replaced,
@@ -456,9 +462,9 @@ dump_access (FILE *f, struct access *access, bool grp)
             access->grp_not_necessarilly_dereferenced);
   else
     fprintf (f, ", write = %d, grp_total_scalarization = %d, "
-            "grp_partial_lhs = %d\n",
+            "grp_type_chaning_ref = %d, grp_partial_lhs = %d\n",
             access->write, access->grp_total_scalarization,
-            access->grp_partial_lhs);
+            access->grp_type_chaning_ref, access->grp_partial_lhs);
 }
 
 /* Dump a subtree rooted in ACCESS to file F, indent by LEVEL.  */
@@ -1150,29 +1156,36 @@ contains_view_convert_expr_p (const_tree ref)
   return false;
 }
 
-/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
-   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
+/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF with a
+   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set the bool
+   it points to will be set if REF contains any of the above or a MEM_REF
+   expression that effectively performs type conversion.  */
 
 static bool
-contains_vce_or_bfcref_p (const_tree ref)
+contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p)
 {
   while (handled_component_p (ref))
     {
       if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
          || (TREE_CODE (ref) == COMPONENT_REF
              && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
-       return true;
+       {
+         if (type_changing_p)
+           *type_changing_p = true;
+         return true;
+       }
       ref = TREE_OPERAND (ref, 0);
     }
 
-  if (TREE_CODE (ref) != MEM_REF
+  if (!type_changing_p
+      || TREE_CODE (ref) != MEM_REF
       || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
     return false;
 
   tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
   if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
       != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
-    return true;
+    *type_changing_p = true;
 
   return false;
 }
@@ -1368,15 +1381,21 @@ build_accesses_from_assign (gimple *stmt)
       lacc->grp_assignment_write = 1;
       if (storage_order_barrier_p (rhs))
        lacc->grp_unscalarizable_region = 1;
+      bool type_changing_p = false;
+      contains_vce_or_bfcref_p (lhs, &type_changing_p);
+      lacc->grp_type_chaning_ref |= type_changing_p;
     }
 
   if (racc)
     {
       racc->grp_assignment_read = 1;
-      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
-         && !is_gimple_reg_type (racc->type))
+      bool type_changing_p = false;
+      bool vce_or_bf = contains_vce_or_bfcref_p (rhs, &type_changing_p);
+      racc->grp_type_chaning_ref |= type_changing_p;
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (racc->type))
        {
-         if (contains_vce_or_bfcref_p (rhs))
+
+         if (vce_or_bf || gimple_has_volatile_ops (stmt))
            bitmap_set_bit (cannot_scalarize_away_bitmap,
                            DECL_UID (racc->base));
          else
@@ -2095,6 +2114,7 @@ sort_and_splice_var_accesses (tree var)
       bool grp_assignment_write = access->grp_assignment_write;
       bool multiple_scalar_reads = false;
       bool total_scalarization = access->grp_total_scalarization;
+      bool type_chaning_ref = access->grp_type_chaning_ref;
       bool grp_partial_lhs = access->grp_partial_lhs;
       bool first_scalar = is_gimple_reg_type (access->type);
       bool unscalarizable_region = access->grp_unscalarizable_region;
@@ -2144,6 +2164,7 @@ sort_and_splice_var_accesses (tree var)
          grp_partial_lhs |= ac2->grp_partial_lhs;
          unscalarizable_region |= ac2->grp_unscalarizable_region;
          total_scalarization |= ac2->grp_total_scalarization;
+         type_chaning_ref |= ac2->grp_type_chaning_ref;
          relink_to_new_repr (access, ac2);
 
          /* If there are both aggregate-type and scalar-type accesses with
@@ -2182,6 +2203,7 @@ sort_and_splice_var_accesses (tree var)
       access->grp_hint = total_scalarization
        || (multiple_scalar_reads && !constant_decl_p (var));
       access->grp_total_scalarization = total_scalarization;
+      access->grp_type_chaning_ref = type_chaning_ref;
       access->grp_partial_lhs = grp_partial_lhs;
       access->grp_unscalarizable_region = unscalarizable_region;
 
@@ -2538,7 +2560,11 @@ analyze_access_subtree (struct access *root, struct 
access *parent,
        root->grp_total_scalarization = 0;
     }
 
-  if (!hole || root->grp_total_scalarization)
+  if (!hole
+      /* We assume that totally scalarized aggregates do not have any
+        meaningful data in holes (padding), but only if they are accessed as
+        exactly the same types as they are declared.  */
+      || (root->grp_total_scalarization && !root->grp_type_chaning_ref))
     root->grp_covered = 1;
   else if (root->grp_write || comes_initialized_p (root->base))
     root->grp_unscalarized_data = 1; /* not covered and written to */
@@ -3557,7 +3583,7 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
            }
          else if (lacc
                   && AGGREGATE_TYPE_P (TREE_TYPE (rhs))
-                  && !contains_vce_or_bfcref_p (rhs))
+                  && !contains_vce_or_bfcref_p (rhs, NULL))
            rhs = build_ref_for_model (loc, rhs, 0, lacc, gsi, false);
 
          if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
@@ -3578,7 +3604,7 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
       if (!useless_type_conversion_p (TREE_TYPE (dlhs), TREE_TYPE (drhs)))
        {
          if (AGGREGATE_TYPE_P (TREE_TYPE (drhs))
-             && !contains_vce_or_bfcref_p (drhs))
+             && !contains_vce_or_bfcref_p (drhs, NULL))
            drhs = build_debug_ref_for_model (loc, drhs, 0, lacc);
          if (drhs
              && !useless_type_conversion_p (TREE_TYPE (dlhs),
@@ -3625,8 +3651,8 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
 
   if (modify_this_stmt
       || gimple_has_volatile_ops (stmt)
-      || contains_vce_or_bfcref_p (rhs)
-      || contains_vce_or_bfcref_p (lhs)
+      || contains_vce_or_bfcref_p (rhs, NULL)
+      || contains_vce_or_bfcref_p (lhs, NULL)
       || stmt_ends_bb_p (stmt))
     {
       /* No need to copy into a constant-pool, it comes pre-initialized.  */
-- 
2.21.0

Reply via email to