> > +   case OBJ_TYPE_REF:
> > +     {
> > +       if (!operand_equal_p (OBJ_TYPE_REF_EXPR (arg0),
> > +                             OBJ_TYPE_REF_EXPR (arg1), flags))
> > +         return false;
> > +       if (flag_devirtualize && virtual_method_call_p (arg0))
> > +         {
> > +           if (tree_to_uhwi (OBJ_TYPE_REF_TOKEN (arg0))
> > +               != tree_to_uhwi (OBJ_TYPE_REF_TOKEN (arg1)))
> > +             return false;
> > +           if (!types_same_for_odr (obj_type_ref_class (arg0),
> > +                                    obj_type_ref_class (arg1)))
> > +             return false;
> 
> devirt machinery in operand_equal_p?  please not.  not more places!

OBJ_TYPE_REF explicitly says what is the type of class whose virtual method is
called. It is GIMPLE operand expression like other, so I think we need to handle
it.

Actually I think I can just drop the flag_devirtualize check because with
!flag_devirtualize we should simply avoid having OBJ_TYPE_REF around.  That
would make it looking less devirt specific.  We can also test types for
equivalence of main variant, but that would just introduce false positives when
LTO did not merged two identical ODR types.  It would be correct though.
> 
> > +           /* OBJ_TYPE_REF_OBJECT is used to track the instance of
> > +              object THIS pointer points to.  Checking that both
> > +              addresses equal is safe approximation of the fact
> > +              that dynamic types are equal.
> > +              Do not care about the other flags, because this expression
> > +              does not attribute to actual value of OBJ_TYPE_REF  */
> > +           if (!operand_equal_p (OBJ_TYPE_REF_OBJECT (arg0),
> > +                                 OBJ_TYPE_REF_OBJECT (arg1),
> > +                                 OEP_ADDRESS_OF))
> > +             return false;
> > +         }
> > +
> > +       return true;
> > +     }
> > +
> >     default:
> >       return 0;
> >     }
> > @@ -3097,6 +3152,21 @@ operand_equal_p (const_tree arg0, const_
> >     }
> >  
> >      case tcc_declaration:
> > +      /* At LTO time the FIELD_DECL may exist in multiple copies.
> > +    We only care about offsets and bit offsets for operands.  */
> 
> Err?  Even at LTO time FIELD_DECLs should only appear once.
> So - testcase?

FIELD_DECL has TREE_TYPE and TREE_TYPE may not get merged by LTO.  So if the
two expressions originate from two different units, we may have two
semantically equivalent FIELD_DECLs (of compatible types and same offsets) that
occupy different memory locations because their merging was prevented by
something upstream (like complete wrt incmplete pointer in the type)
> > +      /* In GIMPLE empty constructors are allowed in initializers of
> > +    vector types.  */
> 
> Why this comment about GIMPLE?  This is about comparing GENERIC
> trees which of course can have CONSTRUCTORs of various sorts.
> 
> > +      if (TREE_CODE (arg0) == CONSTRUCTOR)
> > +   {
> > +     unsigned length1 = vec_safe_length (CONSTRUCTOR_ELTS (arg0));
> > +     unsigned length2 = vec_safe_length (CONSTRUCTOR_ELTS (arg1));
> > +
> > +     if (length1 != length2)
> > +       return false;
> > +
> > +     flags &= ~(OEP_CONSTANT_ADDRESS_OF|OEP_ADDRESS_OF);
> > +
> > +     for (unsigned i = 0; i < length1; i++)
> > +       if (!operand_equal_p (CONSTRUCTOR_ELT (arg0, i)->value,
> > +                             CONSTRUCTOR_ELT (arg1, i)->value, flags))
> 
> You need to compare ->index as well here.  I'm not sure constructor
> values are sorted always so you might get very many false negatives.

many false negatives is better than all false negatices :)

You are right, either I should punt on non-empty constructors or compare
the indexes, will do the second.

> > +    case FIELD_DECL:
> > +      /* At LTO time the FIELD_DECL may exist in multiple copies.
> > +    We only care about offsets and bit offsets for operands.  */
> 
> So explain that this is the reason we don't want to hash by
> DECL_UID.  I still think this is bogus.

Will do if we agree on having this.

I know you would like ipa-icf to keep original bodies and use them for inlining
declaring alias sets to be function local.  This is wrong plan.  Consder:

void t(int *ptr)
{
  *ptr=1;
}

int a(int *ptr1, int *ptr2)
{
  int a = *ptr1;
  t(ptr2)
  return a+*ptr1;
}

long b(long *ptr1, int *ptr2)
{
  int a = *ptr1;
  t(ptr2)
  return a+*ptr1;
}

here aliasing leads to the two options to be optimizer differently:
a:
.LFB1:  
        .cfi_startproc
        movl    4(%esp), %edx
        movl    8(%esp), %ecx
        movl    (%edx), %eax
        movl    $1, (%ecx)
        addl    (%edx), %eax
        ret
        .cfi_endproc
b:
.LFB2:  
        .cfi_startproc
        movl    4(%esp), %eax
        movl    8(%esp), %edx
        movl    (%eax), %eax
        movl    $1, (%edx)
        addl    %eax, %eax
        ret
        .cfi_endproc

however with -fno-early-inlining the functions look identical (modulo alias
sets) at ipa-icf time.  If we merged a/b, we could get wrong code for a
even though no inlining of a or b happens.

So either we match the alias sets or we need to verify that the alias sets
permit precisely the same set of optimizations with taking possible inlining
into account.

I also do not believe that TBAA should be function local.  I believe it is
useful to propagate stuff interprocedurally, like ipa-prop could be able to
propagate this:

long *ptr1;
int *ptr2;
t(int *ptr)
{
  return *ptr;
}
wrap(int *ptr)
{
 *ptr1=1;
}
call()
{
  return wrap (*ptr2);
}

and we could have ipa-reference style pass that collect alias sets read/written 
by a function
and uses it during local optimization to figure out if there is a true 
dependence
between function call and memory store.

Honza

Reply via email to