https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68136

            Bug ID: 68136
           Summary: missed tree-level optimization with redundant
                    computations
           Product: gcc
           Version: 6.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---

Take the testcase gcc.dg/ifcvt-3.c:
typedef long long s64;

int
foo (s64 a, s64 b, s64 c)
{
 s64 d = a - b;

  if (d == 0)
    return a + c;
  else
    return b + d + c;
}


on aarch64 this produces the simplest possible:
foo:
        add     w0, w2, w0
        ret


However, this is due to RTL-level ifconversion.
The final tree dump is the more complex:
foo (s64D.2694 aD.2695, s64D.2694 bD.2696, s64D.2694 cD.2697)
{
  s64D.2694 dD.2700;
  intD.7 _1;
  unsigned int _5;
  unsigned int _7;
  unsigned int _8;
  intD.7 _9;
  unsigned int _10;
  unsigned int _11;
  unsigned int _13;
  unsigned int _14;
  intD.7 _15;
  unsigned int _17;

;;   basic block 2, loop depth 0, count 0, freq 10000, maybe hot
;;    prev block 0, next block 3, flags: (NEW, REACHABLE)
;;    pred:       ENTRY [100.0%]  (FALLTHRU,EXECUTABLE)
  d_4 = a_2(D) - b_3(D);
  if (d_4 == 0)
    goto <bb 3>;
  else
    goto <bb 4>;
;;    succ:       3 [39.0%]  (TRUE_VALUE,EXECUTABLE)
;;                4 [61.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 3, loop depth 0, count 0, freq 3900, maybe hot
;;    prev block 2, next block 4, flags: (NEW, REACHABLE)
;;    pred:       2 [39.0%]  (TRUE_VALUE,EXECUTABLE)
  # RANGE [0, 4294967295]
  _5 = (unsigned int) a_2(D);
  # RANGE [0, 4294967295]
  _7 = (unsigned int) c_6(D);
  # RANGE [0, 4294967295]
  _8 = _5 + _7;
  _9 = (intD.7) _8;
  goto <bb 5>;
;;    succ:       5 [100.0%]  (FALLTHRU,EXECUTABLE)

;;   basic block 4, loop depth 0, count 0, freq 6100, maybe hot
;;    prev block 3, next block 5, flags: (NEW, REACHABLE)
;;    pred:       2 [61.0%]  (FALSE_VALUE,EXECUTABLE)
  # RANGE [0, 4294967295]
  _10 = (unsigned int) b_3(D);
  # RANGE [0, 4294967295]
  _11 = (unsigned int) d_4;
  # RANGE [0, 4294967295]
  _13 = (unsigned int) c_6(D);
  # RANGE [0, 4294967295]
  _17 = _10 + _13;
  # RANGE [0, 4294967295]
  _14 = _11 + _17;
  _15 = (intD.7) _14;
;;    succ:       5 [100.0%]  (FALLTHRU,EXECUTABLE)

;;   basic block 5, loop depth 0, count 0, freq 10000, maybe hot
;;    prev block 4, next block 1, flags: (NEW, REACHABLE)
;;    pred:       3 [100.0%]  (FALLTHRU,EXECUTABLE)
;;                4 [100.0%]  (FALLTHRU,EXECUTABLE)
  # _1 = PHI <_9(3), _15(4)>
  # VUSE <.MEM_16(D)>
  return _1;
;;    succ:       EXIT [100.0%] 

}

It's probably a good idea to detect this earlier and produce a " return a + c;"
at the tree level

Reply via email to