http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57534

Uroš Bizjak <ubizjak at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2013-06-05
          Component|rtl-optimization            |tree-optimization
   Target Milestone|---                         |4.8.2
            Summary|Performance regression      |[4.8. 4.9 Regression]:
                   |versus 4.7.3, 4.8.1 is ~15% |Performance regression
                   |slower                      |versus 4.7.3, 4.8.1 is ~15%
                   |                            |slower
     Ever confirmed|0                           |1

--- Comment #2 from Uroš Bizjak <ubizjak at gmail dot com> ---
Confirmed, for some reason tree optimizers CSE part of the address, resulting
in the _.optimized dump that shows:

  index.6_13 = (unsigned int) index_1;
  _14 = index.6_13 * 8;                     <- here
  _16 = x_15(D) + _14;
  _17 = *_16;
  _20 = _14 + 8;
  _21 = x_15(D) + _20;
  _22 = *_21;
  _23 = _17 + _22;
  _26 = _14 + 16;
  _27 = x_15(D) + _26;
  _28 = *_27;
  _29 = _23 + _28;
  _32 = _14 + 24;
  _33 = x_15(D) + _32;
  _34 = *_33;
  _35 = _29 + _34;
  sum_36 = _35 + sum_3;
  _38 = _14 + 32;
  _39 = x_15(D) + _38;
  _40 = *_39;
  _42 = _14 + 40;
  _43 = x_15(D) + _42;
  _44 = *_43;
  _45 = _40 + _44;
  _47 = _14 + 48;
  _48 = x_15(D) + _47;
  _49 = *_48;
  _50 = _45 + _49;
  _52 = _14 + 56;
  _53 = x_15(D) + _52;
  _54 = *_53;
  _55 = _50 + _54;
  sum2_56 = _55 + sum2_4;
  index_57 = index_1 + 8;

Starting from there, the final assembly results in:

.L16:
        leal    0(,%esi,8), %eax        <- this is CSEd part: %eax = %esi * 8
        fldl    (%ebx,%esi,8)
        faddl   8(%ebx,%eax)
        faddl   16(%ebx,%eax)
        faddl   24(%ebx,%eax)
        faddp   %st, %st(2)
        fldl    32(%ebx,%eax)
        faddl   40(%ebx,%eax)
        faddl   48(%ebx,%eax)
        faddl   56(%ebx,%eax)
        leal    8(%esi), %eax
        cmpl    %eax, %edi
        faddp   %st, %st(1)
        jg      .L17
        movl    keepgoing, %eax
        testl   %eax, %eax
        je      .L18
        addl    $1, %ebp
        xorl    %eax, %eax
.L17:
        movl    %eax, %esi
        jmp     .L16

Confirmed as tree optimizers problem.

Reply via email to