https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103585

--- Comment #7 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
Created attachment 51952
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51952&action=edit
Patch to teach modref about global memory

This patch extends modref so we eliminate the dead part of array descriptors.
This is done by making difference between accesses to global memory and to
unknown memory accesses.  The first is less evil since it can not access things
that does not escape and it is possible to arrange unanalyzed functions to
access only global memory (since everything reaching them escapes).

With the patch I now get
 Performance counter stats for './a.out-badmod':

         27,448.82 msec task-clock                #    0.999 CPUs utilized      
               334      context-switches          #   12.168 /sec               
                10      cpu-migrations            #    0.364 /sec               
                70      page-faults               #    2.550 /sec               
   109,079,862,444      cycles                    #    3.974 GHz                
       130,234,675      stalled-cycles-frontend   #    0.12% frontend cycles
idle   
    80,559,978,185      stalled-cycles-backend    #   73.85% backend cycles
idle    
   230,337,342,608      instructions              #    2.11  insn per cycle     
                                                  #    0.35  stalled cycles per
insn
     9,808,852,327      branches                  #  357.351 M/sec              
        24,742,926      branch-misses             #    0.25% of all branches    

      27.468971377 seconds time elapsed

      27.445934000 seconds user
       0.003999000 seconds sys


so 56% slowdown is now only 22% which is quite nice improvement.
The call of perdida now looks as follows:
  <bb 70> [local count: 109362591]:
  _818 = (unsigned long) _44;
  _817 = _818 + ivtmp.1050_1737;
  _246 = (real(kind=8) *) _817;
  parm.326.data = _246;
  _811 = (unsigned long) _33;
  _810 = _811 + ivtmp.1050_1737;
  _247 = (real(kind=8) *) _810;
  parm.327.data = _247;
  _712 = (unsigned long) _56;
  _711 = _712 + ivtmp.1050_1737;
  _248 = (real(kind=8) *) _711;
  parm.328.data = _248;
  _612 = (unsigned long) _20;
  _574 = _612 + ivtmp.1050_1737;
  _249 = (real(kind=8) *) _574;
  parm.329.data = _249;
  _573 = (unsigned long) _67;
  _572 = _573 + ivtmp.1050_1737;
  _250 = (real(kind=8) *) _572;
  parm.330.data = _250;
  _251 = (real(kind=8) *) ivtmp.1049_1740;
  _252 = (real(kind=8) *) ivtmp.1048_1753;
  _253 = (real(kind=8) *) ivtmp.1047_1762;
  perdida.constprop (&dt, &lambda, &mu, &yield_stress, &r_infinity, &b,
&x_infinity, &gamma, &eta, &plastic_strain_threshold, &parm.326, &parm.327,
&parm.328, &parm.329, _253, &parm.330, _252, _251, &failure_threshold,
&crack_closure_parameter);
  parm.326 ={v} {CLOBBER};
  parm.327 ={v} {CLOBBER};
  parm.328 ={v} {CLOBBER};
  parm.329 ={v} {CLOBBER};
  parm.330 ={v} {CLOBBER};

and compiles to:
        movq    48(%rsp), %rax
        movq    %r10, 24(%rsp)
        addq    %rbp, %rax
        movq    %rax, 720(%rsp)
        movq    32(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 816(%rsp)
        movq    64(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 912(%rsp)
        movq    40(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 1008(%rsp)
        movq    72(%rsp), %rax
        addq    %rbp, %rax
        addq    $72, %rbp
        movq    %rax, 1104(%rsp)
        leaq    352(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1720
        leaq    384(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1728
        pushq   %r13
        .cfi_def_cfa_offset 1736
        addq    $8, %r13
        pushq   %r12
        .cfi_def_cfa_offset 1744
        addq    $8, %r12
        leaq    1136(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1752
        pushq   %r14
        .cfi_def_cfa_offset 1760
        addq    $8, %r14
        leaq    1056(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1768
        leaq    968(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1776
        leaq    880(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1784
        leaq    792(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1792
        leaq    488(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1800
        leaq    456(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1808
        leaq    480(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1816
        leaq    568(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1824
        leaq    440(%rsp), %r9
        leaq    528(%rsp), %r8
        leaq    584(%rsp), %rcx
        leaq    512(%rsp), %rdx
        leaq    504(%rsp), %rsi
        leaq    472(%rsp), %rdi
        call    __perdida_m_MOD_perdida.constprop.0

Reply via email to