https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97181

            Bug ID: 97181
           Summary: Inlining of leaf case in recursion
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

The following two programs are functionally equivalent:

$ cat v1.f90 
program main
  implicit none
  integer, parameter :: ip = selected_int_kind(15)
  integer :: l_max
  integer(kind=ip) :: n_level
  integer :: num
  num = 0
  read (*,*) l_max
  call btsum (0_ip, 0_ip, l_max)
  print *,num
contains
  recursive subroutine btsum (n, s, level)
    integer(kind=ip), value :: n, s
    integer, value :: level
    if (level /= 0) then
       call btsum (3*n,  s  , level-1)
       call btsum (3*n+1,s+1, level-1)
       call btsum (3*n+2,s+2, level-1)
       return
    end if
    if (popcnt (n) == s) then
       call do_something (n, s)
    end if
  end subroutine btsum
  subroutine do_something (n,s)
    integer (kind=ip), value :: n, s
    num = num + 1
  end subroutine do_something
end program main

program main
  implicit none
  integer, parameter :: ip = selected_int_kind(15)
  integer :: l_max
  integer(kind=ip) :: n_level
  integer :: num
  num = 0
  read (*,*) l_max
  call btsum (0_ip,0_ip,l_max)
  print *,num
contains
  recursive subroutine btsum (n, s, level)
    integer(kind=ip), value :: n, s
    integer, value :: level
    if (level > 1) then
       call btsum (3*n,  s  , level-1)
       call btsum (3*n+1,s+1, level-1)
       call btsum (3*n+2,s+2, level-1)
       return
    end if
    if (popcnt (3*n  ) == s  ) call do_something (3*n  , s)
    if (popcnt (3*n+1) == s+1) call do_something (3*n+1, s+1)
    if (popcnt (3*n+2) == s+2) call do_something (3*n+2, s+2)
  end subroutine btsum
  subroutine do_something (n,s)
    integer (kind=ip), value :: n, s
    num = num + 1
  end subroutine do_something
end program main

The only difference is that the last level of recursion in
btsum has been made explicit.

The difference in running time is quite significant:

$ gfortran -O3 -march=native v1.f90 
$ echo 19 | ./a.out
    68611936
$ time echo 19 | ./a.out
    68611936

real    0m2,611s
user    0m2,606s
sys     0m0,005s
$ gfortran -O3 -march=native v2.f90 
$ time echo 19 | ./a.out
    68611936

real    0m1,614s
user    0m1,609s
sys     0m0,005s

Using PGO makes the difference smaller, but it is still there:

$ gfortran -fprofile-arcs -O3 -march=native v1.f90 
$ echo 19 | ./a.out
    68611936
$ gfortran -fprofile-use -O3 -march=native v1.f90 
$ time echo 19 | ./a.out
    68611936

real    0m2,011s
user    0m2,011s
sys     0m0,000s

$ gfortran -fprofile-arcs -O3 -march=native v2.f90 
$ echo 19 | ./a.out
    68611936
$ gfortran -fprofile-use -O3 -march=native v2.f90 
$ time echo 19 | ./a.out
    68611936

real    0m1,386s
user    0m1,382s
sys     0m0,004s

Reply via email to