https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97181
Bug ID: 97181 Summary: Inlining of leaf case in recursion Product: gcc Version: unknown Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: tkoenig at gcc dot gnu.org Target Milestone: --- The following two programs are functionally equivalent: $ cat v1.f90 program main implicit none integer, parameter :: ip = selected_int_kind(15) integer :: l_max integer(kind=ip) :: n_level integer :: num num = 0 read (*,*) l_max call btsum (0_ip, 0_ip, l_max) print *,num contains recursive subroutine btsum (n, s, level) integer(kind=ip), value :: n, s integer, value :: level if (level /= 0) then call btsum (3*n, s , level-1) call btsum (3*n+1,s+1, level-1) call btsum (3*n+2,s+2, level-1) return end if if (popcnt (n) == s) then call do_something (n, s) end if end subroutine btsum subroutine do_something (n,s) integer (kind=ip), value :: n, s num = num + 1 end subroutine do_something end program main program main implicit none integer, parameter :: ip = selected_int_kind(15) integer :: l_max integer(kind=ip) :: n_level integer :: num num = 0 read (*,*) l_max call btsum (0_ip,0_ip,l_max) print *,num contains recursive subroutine btsum (n, s, level) integer(kind=ip), value :: n, s integer, value :: level if (level > 1) then call btsum (3*n, s , level-1) call btsum (3*n+1,s+1, level-1) call btsum (3*n+2,s+2, level-1) return end if if (popcnt (3*n ) == s ) call do_something (3*n , s) if (popcnt (3*n+1) == s+1) call do_something (3*n+1, s+1) if (popcnt (3*n+2) == s+2) call do_something (3*n+2, s+2) end subroutine btsum subroutine do_something (n,s) integer (kind=ip), value :: n, s num = num + 1 end subroutine do_something end program main The only difference is that the last level of recursion in btsum has been made explicit. The difference in running time is quite significant: $ gfortran -O3 -march=native v1.f90 $ echo 19 | ./a.out 68611936 $ time echo 19 | ./a.out 68611936 real 0m2,611s user 0m2,606s sys 0m0,005s $ gfortran -O3 -march=native v2.f90 $ time echo 19 | ./a.out 68611936 real 0m1,614s user 0m1,609s sys 0m0,005s Using PGO makes the difference smaller, but it is still there: $ gfortran -fprofile-arcs -O3 -march=native v1.f90 $ echo 19 | ./a.out 68611936 $ gfortran -fprofile-use -O3 -march=native v1.f90 $ time echo 19 | ./a.out 68611936 real 0m2,011s user 0m2,011s sys 0m0,000s $ gfortran -fprofile-arcs -O3 -march=native v2.f90 $ echo 19 | ./a.out 68611936 $ gfortran -fprofile-use -O3 -march=native v2.f90 $ time echo 19 | ./a.out 68611936 real 0m1,386s user 0m1,382s sys 0m0,004s