The following code (borrowed from
http://gcc.gnu.org/ml/gcc/2008-05/msg00134.html):

integer(8), parameter :: l = z'5fe6eb3be0000000'
integer, parameter :: ni = 3
integer :: i, j, n
integer(8) :: k
real(8) :: a, b, e, m, s
equivalence (b, k)
a = 1.0d0
e = epsilon(1.0)/2.0d0**4
m = 0.0d0
s = 0.0d0
n = 0
do
  n = n + 1
  b = a
  k = l - ishft(k, -1_8)
  do i = 1, ni
    b = b*(1.5-(0.5*a)*b*b)
  end do
  b = b + b*(0.5-(0.5*a)*b*b)
!   b = 1.0d0/sqrt(a)
  m = max(m, abs(a*b*b - 1.0d0))
  s = s + abs(a*b*b - 1.0d0)
  a = a + e
  if (a == 2.0d0) exit
end do
print *, n, m/epsilon(a), s/(n*epsilon(a))
end

gives the following timings:

[ibook-dhum] bug/timing% gfc -m64 -O3 rsqrt_8_nr_v1_s.f90
[ibook-dhum] bug/timing% time a.out
   134217728   2.0000000000000000       0.36966567113995552     
2.662u 0.008s 0:02.67 99.6%     0+0k 0+1io 0pf+0w

[ibook-dhum] bug/timing% gfc -m32 -O3 rsqrt_8_nr_v1_s.f90
[ibook-dhum] bug/timing% time a.out
   134217728   2.0000000000000000       0.36966567113995552     
7.401u 0.023s 0:07.42 100.0%    0+0k 0+0io 0pf+0w

For comparison the following code:

integer :: n
real(8) :: a, b, e, m, s
a = 1.0d0
e = epsilon(1.0)/2.0d0**4
s = 0.0d0
m = 0.0d0
n = 0
do
  n = n + 1
  b = 1.0d0/sqrt(a)
  s = s + abs(a*b*b - 1.0d0)
  m = max(m, abs(a*b*b - 1.0d0))
  a = a + e
  if (a == 2.0d0) exit
end do
print *, n, m/epsilon(a), s/(n*epsilon(a))
end

gives

[ibook-dhum] bug/timing% gfc -m64 -O3 rsqrt_8_s.f90
[ibook-dhum] bug/timing% time a.out
   134217728  1.00000000000000000       0.49419290572404861     
5.469u 0.002s 0:05.47 99.8%     0+0k 0+0io 0pf+0w
[ibook-dhum] bug/timing% gfc -m32 -O3 rsqrt_8_s.f90
[ibook-dhum] bug/timing% time a.out
   134217728  1.00000000000000000       0.49419290572404861     
5.475u 0.020s 0:05.49 100.0%    0+0k 0+0io 0pf+0w

Note that the later code is vectorized, while the former one is not.


-- 
           Summary: Executable compiled with -m64 almost three times faster
                    than the one compiled with -m32 on Core2Duo
           Product: gcc
           Version: 4.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: dominiq at lps dot ens dot fr
 GCC build triplet: i686-apple-darwin9
  GCC host triplet: i686-apple-darwin9
GCC target triplet: i686-apple-darwin9


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36241

Reply via email to