I've observed a strange results while profiling the following program:

$ cat test.c
#include <sys/time.h>

#define rdtscll(val) asm volatile("rdtsc" : "=A" (val))

struct timeval tv;
unsigned long long t;

void f (int n) 
{
  int x, y, z = 0;

  for (x = 0; x < n; x++)
    for (y = 0; y < n; y++) {
      z += x * y;
      rdtscll (t);
    }
}

void g (int n) 
{
  int x, y, z = 0;

  for (x = 0; x < n; x++)
    for (y = 0; y < n; y++) {
      z += x * y;
      gettimeofday (&tv, 0);
    }
}

int main (int argc, char *argv[])
{ 
  int n;

  for (n = 0; n < 200; n++) {
    if (n % 2)
      f (n);
    else
      g (n);
  }
  return 0;
}

My typical results are:

$ gcc -O2 -g -pg -o test test.c                        
$ /usr/bin/time ./test                               
0.13user 3.67system 0:03.80elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+107minor)pagefaults 0swaps
$ gprof test | head -n 10                            
Flat profile:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total           
 time   seconds   seconds    calls  ms/call  ms/call  name    
 95.85      0.40     0.40      100     4.03     4.03  f
  4.79      0.42     0.02      100     0.20     0.20  g

 %         the percentage of the total running time of the
time       program used by this function.

I.e. g() is much faster than f(). But this means that raw CPU insn (rdtsc) is
slower than kernel syscall wrapped by C library function (gettimeofday()), which
is just impossible. Let's split the program above and test f() and g() 
separately:

$ cat test1.c
#define rdtscll(val) asm volatile("rdtsc" : "=A" (val))

unsigned long long t;

void f (int n) 
{
  int x, y, z = 0;

  for (x = 0; x < n; x++)
    for (y = 0; y < n; y++) {
      z += x * y;
      rdtscll (t);
    }
}

int main (int argc, char *argv[])
{ 
  int n;

  for (n = 0; n < 200; n++)
      f (n);
  return 0;
}
$ gcc -O2 -g -pg -o test1 test1.c                      
$ /usr/bin/time ./test1
0.08user 0.00system 0:00.09elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+106minor)pagefaults 0swaps
$ 
$ cat test2.c                                          
#include <sys/time.h>

struct timeval tv;

void g (int n) 
{
  int x, y, z = 0;

  for (x = 0; x < n; x++)
    for (y = 0; y < n; y++) {
      z += x * y;
      gettimeofday (&tv, 0);
    }
}

int main (int argc, char *argv[])
{ 
  int n;

  for (n = 0; n < 200; n++)
      g (n);
  return 0;
}
$ gcc -O2 -g -pg -o test2 test2.c                        
$ /usr/bin/time ./test2                                
0.23user 7.37system 0:07.61elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+106minor)pagefaults 0swaps

These examples obviously shows that gprof results are very strange at least. Can
anyone explain them ?

-- 
           Summary: Strange profiling results
           Product: binutils
           Version: 2.17
            Status: NEW
          Severity: normal
          Priority: P2
         Component: gprof
        AssignedTo: unassigned at sources dot redhat dot com
        ReportedBy: dmantipov at yandex dot ru
                CC: bug-binutils at gnu dot org
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://sourceware.org/bugzilla/show_bug.cgi?id=2776

------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.


_______________________________________________
bug-binutils mailing list
bug-binutils@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-binutils

Reply via email to