I've observed a strange results while profiling the following program: $ cat test.c #include <sys/time.h>
#define rdtscll(val) asm volatile("rdtsc" : "=A" (val)) struct timeval tv; unsigned long long t; void f (int n) { int x, y, z = 0; for (x = 0; x < n; x++) for (y = 0; y < n; y++) { z += x * y; rdtscll (t); } } void g (int n) { int x, y, z = 0; for (x = 0; x < n; x++) for (y = 0; y < n; y++) { z += x * y; gettimeofday (&tv, 0); } } int main (int argc, char *argv[]) { int n; for (n = 0; n < 200; n++) { if (n % 2) f (n); else g (n); } return 0; } My typical results are: $ gcc -O2 -g -pg -o test test.c $ /usr/bin/time ./test 0.13user 3.67system 0:03.80elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+107minor)pagefaults 0swaps $ gprof test | head -n 10 Flat profile: Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls ms/call ms/call name 95.85 0.40 0.40 100 4.03 4.03 f 4.79 0.42 0.02 100 0.20 0.20 g % the percentage of the total running time of the time program used by this function. I.e. g() is much faster than f(). But this means that raw CPU insn (rdtsc) is slower than kernel syscall wrapped by C library function (gettimeofday()), which is just impossible. Let's split the program above and test f() and g() separately: $ cat test1.c #define rdtscll(val) asm volatile("rdtsc" : "=A" (val)) unsigned long long t; void f (int n) { int x, y, z = 0; for (x = 0; x < n; x++) for (y = 0; y < n; y++) { z += x * y; rdtscll (t); } } int main (int argc, char *argv[]) { int n; for (n = 0; n < 200; n++) f (n); return 0; } $ gcc -O2 -g -pg -o test1 test1.c $ /usr/bin/time ./test1 0.08user 0.00system 0:00.09elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+106minor)pagefaults 0swaps $ $ cat test2.c #include <sys/time.h> struct timeval tv; void g (int n) { int x, y, z = 0; for (x = 0; x < n; x++) for (y = 0; y < n; y++) { z += x * y; gettimeofday (&tv, 0); } } int main (int argc, char *argv[]) { int n; for (n = 0; n < 200; n++) g (n); return 0; } $ gcc -O2 -g -pg -o test2 test2.c $ /usr/bin/time ./test2 0.23user 7.37system 0:07.61elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+106minor)pagefaults 0swaps These examples obviously shows that gprof results are very strange at least. Can anyone explain them ? -- Summary: Strange profiling results Product: binutils Version: 2.17 Status: NEW Severity: normal Priority: P2 Component: gprof AssignedTo: unassigned at sources dot redhat dot com ReportedBy: dmantipov at yandex dot ru CC: bug-binutils at gnu dot org GCC build triplet: i686-pc-linux-gnu GCC host triplet: i686-pc-linux-gnu GCC target triplet: i686-pc-linux-gnu http://sourceware.org/bugzilla/show_bug.cgi?id=2776 ------- You are receiving this mail because: ------- You are on the CC list for the bug, or are watching someone who is. _______________________________________________ bug-binutils mailing list bug-binutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-binutils