Hi people,

I'm trying to save people the cost of buying extra servers by making
PowerDNS (GPL) ever faster, but I've hit a rather fundamental problem.

Linux 2.6.20-rc4 appears to take 4 microseconds on my P4 3GHz for a
non-blocking UDPv4 recvfrom() call, both on loopback and ethernet.

Linux 2.6.18 on my 64 bit Athlon64 3200+ takes a similar amount of time.

This seems like rather a lot for a 50 byte datagram, but perhaps I'm
overestimating your abilities :-)

The program is unthreaded, and I measure like this:

#define RDTSC(qp) \
do { \
  unsigned long lowPart, highPart;                                      \
  __asm__ __volatile__("rdtsc" : "=a" (lowPart), "=d" (highPart)); \
    qp = (((unsigned long long) highPart) << 32) | lowPart; \
} while (0)

...

uint64_t tsc1, tsc2;
RDTSC(tsc1);      

if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) 
>= 0) { 
    RDTSC(tsc2);      
    printf("%f\n", (tsc2-tsc1)/3000.0);  // 3GHz P4
}

gdb generates the following dump from the actual program,
x=_Z20handleNewUDPQuestioniRN5boost3anyE, I see nothing untoward happening
between the two 'rdtsc' opcodes.

0x08091de0 <x+0>:  push   %ebp
0x08091de1 <x+1>:  mov    %esp,%ebp
0x08091de3 <x+3>:  push   %edi
0x08091de4 <x+4>:  push   %esi
0x08091de5 <x+5>:  push   %ebx
0x08091de6 <x+6>:  sub    $0x78c,%esp
0x08091dec <x+12>: mov    %gs:0x14,%eax
0x08091df2 <x+18>: mov    %eax,0xffffffe4(%ebp)
0x08091df5 <x+21>: xor    %eax,%eax
0x08091df7 <x+23>: movw   $0x2,0xffffffac(%ebp)
0x08091dfd <x+29>: movl   $0x0,0xffffffb0(%ebp)
0x08091e04 <x+36>: movw   $0x0,0xffffffae(%ebp)
0x08091e0a <x+42>: movl   $0x1c,0xfffff8f4(%ebp)
0x08091e14 <x+52>: rdtsc  
0x08091e16 <x+54>: mov    %edx,%ebx
0x08091e18 <x+56>: mov    0x8(%ebp),%edx
0x08091e1b <x+59>: mov    %eax,%esi
0x08091e1d <x+61>: lea    0xfffff8f4(%ebp),%eax
0x08091e23 <x+67>: mov    %eax,0x14(%esp)
0x08091e27 <x+71>: lea    0xffffffac(%ebp),%ecx
0x08091e2a <x+74>: lea    0xfffff950(%ebp),%eax
0x08091e30 <x+80>: mov    %ecx,0x10(%esp)
0x08091e34 <x+84>: movl   $0x0,0xc(%esp)
0x08091e3c <x+92>: movl   $0x5dc,0x8(%esp)
0x08091e44 <x+100>:        mov    %eax,0x4(%esp)
0x08091e48 <x+104>:        mov    %edx,(%esp)
0x08091e4b <x+107>:        call   0x8192110 <recvfrom>
0x08091e50 <x+112>:        test   %eax,%eax
0x08091e52 <x+114>:        mov    %eax,0xfffff8b0(%ebp)
0x08091e58 <x+120>:        js     0x8092168 <x+904>
0x08091e5e <x+126>:        mov    %ebx,%eax
0x08091e60 <x+128>:        xor    %edx,%edx
0x08091e62 <x+130>:        mov    %eax,%edx
0x08091e64 <x+132>:        mov    $0x0,%eax
0x08091e69 <x+137>:        mov    %esi,%ecx
0x08091e6b <x+139>:        mov    %eax,%esi
0x08091e6d <x+141>:        or     %ecx,%esi
0x08091e6f <x+143>:        mov    %edx,%edi
0x08091e71 <x+145>:        rdtsc  
0x08091e73 <x+147>:        mov    %eax,0xfffff8a0(%ebp)
0x08091e79 <x+153>:        mov    0xfffff8a0(%ebp),%eax
0x08091e7f <x+159>:        mov    %edx,%ecx
0x08091e81 <x+161>:        xor    %ebx,%ebx
0x08091e83 <x+163>:        mov    %ecx,%ebx

recvfrom itself is a tad worrisome, x=recvfrom. I didn't ask for the
'libc_enable_asynccancel' stuff. I'm trying to isolate the actual syscall
but it is proving hard work for an assemnly newbie like me - socketcall
doesn't make things easier.

0xb7d62410 <x+0>:        cmpl   $0x0,%gs:0xc
0xb7d62418 <x+8>:        jne    0xb7d62439 <x+41>
0xb7d6241a <x+10>:       mov    %ebx,%edx
0xb7d6241c <x+12>:       mov    $0x66,%eax
0xb7d62421 <x+17>:       mov    $0xc,%ebx
0xb7d62426 <x+22>:       lea    0x4(%esp),%ecx
0xb7d6242a <x+26>:       call   *%gs:0x10
0xb7d62431 <x+33>:       mov    %edx,%ebx
0xb7d62433 <x+35>:       cmp    $0xffffff83,%eax
0xb7d62436 <x+38>:       jae    0xb7d62469 <x+89>
0xb7d62438 <x+40>:       ret    
0xb7d62439 <x+41>:       push   %esi
0xb7d6243a <x+42>:       call   0xb7d6ddd0 <__libc_enable_asynccancel>
0xb7d6243f <x+47>:       mov    %eax,%esi
0xb7d62441 <x+49>:       mov    %ebx,%edx
0xb7d62443 <x+51>:       mov    $0x66,%eax
0xb7d62448 <x+56>:       mov    $0xc,%ebx
0xb7d6244d <x+61>:       lea    0x8(%esp),%ecx
0xb7d62451 <x+65>:       call   *%gs:0x10
0xb7d62458 <x+72>:       mov    %edx,%ebx
0xb7d6245a <x+74>:       xchg   %eax,%esi
0xb7d6245b <x+75>:       call   0xb7d6dd90 <__libc_disable_asynccancel>
0xb7d62460 <x+80>:       mov    %esi,%eax
0xb7d62462 <x+82>:       pop    %esi
0xb7d62463 <x+83>:       cmp    $0xffffff83,%eax
0xb7d62466 <x+86>:       jae    0xb7d62469 <x+89>
0xb7d62468 <x+88>:       ret    
0xb7d62469 <x+89>:       call   0xb7d998f8 <__i686.get_pc_thunk.cx>
0xb7d6246e <x+94>:       add    $0x61b86,%ecx
0xb7d62474 <x+100>:      mov    0xffffff2c(%ecx),%ecx
0xb7d6247a <x+106>:      xor    %edx,%edx
0xb7d6247c <x+108>:      sub    %eax,%edx
0xb7d6247e <x+110>:      mov    %edx,%gs:(%ecx)
0xb7d62481 <x+113>:      or     $0xffffffff,%eax
0xb7d62484 <x+116>:      jmp    0xb7d62438 <x+40>

Any clues?

-- 
http://www.PowerDNS.com      Open source, database driven DNS Software 
http://netherlabs.nl              Open and Closed source services
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to