hi Kyle

I discuss delayed ACKs and the tcp provider at

http://blogs.oracle.com/amaguire/entry/measuring_rtt_and_handling_tcp

One filtering approach would be to only to measure
RTT for segments which are directly ACKed - i.e.

ACK value received == tcp->tcps_sndnxt value

The script at

http://blogs.oracle.com/amaguire/entry/improved_dtrace_tcp_acknowledgement_rtt

...does this, so give it a try. Hope this helps,

Alan


On 17/05/2011 20:07, Kyle Hailey wrote:

Is there a way to identify a delayed ACK in the TCP layer dtrace probes?

If I do

   time dd if=/dev/zero of=foo bs=8k count=1

where the output file is on an NFS mount and dtrace the TCP send, receives and look at the delta of the times, the last acknowledgement happens after the dd has finished.
I'm imagining the the last ACK is a delayed acknowledgement.
I want to be able to measure the round trip speeds, but these slow ACKs throw off the measurements . I want to be able to filter them out if possible.

Here is an example

 time dd if=/dev/zero of=foo bs=8k count=1
1+0 records in
1+0 records out
8192 bytes (8.2 kB) copied, 0.00193471 s, 4.2 MB/s

real    0m0.014s
user    0m0.001s
sys     0m0.006s


sudo nfslat.d 1 192.86.100.186
starting up ...
sbytes rbytes  delta send   recd   flags
unackd unackd   us   bytes  bytes
   240      0    439      / 68   ACK|PUSH|
   244      0    175  244 \      ACK|PUSH|
   244      0    308      / 312  ACK|PUSH|
   252      0    467  252 \      ACK|PUSH|
   252      0    320      / 76   ACK|PUSH|
   244      0   1717  244 \      ACK|PUSH|
   244      0    268      / 312  ACK|PUSH|
   168      0    208  168 \      ACK|PUSH|
   168      0    250      / 388  ACK|PUSH|
   244      0    199  244 \      ACK|PUSH|
   244      0    260      / 312  ACK|PUSH|
   176      0    149  176 \      ACK|PUSH|
   176      0    250      / 248  ACK|PUSH|
   280      0    159  280 \      ACK|PUSH|
   280      0    606      / 532  ACK|PUSH|
   176      0    173  176 \      ACK|PUSH|
   176      0    245      / 84   ACK|PUSH|
     0     84    380 1448 \      ACK|
     0     84     58 1448 \      ACK|
     0     84     36 1448 \      ACK|
     0     84     34 1448 \      ACK|
     0     84     36 1448 \      ACK|
     0     84     34 1140 \      ACK|PUSH|
  8380      0    360      / 84   ACK|PUSH|
   168      0    144  168 \      ACK|PUSH|
   168      0    471      / 76   ACK|PUSH|
   192      0    116  192 \      ACK|PUSH|
   192      0    275      / 256  ACK|PUSH|
   244      0   5125  244 \      ACK|PUSH|
   244      0    382      / 68   ACK|PUSH|
     0      0  50698    0 \      ACK| 0

So the total dd time was 14ms where as the last ACK alone  was 50ms.


Code

#!/usr/sbin/dtrace -s
#pragma D option quiet
#pragma D option defaultargs

inline int TICKS=$1;
inline string ADDR=$$2;

dtrace:::BEGIN
{
       TIMER = ( TICKS != NULL ) ?  TICKS : 1 ;
       ticks = TIMER;
       TITLE = 10;
       title = 0;
       walltime=timestamp;
       printf("starting up ...\n");
}

tcp:::send, tcp:::receive
/   title == 0 &&
     ( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
      printf("%6s %6s %6s %8s   %8s  %8s %8s %8s  %8s %8s %8s  %8s \n",
        "sbytes",
        "rbytes",
        "delta"    ,
        "send" ,
        "recd"  ,
        "ssz"  ,
        "sscal"  ,
        "rsz",
        "rscal",
        "congw",
        "conthr",
        "retran"
      );
}

tcp:::send
/     ( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
    nfs[args[1]->cs_cid]=1; /* this is an NFS thread */
    delta= timestamp-walltime;
    walltime=timestamp;
    flags="";
    flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : ""),flags);
   flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
    flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);


printf("%6d %6d %6d %8d \ %-8s %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8d \ %8s %8d %8d %8d %8d %8d %12d %12d %12d %8d %8d %s %d \n",
        args[4]->tcp_seq %1000000,
        args[4]->tcp_ack %1000000,
*/
        args[3]->tcps_snxt - args[3]->tcps_suna ,
        args[3]->tcps_rnxt - args[3]->tcps_rack,
        delta/1000,
        args[2]->ip_plength - args[4]->tcp_offset,
        "",
        args[3]->tcps_swnd,
        args[3]->tcps_snd_ws,
        args[3]->tcps_rwnd,
        args[3]->tcps_rcv_ws,
        args[3]->tcps_cwnd,
        args[3]->tcps_cwnd_ssthresh,
/*
        args[3]->tcps_sack_fack,
        args[3]->tcps_sack_snxt,
        args[3]->tcps_rto,
        args[3]->tcps_mss,
*/
        flags,
        args[3]->tcps_retransmit
      );
    flags=0;
    title--;
}

tcp:::receive
/ ( args[2]->ip_saddr == ADDR || ADDR == NULL ) && nfs[args[1]->cs_cid] /
{
      delta=timestamp-walltime;
      walltime=timestamp;

      flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags); flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags);
      flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags);

printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %s %d \n",
/*
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %12d %12d %8d %8d %s %d \n",
        args[4]->tcp_ack %1000000,
        args[4]->tcp_seq %1000000,
*/
        args[3]->tcps_snxt - args[3]->tcps_suna ,
        args[3]->tcps_rnxt - args[3]->tcps_rack,
        delta/1000,
        "",
        args[2]->ip_plength - args[4]->tcp_offset,
        args[3]->tcps_swnd,
        args[3]->tcps_snd_ws,
        args[3]->tcps_rwnd,
        args[3]->tcps_rcv_ws,
        args[3]->tcps_cwnd,
        args[3]->tcps_cwnd_ssthresh,
        flags,
/*
        args[3]->tcps_sack_fack,
        args[3]->tcps_sack_snxt,
        args[3]->tcps_rto,
        args[3]->tcps_mss,
*/
        args[3]->tcps_retransmit
      );
    flags=0;
    title--;
}



--
- Kyle

O: +1.415.341.3430
F: +1.650.494.1676
275 Middlefield Road, Suite 50
Menlo Park, CA 94025
http://www.delphix.com <http://www.delphix.com/>


_______________________________________________
dtrace-discuss mailing list
dtrace-discuss@opensolaris.org

_______________________________________________
dtrace-discuss mailing list
dtrace-discuss@opensolaris.org

Reply via email to