Author: rrs Date: Thu Apr 26 21:41:16 2018 New Revision: 333041 URL: https://svnweb.freebsd.org/changeset/base/333041
Log: This change re-arranges the fields within the tcp-pcb so that they are more in order of cache line use as one passes through the tcp_input/output paths (non-errors most likely path). This helps speed up cache line optimization so that the tcp stack runs a bit more efficently. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D15136 Modified: head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_var.h ============================================================================== --- head/sys/netinet/tcp_var.h Thu Apr 26 21:40:05 2018 (r333040) +++ head/sys/netinet/tcp_var.h Thu Apr 26 21:41:16 2018 (r333041) @@ -83,125 +83,123 @@ STAILQ_HEAD(tcp_log_stailq, tcp_log_mem); /* * Tcp control block, one per tcp; fields: - * Organized for 16 byte cacheline efficiency. + * Organized for 64 byte cacheline efficiency based + * on common tcp_input/tcp_output processing. */ struct tcpcb { - struct tsegqe_head t_segq; /* segment reassembly queue */ - int t_segqlen; /* segment reassembly queue length */ - int t_dupacks; /* consecutive dup acks recd */ - - struct mbuf *t_in_pkt; /* head of the input packet queue for the tcp_hpts system */ - struct mbuf *t_tail_pkt; /* tail of the input packet queue for the tcp_hpts system */ - struct tcp_timer *t_timers; /* All the TCP timers in one struct */ - + /* Cache line 1 */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ - int t_state; /* state of this connection */ + struct tcp_function_block *t_fb;/* TCP function call block */ + void *t_fb_ptr; /* Pointer to t_fb specific data */ + uint32_t t_maxseg:24, /* maximum segment size */ + t_logstate:8; /* State of "black box" logging */ + uint32_t t_state:4, /* state of this connection */ + bits_spare : 24; u_int t_flags; - - struct vnet *t_vnet; /* back pointer to parent vnet */ - tcp_seq snd_una; /* sent but unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; * used to recognize retransmits */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ - - tcp_seq snd_wl1; /* window update seg seq number */ - tcp_seq snd_wl2; /* window update seg ack number */ - tcp_seq iss; /* initial send sequence number */ - tcp_seq irs; /* initial receive sequence number */ - + uint32_t snd_wnd; /* send window */ + uint32_t snd_cwnd; /* congestion-controlled window */ + uint32_t cl1_spare; /* Spare to round out CL 1 */ + /* Cache line 2 */ + u_int32_t ts_offset; /* our timestamp offset */ + u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ + int rcv_numsacks; /* # distinct sack blks present */ + u_int t_tsomax; /* TSO total burst length limit in bytes */ + u_int t_tsomaxsegcount; /* TSO maximum segment count */ + u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ uint32_t rcv_wnd; /* receive window */ + u_int t_flags2; /* More tcpcb flags storage */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + u_int32_t ts_recent; /* timestamp echo data */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char snd_limited; /* segments limited transmitted */ + u_char request_r_scale; /* pending window scaling */ + tcp_seq last_ack_sent; + u_int t_rcvtime; /* inactivity time */ + /* Cache line 3 */ tcp_seq rcv_up; /* receive urgent pointer */ - - uint32_t snd_wnd; /* send window */ - uint32_t snd_cwnd; /* congestion-controlled window */ + int t_segqlen; /* segment reassembly queue length */ + struct tsegqe_head t_segq; /* segment reassembly queue */ + struct mbuf *t_in_pkt; + struct mbuf *t_tail_pkt; + struct tcp_timer *t_timers; /* All the TCP timers in one struct */ + struct vnet *t_vnet; /* back pointer to parent vnet */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ + tcp_seq snd_wl1; /* window update seg seq number */ + /* Cache line 4 */ + tcp_seq snd_wl2; /* window update seg ack number */ + + tcp_seq irs; /* initial receive sequence number */ + tcp_seq iss; /* initial send sequence number */ + u_int t_acktime; + u_int ts_recent_age; /* when last updated */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ + uint16_t cl4_spare; /* Spare to adjust CL 4 */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ + int t_rxtcur; /* current retransmit value (ticks) */ - u_int t_rcvtime; /* inactivity time */ - u_int t_starttime; /* time connection was established */ + int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rtttime; /* RTT measurement start time */ + tcp_seq t_rtseq; /* sequence number being timed */ + u_int t_starttime; /* time connection was established */ - int t_rxtcur; /* current retransmit value (ticks) */ - u_int t_maxseg; /* maximum segment size */ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ - int t_srtt; /* smoothed round-trip time */ - int t_rttvar; /* variance in round-trip time */ - - int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ + u_int t_rttbest; /* best rtt we've seen */ - u_long t_rttupdated; /* number of times rtt sampled */ - uint32_t max_sndwnd; /* largest window peer has offered */ int t_softerror; /* possible error not yet reported */ -/* out-of-band data */ - char t_oobflags; /* have some */ - char t_iobc; /* input character */ -/* RFC 1323 variables */ - u_char snd_scale; /* window scaling for send window */ - u_char rcv_scale; /* window scaling for recv window */ - u_char request_r_scale; /* pending window scaling */ - u_int32_t ts_recent; /* timestamp echo data */ - u_int ts_recent_age; /* when last updated */ - u_int32_t ts_offset; /* our timestamp offset */ - - tcp_seq last_ack_sent; -/* experimental */ + uint32_t max_sndwnd; /* largest window peer has offered */ + /* Cache line 5 */ uint32_t snd_cwnd_prev; /* cwnd prior to retransmit */ uint32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ int t_sndzerowin; /* zero-window updates sent */ - u_int t_badrxtwin; /* window for retransmit recovery */ - u_char snd_limited; /* segments limited transmitted */ -/* SACK related state */ + u_long t_rttupdated; /* number of times rtt sampled */ int snd_numholes; /* number of holes seen by sender */ + u_int t_badrxtwin; /* window for retransmit recovery */ TAILQ_HEAD(sackhole_head, sackhole) snd_holes; /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ - int rcv_numsacks; /* # distinct sack blks present */ - struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ tcp_seq sack_newdata; /* New data xmitted in this recovery episode starts at this seq number */ + struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ struct sackhint sackhint; /* SACK scoreboard hint */ int t_rttlow; /* smallest observerved RTT */ - u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ int rfbuf_cnt; /* recv buffer autoscaling byte count */ struct toedev *tod; /* toedev handling this connection */ int t_sndrexmitpack; /* retransmit packets sent */ int t_rcvoopack; /* out-of-order packets received */ void *t_toe; /* TOE pcb pointer */ - int t_bytes_acked; /* # bytes acked during current RTT */ struct cc_algo *cc_algo; /* congestion control algorithm */ struct cc_var *ccv; /* congestion control specific vars */ struct osd *osd; /* storage for Khelp module data */ - + int t_bytes_acked; /* # bytes acked during current RTT */ u_int t_keepinit; /* time to establish connection */ u_int t_keepidle; /* time before keepalive probes begin */ u_int t_keepintvl; /* interval between keepalives */ u_int t_keepcnt; /* number of keepalives before close */ - - u_int t_tsomax; /* TSO total burst length limit in bytes */ - u_int t_tsomaxsegcount; /* TSO maximum segment count */ - u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ - u_int t_flags2; /* More tcpcb flags storage */ - int t_logstate; /* State of "black box" logging */ - struct tcp_log_stailq t_logs; /* Log buffer */ + int t_dupacks; /* consecutive dup acks recd */ int t_lognum; /* Number of log entries */ - uint32_t t_logsn; /* Log "serial number" */ + struct tcp_log_stailq t_logs; /* Log buffer */ struct tcp_log_id_node *t_lin; struct tcp_log_id_bucket *t_lib; const char *t_output_caller; /* Function that called tcp_output */ - struct tcp_function_block *t_fb;/* TCP function call block */ - void *t_fb_ptr; /* Pointer to t_fb specific data */ + uint32_t t_logsn; /* Log "serial number" */ uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ union { _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"