On Tue, 4 Jul 2023 12:14:47 +0300
Alexander Bluhm <[email protected]> wrote:
> After changing tcp now tick to milliseconds, it will wrap around
> after 49 days of uptime. That may be a problem in some places of
> our stack. Better use a 64 bit counter.
I agree since we sometimes hit a problem from where we could not see
in advance.
> As timestamp option is 32 bit in TCP protocol, we have to use the
> lower 32 bit there. There are casts to 32 bits that should behave
> correctly. More eyes to review would be helpful.
>
> As a bonus, start with random 63 bit offset to avoid uptime leakage.
> I am not aware that we leak anywhere, but more random is always
> good. 2^63 milliseconds gives us 2.9*10^8 years of possible uptime.
>
> ok?
ok yasuoka
> bluhm
>
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.388
> diff -u -p -r1.388 tcp_input.c
> --- netinet/tcp_input.c 30 May 2023 19:32:57 -0000 1.388
> +++ netinet/tcp_input.c 4 Jul 2023 08:49:47 -0000
> @@ -130,8 +130,8 @@ struct timeval tcp_ackdrop_ppslim_last;
> #define TCP_PAWS_IDLE TCP_TIME(24 * 24 * 60 * 60)
>
> /* for modulo comparisons of timestamps */
> -#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
> -#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
> +#define TSTMP_LT(a,b) ((int32_t)((a)-(b)) < 0)
> +#define TSTMP_GEQ(a,b) ((int32_t)((a)-(b)) >= 0)
>
> /* for TCP SACK comparisons */
> #define SEQ_MIN(a,b) (SEQ_LT(a,b) ? (a) : (b))
> @@ -190,7 +190,7 @@ void tcp_newreno_partialack(struct tcpc
>
> void syn_cache_put(struct syn_cache *);
> void syn_cache_rm(struct syn_cache *);
> -int syn_cache_respond(struct syn_cache *, struct mbuf *, uint32_t);
> +int syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t);
> void syn_cache_timer(void *);
> void syn_cache_reaper(void *);
> void syn_cache_insert(struct syn_cache *, struct tcpcb *);
> @@ -198,10 +198,10 @@ void syn_cache_reset(struct sockaddr *,
> struct tcphdr *, u_int);
> int syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
> unsigned int, struct socket *, struct mbuf *, u_char *, int,
> - struct tcp_opt_info *, tcp_seq *, uint32_t);
> + struct tcp_opt_info *, tcp_seq *, uint64_t);
> struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
> struct tcphdr *, unsigned int, unsigned int, struct socket *,
> - struct mbuf *, uint32_t);
> + struct mbuf *, uint64_t);
> struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *,
> struct syn_cache_head **, u_int);
>
> @@ -375,7 +375,7 @@ tcp_input(struct mbuf **mp, int *offp, i
> short ostate;
> caddr_t saveti;
> tcp_seq iss, *reuse = NULL;
> - uint32_t now;
> + uint64_t now;
> u_long tiwin;
> struct tcp_opt_info opti;
> struct tcphdr *th;
> @@ -885,7 +885,7 @@ findpcb:
> goto drop;
>
> if (opti.ts_present && opti.ts_ecr) {
> - int rtt_test;
> + int32_t rtt_test;
>
> /* subtract out the tcp timestamp modulator */
> opti.ts_ecr -= tp->ts_modulate;
> @@ -1272,7 +1272,7 @@ trimthenstep6:
> TSTMP_LT(opti.ts_val, tp->ts_recent)) {
>
> /* Check to see if ts_recent is over 24 days old. */
> - if ((int)(now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
> + if (now - tp->ts_recent_age > TCP_PAWS_IDLE) {
> /*
> * Invalidate ts_recent. If this segment updates
> * ts_recent, the age will be reset later and ts_recent
> @@ -2120,7 +2120,7 @@ drop:
> int
> tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th,
> struct mbuf *m, int iphlen, struct tcp_opt_info *oi,
> - u_int rtableid, uint32_t now)
> + u_int rtableid, uint64_t now)
> {
> u_int16_t mss = 0;
> int opt, optlen;
> @@ -2686,7 +2686,7 @@ tcp_pulloutofband(struct socket *so, u_i
> * and update averages and current timeout.
> */
> void
> -tcp_xmit_timer(struct tcpcb *tp, int rtt)
> +tcp_xmit_timer(struct tcpcb *tp, int32_t rtt)
> {
> int delta, rttmin;
>
> @@ -3335,7 +3335,7 @@ void
> syn_cache_timer(void *arg)
> {
> struct syn_cache *sc = arg;
> - uint32_t now;
> + uint64_t now;
>
> NET_LOCK();
> if (sc->sc_flags & SCF_DEAD)
> @@ -3469,7 +3469,7 @@ syn_cache_lookup(struct sockaddr *src, s
> */
> struct socket *
> syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> - u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint32_t now)
> + u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now)
> {
> struct syn_cache *sc;
> struct syn_cache_head *scp;
> @@ -3744,7 +3744,7 @@ syn_cache_unreach(struct sockaddr *src,
> int
> syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int
> optlen,
> - struct tcp_opt_info *oi, tcp_seq *issp, uint32_t now)
> + struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now)
> {
> struct tcpcb tb, *tp;
> long win;
> @@ -3911,7 +3911,7 @@ syn_cache_add(struct sockaddr *src, stru
> }
>
> int
> -syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint32_t now)
> +syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now)
> {
> u_int8_t *optp;
> int optlen, error;
> Index: netinet/tcp_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
> retrieving revision 1.138
> diff -u -p -r1.138 tcp_output.c
> --- netinet/tcp_output.c 15 May 2023 16:34:56 -0000 1.138
> +++ netinet/tcp_output.c 4 Jul 2023 08:49:47 -0000
> @@ -204,7 +204,7 @@ tcp_output(struct tcpcb *tp)
> int idle, sendalot = 0;
> int i, sack_rxmit = 0;
> struct sackhole *p;
> - uint32_t now;
> + uint64_t now;
> #ifdef TCP_SIGNATURE
> unsigned int sigoff;
> #endif /* TCP_SIGNATURE */
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> retrieving revision 1.191
> diff -u -p -r1.191 tcp_subr.c
> --- netinet/tcp_subr.c 10 May 2023 12:07:16 -0000 1.191
> +++ netinet/tcp_subr.c 4 Jul 2023 08:49:47 -0000
> @@ -137,6 +137,7 @@ struct cpumem *tcpcounters; /* tcp stat
> u_char tcp_secret[16]; /* [I] */
> SHA2_CTX tcp_secret_ctx; /* [I] */
> tcp_seq tcp_iss; /* [T] updated by timer and connection
> */
> +uint64_t tcp_starttime; /* [I] random offset for tcp_now() */
>
> /*
> * Tcp initialization
> @@ -145,6 +146,9 @@ void
> tcp_init(void)
> {
> tcp_iss = 1; /* wrong */
> + /* 0 is treated special so add 1, 63 bits to count is enough */
> + arc4random_buf(&tcp_starttime, sizeof(tcp_starttime));
> + tcp_starttime = 1ULL + (tcp_starttime / 2);
> pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0,
> "tcpcb", NULL);
> pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0,
> @@ -289,7 +293,7 @@ tcp_template(struct tcpcb *tp)
> */
> void
> tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0,
> - tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint32_t now)
> + tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint64_t now)
> {
> int tlen;
> int win = 0;
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> retrieving revision 1.72
> diff -u -p -r1.72 tcp_timer.c
> --- netinet/tcp_timer.c 14 Mar 2023 00:24:05 -0000 1.72
> +++ netinet/tcp_timer.c 4 Jul 2023 08:49:47 -0000
> @@ -394,7 +394,7 @@ tcp_timer_persist(void *arg)
> struct tcpcb *otp = NULL, *tp = arg;
> uint32_t rto;
> short ostate;
> - uint32_t now;
> + uint64_t now;
>
> NET_LOCK();
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> @@ -463,7 +463,7 @@ tcp_timer_keep(void *arg)
> tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
> tp->t_state <= TCPS_CLOSING) {
> int maxidle;
> - uint32_t now;
> + uint64_t now;
>
> maxidle = READ_ONCE(tcp_maxidle);
> now = tcp_now();
> @@ -506,7 +506,7 @@ tcp_timer_2msl(void *arg)
> struct tcpcb *otp = NULL, *tp = arg;
> short ostate;
> int maxidle;
> - uint32_t now;
> + uint64_t now;
>
> NET_LOCK();
> /* Ignore canceled timeouts or timeouts that have been rescheduled. */
> Index: netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.220
> diff -u -p -r1.220 tcp_usrreq.c
> --- netinet/tcp_usrreq.c 2 Jul 2023 19:59:15 -0000 1.220
> +++ netinet/tcp_usrreq.c 4 Jul 2023 08:49:47 -0000
> @@ -211,7 +211,7 @@ tcp_fill_info(struct tcpcb *tp, struct s
> struct proc *p = curproc;
> struct tcp_info *ti;
> u_int t = 1000; /* msec => usec */
> - uint32_t now;
> + uint64_t now;
>
> if (sizeof(*ti) > MLEN) {
> MCLGETL(m, M_WAITOK, sizeof(*ti));
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> retrieving revision 1.168
> diff -u -p -r1.168 tcp_var.h
> --- netinet/tcp_var.h 2 Jul 2023 19:59:15 -0000 1.168
> +++ netinet/tcp_var.h 4 Jul 2023 08:49:47 -0000
> @@ -150,8 +150,8 @@ struct tcpcb {
> */
>
> /* auto-sizing variables */
> + uint64_t rfbuf_ts; /* recv buffer autoscaling time stamp */
> u_int rfbuf_cnt; /* recv buffer autoscaling byte count */
> - u_int32_t rfbuf_ts; /* recv buffer autoscaling time stamp */
>
> u_short t_maxopd; /* mss plus options */
> u_short t_peermss; /* peer's maximum segment size */
> @@ -160,11 +160,11 @@ struct tcpcb {
> * transmit timing stuff. See below for scale of srtt and rttvar.
> * "Variance" is actually smoothed difference.
> */
> - uint32_t t_rcvtime; /* time last segment received */
> - uint32_t t_rcvacktime; /* time last ack received */
> - uint32_t t_sndtime; /* time last segment sent */
> - uint32_t t_sndacktime; /* time last ack sent */
> - uint32_t t_rtttime; /* time we started measuring rtt */
> + uint64_t t_rcvtime; /* time last segment received */
> + uint64_t t_rcvacktime; /* time last ack received */
> + uint64_t t_sndtime; /* time last segment sent */
> + uint64_t t_sndacktime; /* time last ack sent */
> + uint64_t t_rtttime; /* time we started measuring rtt */
> tcp_seq t_rtseq; /* sequence number being timed */
> int t_srtt; /* smoothed round-trip time */
> int t_rttvar; /* variance in round-trip time */
> @@ -183,9 +183,9 @@ struct tcpcb {
> u_char rcv_scale; /* window scaling for recv window */
> u_char request_r_scale; /* pending window scaling */
> u_char requested_s_scale;
> - u_int32_t ts_recent; /* timestamp echo data */
> - u_int32_t ts_modulate; /* modulation on timestamp */
> - u_int32_t ts_recent_age; /* when last updated */
> + uint32_t ts_recent; /* timestamp echo data */
> + uint32_t ts_modulate; /* modulation on timestamp */
> + uint64_t ts_recent_age; /* when last updated */
> tcp_seq last_ack_sent;
>
> /* pointer for syn cache entries*/
> @@ -250,12 +250,9 @@ struct syn_cache {
> long sc_win; /* advertised window */
> struct syn_cache_head *sc_buckethead; /* our bucket index */
> struct syn_cache_set *sc_set; /* our syn cache set */
> + u_int64_t sc_timestamp; /* timestamp from SYN */
> u_int32_t sc_hash;
> - u_int32_t sc_timestamp; /* timestamp from SYN */
> u_int32_t sc_modulate; /* our timestamp modulator */
> -#if 0
> - u_int32_t sc_timebase; /* our local timebase */
> -#endif
> union syn_cache_sa sc_src;
> union syn_cache_sa sc_dst;
> tcp_seq sc_irs;
> @@ -657,10 +654,13 @@ tcpstat_pkt(enum tcpstat_counters pcount
> counters_pkt(tcpcounters, pcounter, bcounter, v);
> }
>
> -static inline uint32_t
> +extern uint64_t tcp_starttime;
> +
> +static inline uint64_t
> tcp_now(void)
> {
> - return (getnsecruntime() / 1000000);
> + /* TCP time ticks in 63 bit milliseconds with 63 bit random offset. */
> + return tcp_starttime + (getnsecruntime() / 1000000ULL);
> }
>
> #define TCP_TIME(_sec) ((_sec) * 1000) /* tcp_now() is in milliseconds
> */
> @@ -712,7 +712,7 @@ struct tcpcb *
> struct tcpcb *
> tcp_drop(struct tcpcb *, int);
> int tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *,
> - struct mbuf *, int, struct tcp_opt_info *, u_int, uint32_t);
> + struct mbuf *, int, struct tcp_opt_info *, u_int, uint64_t);
> void tcp_init(void);
> int tcp_input(struct mbuf **, int *, int, int);
> int tcp_mss(struct tcpcb *, int);
> @@ -735,7 +735,7 @@ void tcp_pulloutofband(struct socket *,
> int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *);
> void tcp_rscale(struct tcpcb *, u_long);
> void tcp_respond(struct tcpcb *, caddr_t, struct tcphdr *, tcp_seq,
> - tcp_seq, int, u_int, uint32_t);
> + tcp_seq, int, u_int, uint64_t);
> void tcp_setpersist(struct tcpcb *);
> void tcp_update_sndspace(struct tcpcb *);
> void tcp_update_rcvspace(struct tcpcb *);
> @@ -767,7 +767,7 @@ int tcp_sense(struct socket *, struct s
> int tcp_rcvoob(struct socket *, struct mbuf *, int);
> int tcp_sendoob(struct socket *, struct mbuf *, struct mbuf *,
> struct mbuf *);
> -void tcp_xmit_timer(struct tcpcb *, int);
> +void tcp_xmit_timer(struct tcpcb *, int32_t);
> void tcpdropoldhalfopen(struct tcpcb *, u_int16_t);
> void tcp_sack_option(struct tcpcb *,struct tcphdr *,u_char *,int);
> void tcp_update_sack_list(struct tcpcb *tp, tcp_seq, tcp_seq);