When a qdisc setup including pacing FQ is dismantled and recreated,
some TCP packets are sent earlier than instructed by TCP stack.

TCP can be fooled when ACK comes back, because the following
operation can return a negative value.

    tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;

Some paths in TCP stack were not dealing properly with this,
this patch addresses four of them.

Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time 
model")
Signed-off-by: Eric Dumazet <eduma...@google.com>
---
 net/ipv4/tcp_input.c | 16 ++++++++++------
 net/ipv4/tcp_timer.c | 10 ++++++----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 
1e37c13881893c8a73c0fcaa82a9979d3b5e1798..a9d9555a973fed4e3562a57d1a2cdadfef40dae4
 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
                u32 delta_us;
 
-               if (!delta)
-                       delta = 1;
-               delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
-               tcp_rcv_rtt_update(tp, delta_us, 0);
+               if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+                       if (!delta)
+                               delta = 1;
+                       delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+                       tcp_rcv_rtt_update(tp, delta_us, 0);
+               }
        }
 }
 
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const 
int flag,
        if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
            flag & FLAG_ACKED) {
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-               u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
 
-               seq_rtt_us = ca_rtt_us = delta_us;
+               if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+                       seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+                       ca_rtt_us = seq_rtt_us;
+               }
        }
        rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
        if (seq_rtt_us < 0)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 
5f8b6d3cd855dc639409e69d84ade5bb2be51626..091c53925e4da6b2b154d166682a0ac0aefd7ecb
 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock 
*sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        u32 elapsed, start_ts;
+       s32 remaining;
 
        start_ts = tcp_retransmit_stamp(sk);
        if (!icsk->icsk_user_timeout || !start_ts)
                return icsk->icsk_rto;
        elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
-       if (elapsed >= icsk->icsk_user_timeout)
+       remaining = icsk->icsk_user_timeout - elapsed;
+       if (remaining <= 0)
                return 1; /* user timeout has passed; fire ASAP */
-       else
-               return min_t(u32, icsk->icsk_rto, 
msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+       return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
 }
 
 /**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
                                (boundary - linear_backoff_thresh) * 
TCP_RTO_MAX;
                timeout = jiffies_to_msecs(timeout);
        }
-       return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+       return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
 }
 
 /* A write timeout has occurred. Process the after effects. */
-- 
2.20.0.rc0.387.gc7a69e6b6c-goog

Reply via email to