When TCP implements its own pacing (when no fq packet scheduler is used),
it is arming high resolution timer after a packet is sent.

But in many cases (like TCP_RR kind of workloads), this high resolution
timer expires before the application attempts to write the following
packet. This overhead also happens when the flow is ACK clocked and
cwnd limited instead of being limited by the pacing rate.

This leads to extra overhead (high number of IRQ)

Now tcp_wstamp_ns is reserved for the pacing timer only
(after commit "tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh"),
we can setup the timer only when a packet is about to be sent,
and if tcp_wstamp_ns is in the future.

This leads to a ~10% performance increase in TCP_RR workloads.

Signed-off-by: Eric Dumazet <eduma...@google.com>
---
 net/ipv4/tcp_output.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
5474c9854f252e50cdb1136435417873861d7618..d212e4cbc68902e873afb4a12b43b467ccd6069b
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -975,16 +975,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
-static void tcp_internal_pacing(struct sock *sk)
-{
-       if (!tcp_needs_internal_pacing(sk))
-               return;
-       hrtimer_start(&tcp_sk(sk)->pacing_timer,
-                     ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
-                     HRTIMER_MODE_ABS_PINNED_SOFT);
-       sock_hold(sk);
-}
-
 static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
                                      u64 prior_wstamp)
 {
@@ -1005,8 +995,6 @@ static void tcp_update_skb_after_send(struct sock *sk, 
struct sk_buff *skb,
                        /* take into account OS jitter */
                        len_ns -= min_t(u64, len_ns / 2, credit);
                        tp->tcp_wstamp_ns += len_ns;
-
-                       tcp_internal_pacing(sk);
                }
        }
        list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
@@ -2186,10 +2174,23 @@ static int tcp_mtu_probe(struct sock *sk)
        return -1;
 }
 
-static bool tcp_pacing_check(const struct sock *sk)
+static bool tcp_pacing_check(struct sock *sk)
 {
-       return tcp_needs_internal_pacing(sk) &&
-              hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (!tcp_needs_internal_pacing(sk))
+               return false;
+
+       if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
+               return false;
+
+       if (!hrtimer_is_queued(&tp->pacing_timer)) {
+               hrtimer_start(&tp->pacing_timer,
+                             ns_to_ktime(tp->tcp_wstamp_ns),
+                             HRTIMER_MODE_ABS_PINNED_SOFT);
+               sock_hold(sk);
+       }
+       return true;
 }
 
 /* TCP Small Queues :
-- 
2.19.0.605.g01d371f741-goog

Reply via email to