From: Alexander Duyck <[email protected]>

Improve the adaptive interrupt throttle (ITR) algorithm in several ways:

 - Lower IXGBE_ITR_ADAPTIVE_MAX_USECS from 126 to 84 us (12K interrupts/s
   minimum in bulk mode) to prevent RX starvation in full-blown bulk
   scenarios.

 - Add ixgbe_container_is_rx() helper to split the Rx vs Tx logic in
   ixgbe_update_itr(); Rx uses a latency-favouring path for small bursts
   (< 24 packets and < 12112 bytes), targeting 8x throughput growth per
   step.

 - Limit the ITR decrease in latency mode to at most 2 us per update so
   ACK workloads do not overdrive the moderation and starve TCP senders.

 - Add IXGBE_ITR_ADAPTIVE_MASK_USECS (= IXGBE_ITR_ADAPTIVE_LATENCY - 1
   = 0x7F) to mask out the mode flag bit 7 in ixgbe_set_itr(), replacing
   the open-coded ~IXGBE_ITR_ADAPTIVE_LATENCY.

Signed-off-by: Alexander Duyck <[email protected]>
Signed-off-by: Aleksandr Loktionov <[email protected]>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 78 +++++++++++--------
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 59a1cee4..c704cc6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -475,9 +475,10 @@ static inline unsigned int ixgbe_rx_pg_order(struct 
ixgbe_ring *ring)
 
 #define IXGBE_ITR_ADAPTIVE_MIN_INC     2
 #define IXGBE_ITR_ADAPTIVE_MIN_USECS   10
-#define IXGBE_ITR_ADAPTIVE_MAX_USECS   126
+#define IXGBE_ITR_ADAPTIVE_MAX_USECS   84
 #define IXGBE_ITR_ADAPTIVE_LATENCY     0x80
 #define IXGBE_ITR_ADAPTIVE_BULK                0x00
+#define IXGBE_ITR_ADAPTIVE_MASK_USECS  (IXGBE_ITR_ADAPTIVE_LATENCY - 1)
 
 struct ixgbe_ring_container {
        struct ixgbe_ring *ring;        /* pointer to linked list of rings */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0bc806a..1885fe8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2711,6 +2711,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter 
*adapter)
        IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask);
 }
 
+static inline bool ixgbe_container_is_rx(struct ixgbe_q_vector *q_vector,
+                                        struct ixgbe_ring_container *rc)
+{
+       return &q_vector->rx == rc;
+}
+
 /**
  * ixgbe_update_itr - update the dynamic ITR value based on statistics
  * @q_vector: structure containing interrupt and ring information
@@ -2747,35 +2753,24 @@ static void ixgbe_update_itr(struct ixgbe_q_vector 
*q_vector,
                goto clear_counts;
 
        packets = ring_container->total_packets;
-
-       /* We have no packets to actually measure against. This means
-        * either one of the other queues on this vector is active or
-        * we are a Tx queue doing TSO with too high of an interrupt rate.
-        *
-        * When this occurs just tick up our delay by the minimum value
-        * and hope that this extra delay will prevent us from being called
-        * without any work on our queue.
-        */
-       if (!packets) {
-               itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
-               if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
-                       itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
-               itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
-               goto clear_counts;
-       }
-
        bytes = ring_container->total_bytes;
 
-       /* If packets are less than 4 or bytes are less than 9000 assume
-        * insufficient data to use bulk rate limiting approach. We are
-        * likely latency driven.
-        */
-       if (packets < 4 && bytes < 9000) {
-               itr = IXGBE_ITR_ADAPTIVE_LATENCY;
-               goto adjust_by_size;
+       if (ixgbe_container_is_rx(q_vector, ring_container)) {
+               /* If Rx and there are 1 to 23 packets and bytes are less than
+                * 12112 assume insufficient data to use bulk rate limiting
+                * approach. Instead we will focus on simply trying to target
+                * receiving 8 times as much data in the next interrupt.
+                */
+               if (packets && packets < 24 && bytes < 12112) {
+                       itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+                       avg_wire_size = (bytes + packets * 24) * 2;
+                       avg_wire_size = clamp_t(unsigned int,
+                                               avg_wire_size, 2560, 12800);
+                       goto adjust_for_speed;
+               }
        }
 
-       /* Between 4 and 48 we can assume that our current interrupt delay
+       /* Less than 48 packets we can assume that our current interrupt delay
         * is only slightly too low. As such we should increase it by a small
         * fixed amount.
         */
@@ -2783,6 +2778,20 @@ static void ixgbe_update_itr(struct ixgbe_q_vector 
*q_vector,
                itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
                if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
                        itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+
+               /* If sample size is 0 - 7 we should probably switch
+                * to latency mode instead of trying to control
+                * things as though we are in bulk.
+                *
+                * Otherwise if the number of packets is less than 48
+                * we should maintain whatever mode we are currently
+                * in. The range between 8 and 48 is the cross-over
+                * point between latency and bulk traffic.
+                */
+               if (packets < 8)
+                       itr += IXGBE_ITR_ADAPTIVE_LATENCY;
+               else
+                       itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
                goto clear_counts;
        }
 
@@ -2813,7 +2822,6 @@ static void ixgbe_update_itr(struct ixgbe_q_vector 
*q_vector,
         */
        itr = IXGBE_ITR_ADAPTIVE_BULK;
 
-adjust_by_size:
        /* If packet counts are 256 or greater we can assume we have a gross
         * overestimation of what the rate should be. Instead of trying to fine
         * tune it just use the formula below to try and dial in an exact value
@@ -2856,12 +2864,7 @@ static void ixgbe_update_itr(struct ixgbe_q_vector 
*q_vector,
                avg_wire_size = 32256;
        }
 
-       /* If we are in low latency mode half our delay which doubles the rate
-        * to somewhere between 100K to 16K ints/sec
-        */
-       if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
-               avg_wire_size >>= 1;
-
+adjust_for_speed:
        /* Resultant value is 256 times larger than it needs to be. This
         * gives us room to adjust the value as needed to either increase
         * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
@@ -2888,6 +2891,15 @@ static void ixgbe_update_itr(struct ixgbe_q_vector 
*q_vector,
                break;
        }
 
+       /* In the case of a latency specific workload only allow us to
+        * reduce the ITR by at most 2us. By doing this we should dial
+        * in so that our number of interrupts is no more than 2x the number
+        * of packets for the least busy workload. So for example in the case
+        * of a TCP workload the ACK packets being received would set the
+        * interrupt rate as they are a latency specific workload.
+        */
+       if ((itr & IXGBE_ITR_ADAPTIVE_LATENCY) && itr < ring_container->itr)
+               itr = ring_container->itr - IXGBE_ITR_ADAPTIVE_MIN_INC;
 clear_counts:
        /* write back value */
        ring_container->itr = itr;
@@ -2948,7 +2960,7 @@ static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
        new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
 
        /* Clear latency flag if set, shift into correct position */
-       new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+       new_itr &= IXGBE_ITR_ADAPTIVE_MASK_USECS;
        new_itr <<= 2;
 
        if (new_itr != q_vector->itr) {
-- 
2.52.0

Reply via email to