From: Alexander Duyck <[email protected]> Improve the adaptive interrupt throttle (ITR) algorithm in several ways:
- Lower IXGBE_ITR_ADAPTIVE_MAX_USECS from 126 to 84 us (12K interrupts/s minimum in bulk mode) to prevent RX starvation in full-blown bulk scenarios. - Add ixgbe_container_is_rx() helper to split the Rx vs Tx logic in ixgbe_update_itr(); Rx uses a latency-favouring path for small bursts (< 24 packets and < 12112 bytes), targeting 8x throughput growth per step. - Limit the ITR decrease in latency mode to at most 2 us per update so ACK workloads do not overdrive the moderation and starve TCP senders. - Add IXGBE_ITR_ADAPTIVE_MASK_USECS (= IXGBE_ITR_ADAPTIVE_LATENCY - 1 = 0x7F) to mask out the mode flag bit 7 in ixgbe_set_itr(), replacing the open-coded ~IXGBE_ITR_ADAPTIVE_LATENCY. Signed-off-by: Alexander Duyck <[email protected]> Signed-off-by: Aleksandr Loktionov <[email protected]> --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 78 +++++++++++-------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 59a1cee4..c704cc6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -475,9 +475,10 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) #define IXGBE_ITR_ADAPTIVE_MIN_INC 2 #define IXGBE_ITR_ADAPTIVE_MIN_USECS 10 -#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 +#define IXGBE_ITR_ADAPTIVE_MAX_USECS 84 #define IXGBE_ITR_ADAPTIVE_LATENCY 0x80 #define IXGBE_ITR_ADAPTIVE_BULK 0x00 +#define IXGBE_ITR_ADAPTIVE_MASK_USECS (IXGBE_ITR_ADAPTIVE_LATENCY - 1) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0bc806a..1885fe8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2711,6 +2711,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); } +static inline bool ixgbe_container_is_rx(struct ixgbe_q_vector *q_vector, + struct ixgbe_ring_container *rc) +{ + return &q_vector->rx == rc; +} + /** * ixgbe_update_itr - update the dynamic ITR value based on statistics * @q_vector: structure containing interrupt and ring information @@ -2747,35 +2753,24 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, goto clear_counts; packets = ring_container->total_packets; - - /* We have no packets to actually measure against. This means - * either one of the other queues on this vector is active or - * we are a Tx queue doing TSO with too high of an interrupt rate. - * - * When this occurs just tick up our delay by the minimum value - * and hope that this extra delay will prevent us from being called - * without any work on our queue. - */ - if (!packets) { - itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; - if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) - itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; - itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY; - goto clear_counts; - } - bytes = ring_container->total_bytes; - /* If packets are less than 4 or bytes are less than 9000 assume - * insufficient data to use bulk rate limiting approach. We are - * likely latency driven. - */ - if (packets < 4 && bytes < 9000) { - itr = IXGBE_ITR_ADAPTIVE_LATENCY; - goto adjust_by_size; + if (ixgbe_container_is_rx(q_vector, ring_container)) { + /* If Rx and there are 1 to 23 packets and bytes are less than + * 12112 assume insufficient data to use bulk rate limiting + * approach. Instead we will focus on simply trying to target + * receiving 8 times as much data in the next interrupt. + */ + if (packets && packets < 24 && bytes < 12112) { + itr = IXGBE_ITR_ADAPTIVE_LATENCY; + avg_wire_size = (bytes + packets * 24) * 2; + avg_wire_size = clamp_t(unsigned int, + avg_wire_size, 2560, 12800); + goto adjust_for_speed; + } } - /* Between 4 and 48 we can assume that our current interrupt delay + /* Less than 48 packets we can assume that our current interrupt delay * is only slightly too low. As such we should increase it by a small * fixed amount. */ @@ -2783,6 +2778,20 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; + + /* If sample size is 0 - 7 we should probably switch + * to latency mode instead of trying to control + * things as though we are in bulk. + * + * Otherwise if the number of packets is less than 48 + * we should maintain whatever mode we are currently + * in. The range between 8 and 48 is the cross-over + * point between latency and bulk traffic. + */ + if (packets < 8) + itr += IXGBE_ITR_ADAPTIVE_LATENCY; + else + itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY; goto clear_counts; } @@ -2813,7 +2822,6 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, */ itr = IXGBE_ITR_ADAPTIVE_BULK; -adjust_by_size: /* If packet counts are 256 or greater we can assume we have a gross * overestimation of what the rate should be. Instead of trying to fine * tune it just use the formula below to try and dial in an exact value @@ -2856,12 +2864,7 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, avg_wire_size = 32256; } - /* If we are in low latency mode half our delay which doubles the rate - * to somewhere between 100K to 16K ints/sec - */ - if (itr & IXGBE_ITR_ADAPTIVE_LATENCY) - avg_wire_size >>= 1; - +adjust_for_speed: /* Resultant value is 256 times larger than it needs to be. This * gives us room to adjust the value as needed to either increase * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. @@ -2888,6 +2891,15 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, break; } + /* In the case of a latency specific workload only allow us to + * reduce the ITR by at most 2us. By doing this we should dial + * in so that our number of interrupts is no more than 2x the number + * of packets for the least busy workload. So for example in the case + * of a TCP workload the ACK packets being received would set the + * interrupt rate as they are a latency specific workload. + */ + if ((itr & IXGBE_ITR_ADAPTIVE_LATENCY) && itr < ring_container->itr) + itr = ring_container->itr - IXGBE_ITR_ADAPTIVE_MIN_INC; clear_counts: /* write back value */ ring_container->itr = itr; @@ -2948,7 +2960,7 @@ static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) new_itr = min(q_vector->rx.itr, q_vector->tx.itr); /* Clear latency flag if set, shift into correct position */ - new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY; + new_itr &= IXGBE_ITR_ADAPTIVE_MASK_USECS; new_itr <<= 2; if (new_itr != q_vector->itr) { -- 2.52.0
