Hi Neil,

On 10-09-2018 12:43, Neil Armstrong wrote:
> Hi Jose,
>
> On 10/09/2018 11:14, Jose Abreu wrote:
>> This follows David Miller advice and tries to fix coalesce timer in
>> multi-queue scenarios.
>>
>> We are now using per-queue coalesce values and per-queue TX timer.
>>
>> Coalesce timer default values was changed to 1ms and the coalesce frames
>> to 25.
>>
>> Tested in B2B setup between XGMAC2 and GMAC5.
>>
>> Signed-off-by: Jose Abreu <joab...@synopsys.com>
>> Cc: Jerome Brunet <jbru...@baylibre.com>
>> Cc: Martin Blumenstingl <martin.blumensti...@googlemail.com>
>> Cc: David S. Miller <da...@davemloft.net>
>> Cc: Joao Pinto <jpi...@synopsys.com>
>> Cc: Giuseppe Cavallaro <peppe.cavall...@st.com>
>> Cc: Alexandre Torgue <alexandre.tor...@st.com>
>> ---
>> Jerome,
> Jerome is in holidays...
>
>> Can you please test if this one is okay ?
>
> I tested this patch on top of 4.18.7 with the previous patch (4ae0169fd1b3) 
> reverted.
>
> The TX or RX stopped a few seconds after iperf3 started :
> (iperf3 is running in server mode on the Amlogic A113D board)
>
> $ iperf3 -c 10.1.4.95
> Connecting to host 10.1.4.95, port 5201
> [  4] local 10.1.2.12 port 39906 connected to 10.1.4.95 port 5201
> [ ID] Interval           Transfer     Bandwidth       Retr  Cwnd
> [  4]   0.00-1.00   sec  56.2 MBytes   472 Mbits/sec    0    660 KBytes
> [  4]   1.00-2.00   sec  56.2 MBytes   472 Mbits/sec    0    660 KBytes
> [  4]   2.00-3.00   sec  38.8 MBytes   325 Mbits/sec    1   1.41 KBytes
> [  4]   3.00-4.00   sec  0.00 Bytes  0.00 bits/sec    1   1.41 KBytes
> [  4]   4.00-5.00   sec  0.00 Bytes  0.00 bits/sec    1   1.41 KBytes
> ^C[  4]   5.00-5.61   sec  0.00 Bytes  0.00 bits/sec    0   1.41 KBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval           Transfer     Bandwidth       Retr
> [  4]   0.00-5.61   sec   151 MBytes   226 Mbits/sec    3             sender
> [  4]   0.00-5.61   sec  0.00 Bytes  0.00 bits/sec                  receiver
> iperf3: interrupt - the client has terminated
>
> $ iperf3 -c 10.1.4.95 -R
> Connecting to host 10.1.4.95, port 5201
> Reverse mode, remote host 10.1.4.95 is sending
> [  4] local 10.1.2.12 port 39894 connected to 10.1.4.95 port 5201
> [ ID] Interval           Transfer     Bandwidth
> [  4]   0.00-1.00   sec  82.2 MBytes   690 Mbits/sec
> [  4]   1.00-2.00   sec  82.6 MBytes   693 Mbits/sec
> [  4]   2.00-3.00   sec  24.2 MBytes   203 Mbits/sec
> [  4]   3.00-4.00   sec  0.00 Bytes  0.00 bits/sec
> [  4]   4.00-5.00   sec  0.00 Bytes  0.00 bits/sec
> ^C[  4]   5.00-5.53   sec  0.00 Bytes  0.00 bits/sec
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval           Transfer     Bandwidth
> [  4]   0.00-5.53   sec  0.00 Bytes  0.00 bits/sec                  sender
> [  4]   0.00-5.53   sec   189 MBytes   287 Mbits/sec                  receiver
> iperf3: interrupt - the client has terminated
>
> These works for hours without this patch applied.

Damn... I'm able to replicate the issue if I turn SMP off but
it's not consistent ...

Can you please try attached follow-up patch ? It's been running
for 10min now and I'm getting ~700Mbps on the same GMAC as you
have (3.71).

Thanks and Best Regards,
Jose Miguel Abreu

>
> Neil
>
>> Thanks and Best Regards,
>> Jose Miguel Abreu
>> ---
>>  drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
>>  drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   6 +-
>>  drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 207 
>> ++++++++++++++--------
>>  3 files changed, 135 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h 
>> b/drivers/net/ethernet/stmicro/stmmac/common.h
>> index 1854f270ad66..b1b305f8f414 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
>> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
>> @@ -258,10 +258,10 @@ struct stmmac_safety_stats {
>>  #define MAX_DMA_RIWT                0xff
>>  #define MIN_DMA_RIWT                0x20
>>  /* Tx coalesce parameters */
>> -#define STMMAC_COAL_TX_TIMER        40000
>> +#define STMMAC_COAL_TX_TIMER        1000
>>  #define STMMAC_MAX_COAL_TX_TICK     100000
>>  #define STMMAC_TX_MAX_FRAMES        256
>> -#define STMMAC_TX_FRAMES    64
>> +#define STMMAC_TX_FRAMES    25
>>  
>>  /* Packets types */
>>  enum packets_types {
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> index c0a855b7ab3b..957030cfb833 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> @@ -48,6 +48,9 @@ struct stmmac_tx_info {
>>  
>>  /* Frequently used values are kept adjacent for cache effect */
>>  struct stmmac_tx_queue {
>> +    u32 tx_count_frames;
>> +    int tx_timer_active;
>> +    struct timer_list txtimer;
>>      u32 queue_index;
>>      struct stmmac_priv *priv_data;
>>      struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
>> @@ -59,6 +62,7 @@ struct stmmac_tx_queue {
>>      dma_addr_t dma_tx_phy;
>>      u32 tx_tail_addr;
>>      u32 mss;
>> +    struct napi_struct napi ____cacheline_aligned_in_smp;
>>  };
>>  
>>  struct stmmac_rx_queue {
>> @@ -109,14 +113,12 @@ struct stmmac_pps_cfg {
>>  
>>  struct stmmac_priv {
>>      /* Frequently used values are kept adjacent for cache effect */
>> -    u32 tx_count_frames;
>>      u32 tx_coal_frames;
>>      u32 tx_coal_timer;
>>  
>>      int tx_coalesce;
>>      int hwts_tx_en;
>>      bool tx_path_in_lpi_mode;
>> -    struct timer_list txtimer;
>>      bool tso;
>>  
>>      unsigned int dma_buf_sz;
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> index 9f458bb16f2a..9809c2b319fe 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> @@ -148,6 +148,7 @@ static void stmmac_verify_args(void)
>>  static void stmmac_disable_all_queues(struct stmmac_priv *priv)
>>  {
>>      u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
>> +    u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
>>      u32 queue;
>>  
>>      for (queue = 0; queue < rx_queues_cnt; queue++) {
>> @@ -155,6 +156,12 @@ static void stmmac_disable_all_queues(struct 
>> stmmac_priv *priv)
>>  
>>              napi_disable(&rx_q->napi);
>>      }
>> +
>> +    for (queue = 0; queue < tx_queues_cnt; queue++) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
>> +
>> +            napi_disable(&tx_q->napi);
>> +    }
>>  }
>>  
>>  /**
>> @@ -164,6 +171,7 @@ static void stmmac_disable_all_queues(struct stmmac_priv 
>> *priv)
>>  static void stmmac_enable_all_queues(struct stmmac_priv *priv)
>>  {
>>      u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
>> +    u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
>>      u32 queue;
>>  
>>      for (queue = 0; queue < rx_queues_cnt; queue++) {
>> @@ -171,6 +179,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv 
>> *priv)
>>  
>>              napi_enable(&rx_q->napi);
>>      }
>> +
>> +    for (queue = 0; queue < tx_queues_cnt; queue++) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
>> +
>> +            napi_enable(&tx_q->napi);
>> +    }
>>  }
>>  
>>  /**
>> @@ -1843,7 +1857,8 @@ static void stmmac_dma_operation_mode(struct 
>> stmmac_priv *priv)
>>   * @queue: TX queue index
>>   * Description: it reclaims the transmit resources after transmission 
>> completes.
>>   */
>> -static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
>> +static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue,
>> +                       bool *more)
>>  {
>>      struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
>>      unsigned int bytes_compl = 0, pkts_compl = 0;
>> @@ -1851,10 +1866,13 @@ static void stmmac_tx_clean(struct stmmac_priv 
>> *priv, u32 queue)
>>  
>>      netif_tx_lock(priv->dev);
>>  
>> +    if (more)
>> +            *more = false;
>> +
>>      priv->xstats.tx_clean++;
>>  
>>      entry = tx_q->dirty_tx;
>> -    while (entry != tx_q->cur_tx) {
>> +    while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) {
>>              struct sk_buff *skb = tx_q->tx_skbuff[entry];
>>              struct dma_desc *p;
>>              int status;
>> @@ -1937,7 +1955,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, 
>> u32 queue)
>>              stmmac_enable_eee_mode(priv);
>>              mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
>>      }
>> +
>> +    if (more && (tx_q->dirty_tx != tx_q->cur_tx))
>> +            *more = true;
>> +
>>      netif_tx_unlock(priv->dev);
>> +
>> +    return pkts_compl;
>>  }
>>  
>>  /**
>> @@ -2020,6 +2044,34 @@ static bool stmmac_safety_feat_interrupt(struct 
>> stmmac_priv *priv)
>>      return false;
>>  }
>>  
>> +static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
>> +{
>> +    int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
>> +                                             &priv->xstats, chan);
>> +
>> +    if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
>> +            struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
>> +
>> +            if (likely(napi_schedule_prep(&rx_q->napi))) {
>> +                    stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
>> +                    __napi_schedule(&rx_q->napi);
>> +            }
>> +    } else {
>> +            status &= ~handle_rx;
>> +    }
>> +
>> +    if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
>> +
>> +            if (likely(napi_schedule_prep(&tx_q->napi)))
>> +                    __napi_schedule(&tx_q->napi);
>> +    } else {
>> +            status &= ~handle_tx;
>> +    }
>> +
>> +    return status;
>> +}
>> +
>>  /**
>>   * stmmac_dma_interrupt - DMA ISR
>>   * @priv: driver private structure
>> @@ -2034,57 +2086,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv 
>> *priv)
>>      u32 channels_to_check = tx_channel_count > rx_channel_count ?
>>                              tx_channel_count : rx_channel_count;
>>      u32 chan;
>> -    bool poll_scheduled = false;
>>      int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
>>  
>>      /* Make sure we never check beyond our status buffer. */
>>      if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
>>              channels_to_check = ARRAY_SIZE(status);
>>  
>> -    /* Each DMA channel can be used for rx and tx simultaneously, yet
>> -     * napi_struct is embedded in struct stmmac_rx_queue rather than in a
>> -     * stmmac_channel struct.
>> -     * Because of this, stmmac_poll currently checks (and possibly wakes)
>> -     * all tx queues rather than just a single tx queue.
>> -     */
>>      for (chan = 0; chan < channels_to_check; chan++)
>> -            status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
>> -                            &priv->xstats, chan);
>> -
>> -    for (chan = 0; chan < rx_channel_count; chan++) {
>> -            if (likely(status[chan] & handle_rx)) {
>> -                    struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
>> -
>> -                    if (likely(napi_schedule_prep(&rx_q->napi))) {
>> -                            stmmac_disable_dma_irq(priv, priv->ioaddr, 
>> chan);
>> -                            __napi_schedule(&rx_q->napi);
>> -                            poll_scheduled = true;
>> -                    }
>> -            }
>> -    }
>> -
>> -    /* If we scheduled poll, we already know that tx queues will be checked.
>> -     * If we didn't schedule poll, see if any DMA channel (used by tx) has a
>> -     * completed transmission, if so, call stmmac_poll (once).
>> -     */
>> -    if (!poll_scheduled) {
>> -            for (chan = 0; chan < tx_channel_count; chan++) {
>> -                    if (status[chan] & handle_tx) {
>> -                            /* It doesn't matter what rx queue we choose
>> -                             * here. We use 0 since it always exists.
>> -                             */
>> -                            struct stmmac_rx_queue *rx_q =
>> -                                    &priv->rx_queue[0];
>> -
>> -                            if (likely(napi_schedule_prep(&rx_q->napi))) {
>> -                                    stmmac_disable_dma_irq(priv,
>> -                                                    priv->ioaddr, chan);
>> -                                    __napi_schedule(&rx_q->napi);
>> -                            }
>> -                            break;
>> -                    }
>> -            }
>> -    }
>> +            status[chan] = stmmac_napi_check(priv, chan);
>>  
>>      for (chan = 0; chan < tx_channel_count; chan++) {
>>              if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
>> @@ -2241,13 +2250,11 @@ static int stmmac_init_dma_engine(struct stmmac_priv 
>> *priv)
>>   */
>>  static void stmmac_tx_timer(struct timer_list *t)
>>  {
>> -    struct stmmac_priv *priv = from_timer(priv, t, txtimer);
>> -    u32 tx_queues_count = priv->plat->tx_queues_to_use;
>> -    u32 queue;
>> +    struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
>>  
>> -    /* let's scan all the tx queues */
>> -    for (queue = 0; queue < tx_queues_count; queue++)
>> -            stmmac_tx_clean(priv, queue);
>> +    if (likely(napi_schedule_prep(&tx_q->napi)))
>> +            __napi_schedule(&tx_q->napi);
>> +    tx_q->tx_timer_active = 0;
>>  }
>>  
>>  /**
>> @@ -2260,11 +2267,17 @@ static void stmmac_tx_timer(struct timer_list *t)
>>   */
>>  static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
>>  {
>> +    u32 tx_channel_count = priv->plat->tx_queues_to_use;
>> +    u32 chan;
>> +
>>      priv->tx_coal_frames = STMMAC_TX_FRAMES;
>>      priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
>> -    timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
>> -    priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
>> -    add_timer(&priv->txtimer);
>> +
>> +    for (chan = 0; chan < tx_channel_count; chan++) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
>> +
>> +            timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
>> +    }
>>  }
>>  
>>  static void stmmac_set_rings_length(struct stmmac_priv *priv)
>> @@ -2592,6 +2605,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
>>  static int stmmac_open(struct net_device *dev)
>>  {
>>      struct stmmac_priv *priv = netdev_priv(dev);
>> +    u32 chan;
>>      int ret;
>>  
>>      stmmac_check_ether_addr(priv);
>> @@ -2688,7 +2702,9 @@ static int stmmac_open(struct net_device *dev)
>>      if (dev->phydev)
>>              phy_stop(dev->phydev);
>>  
>> -    del_timer_sync(&priv->txtimer);
>> +    for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
>> +            del_timer_sync(&priv->tx_queue[chan].txtimer);
>> +
>>      stmmac_hw_teardown(dev);
>>  init_error:
>>      free_dma_desc_resources(priv);
>> @@ -2708,6 +2724,7 @@ static int stmmac_open(struct net_device *dev)
>>  static int stmmac_release(struct net_device *dev)
>>  {
>>      struct stmmac_priv *priv = netdev_priv(dev);
>> +    u32 chan;
>>  
>>      if (priv->eee_enabled)
>>              del_timer_sync(&priv->eee_ctrl_timer);
>> @@ -2722,7 +2739,8 @@ static int stmmac_release(struct net_device *dev)
>>  
>>      stmmac_disable_all_queues(priv);
>>  
>> -    del_timer_sync(&priv->txtimer);
>> +    for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
>> +            del_timer_sync(&priv->tx_queue[chan].txtimer);
>>  
>>      /* Free the IRQ lines */
>>      free_irq(dev->irq, dev);
>> @@ -2936,14 +2954,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff 
>> *skb, struct net_device *dev)
>>      priv->xstats.tx_tso_nfrags += nfrags;
>>  
>>      /* Manage tx mitigation */
>> -    priv->tx_count_frames += nfrags + 1;
>> -    if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
>> -            mod_timer(&priv->txtimer,
>> -                      STMMAC_COAL_TIMER(priv->tx_coal_timer));
>> -    } else {
>> -            priv->tx_count_frames = 0;
>> +    tx_q->tx_count_frames += nfrags + 1;
>> +    if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
>>              stmmac_set_tx_ic(priv, desc);
>>              priv->xstats.tx_set_ic_bit++;
>> +            tx_q->tx_count_frames = 0;
>>      }
>>  
>>      skb_tx_timestamp(skb);
>> @@ -2994,6 +3009,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff 
>> *skb, struct net_device *dev)
>>  
>>      stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
>>  
>> +    if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
>> +            tx_q->tx_timer_active = 1;
>> +            mod_timer(&tx_q->txtimer,
>> +                            STMMAC_COAL_TIMER(priv->tx_coal_timer));
>> +    }
>> +
>>      return NETDEV_TX_OK;
>>  
>>  dma_map_err:
>> @@ -3146,14 +3167,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
>> struct net_device *dev)
>>       * This approach takes care about the fragments: desc is the first
>>       * element in case of no SG.
>>       */
>> -    priv->tx_count_frames += nfrags + 1;
>> -    if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
>> -            mod_timer(&priv->txtimer,
>> -                      STMMAC_COAL_TIMER(priv->tx_coal_timer));
>> -    } else {
>> -            priv->tx_count_frames = 0;
>> +    tx_q->tx_count_frames += nfrags + 1;
>> +    if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
>>              stmmac_set_tx_ic(priv, desc);
>>              priv->xstats.tx_set_ic_bit++;
>> +            tx_q->tx_count_frames = 0;
>>      }
>>  
>>      skb_tx_timestamp(skb);
>> @@ -3199,8 +3217,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
>> struct net_device *dev)
>>      netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
>>  
>>      stmmac_enable_dma_transmission(priv, priv->ioaddr);
>> +
>>      stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
>>  
>> +    if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
>> +            tx_q->tx_timer_active = 1;
>> +            mod_timer(&tx_q->txtimer,
>> +                            STMMAC_COAL_TIMER(priv->tx_coal_timer));
>> +    }
>> +
>>      return NETDEV_TX_OK;
>>  
>>  dma_map_err:
>> @@ -3514,27 +3539,41 @@ static int stmmac_rx(struct stmmac_priv *priv, int 
>> limit, u32 queue)
>>   *  Description :
>>   *  To look at the incoming frames and clear the tx resources.
>>   */
>> -static int stmmac_poll(struct napi_struct *napi, int budget)
>> +static int stmmac_rx_poll(struct napi_struct *napi, int budget)
>>  {
>>      struct stmmac_rx_queue *rx_q =
>>              container_of(napi, struct stmmac_rx_queue, napi);
>>      struct stmmac_priv *priv = rx_q->priv_data;
>> -    u32 tx_count = priv->plat->tx_queues_to_use;
>>      u32 chan = rx_q->queue_index;
>>      int work_done = 0;
>> -    u32 queue;
>>  
>>      priv->xstats.napi_poll++;
>>  
>> -    /* check all the queues */
>> -    for (queue = 0; queue < tx_count; queue++)
>> -            stmmac_tx_clean(priv, queue);
>> -
>>      work_done = stmmac_rx(priv, budget, rx_q->queue_index);
>> +    if (work_done < budget && napi_complete_done(napi, work_done))
>> +            stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
>> +
>> +    return work_done;
>> +}
>> +
>> +static int stmmac_tx_poll(struct napi_struct *napi, int budget)
>> +{
>> +    struct stmmac_tx_queue *tx_q =
>> +            container_of(napi, struct stmmac_tx_queue, napi);
>> +    struct stmmac_priv *priv = tx_q->priv_data;
>> +    u32 chan = tx_q->queue_index;
>> +    int work_done = 0;
>> +    bool more;
>> +
>> +    priv->xstats.napi_poll++;
>> +
>> +    work_done = stmmac_tx_clean(priv, budget, chan, &more);
>>      if (work_done < budget) {
>>              napi_complete_done(napi, work_done);
>> -            stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
>> +            if (more)
>> +                    napi_reschedule(napi);
>>      }
>> +
>>      return work_done;
>>  }
>>  
>> @@ -4325,10 +4364,17 @@ int stmmac_dvr_probe(struct device *device,
>>      for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
>>              struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
>>  
>> -            netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
>> +            netif_napi_add(ndev, &rx_q->napi, stmmac_rx_poll,
>>                             (8 * priv->plat->rx_queues_to_use));
>>      }
>>  
>> +    for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
>> +
>> +            netif_napi_add(ndev, &tx_q->napi, stmmac_tx_poll,
>> +                           (8 * priv->plat->tx_queues_to_use));
>> +    }
>> +
>>      mutex_init(&priv->lock);
>>  
>>      /* If a specific clk_csr value is passed from the platform
>> @@ -4377,6 +4423,11 @@ int stmmac_dvr_probe(struct device *device,
>>  
>>              netif_napi_del(&rx_q->napi);
>>      }
>> +    for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
>> +            struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
>> +
>> +            netif_napi_del(&tx_q->napi);
>> +    }
>>  error_hw_init:
>>      destroy_workqueue(priv->wq);
>>  error_wq:
>>

>From 35a5e33f4edcc663511d615e61a1ea119dfc77ee Mon Sep 17 00:00:00 2001
Message-Id: <35a5e33f4edcc663511d615e61a1ea119dfc77ee.1536583587.git.joab...@synopsys.com>
From: Jose Abreu <joab...@synopsys.com>
Date: Mon, 10 Sep 2018 14:46:09 +0200
Subject: [PATCH] fixup_coalesce

Signed-off-by: Jose Abreu <joab...@synopsys.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 39 +++++++++++++----------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 97268769186e..7875e81966fb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1872,7 +1872,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue,
 	priv->xstats.tx_clean++;
 
 	entry = tx_q->dirty_tx;
-	while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) {
+	while (entry != tx_q->cur_tx) {
 		struct sk_buff *skb = tx_q->tx_skbuff[entry];
 		struct dma_desc *p;
 		int status;
@@ -2241,6 +2241,17 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	return ret;
 }
 
+static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+	if (tx_q->tx_timer_active)
+		return;
+
+	mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	tx_q->tx_timer_active = true;
+}
+
 /**
  * stmmac_tx_timer - mitigation sw timer for tx.
  * @data: data pointer
@@ -2250,10 +2261,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 static void stmmac_tx_timer(struct timer_list *t)
 {
 	struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
+	struct stmmac_priv *priv = tx_q->priv_data;
 
-	if (likely(napi_schedule_prep(&tx_q->napi)))
-		__napi_schedule(&tx_q->napi);
-	tx_q->tx_timer_active = 0;
+	stmmac_tx_clean(priv, ~0, tx_q->queue_index, NULL);
+	tx_q->tx_timer_active = false;
 }
 
 /**
@@ -2852,6 +2863,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Compute header lengths */
 	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
+	/* Start coalesce timer earlier in case TX Queue is stopped */
+	stmmac_tx_timer_arm(priv, queue);
+
 	/* Desc availability based on threshold should be enough safe */
 	if (unlikely(stmmac_tx_avail(priv, queue) <
 		(((skb->len - proto_hdr_len) / TSO_MAX_BUFF_SIZE + 1)))) {
@@ -3009,12 +3023,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
-	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
-		tx_q->tx_timer_active = 1;
-		mod_timer(&tx_q->txtimer,
-				STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	}
-
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3054,6 +3062,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			return stmmac_tso_xmit(skb, dev);
 	}
 
+	/* Start coalesce timer earlier in case TX Queue is stopped */
+	stmmac_tx_timer_arm(priv, queue);
+
 	if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
 		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
 			netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
@@ -3221,12 +3232,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
-	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
-		tx_q->tx_timer_active = 1;
-		mod_timer(&tx_q->txtimer,
-				STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	}
-
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3575,7 +3580,7 @@ static int stmmac_tx_poll(struct napi_struct *napi, int budget)
 			napi_reschedule(napi);
 	}
 
-	return work_done;
+	return min(work_done, budget);
 }
 
 /**
-- 
2.7.4

Reply via email to