On 3/27/2018 12:36 AM, Florian Fainelli wrote:
On 03/26/2018 02:22 PM, Tal Gilboa wrote:
On 3/23/2018 4:19 AM, Florian Fainelli wrote:
Implement support for adaptive RX and TX interrupt coalescing using
net_dim. We have each of our TX ring and our single RX ring implement a
bcm_sysport_net_dim structure which holds an interrupt counter, number
of packets, bytes, and a container for a net_dim instance.

Signed-off-by: Florian Fainelli <f.faine...@gmail.com>
---
   drivers/net/ethernet/broadcom/bcmsysport.c | 141
++++++++++++++++++++++++++---
   drivers/net/ethernet/broadcom/bcmsysport.h |  14 +++
   2 files changed, 140 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c
b/drivers/net/ethernet/broadcom/bcmsysport.c
index f15a8fc6dfc9..5a5a726bafa4 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -15,6 +15,7 @@
   #include <linux/module.h>
   #include <linux/kernel.h>
   #include <linux/netdevice.h>
+#include <linux/net_dim.h>

I don't think you need this include. You already include net_dim in
bcmsysport.h and include the bcmsysport.h here.

Indeed.


   #include <linux/etherdevice.h>
   #include <linux/platform_device.h>
   #include <linux/of.h>
@@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device
*dev,
       return 0;
   }
   +static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
+{
+    u32 reg;
+
+    reg = rdma_readl(priv, RDMA_MBDONE_INTR);
+    reg &= ~(RDMA_INTR_THRESH_MASK |
+         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
+    reg |= priv->dim.coal_pkts;
+    reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
+                RDMA_TIMEOUT_SHIFT;
+    rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+}
+
+static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring
*ring)
+{
+    struct bcm_sysport_priv *priv = ring->priv;
+    u32 reg;
+
+    reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+    reg &= ~(RING_INTR_THRESH_MASK |
+         RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
+    reg |= ring->dim.coal_pkts;
+    reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
+                RING_TIMEOUT_SHIFT;
+    tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+}
+

I wouldn't couple these functions with dim. This implies dim is always
used. IMO, would be more clear to use a generic method which takes usecs
and packets as an argument.

I did not want to create an additional structure for storing coalescing
parameters, but if you prefer I make this function take two parameters,
that sounds entirely reasonable.


   static int bcm_sysport_get_coalesce(struct net_device *dev,
                       struct ethtool_coalesce *ec)
   {
       struct bcm_sysport_priv *priv = netdev_priv(dev);
+    struct bcm_sysport_tx_ring *ring;
+    unsigned int i;
       u32 reg;
         reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
         ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 /
1000;
       ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
+    for (i = 0; i < dev->num_tx_queues; i++) {
+        ring = &priv->tx_rings[i];
+        ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
+    }
         reg = rdma_readl(priv, RDMA_MBDONE_INTR);
         ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 /
1000;
       ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
+    ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
         return 0;
   }
@@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct
net_device *dev,
                       struct ethtool_coalesce *ec)
   {
       struct bcm_sysport_priv *priv = netdev_priv(dev);
+    struct bcm_sysport_tx_ring *ring;
       unsigned int i;
-    u32 reg;
         /* Base system clock is 125Mhz, DMA timeout is this reference
clock
        * divided by 1024, which yield roughly 8.192 us, our maximum
value has
@@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct
net_device *dev,
           return -EINVAL;
         for (i = 0; i < dev->num_tx_queues; i++) {
-        reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
-        reg &= ~(RING_INTR_THRESH_MASK |
-             RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
-        reg |= ec->tx_max_coalesced_frames;
-        reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
-             RING_TIMEOUT_SHIFT;
-        tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
+        ring = &priv->tx_rings[i];
+        ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
+        ring->dim.coal_usecs = ec->tx_coalesce_usecs;
+        if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
+            ring->dim.coal_pkts = 1;
+            ring->dim.coal_usecs = 0;
+        }
+        ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
+        bcm_sysport_set_tx_coalesce(ring);
       }

If I understand correctly, if I disable dim, moderation is set to
{usecs,packets}={0,1} regardless of the input from ethtool right?

Correct, these are the default coalescing parameters that the driver
sets. As mentioned before, since I am not storing any coalescing
parameters other than these two, there is no copy of what an user might
have previously provided, falling back to the defaults seemed reasonable.

Consider this example: ethtool -C <int> adaptive-tx on; ethtool -C <intf> adaptive-tx off tx-usecs 8 tx-frames 32; In this case the actual moderation would be {0,1} instead of the requested {8,32}. Setting default values is ok unless requested otherwise. I would also use macros for default values.


Doesn't this break the wanted behavior? As mentioned above, I would
decouple dim from the set_tx/rx_coalesce() function. Also, when dim is
enabled, why change dim.coal_pkts/usecs? They would just be overwritten
in the next iteration of net_dim.

Indeed, that is not necessary.


   -    reg = rdma_readl(priv, RDMA_MBDONE_INTR);
-    reg &= ~(RDMA_INTR_THRESH_MASK |
-         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
-    reg |= ec->rx_max_coalesced_frames;
-    reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
-                RDMA_TIMEOUT_SHIFT;
-    rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+    priv->dim.coal_usecs = ec->rx_coalesce_usecs;
+    priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
+
+    if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
+        priv->dim.coal_pkts = 1;
+        priv->dim.coal_usecs = 0;
+    }
+    priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
+    bcm_sysport_set_rx_coalesce(priv);

Same comment as above.

         return 0;
   }
@@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct
bcm_sysport_priv *priv,
       struct bcm_sysport_stats64 *stats64 = &priv->stats64;
       struct net_device *ndev = priv->netdev;
       unsigned int processed = 0, to_process;
+    unsigned int processed_bytes = 0;
       struct bcm_sysport_cb *cb;
       struct sk_buff *skb;
       unsigned int p_index;
@@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct
bcm_sysport_priv *priv,
            */
           skb_pull(skb, sizeof(*rsb) + 2);
           len -= (sizeof(*rsb) + 2);
+        processed_bytes += len;
             /* UniMAC may forward CRC */
           if (priv->crc_fwd) {
@@ -824,6 +865,9 @@ static unsigned int bcm_sysport_desc_rx(struct
bcm_sysport_priv *priv,
               priv->rx_read_ptr = 0;
       }
   +    priv->dim.packets = processed;
+    priv->dim.bytes = processed_bytes;
+
       return processed;
   }
   @@ -900,6 +944,8 @@ static unsigned int
__bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
       ring->packets += pkts_compl;
       ring->bytes += bytes_compl;
       u64_stats_update_end(&priv->syncp);
+    ring->dim.packets = pkts_compl;
+    ring->dim.bytes = bytes_compl;
         ring->c_index = c_index;
   @@ -945,6 +991,7 @@ static int bcm_sysport_tx_poll(struct
napi_struct *napi, int budget)
   {
       struct bcm_sysport_tx_ring *ring =
           container_of(napi, struct bcm_sysport_tx_ring, napi);
+    struct net_dim_sample dim_sample;
       unsigned int work_done = 0;
         work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
@@ -961,6 +1008,12 @@ static int bcm_sysport_tx_poll(struct
napi_struct *napi, int budget)
           return 0;
       }
   +    if (ring->dim.use_dim) {
+        net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+                   ring->dim.bytes, &dim_sample);
+        net_dim(&ring->dim.dim, dim_sample);
+    }
+
       return budget;
   }
   @@ -976,6 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct
*napi, int budget)
   {
       struct bcm_sysport_priv *priv =
           container_of(napi, struct bcm_sysport_priv, napi);
+    struct net_dim_sample dim_sample;
       unsigned int work_done = 0;
         work_done = bcm_sysport_desc_rx(priv, budget);
@@ -998,6 +1052,12 @@ static int bcm_sysport_poll(struct napi_struct
*napi, int budget)
           intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
       }
   +    if (priv->dim.use_dim) {
+        net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
+                   priv->dim.bytes, &dim_sample);
+        net_dim(&priv->dim.dim, dim_sample);
+    }
+
       return work_done;
   }
   @@ -1016,6 +1076,40 @@ static void
bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
       netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
   }
   +static void bcm_sysport_dim_work(struct work_struct *work)
+{
+    struct net_dim *dim = container_of(work, struct net_dim, work);
+    struct bcm_sysport_net_dim *ndim =
+            container_of(dim, struct bcm_sysport_net_dim, dim);
+    struct bcm_sysport_priv *priv =
+            container_of(ndim, struct bcm_sysport_priv, dim);
+    struct net_dim_cq_moder cur_profile =
+                net_dim_get_profile(dim->mode, dim->profile_ix);
+
+    priv->dim.coal_usecs = cur_profile.usec;
+    priv->dim.coal_pkts = cur_profile.pkts;
+
+    bcm_sysport_set_rx_coalesce(priv);
+    dim->state = NET_DIM_START_MEASURE;
+}
+
+static void bcm_sysport_dim_tx_work(struct work_struct *work)
+{
+    struct net_dim *dim = container_of(work, struct net_dim, work);
+    struct bcm_sysport_net_dim *ndim =
+            container_of(dim, struct bcm_sysport_net_dim, dim);
+    struct bcm_sysport_tx_ring *ring =
+            container_of(ndim, struct bcm_sysport_tx_ring, dim);
+    struct net_dim_cq_moder cur_profile =
+                net_dim_get_profile(dim->mode, dim->profile_ix);
+
+    ring->dim.coal_usecs = cur_profile.usec;
+    ring->dim.coal_pkts = cur_profile.pkts;
+
+    bcm_sysport_set_tx_coalesce(ring);
+    dim->state = NET_DIM_START_MEASURE;
+}
+
   /* RX and misc interrupt routine */
   static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
   {
@@ -1034,6 +1128,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq,
void *dev_id)
       }
         if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
+        priv->dim.event_ctr++;
           if (likely(napi_schedule_prep(&priv->napi))) {
               /* disable RX interrupts */
               intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
@@ -1061,6 +1156,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq,
void *dev_id)
               continue;
             txr = &priv->tx_rings[ring];
+        txr->dim.event_ctr++;
             if (likely(napi_schedule_prep(&txr->napi))) {
               intrl2_0_mask_set(priv, ring_bit);
@@ -1093,6 +1189,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq,
void *dev_id)
               continue;
             txr = &priv->tx_rings[ring];
+        txr->dim.event_ctr++;
             if (likely(napi_schedule_prep(&txr->napi))) {
               intrl2_1_mask_set(priv, BIT(ring));
@@ -1358,6 +1455,16 @@ static void bcm_sysport_adj_link(struct
net_device *dev)
           phy_print_status(phydev);
   }
   +static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
+                 void (*cb)(struct work_struct *work))
+{
+    INIT_WORK(&dim->dim.work, cb);
+    dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+    dim->event_ctr = 0;
+    dim->packets = 0;
+    dim->bytes = 0;
+}

What about default values for coal_usecs/pkts? dim supports it through
net_dim_get_def_profile(mode) function.

OK, thanks I did not know that.


I'll add it to the documentation.

Reply via email to