Grant Likely wrote:
> On Thu, Jul 9, 2009 at 2:33 PM, Wolfgang Grandegger<w...@grandegger.com> 
> wrote:
>> Hello,
>>
>> I'm currently trying to implement NAPI for the FEC on the MPC5200 to
>> solve the well known problem, that network packet storms can cause
>> interrupt flooding, which may totally block the system.
> 
> Good to hear it!  Thanks for this work.
> 
>> The NAPI
>> implementation, in principle, is straight forward and works
>> well under normal and moderate network load. It just calls disable_irq()
>> in the receive interrupt handler to defer packet processing to the NAPI
>> poll callback, which calls enable_irq() when it has processed all
>> packets. Unfortunately, under heavy network load (packet storm),
>> problems show up:
>>
>> - With DENX 2.4.25, the Bestcomm RX task gets and remains stopped after
>>  a while under additional system load. I have no idea how and when
>>  Bestcom tasks are stopped. In the auto-start mode, the firmware should
>>  poll forever for the next free descriptor block.
>>
>> - With 2.6.31-rc2, the RFIFO error occurs quickly which does reset the
>>  FEC and Bestcomm (unfortunately, this does trigger an oops because
>>  it's called from the interrupt context, but that's another issue).
>>
>> I'm realized that working with Bestcomm is a pain :-( but so far I have
>> little knowledge of the Bestcomm limitations and quirks. Any idea what
>> might go wrong or how to implement NAPI for that FEC properly.
> 
> Yes, I have a few ideas.  First, I suspect that the FEC rx queue isn't
> big enough and I wouldn't be surprised if the RFIFO error is occurring
> because Bestcomm gets overrun.  This scenario needs to be handled more
> gracefully.

First some words concerning NAPI. NAPI is mainly used to improve network
performance by processing network packets in the process context while
reducing interrupt load at the same time. Thereby it also solves the
problem of interrupt flooding, which may totally block the system. Most
(maybe all?) Gigabit Ethernet drivers use NAPI, e.g. ucc_geth. Below I
have attached my preliminary (and not yet complete or even correct)
patch, which should demonstrate how NAPI is supposed to work. The old NAPI
implementation of 2.4 is documented here:

http://lxr.linux.no/linux-old+v2.4.31/Documentation/networking/NAPI_HOWTO.txt.

As the NAPI polling competes with other task/processes, it's clear that
a bigger queue only helps partially.

Wolfgang.


---
 drivers/net/Kconfig       |    7 ++++
 drivers/net/fec_mpc52xx.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

Index: linux-2.6-denx/drivers/net/Kconfig
===================================================================
--- linux-2.6-denx.orig/drivers/net/Kconfig
+++ linux-2.6-denx/drivers/net/Kconfig
@@ -1896,6 +1896,13 @@ config FEC_MPC52xx
          Fast Ethernet Controller
          If compiled as module, it will be called fec_mpc52xx.
 
+config FEC_MPC52xx_NAPI
+       bool "Use NAPI for MPC52xx FEC driver"
+       depends on FEC_MPC52xx
+       ---help---
+         This option enables NAPI support for the MPC5200's on-chip
+         Fast Ethernet Controller driver.
+
 config FEC_MPC52xx_MDIO
        bool "MPC52xx FEC MDIO bus driver"
        depends on FEC_MPC52xx
Index: linux-2.6-denx/drivers/net/fec_mpc52xx.c
===================================================================
--- linux-2.6-denx.orig/drivers/net/fec_mpc52xx.c
+++ linux-2.6-denx/drivers/net/fec_mpc52xx.c
@@ -44,6 +44,8 @@
 
 #define DRIVER_NAME "mpc52xx-fec"
 
+#define FEC_MPC52xx_NAPI_WEIGHT 64
+
 /* Private driver data structure */
 struct mpc52xx_fec_priv {
        struct net_device *ndev;
@@ -63,6 +65,9 @@ struct mpc52xx_fec_priv {
        struct phy_device *phydev;
        enum phy_state link;
        int seven_wire_mode;
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       struct napi_struct napi;
+#endif
 };
 
 
@@ -226,6 +231,10 @@ static int mpc52xx_fec_open(struct net_d
                phy_start(priv->phydev);
        }
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       napi_enable(&priv->napi);
+#endif
+
        if (request_irq(dev->irq, &mpc52xx_fec_interrupt, IRQF_SHARED,
                        DRIVER_NAME "_ctrl", dev)) {
                dev_err(&dev->dev, "ctrl interrupt request failed\n");
@@ -273,6 +282,9 @@ static int mpc52xx_fec_open(struct net_d
                priv->phydev = NULL;
        }
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       napi_disable(&priv->napi);
+#endif
        return err;
 }
 
@@ -280,6 +292,10 @@ static int mpc52xx_fec_close(struct net_
 {
        struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       napi_disable(&priv->napi);
+#endif
+
        netif_stop_queue(dev);
 
        mpc52xx_fec_stop(dev);
@@ -379,17 +395,48 @@ static irqreturn_t mpc52xx_fec_tx_interr
        return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
 static irqreturn_t mpc52xx_fec_rx_interrupt(int irq, void *dev_id)
 {
        struct net_device *dev = dev_id;
        struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 
+       /* Disable the RX interrupt */
+       if (napi_schedule_prep(&priv->napi)) {
+               disable_irq_nosync(irq);
+               __napi_schedule(&priv->napi);
+       } else {
+               dev_err(dev->dev.parent, "FEC BUG: interrupt while in poll\n");
+       }
+       return IRQ_HANDLED;
+}
+#endif
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+static int mpc52xx_fec_rx_poll(struct napi_struct *napi, int budget)
+#else
+static irqreturn_t mpc52xx_fec_rx_interrupt(int irq, void *dev_id)
+#endif
+{
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       struct mpc52xx_fec_priv *priv =
+               container_of(napi, struct mpc52xx_fec_priv, napi);
+       struct net_device *dev = napi->dev;
+       int pkt_received = 0;
+#else
+       struct net_device *dev = dev_id;
+       struct mpc52xx_fec_priv *priv = netdev_priv(dev);
+#endif
+
        while (bcom_buffer_done(priv->rx_dmatsk)) {
                struct sk_buff *skb;
                struct sk_buff *rskb;
                struct bcom_fec_bd *bd;
                u32 status;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+               pkt_received++;
+#endif
                rskb = bcom_retrieve_buffer(priv->rx_dmatsk, &status,
                                (struct bcom_bd **)&bd);
                dma_unmap_single(dev->dev.parent, bd->skb_pa, rskb->len,
@@ -410,6 +457,10 @@ static irqreturn_t mpc52xx_fec_rx_interr
 
                        dev->stats.rx_dropped++;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+                       if (pkt_received >= budget)
+                               break;
+#endif
                        continue;
                }
 
@@ -425,7 +476,11 @@ static irqreturn_t mpc52xx_fec_rx_interr
                        rskb->dev = dev;
                        rskb->protocol = eth_type_trans(rskb, dev);
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+                       netif_receive_skb(rskb);
+#else
                        netif_rx(rskb);
+#endif
                } else {
                        /* Can't get a new one : reuse the same & drop pkt */
                        dev_notice(&dev->dev, "Memory squeeze, dropping 
packet.\n");
@@ -442,9 +497,23 @@ static irqreturn_t mpc52xx_fec_rx_interr
                                FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
 
                bcom_submit_next_buffer(priv->rx_dmatsk, skb);
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+               if (pkt_received >= budget)
+                       break;
+#endif
+       }
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       if (pkt_received < budget) {
+               napi_complete(napi);
+               enable_irq(priv->r_irq);
        }
 
+       return pkt_received;
+#else
        return IRQ_HANDLED;
+#endif
 }
 
 static irqreturn_t mpc52xx_fec_interrupt(int irq, void *dev_id)
@@ -950,6 +1019,13 @@ mpc52xx_fec_probe(struct of_device *op, 
        priv->duplex = DUPLEX_HALF;
        priv->mdio_speed = ((mpc5xxx_get_bus_frequency(op->node) >> 20) / 5) << 
1;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+       netif_napi_add(ndev, &priv->napi, mpc52xx_fec_rx_poll,
+                      FEC_MPC52xx_NAPI_WEIGHT);
+       dev_info(&op->dev, "using NAPI with weigth %d\n",
+                FEC_MPC52xx_NAPI_WEIGHT);
+#endif
+
        /* The current speed preconfigures the speed of the MII link */
        prop = of_get_property(op->node, "current-speed", &prop_size);
        if (prop && (prop_size >= sizeof(u32) * 2)) {
---
 drivers/net/Kconfig       |    7 ++++
 drivers/net/fec_mpc52xx.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

Index: linux-2.6-denx/drivers/net/Kconfig
===================================================================
--- linux-2.6-denx.orig/drivers/net/Kconfig
+++ linux-2.6-denx/drivers/net/Kconfig
@@ -1896,6 +1896,13 @@ config FEC_MPC52xx
 	  Fast Ethernet Controller
 	  If compiled as module, it will be called fec_mpc52xx.
 
+config FEC_MPC52xx_NAPI
+	bool "Use NAPI for MPC52xx FEC driver"
+	depends on FEC_MPC52xx
+	---help---
+	  This option enables NAPI support for the MPC5200's on-chip
+	  Fast Ethernet Controller driver.
+
 config FEC_MPC52xx_MDIO
 	bool "MPC52xx FEC MDIO bus driver"
 	depends on FEC_MPC52xx
Index: linux-2.6-denx/drivers/net/fec_mpc52xx.c
===================================================================
--- linux-2.6-denx.orig/drivers/net/fec_mpc52xx.c
+++ linux-2.6-denx/drivers/net/fec_mpc52xx.c
@@ -44,6 +44,8 @@
 
 #define DRIVER_NAME "mpc52xx-fec"
 
+#define FEC_MPC52xx_NAPI_WEIGHT 64
+
 /* Private driver data structure */
 struct mpc52xx_fec_priv {
 	struct net_device *ndev;
@@ -63,6 +65,9 @@ struct mpc52xx_fec_priv {
 	struct phy_device *phydev;
 	enum phy_state link;
 	int seven_wire_mode;
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	struct napi_struct napi;
+#endif
 };
 
 
@@ -226,6 +231,10 @@ static int mpc52xx_fec_open(struct net_d
 		phy_start(priv->phydev);
 	}
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	napi_enable(&priv->napi);
+#endif
+
 	if (request_irq(dev->irq, &mpc52xx_fec_interrupt, IRQF_SHARED,
 	                DRIVER_NAME "_ctrl", dev)) {
 		dev_err(&dev->dev, "ctrl interrupt request failed\n");
@@ -273,6 +282,9 @@ static int mpc52xx_fec_open(struct net_d
 		priv->phydev = NULL;
 	}
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	napi_disable(&priv->napi);
+#endif
 	return err;
 }
 
@@ -280,6 +292,10 @@ static int mpc52xx_fec_close(struct net_
 {
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	napi_disable(&priv->napi);
+#endif
+
 	netif_stop_queue(dev);
 
 	mpc52xx_fec_stop(dev);
@@ -379,17 +395,48 @@ static irqreturn_t mpc52xx_fec_tx_interr
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
 static irqreturn_t mpc52xx_fec_rx_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 
+	/* Disable the RX interrupt */
+	if (napi_schedule_prep(&priv->napi)) {
+		disable_irq_nosync(irq);
+		__napi_schedule(&priv->napi);
+	} else {
+		dev_err(dev->dev.parent, "FEC BUG: interrupt while in poll\n");
+	}
+	return IRQ_HANDLED;
+}
+#endif
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+static int mpc52xx_fec_rx_poll(struct napi_struct *napi, int budget)
+#else
+static irqreturn_t mpc52xx_fec_rx_interrupt(int irq, void *dev_id)
+#endif
+{
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	struct mpc52xx_fec_priv *priv =
+		container_of(napi, struct mpc52xx_fec_priv, napi);
+	struct net_device *dev = napi->dev;
+	int pkt_received = 0;
+#else
+	struct net_device *dev = dev_id;
+	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
+#endif
+
 	while (bcom_buffer_done(priv->rx_dmatsk)) {
 		struct sk_buff *skb;
 		struct sk_buff *rskb;
 		struct bcom_fec_bd *bd;
 		u32 status;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+		pkt_received++;
+#endif
 		rskb = bcom_retrieve_buffer(priv->rx_dmatsk, &status,
 				(struct bcom_bd **)&bd);
 		dma_unmap_single(dev->dev.parent, bd->skb_pa, rskb->len,
@@ -410,6 +457,10 @@ static irqreturn_t mpc52xx_fec_rx_interr
 
 			dev->stats.rx_dropped++;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+			if (pkt_received >= budget)
+				break;
+#endif
 			continue;
 		}
 
@@ -425,7 +476,11 @@ static irqreturn_t mpc52xx_fec_rx_interr
 			rskb->dev = dev;
 			rskb->protocol = eth_type_trans(rskb, dev);
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+			netif_receive_skb(rskb);
+#else
 			netif_rx(rskb);
+#endif
 		} else {
 			/* Can't get a new one : reuse the same & drop pkt */
 			dev_notice(&dev->dev, "Memory squeeze, dropping packet.\n");
@@ -442,9 +497,23 @@ static irqreturn_t mpc52xx_fec_rx_interr
 				FEC_RX_BUFFER_SIZE, DMA_FROM_DEVICE);
 
 		bcom_submit_next_buffer(priv->rx_dmatsk, skb);
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+		if (pkt_received >= budget)
+			break;
+#endif
+	}
+
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	if (pkt_received < budget) {
+		napi_complete(napi);
+		enable_irq(priv->r_irq);
 	}
 
+	return pkt_received;
+#else
 	return IRQ_HANDLED;
+#endif
 }
 
 static irqreturn_t mpc52xx_fec_interrupt(int irq, void *dev_id)
@@ -950,6 +1019,13 @@ mpc52xx_fec_probe(struct of_device *op, 
 	priv->duplex = DUPLEX_HALF;
 	priv->mdio_speed = ((mpc5xxx_get_bus_frequency(op->node) >> 20) / 5) << 1;
 
+#ifdef CONFIG_FEC_MPC52xx_NAPI
+	netif_napi_add(ndev, &priv->napi, mpc52xx_fec_rx_poll,
+		       FEC_MPC52xx_NAPI_WEIGHT);
+	dev_info(&op->dev, "using NAPI with weigth %d\n",
+		 FEC_MPC52xx_NAPI_WEIGHT);
+#endif
+
 	/* The current speed preconfigures the speed of the MII link */
 	prop = of_get_property(op->node, "current-speed", &prop_size);
 	if (prop && (prop_size >= sizeof(u32) * 2)) {
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to