current -git broke my main testbox. No TCP/IP networking to/from the box 
and e1000 would time out in xmit:

 NETDEV WATCHDOG: eth0: transmit timed out
 e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit Hang
   Tx Queue             <0>
   TDH                  <95>
   TDT                  <95>
   next_to_use          <95>
   next_to_clean        <ea>
 buffer_info[next_to_clean]
   time_stamp           <ffff8f43>
   next_to_watch        <ea>
   jiffies              <ffffb5cc>
   next_to_watch.status <1>

After a bisection session the bad commit turned out to be:

 29578624e354f56143d92510fff33a8b2aaa2c03 is first bad commit
 commit 29578624e354f56143d92510fff33a8b2aaa2c03
 Author: Olaf Kirch <[EMAIL PROTECTED]>
 Date:   Wed Jul 11 19:32:02 2007 -0700

     [NET]: Fix races in net_rx_action vs netpoll.

     Keep netpoll/poll_napi from messing with the poll_list.
     Only net_rx_action is allowed to manipulate the list.

     Signed-off-by: Olaf Kirch <[EMAIL PROTECTED]>
     Signed-off-by: David S. Miller <[EMAIL PROTECTED]>

and indeed the testbox uses netconsole (it's a laptop so this is the 
only viable remote debugging option). Applying the revert patch below 
makes it work again.

        Ingo

------------------>
Subject: [patch] revert: [NET]: Fix races in net_rx_action vs netpoll
From: Ingo Molnar <[EMAIL PROTECTED]>

commit 29578624 causes netconsole failures:

  NETDEV WATCHDOG: eth0: transmit timed out
  e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit Hang
    Tx Queue             <0>
    TDH                  <95>
    TDT                  <95>
    next_to_use          <95>
    next_to_clean        <ea>
  buffer_info[next_to_clean]
    time_stamp           <ffff8f43>
    next_to_watch        <ea>
    jiffies              <ffffb5cc>
    next_to_watch.status <1>

revert it for now, to make my testsystem work.

Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 include/linux/netdevice.h |   10 ----------
 net/core/netpoll.c        |    8 --------
 2 files changed, 18 deletions(-)

Index: linux/include/linux/netdevice.h
===================================================================
--- linux.orig/include/linux/netdevice.h
+++ linux/include/linux/netdevice.h
@@ -261,8 +261,6 @@ enum netdev_state_t
        __LINK_STATE_LINKWATCH_PENDING,
        __LINK_STATE_DORMANT,
        __LINK_STATE_QDISC_RUNNING,
-       /* Set by the netpoll NAPI code */
-       __LINK_STATE_POLL_LIST_FROZEN,
 };
 
 
@@ -1016,14 +1014,6 @@ static inline void netif_rx_complete(str
 {
        unsigned long flags;
 
-#ifdef CONFIG_NETPOLL
-       /* Prevent race with netpoll - yes, this is a kludge.
-        * But at least it doesn't penalize the non-netpoll
-        * code path. */
-       if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state))
-               return;
-#endif
-
        local_irq_save(flags);
        __netif_rx_complete(dev);
        local_irq_restore(flags);
Index: linux/net/core/netpoll.c
===================================================================
--- linux.orig/net/core/netpoll.c
+++ linux/net/core/netpoll.c
@@ -124,13 +124,6 @@ static void poll_napi(struct netpoll *np
        if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
            npinfo->poll_owner != smp_processor_id() &&
            spin_trylock(&npinfo->poll_lock)) {
-               /* When calling dev->poll from poll_napi, we may end up in
-                * netif_rx_complete. However, only the CPU to which the
-                * device was queued is allowed to remove it from poll_list.
-                * Setting POLL_LIST_FROZEN tells netif_rx_complete
-                * to leave the NAPI state alone.
-                */
-               set_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state);
                npinfo->rx_flags |= NETPOLL_RX_DROP;
                atomic_inc(&trapped);
 
@@ -138,7 +131,6 @@ static void poll_napi(struct netpoll *np
 
                atomic_dec(&trapped);
                npinfo->rx_flags &= ~NETPOLL_RX_DROP;
-               clear_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state);
                spin_unlock(&npinfo->poll_lock);
        }
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to