A rare [1] race condition is observed between the igb_watchdog_task and shutdown on a dual-core i.MX6 based system with two I210 controllers.
Using printk, the igb_watchdog_task is hung in igb_read_phy_reg because __igb_shutdown has already called __igb_close. Fix this by locking in igb_watchdog_task (in the same way as is done in igb_reset_task). reboot kworker __igb_shutdown rtnl_lock __igb_close : igb_watchdog_task : : : igb_read_phy_reg (hung) rtnl_unlock [1] Note that this is easier to reproduce with 'initcall_debug' logging and additional and printk logging in igb_main. Signed-off-by: Ian Ray <ian....@gehealthcare.com> --- drivers/net/ethernet/intel/igb/igb_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c646c71915f0..a4910e565a3f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5544,6 +5544,8 @@ static void igb_watchdog_task(struct work_struct *work) u32 connsw; u16 phy_data, retry_count = 20; + rtnl_lock(); + link = igb_has_link(adapter); if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { @@ -5680,7 +5682,7 @@ static void igb_watchdog_task(struct work_struct *work) if (adapter->flags & IGB_FLAG_MEDIA_RESET) { schedule_work(&adapter->reset_task); /* return immediately */ - return; + goto unlock; } } pm_schedule_suspend(netdev->dev.parent, @@ -5693,7 +5695,7 @@ static void igb_watchdog_task(struct work_struct *work) if (adapter->flags & IGB_FLAG_MEDIA_RESET) { schedule_work(&adapter->reset_task); /* return immediately */ - return; + goto unlock; } } } @@ -5714,7 +5716,7 @@ static void igb_watchdog_task(struct work_struct *work) adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); /* return immediately since reset is imminent */ - return; + goto unlock; } } @@ -5751,6 +5753,9 @@ static void igb_watchdog_task(struct work_struct *work) mod_timer(&adapter->watchdog_timer, round_jiffies(jiffies + 2 * HZ)); } + +unlock: + rtnl_unlock(); } enum latency_range { -- 2.49.0