On 27/09/18 17:31, Eric Dumazet wrote:
> As diagnosed by Song Liu, ndo_poll_controller() can
> be very dangerous on loaded hosts, since the cpu
> calling ndo_poll_controller() might steal all NAPI
> contexts (for all RX/TX queues of the NIC). This capture
> can last for unlimited amount of time, since one
> cpu is generally not able to drain all the queues under load.
>
> sfc uses NAPI for TX completions, so we better let core
> networking stack call the napi->poll() to avoid the capture.
>
> Signed-off-by: Eric Dumazet
> Cc: Edward Cree
> Cc: Bert Kenward
> Cc: Solarflare linux maintainers
Acked-By: Bert Kenward
> ---
> drivers/net/ethernet/sfc/efx.c | 26 --
> 1 file changed, 26 deletions(-)
>
> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
> index
> 330233286e785254f5f29c87f9557a305974f606..3d0dd39c289e05b8a7a6778363461ef5698dc62b
> 100644
> --- a/drivers/net/ethernet/sfc/efx.c
> +++ b/drivers/net/ethernet/sfc/efx.c
> @@ -2206,29 +2206,6 @@ static void efx_fini_napi(struct efx_nic *efx)
> efx_fini_napi_channel(channel);
> }
>
> -/**
> - *
> - * Kernel netpoll interface
> - *
> - */
> -
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> -
> -/* Although in the common case interrupts will be disabled, this is not
> - * guaranteed. However, all our work happens inside the NAPI callback,
> - * so no locking is required.
> - */
> -static void efx_netpoll(struct net_device *net_dev)
> -{
> - struct efx_nic *efx = netdev_priv(net_dev);
> - struct efx_channel *channel;
> -
> - efx_for_each_channel(channel, efx)
> - efx_schedule_channel(channel);
> -}
> -
> -#endif
> -
> /**
> *
> * Kernel net device interface
> @@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = {
> #endif
> .ndo_get_phys_port_id = efx_get_phys_port_id,
> .ndo_get_phys_port_name = efx_get_phys_port_name,
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> - .ndo_poll_controller = efx_netpoll,
> -#endif
> .ndo_setup_tc = efx_setup_tc,
> #ifdef CONFIG_RFS_ACCEL
> .ndo_rx_flow_steer = efx_filter_rfs,
>