> Subject: [PATCH v4] net/iavf: fix duplicate VF reset during PF reset recovery
> 
> During PF initiated reset recovery, iavf_dev_close() sending
> an extra VIRTCHNL_OP_RESET_VF while recovery is already in progress.
> That second reset can leave PF/VF virtchnl state inconsistent and
> cause VIRTCHNL_OP_CONFIG_VSI_QUEUES to fail with ERR_PARAM after
> ToR link flap/power-cycle, leaving the VF unable to recover.
> This results in connection loss.
> 
> This patch skipped close-time VF reset and related close-time
> virtchnl operations when PF triggered reset recovery is set.
> This is done to avoid a duplicate VF reset, and keep normal
> behavior for application-driven close.
> 
> Fixes: 675a104e2e94 ("net/iavf: fix abnormal disable HW interrupt")
> Fixes: b34fe66ea893 ("net/iavf: delay VF reset command")
> Fixes: 5e03e316c753 ("net/iavf: handle virtchnl event message without
> interrupt")
> Cc: [email protected]
> 
> Signed-off-by: Anurag Mandal <[email protected]>
> ---
> V4: Addressed Ciara Loftus comments
>   - split VF reset from other code changes
> V3: Addressed latest ai-code-review comments
> V2: Addressed ai-code-review comments
> 
>  doc/guides/rel_notes/release_26_07.rst |  3 +++
>  drivers/net/intel/iavf/iavf_ethdev.c   | 37 +++++++++++++++-----------
>  drivers/net/intel/iavf/iavf_vchnl.c    | 18 ++++++++++---
>  3 files changed, 39 insertions(+), 19 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_26_07.rst
> b/doc/guides/rel_notes/release_26_07.rst
> index d2563ac503..f6899a78c3 100644
> --- a/doc/guides/rel_notes/release_26_07.rst
> +++ b/doc/guides/rel_notes/release_26_07.rst
> @@ -95,6 +95,9 @@ New Features
> 
>    * Added support for transmitting LLDP packets based on mbuf packet type.
>    * Implemented AVX2 context descriptor transmit paths.
> +  * Prevented duplicate 'VIRTCHNL_OP_RESET_VF' during a PF-initiated
> +    reset recovery, which earlier caused virtchnl state corruption
> +    and connection loss after a top-of-rack (ToR) link flap/power-cycle.
> 
>  * **Updated PCAP ethernet driver.**
> 
> diff --git a/drivers/net/intel/iavf/iavf_ethdev.c
> b/drivers/net/intel/iavf/iavf_ethdev.c
> index a8031e23a5..99457ae510 100644
> --- a/drivers/net/intel/iavf/iavf_ethdev.c
> +++ b/drivers/net/intel/iavf/iavf_ethdev.c
> @@ -3166,24 +3166,27 @@ iavf_dev_close(struct rte_eth_dev *dev)
> 
>       ret = iavf_dev_stop(dev);
> 
> -     /*
> -      * Release redundant queue resource when close the dev
> -      * so that other vfs can re-use the queues.
> -      */
> -     if (vf->lv_enabled) {
> -             ret = iavf_request_queues(dev,
> IAVF_MAX_NUM_QUEUES_DFLT);
> -             if (ret)
> -                     PMD_DRV_LOG(ERR, "Reset the num of queues
> failed");
> +     /* Skip RESET_VF on a PF-initiated reset */
> +     if (!adapter->closed && !vf->in_reset_recovery) {

adapter->closed will always be false here so no need to check it.

vf->in_reset_recovery is set for a VF initiated reset as well which does
require sending VIRTCHNL_OP_RESET_VF because that kicks off the reset.
(rte_pmd_iavf_reinit -> handle_hw_reset is the vf initiated reset path.)

We need some way to know if we are currently handling a PF initiated
reset here. Another adapter flag, or else make in_reset_recovery
tri-state eg. 0 (no reset) 1 (pf initiated) 2 (vf initiated)
Then skip the OP_RESET_VF if pf initiated.

> +             /*
> +              * Release redundant queue resource when close the dev
> +              * so that other vfs can re-use the queues.
> +              */
> +             if (vf->lv_enabled) {
> +                     ret = iavf_request_queues(dev,
> IAVF_MAX_NUM_QUEUES_DFLT);
> +                     if (ret)
> +                             PMD_DRV_LOG(ERR, "Reset the num of
> queues failed");
> +                     vf->max_rss_qregion =
> IAVF_MAX_NUM_QUEUES_DFLT;
> +             }
> 
> -             vf->max_rss_qregion = IAVF_MAX_NUM_QUEUES_DFLT;
> +             /*
> +              * Disable promiscuous mode before resetting the VF. This is to
> avoid
> +              * potential issues when the PF is bound to the kernel driver.
> +              */
> +             if (vf->promisc_unicast_enabled || vf-
> >promisc_multicast_enabled)
> +                     iavf_config_promisc(adapter, false, false);
>       }
> 
> -     /* Disable promiscuous mode before resetting the VF. This is to avoid
> -      * potential issues when the PF is bound to the kernel driver.
> -      */
> -     if (vf->promisc_unicast_enabled || vf->promisc_multicast_enabled)
> -             iavf_config_promisc(adapter, false, false);
> -
>       adapter->closed = true;
> 
>       /* free iAVF security device context all related resources */
> @@ -3195,7 +3198,9 @@ iavf_dev_close(struct rte_eth_dev *dev)
>       iavf_flow_flush(dev, NULL);
>       iavf_flow_uninit(adapter);
> 
> -     iavf_vf_reset(hw);
> +     /* Skip RESET_VF on a PF-initiated reset */
> +     if (!vf->in_reset_recovery)
> +             iavf_vf_reset(hw);
>       vf->aq_intr_enabled = false;
>       iavf_shutdown_adminq(hw);
>       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
> {
> diff --git a/drivers/net/intel/iavf/iavf_vchnl.c
> b/drivers/net/intel/iavf/iavf_vchnl.c
> index 94ccfb5d6e..cf3513ef94 100644
> --- a/drivers/net/intel/iavf/iavf_vchnl.c
> +++ b/drivers/net/intel/iavf/iavf_vchnl.c
> @@ -283,9 +283,21 @@ iavf_read_msg_from_pf(struct iavf_adapter
> *adapter, uint16_t buf_len,
>                                       vf->link_up ? "up" : "down");
>                       break;
>               case VIRTCHNL_EVENT_RESET_IMPENDING:
> -                     vf->vf_reset = true;
> -                     iavf_set_no_poll(adapter, false);
> -                     PMD_DRV_LOG(INFO, "VF is resetting");
> +                     /*
> +                      * Force link down on impending reset to drop
> +                      * the cached link-up state; a fresh LSC up
> +                      * event will be re-issued by the PF once the
> +                      * VF is reinitialised.
> +                      */
> +                     vf->link_up = false;
> +                     if (!vf->vf_reset) {
> +                             vf->vf_reset = true;
> +                             iavf_set_no_poll(adapter, false);
> +                             iavf_dev_event_post(vf->eth_dev,
> +                                     RTE_ETH_EVENT_INTR_RESET,
> +                                     NULL, 0);
> +                     }
> +                     PMD_DRV_LOG(DEBUG, "VF is resetting");
>                       break;
>               case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
>                       vf->dev_closed = true;
> --
> 2.34.1

Reply via email to