Hi David,

Thanks for working on this patch!

It seems possible to change this patch later when the other TSO series
gets merged to disable TSO only on the affected port.
Mike, any thoughts?

I made a couple comments below.

On Fri, Sep 9, 2022 at 10:57 AM David Marchand <david.march...@redhat.com>
wrote:

> At some point in OVS history, some virtio features were announced as
> supported (ECN and UFO virtio features).
>
> The userspace TSO code, which has been added later, does not support
> those features and tries to disable them.
>
> This breaks OVS upgrades: if an existing VM already negotiated such
> features, their lack on reconnection to an upgraded OVS triggers a
> vhost socket disconnection by Qemu.
> This results in an endless loop because Qemu then retries with the same
> set of virtio features.
>
> This patch proposes to try and detect those vhost socket disconnection
> and fallback restoring the old virtio features (and disabling TSO for this
> vhost port).
>
> Signed-off-by: David Marchand <david.march...@redhat.com>
> ---
>  lib/netdev-dpdk.c | 52 +++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 50 insertions(+), 2 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0dd655507b..13d7ed3d62 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -465,6 +465,15 @@ struct netdev_dpdk {
>          /* True if vHost device is 'up' and has been reconfigured at
> least once */
>          bool vhost_reconfigured;
>
> +        /* Set on driver start (which means after a vHost connection is
> +         * accepted), and cleared when the vHost device gets configured.
> */
> +        bool vhost_initial_config;
> +
> +        /* Set on disconnection if an initial configuration did not
> finish.
> +         * This triggers a workaround for Virtio features negotiation,
> that
> +         * makes TSO unavailable. */
> +        bool vhost_workaround_disable_tso;
> +
>          atomic_uint8_t vhost_tx_retries_max;
>          /* 2 pad bytes here. */
>      );
> @@ -1293,6 +1302,7 @@ common_construct(struct netdev *netdev, dpdk_port_t
> port_no,
>      dev->requested_lsc_interrupt_mode = 0;
>      ovsrcu_index_init(&dev->vid, -1);
>      dev->vhost_reconfigured = false;
> +    dev->vhost_initial_config = false;
>      dev->attached = false;
>      dev->started = false;
>      dev->reset_needed = false;
> @@ -3986,6 +3996,7 @@ new_device(int vid)
>              } else {
>                  /* Reconfiguration not required. */
>                  dev->vhost_reconfigured = true;
> +                dev->vhost_initial_config = false;
>              }
>
>              ovsrcu_index_set(&dev->vid, vid);
> @@ -4154,6 +4165,16 @@ destroy_connection(int vid)
>                  dev->requested_n_txq = qp_num;
>                  netdev_request_reconfigure(&dev->up);
>              }
> +
> +            if (dev->vhost_initial_config) {
> +                VLOG_ERR("Connection on socket '%s' seems prematurately "
> +                         "closed.", dev->vhost_id);
>

Perhaps use a message more specific like below?
"Connection on socket '%s' closed during initialization."
Or change to below if that makes sense:
"Connection on socket '%s' closed during feature negotiation."



> +                dev->vhost_workaround_disable_tso = true;
> +                netdev_request_reconfigure(&dev->up);
> +            } else {
> +                dev->vhost_workaround_disable_tso = false;
> +            }
> +
>              ovs_mutex_unlock(&dev->mutex);
>              exists = true;
>              break;
> @@ -5058,6 +5079,7 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk
> *dev)
>
>          if (dev->vhost_reconfigured == false) {
>              dev->vhost_reconfigured = true;
> +            dev->vhost_initial_config = false;
>              /* Carrier status may need updating. */
>              netdev_change_seq_changed(&dev->up);
>          }
> @@ -5086,6 +5108,31 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
>      int err;
>      uint64_t vhost_flags = 0;
>      uint64_t vhost_unsup_flags;
> +    bool tso_enabled = userspace_tso_enabled();
> +
> +    if (tso_enabled) {
> +        bool unregister;
> +        char *vhost_id;
> +
> +        ovs_mutex_lock(&dev->mutex);
> +        unregister = dev->vhost_id != NULL;
> +        unregister &= dev->vhost_workaround_disable_tso;
> +        if (unregister) {
> +            /* There was an issue with a previous connection that did not
> +             * finish initialising, one common reason is virtio features
> +             * negotiation. Disable TSO as a workaround. */
> +            tso_enabled = false;
> +            dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT;
> +            vhost_id = dev->vhost_id;
> +            VLOG_WARN("Disabling TSO for %s as a workaround because of "
> +                      "previous connection drop.", dev->up.name);
>

Should we update Documentation/topics/userspace-tso.rst with
this new condition since it is external and visible to the users?

Thanks,
fbl



> +        }
> +        ovs_mutex_unlock(&dev->mutex);
> +
> +        if (unregister) {
> +            dpdk_vhost_driver_unregister(dev, vhost_id);
> +        }
> +    }
>
>      ovs_mutex_lock(&dev->mutex);
>
> @@ -5112,7 +5159,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
>          }
>
>          /* Enable External Buffers if TCP Segmentation Offload is
> enabled. */
> -        if (userspace_tso_enabled()) {
> +        if (tso_enabled) {
>              vhost_flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
>          }
>
> @@ -5137,7 +5184,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
>              goto unlock;
>          }
>
> -        if (userspace_tso_enabled()) {
> +        if (tso_enabled) {
>              netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
>              netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
>              netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
> @@ -5160,6 +5207,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
> *netdev)
>              goto unlock;
>          }
>
> +        dev->vhost_initial_config = true;
>          err = rte_vhost_driver_start(dev->vhost_id);
>          if (err) {
>              VLOG_ERR("rte_vhost_driver_start failed for vhost user "
> --
> 2.37.2
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to