At some point in OVS history, some virtio features were announced as supported (ECN and UFO virtio features).
The userspace TSO code, which has been added later, does not support those features and tries to disable them. This breaks OVS upgrades: if an existing VM already negotiated such features, their lack on reconnection to an upgraded OVS triggers a vhost socket disconnection by Qemu. This results in an endless loop because Qemu then retries with the same set of virtio features. This patch proposes to try and detect those vhost socket disconnection and fallback restoring the old virtio features (and disabling TSO for this vhost port). Acked-by: Mike Pattrick <m...@redhat.com> Acked-by: Simon Horman <simon.hor...@corigine.com> Signed-off-by: David Marchand <david.march...@redhat.com> --- Changelog since v5: - fixed coding style, Changelog since v4: - I kept acks as the logic behind the state machine did not change much, - fixed indent of enumeration in documentation, - used status:<field> in documentation instead of grep -o, - renamed "disabled_tso" as "userspace-tso", - switched to a state machine with flags, - removed note on byte padding in netdev_dpdk struct, Changelog since v3: - updated documentation now that the interface offloads status is reported in ovsdb, - fixed one coding style issue, Changelog since v2: - reported workaround presence in the ovsdb port status field and updated documentation accordingly, - tried to use "better" names, to distinguish ECN virtio feature from TSO OVS netdev feature, Changelog since v1: - added a note in the documentation, - fixed vhost unregister trigger (so that both disabling and re-enabling TSO is handled), - cleared netdev features when disabling TSO, - changed level and ratelimited log message on vhost socket disconnect, --- Documentation/topics/userspace-tso.rst | 26 ++++++- lib/netdev-dpdk.c | 100 ++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 6 deletions(-) diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 5a43c2e86b..c4b15f2604 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -68,7 +68,7 @@ as follows. connection is established, `TSO` is thus advertised to the guest as an available feature: -QEMU Command Line Parameter:: +1. QEMU Command Line Parameter:: $ sudo $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 \ ... @@ -77,12 +77,34 @@ QEMU Command Line Parameter:: ... 2. Ethtool. Assuming that the guest's OS also supports `TSO`, ethtool can be -used to enable same:: + used to enable same:: $ ethtool -K eth0 sg on # scatter-gather is a prerequisite for TSO $ ethtool -K eth0 tso on $ ethtool -k eth0 +**Note:** Enabling this feature impacts the virtio features exposed by the DPDK +vHost User backend to a guest. If a guest was already connected to OvS before +enabling TSO and restarting OvS, this guest ports won't have TSO available:: + + $ ovs-vsctl get interface vhost0 status:tx_tcp_seg_offload + "false" + +To help diagnose the issue, those ports have some additional information in +their status field in ovsdb:: + + $ ovs-vsctl get interface vhost0 status:userspace-tso + disabled + +To restore TSO for this guest ports, this guest QEMU process must be stopped, +then started again. OvS will then report:: + + $ ovs-vsctl get interface vhost0 status:tx_tcp_seg_offload + "true" + + $ ovs-vsctl get interface vhost0 status:userspace-tso + ovs-vsctl: no key "userspace-tso" in Interface record "vhost0" column status + ~~~~~~~~~~~ Limitations ~~~~~~~~~~~ diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 63dac689e3..4415443924 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -418,6 +418,18 @@ enum dpdk_hw_ol_features { NETDEV_TX_TSO_OFFLOAD = 1 << 7, }; +/* Flags for the netdev_dpdk virtio_features_state field. + * This is used for the virtio features recovery mechanism linked to TSO + * support. */ +#define OVS_VIRTIO_F_CLEAN (UINT8_C(1) << 0) +#define OVS_VIRTIO_F_WORKAROUND (UINT8_C(1) << 1) +#define OVS_VIRTIO_F_NEGOTIATED (UINT8_C(1) << 2) +#define OVS_VIRTIO_F_RECONF_PENDING (UINT8_C(1) << 3) +#define OVS_VIRTIO_F_CLEAN_NEGOTIATED \ + (OVS_VIRTIO_F_CLEAN | OVS_VIRTIO_F_NEGOTIATED) +#define OVS_VIRTIO_F_WORKAROUND_NEGOTIATED \ + (OVS_VIRTIO_F_WORKAROUND | OVS_VIRTIO_F_NEGOTIATED) + /* * In order to avoid confusion in variables names, following naming convention * should be used, if possible: @@ -474,7 +486,11 @@ struct netdev_dpdk { bool vhost_reconfigured; atomic_uint8_t vhost_tx_retries_max; - /* 2 pad bytes here. */ + + /* Flags for virtio features recovery mechanism. */ + uint8_t virtio_features_state; + + /* 1 pad byte here. */ ); PADDED_MEMBERS(CACHE_LINE_SIZE, @@ -1359,6 +1375,7 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, dev->requested_lsc_interrupt_mode = 0; ovsrcu_index_init(&dev->vid, -1); dev->vhost_reconfigured = false; + dev->virtio_features_state = OVS_VIRTIO_F_CLEAN; dev->attached = false; dev->started = false; dev->reset_needed = false; @@ -3883,6 +3900,12 @@ netdev_dpdk_vhost_user_get_status(const struct netdev *netdev, xasprintf("%d", vring.size)); } + if (userspace_tso_enabled() + && dev->virtio_features_state & OVS_VIRTIO_F_WORKAROUND) { + + smap_add_format(args, "userspace-tso", "disabled"); + } + ovs_mutex_unlock(&dev->mutex); return 0; } @@ -4245,6 +4268,8 @@ new_device(int vid) newnode = dev->socket_id; } + dev->virtio_features_state |= OVS_VIRTIO_F_NEGOTIATED; + if (dev->requested_n_txq < qp_num || dev->requested_n_rxq < qp_num || dev->requested_socket_id != newnode @@ -4268,7 +4293,9 @@ new_device(int vid) dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; } - if (userspace_tso_enabled()) { + if (userspace_tso_enabled() + && dev->virtio_features_state & OVS_VIRTIO_F_CLEAN) { + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4) && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) { @@ -4524,6 +4551,45 @@ destroy_connection(int vid) dev->requested_n_txq = qp_num; netdev_request_reconfigure(&dev->up); } + + if (!(dev->virtio_features_state & OVS_VIRTIO_F_NEGOTIATED)) { + /* The socket disconnected before reaching new_device. It + * likely means that the guest did not agree with the virtio + * features. */ + VLOG_WARN_RL(&rl, "Connection on socket '%s' closed during " + "initialization.", dev->vhost_id); + } + if (!(dev->virtio_features_state & OVS_VIRTIO_F_RECONF_PENDING)) { + switch (dev->virtio_features_state) { + case OVS_VIRTIO_F_CLEAN: + dev->virtio_features_state = OVS_VIRTIO_F_WORKAROUND; + break; + + case OVS_VIRTIO_F_WORKAROUND: + dev->virtio_features_state = OVS_VIRTIO_F_CLEAN; + break; + + case OVS_VIRTIO_F_CLEAN_NEGOTIATED: + /* The virtio features were clean and got accepted by the + * guest. We expect it will be the case in the future and + * change nothing. */ + break; + + case OVS_VIRTIO_F_WORKAROUND_NEGOTIATED: + /* Let's try to go with clean virtio features on a next + * connection. */ + dev->virtio_features_state = OVS_VIRTIO_F_CLEAN; + break; + + default: + OVS_NOT_REACHED(); + } + if (!(dev->virtio_features_state & OVS_VIRTIO_F_NEGOTIATED)) { + dev->virtio_features_state |= OVS_VIRTIO_F_RECONF_PENDING; + netdev_request_reconfigure(&dev->up); + } + } + ovs_mutex_unlock(&dev->mutex); exists = true; break; @@ -5454,10 +5520,31 @@ static int netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + bool unregister = false; + char *vhost_id; int err; ovs_mutex_lock(&dev->mutex); + if (dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT && dev->vhost_id + && dev->virtio_features_state & OVS_VIRTIO_F_RECONF_PENDING) { + + /* This vhost-user port was registered to the vhost library already, + * but a socket disconnection happened and configuration must be + * re-evaluated wrt dev->virtio_features_state. */ + dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT; + vhost_id = dev->vhost_id; + unregister = true; + } + + ovs_mutex_unlock(&dev->mutex); + + if (unregister) { + dpdk_vhost_driver_unregister(dev, vhost_id); + } + + ovs_mutex_lock(&dev->mutex); + /* Configure vHost client mode if requested and if the following criteria * are met: * 1. Device hasn't been registered yet. @@ -5466,6 +5553,11 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) { uint64_t virtio_unsup_features = 0; uint64_t vhost_flags = 0; + bool enable_tso; + + enable_tso = userspace_tso_enabled() + && dev->virtio_features_state & OVS_VIRTIO_F_CLEAN; + dev->virtio_features_state &= ~OVS_VIRTIO_F_RECONF_PENDING; /* Register client-mode device. */ vhost_flags |= RTE_VHOST_USER_CLIENT; @@ -5487,7 +5579,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) } /* Enable External Buffers if TCP Segmentation Offload is enabled. */ - if (userspace_tso_enabled()) { + if (enable_tso) { vhost_flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; } @@ -5512,7 +5604,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) goto unlock; } - if (userspace_tso_enabled()) { + if (enable_tso) { virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN | 1ULL << VIRTIO_NET_F_HOST_UFO; VLOG_DBG("%s: TSO enabled on vhost port", -- 2.41.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev