Next commit will introduce live-migration (with fd-passing) for TAP net backend. So, now we prepare virtio-net for it Add virtio-net option local-migration, which is true by default, but false for older machine types, which doesn't support the feature.
We introduce interface for live-migrating backends: 1. ->is_wait_incoming() handler, so that virtio-net knows, that backend is not fully intialized, as it waits for incoming migration stream. 2. MIG_PRI_BACKEND priority: backends should migrate with higher priority than virtio-net, so that we can do final preparations here in post-load handlers and be sure, that backends are already prepared. Signed-off-by: Vladimir Sementsov-Ogievskiy <[email protected]> --- hw/net/virtio-net.c | 89 +++++++++++++++++++++++++++++++++- include/hw/virtio/virtio-net.h | 1 + include/migration/vmstate.h | 2 + include/net/net.h | 2 + 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 2a5d642a647..e23ae6052a2 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -40,6 +40,7 @@ #include "migration/misc.h" #include "standard-headers/linux/ethtool.h" #include "system/system.h" +#include "system/runstate.h" #include "system/replay.h" #include "trace.h" #include "monitor/qdev.h" @@ -3060,7 +3061,17 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) n->multiqueue = multiqueue; virtio_net_change_num_queues(n, max * 2 + 1); - virtio_net_set_queue_pairs(n); + /* + * virtio_net_set_multiqueue() called from set_features(0) on early + * reset, when peer may wait for incoming (and is not initialized + * yet). + * Don't worry about it: virtio_net_set_queue_pairs() will be called + * later from virtio_net_post_load_device(), and anyway will be + * no-op for local incoming migration with live backend passing. + */ + if (!n->peers_wait_incoming) { + virtio_net_set_queue_pairs(n); + } } static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) @@ -3089,6 +3100,17 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features, virtio_add_feature_ex(features, VIRTIO_NET_F_MAC); + if (n->peers_wait_incoming) { + /* + * Excessive feature set is OK for early initialization when + * we wait for local incoming migration: actual guest-negotiated + * features will come with migration stream anyway. And we are sure + * that we support same host-features as source, because the backend + * is the same (the same TAP device, for example). + */ + return; + } + if (!peer_has_vnet_hdr(n)) { virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM); virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO4); @@ -3185,6 +3207,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id) VirtIODevice *vdev = VIRTIO_DEVICE(n); int i, link_down; bool has_tunnel_hdr = virtio_has_tunnel_hdr(vdev->guest_features_ex); + Error *local_err = NULL; trace_virtio_net_post_load_device(); virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, @@ -3242,6 +3265,20 @@ static int virtio_net_post_load_device(void *opaque, int version_id) } virtio_net_commit_rss_config(n); + + /* + * If live-migration is enabled for some backend, than backend + * has already been migrated at higher priority (MIG_PRI_BACKEND) + * and virtio_net_vnet_post_load() has already called + * peer_test_vnet_hdr(). Recompute host_features so that virtio-net + * reflects the capabilities of the restored backend. + */ + virtio_net_get_features(vdev, &vdev->host_features, &local_err); + if (local_err) { + error_report_err(local_err); + return -EINVAL; + } + return 0; } @@ -3392,6 +3429,14 @@ static int virtio_net_vnet_post_load(void *opaque, int version_id) { struct VirtIONetMigTmp *tmp = opaque; + /* + * If live-migration is enabled for some backend, than backend + * has already been migrated at higher priority (MIG_PRI_BACKEND), + * so n->has_vnet_hdr can be refreshed from the live backend right + * here. + */ + peer_test_vnet_hdr(tmp->parent); + if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { error_report("virtio-net: saved image requires vnet_hdr=on"); return -EINVAL; @@ -3864,6 +3909,42 @@ static bool failover_hide_primary_device(DeviceListener *listener, return qatomic_read(&n->failover_primary_hidden); } +static bool virtio_net_check_peers_wait_incoming(VirtIONet *n, bool *waiting, + Error **errp) +{ + bool has_waiting = false; + bool has_not_waiting = false; + + for (int i = 0; i < n->max_queue_pairs; i++) { + NetClientState *peer = n->nic->ncs[i].peer; + if (!peer) { + continue; + } + + if (peer->info->is_wait_incoming && + peer->info->is_wait_incoming(peer)) { + has_waiting = true; + } else { + has_not_waiting = true; + } + + if (has_waiting && has_not_waiting) { + error_setg(errp, "Mixed peer states: some peers wait for incoming " + "migration while others don't"); + return false; + } + } + + if (has_waiting && !runstate_check(RUN_STATE_INMIGRATE)) { + error_setg(errp, "Peers wait for incoming, but it's not an incoming " + "migration."); + return false; + } + + *waiting = has_waiting; + return true; +} + static void virtio_net_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -4001,6 +4082,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->nic->ncs[i].do_not_pad = true; } + if (!virtio_net_check_peers_wait_incoming(n, &n->peers_wait_incoming, + errp)) { + virtio_cleanup(vdev); + return; + } + peer_test_vnet_hdr(n); if (peer_has_vnet_hdr(n)) { n->host_hdr_len = sizeof(struct virtio_net_hdr); diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 371e3764282..8c967760c2a 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -230,6 +230,7 @@ struct VirtIONet { struct EBPFRSSContext ebpf_rss; uint32_t nr_ebpf_rss_fds; char **ebpf_rss_fds; + bool peers_wait_incoming; }; size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 0a8a2e85a63..c92c41397ae 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -178,6 +178,8 @@ typedef enum { MIG_PRI_LOW, /* Must happen after default */ MIG_PRI_DEFAULT, + MIG_PRI_BACKEND, /* Must happen before emulated devices, */ + /* e.g. virtio-net */ MIG_PRI_IOMMU, /* Must happen before PCI devices */ MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */ diff --git a/include/net/net.h b/include/net/net.h index bdf16bb3e64..658b0d0aaf1 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -81,6 +81,7 @@ typedef void (SocketReadStateFinalize)(SocketReadState *rs); typedef void (NetAnnounce)(NetClientState *); typedef bool (SetSteeringEBPF)(NetClientState *, int); typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **); +typedef bool (IsWaitIncoming)(NetClientState *); typedef struct vhost_net *(GetVHostNet)(NetClientState *nc); typedef struct NetClientInfo { @@ -109,6 +110,7 @@ typedef struct NetClientInfo { NetAnnounce *announce; SetSteeringEBPF *set_steering_ebpf; NetCheckPeerType *check_peer_type; + IsWaitIncoming *is_wait_incoming; GetVHostNet *get_vhost_net; } NetClientInfo; -- 2.52.0
