Provide the cpr=on option to preserve TAP and vhost descriptors during cpr-transfer, so the management layer does not need to create a new device for the target.
Save all tap fd's in order with the tap device fds saved first, and the vhostfd saved after. Example: -netdev tap,id=hostnet2,cpr=on Signed-off-by: Steve Sistare <[email protected]> Signed-off-by: Ben Chaney <[email protected]> --- hw/vfio/device.c | 2 +- include/migration/cpr.h | 4 +-- migration/cpr.c | 19 +++++++------ net/tap.c | 74 +++++++++++++++++++++++++++++++++++++++---------- qapi/net.json | 6 +++- 5 files changed, 77 insertions(+), 28 deletions(-) diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 086f20f676..cbc8db6a67 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -363,7 +363,7 @@ void vfio_device_free_name(VFIODevice *vbasedev) void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) { - vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); + vbasedev->fd = get_fd_param(vbasedev->dev->id, str, 0, true, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; diff --git a/include/migration/cpr.h b/include/migration/cpr.h index d585fadc5b..ded6ceff7c 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -48,8 +48,8 @@ void cpr_state_close(void); struct QIOChannel *cpr_state_ioc(void); bool cpr_incoming_needed(void *opaque); -int cpr_get_fd_param(const char *name, const char *fdname, int index, - Error **errp); +int get_fd_param(const char *cpr_name, const char *fdname, int index, bool cpr, + Error **errp); QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp); QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp); diff --git a/migration/cpr.c b/migration/cpr.c index c0bf93a7ba..f2c40eeba5 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -311,11 +311,12 @@ bool cpr_incoming_needed(void *opaque) } /* - * cpr_get_fd_param: find a descriptor and return its value. + * get_fd_param: find a descriptor and return its value. * - * @name: CPR name for the descriptor + * @cpr_name: CPR name for the descriptor * @fdname: An integer-valued string, or a name passed to a getfd command * @index: CPR index of the descriptor + * @cpr: cpr is enabled on the associated device * @errp: returned error message * * If CPR is not being performed, then use @fdname to find the fd. @@ -324,23 +325,23 @@ bool cpr_incoming_needed(void *opaque) * * On success returns the fd value, else returns -1. */ -int cpr_get_fd_param(const char *name, const char *fdname, int index, - Error **errp) +int get_fd_param(const char *cpr_name, const char *fdname, int index, + bool cpr, Error **errp) { ERRP_GUARD(); int fd; - if (cpr_is_incoming()) { - fd = cpr_find_fd(name, index); + if (cpr && cpr_is_incoming()) { + fd = cpr_find_fd(cpr_name, index); if (fd < 0) { error_setg(errp, "cannot find saved value for fd %s", fdname); } } else { fd = monitor_fd_param(monitor_cur(), fdname, errp); - if (fd >= 0) { - cpr_save_fd(name, index, fd); - } else { + if (fd < 0) { error_prepend(errp, "Could not parse object fd %s:", fdname); + } else if (cpr) { + cpr_save_fd(cpr_name, index, fd); } } return fd; diff --git a/net/tap.c b/net/tap.c index 1847167e4f..8875498434 100644 --- a/net/tap.c +++ b/net/tap.c @@ -35,6 +35,7 @@ #include "net/eth.h" #include "net/net.h" #include "clients.h" +#include "migration/cpr.h" #include "monitor/monitor.h" #include "system/system.h" #include "qapi/error.h" @@ -80,6 +81,7 @@ typedef struct TAPState { bool has_uso; bool has_tunnel; bool enabled; + bool cpr; VHostNetState *vhost_net; unsigned host_vnet_hdr_len; Notifier exit; @@ -323,6 +325,9 @@ static void tap_cleanup(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); + if (s->cpr) { + cpr_delete_fd_all(nc->name); + } if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -690,18 +695,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, return fd; } +/* CPR fd's for each queue are saved at these indices */ +#define TAP_FD_INDEX(queue) ((queue)) +#define TAP_VHOSTFD_INDEX(queue, total_fds) ((queue) + (total_fds)) + #define MAX_TAP_QUEUES 1024 static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, const char *model, const char *name, const char *ifname, const char *script, const char *downscript, const char *vhostfdname, - int vnet_hdr, int fd, Error **errp) + int vnet_hdr, int fd, int index, Error **errp) { Error *err = NULL; TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); + bool cpr = tap->has_cpr ? tap->cpr : false; int vhostfd; + s->cpr = cpr; tap_set_sndbuf(s->fd, tap, &err); if (err) { error_propagate(errp, err); @@ -736,7 +747,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } if (vhostfdname) { - vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err); + vhostfd = get_fd_param(name, vhostfdname, index, cpr, &err); if (vhostfd == -1) { error_propagate(errp, err); goto failed; @@ -745,12 +756,21 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, goto failed; } } else { - vhostfd = open("/dev/vhost-net", O_RDWR); + vhostfd = cpr ? cpr_find_fd(name, index) : -1; + if (vhostfd < 0) { + vhostfd = open("/dev/vhost-net", O_RDWR); + if (cpr && vhostfd >= 0) { + cpr_save_fd(name, index, vhostfd); + } + } if (vhostfd < 0) { error_setg_file_open(errp, errno, "/dev/vhost-net"); goto failed; } if (!qemu_set_blocking(vhostfd, false, errp)) { + if (!cpr) { + close(vhostfd); + } goto failed; } } @@ -776,6 +796,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, return; failed: + if (cpr) { + cpr_delete_fd_all(name); + } qemu_del_net_client(&s->nc); } @@ -808,7 +831,8 @@ static int get_fds(char *str, char *fds[], int max) int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { - const NetdevTapOptions *tap; + const NetdevTapOptions *tap = &netdev->u.tap; + bool cpr = tap->has_cpr ? tap->cpr : false; int fd, vnet_hdr = 0, i = 0, queues; /* for the no-fd, no-helper case */ const char *script; @@ -844,7 +868,7 @@ int net_init_tap(const Netdev *netdev, const char *name, goto out; } - fd = monitor_fd_param(monitor_cur(), tap->fd, errp); + fd = get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp); if (fd == -1) { ret = -1; goto out; @@ -865,13 +889,15 @@ int net_init_tap(const Netdev *netdev, const char *name, net_init_tap_one(tap, peer, "tap", name, NULL, script, downscript, - vhostfdname, vnet_hdr, fd, &err); + vhostfdname, vnet_hdr, fd, + TAP_VHOSTFD_INDEX(0, 1), &err); if (err) { error_propagate(errp, err); close(fd); ret = -1; goto out; } + } else if (tap->fds) { char **fds; char **vhost_fds; @@ -902,7 +928,7 @@ int net_init_tap(const Netdev *netdev, const char *name, } for (i = 0; i < nfds; i++) { - fd = monitor_fd_param(monitor_cur(), fds[i], errp); + fd = get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp); if (fd == -1) { ret = -1; goto free_fail; @@ -929,7 +955,7 @@ int net_init_tap(const Netdev *netdev, const char *name, net_init_tap_one(tap, peer, "tap", name, ifname, script, downscript, tap->vhostfds ? vhost_fds[i] : NULL, - vnet_hdr, fd, &err); + vnet_hdr, fd, TAP_VHOSTFD_INDEX(i, nfds), &err); if (err) { error_propagate(errp, err); ret = -1; @@ -957,9 +983,15 @@ free_fail: goto out; } - fd = net_bridge_run_helper(tap->helper, - tap->br ?: DEFAULT_BRIDGE_INTERFACE, - errp); + fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1; + if (fd < 0) { + fd = net_bridge_run_helper(tap->helper, + tap->br ?: DEFAULT_BRIDGE_INTERFACE, + errp); + if (cpr && fd >= 0) { + cpr_save_fd(name, TAP_FD_INDEX(0), fd); + } + } if (fd == -1) { ret = -1; goto out; @@ -979,13 +1011,14 @@ free_fail: net_init_tap_one(tap, peer, "bridge", name, ifname, script, downscript, vhostfdname, - vnet_hdr, fd, &err); + vnet_hdr, fd, TAP_VHOSTFD_INDEX(0, 1), &err); if (err) { error_propagate(errp, err); close(fd); ret = -1; goto out; } + } else { g_autofree char *default_script = NULL; g_autofree char *default_downscript = NULL; @@ -1010,8 +1043,14 @@ free_fail: } for (i = 0; i < queues; i++) { - fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, - ifname, sizeof ifname, queues > 1, errp); + fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1; + if (fd < 0) { + fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, + ifname, sizeof ifname, queues > 1, errp); + if (cpr && fd >= 0) { + cpr_save_fd(name, TAP_FD_INDEX(i), fd); + } + } if (fd == -1) { ret = -1; goto out; @@ -1029,7 +1068,9 @@ free_fail: net_init_tap_one(tap, peer, "tap", name, ifname, i >= 1 ? "no" : script, i >= 1 ? "no" : downscript, - vhostfdname, vnet_hdr, fd, &err); + vhostfdname, vnet_hdr, + fd, TAP_VHOSTFD_INDEX(i, queues), + &err); if (err) { error_propagate(errp, err); close(fd); @@ -1040,6 +1081,9 @@ free_fail: } out: + if (ret && cpr) { + cpr_delete_fd_all(name); + } return ret; } diff --git a/qapi/net.json b/qapi/net.json index 118bd34965..4b12fca94b 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -355,6 +355,9 @@ # @poll-us: maximum number of microseconds that could be spent on busy # polling for tap (since 2.7) # +# @cpr: preserve the state of this device and its associated file +# descriptors during cpr-transfer for reduced migration downtime +# # Since: 1.2 ## { 'struct': 'NetdevTapOptions', @@ -373,7 +376,8 @@ '*vhostfds': 'str', '*vhostforce': 'bool', '*queues': 'uint32', - '*poll-us': 'uint32'} } + '*poll-us': 'uint32', + '*cpr': 'bool'} } ## # @NetdevSocketOptions: -- 2.34.1
