Support transferring of TAP state (including open fd) through migration stream as part of viritio-net "local-migration".
Add new option, incoming-fds, which should be set to true to trigger new logic. For new option require explicitly unset script and downscript, to keep possibility of implementing support for them in future. Note disabling read polling on source stop for TAP migration: otherwise, source process may steal packages from TAP fd even after source vm STOP. Signed-off-by: Vladimir Sementsov-Ogievskiy <[email protected]> --- net/tap.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++--- qapi/net.json | 7 ++- 2 files changed, 147 insertions(+), 7 deletions(-) diff --git a/net/tap.c b/net/tap.c index 9d6213fc3e5..2156b6cbb73 100644 --- a/net/tap.c +++ b/net/tap.c @@ -36,6 +36,7 @@ #include "net/net.h" #include "clients.h" #include "monitor/monitor.h" +#include "system/runstate.h" #include "system/system.h" #include "qapi/error.h" #include "qemu/cutils.h" @@ -86,6 +87,9 @@ typedef struct TAPState { VHostNetState *vhost_net; unsigned host_vnet_hdr_len; Notifier exit; + + bool read_poll_detached; + VMChangeStateEntry *vmstate; } TAPState; static void launch_script(const char *setup_script, const char *ifname, @@ -94,19 +98,25 @@ static void launch_script(const char *setup_script, const char *ifname, static void tap_send(void *opaque); static void tap_writable(void *opaque); +static bool tap_is_explicit_no_scirpt(const char *script_arg) +{ + return script_arg && + (script_arg[0] == '\0' || strcmp(script_arg, "no") == 0); +} + static char *tap_parse_script(const char *script_arg, const char *default_path) { g_autofree char *res = g_strdup(script_arg); - if (!res) { - res = get_relocated_path(default_path); + if (tap_is_explicit_no_scirpt(script_arg)) { + return NULL; } - if (res[0] == '\0' || strcmp(res, "no") == 0) { - return NULL; + if (!script_arg) { + return get_relocated_path(default_path); } - return g_steal_pointer(&res); + return g_strdup(script_arg); } static void tap_update_fd_handler(TAPState *s) @@ -123,6 +133,23 @@ static void tap_read_poll(TAPState *s, bool enable) tap_update_fd_handler(s); } +static void tap_vm_state_change(void *opaque, bool running, RunState state) +{ + TAPState *s = opaque; + + if (running) { + if (s->read_poll_detached) { + tap_read_poll(s, true); + s->read_poll_detached = false; + } + } else if (state == RUN_STATE_FINISH_MIGRATE) { + if (s->read_poll) { + s->read_poll_detached = true; + tap_read_poll(s, false); + } + } +} + static void tap_write_poll(TAPState *s, bool enable) { s->write_poll = enable; @@ -353,6 +380,11 @@ static void tap_cleanup(NetClientState *nc) s->exit.notify = NULL; } + if (s->vmstate) { + qemu_del_vm_change_state_handler(s->vmstate); + s->vmstate = NULL; + } + tap_read_poll(s, false); tap_write_poll(s, false); close(s->fd); @@ -393,6 +425,65 @@ static VHostNetState *tap_get_vhost_net(NetClientState *nc) return s->vhost_net; } +static bool tap_is_wait_incoming(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); + return s->fd == -1; +} + +static int tap_pre_load(void *opaque) +{ + TAPState *s = opaque; + + if (s->fd != -1) { + error_report( + "TAP is already initialized and cannot receive incoming fd"); + return -EINVAL; + } + + return 0; +} + +static bool tap_setup_vhost(TAPState *s, Error **errp); + +static int tap_post_load(void *opaque, int version_id) +{ + TAPState *s = opaque; + Error *local_err = NULL; + + tap_read_poll(s, true); + + if (s->fd < 0) { + return -1; + } + + if (!tap_setup_vhost(s, &local_err)) { + error_prepend(&local_err, + "Failed to setup vhost during TAP post-load: "); + error_report_err(local_err); + return -1; + } + + return 0; +} + +static const VMStateDescription vmstate_tap = { + .name = "net-tap", + .pre_load = tap_pre_load, + .post_load = tap_post_load, + .fields = (const VMStateField[]) { + VMSTATE_FD(fd, TAPState), + VMSTATE_BOOL(using_vnet_hdr, TAPState), + VMSTATE_BOOL(has_ufo, TAPState), + VMSTATE_BOOL(has_uso, TAPState), + VMSTATE_BOOL(has_tunnel, TAPState), + VMSTATE_BOOL(enabled, TAPState), + VMSTATE_UINT32(host_vnet_hdr_len, TAPState), + VMSTATE_END_OF_LIST() + } +}; + /* fd support */ static NetClientInfo net_tap_info = { @@ -412,7 +503,9 @@ static NetClientInfo net_tap_info = { .set_vnet_le = tap_set_vnet_le, .set_vnet_be = tap_set_vnet_be, .set_steering_ebpf = tap_set_steering_ebpf, + .is_wait_incoming = tap_is_wait_incoming, .get_vhost_net = tap_get_vhost_net, + .backend_vmsd = &vmstate_tap, }; static TAPState *net_tap_fd_init(NetClientState *peer, @@ -748,6 +841,9 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, int sndbuf = (tap->has_sndbuf && tap->sndbuf) ? MIN(tap->sndbuf, INT_MAX) : INT_MAX; + s->read_poll_detached = false; + s->vmstate = qemu_add_vm_change_state_handler(tap_vm_state_change, s); + if (!tap_set_sndbuf(fd, sndbuf, sndbuf_required ? errp : NULL) && sndbuf_required) { goto failed; @@ -779,6 +875,8 @@ static bool net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, return true; failed: + qemu_del_vm_change_state_handler(s->vmstate); + s->vmstate = NULL; qemu_del_net_client(&s->nc); return false; } @@ -910,6 +1008,26 @@ int net_init_tap(const Netdev *netdev, const char *name, return -1; } + if (tap->incoming_fds && + (tap->fd || tap->fds || tap->helper || tap->br || tap->ifname || + tap->has_sndbuf || tap->has_vnet_hdr)) { + error_setg(errp, "incoming-fds is incompatible with " + "fd=, fds=, helper=, br=, ifname=, sndbuf= and vnet_hdr="); + return -1; + } + + if (tap->incoming_fds && + !(tap_is_explicit_no_scirpt(tap->script) && + tap_is_explicit_no_scirpt(tap->downscript))) { + /* + * script="" and downscript="" are silently supported to be consistent + * with cases without incoming_fds, but do not care to put this into + * error message. + */ + error_setg(errp, "incoming-fds requires script=no and downscript=no"); + return -1; + } + queues = tap_parse_fds_and_queues(tap, &fds, errp); if (queues < 0) { return -1; @@ -928,7 +1046,24 @@ int net_init_tap(const Netdev *netdev, const char *name, goto fail; } - if (fds) { + if (tap->incoming_fds) { + for (i = 0; i < queues; i++) { + NetClientState *nc; + TAPState *s; + + nc = qemu_new_net_client(&net_tap_info, peer, "tap", name); + qemu_set_info_str(nc, "incoming"); + + s = DO_UPCAST(TAPState, nc, nc); + s->fd = -1; + if (vhost_fds) { + s->vhostfd = vhost_fds[i]; + s->vhost_busyloop_timeout = tap->has_poll_us ? tap->poll_us : 0; + } else { + s->vhostfd = -1; + } + } + } else if (fds) { for (i = 0; i < queues; i++) { if (i == 0) { vnet_hdr = tap_probe_vnet_hdr(fds[i], errp); diff --git a/qapi/net.json b/qapi/net.json index 118bd349651..2240de7dbf6 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -355,6 +355,10 @@ # @poll-us: maximum number of microseconds that could be spent on busy # polling for tap (since 2.7) # +# @incoming-fds: do not open or create any TAP devices. Prepare for +# getting opened TAP file descriptors from incoming migration +# stream. (Since 11.0) +# # Since: 1.2 ## { 'struct': 'NetdevTapOptions', @@ -373,7 +377,8 @@ '*vhostfds': 'str', '*vhostforce': 'bool', '*queues': 'uint32', - '*poll-us': 'uint32'} } + '*poll-us': 'uint32', + '*incoming-fds': 'bool' } } ## # @NetdevSocketOptions: -- 2.52.0
