On 28/08/2025 04:59, Peter Xu wrote: > Now after threadified dest VM load during precopy, we will always in a > thread context rather than within a coroutine. We can remove this path > now. > > With that, migration_started_on_destination can go away too. > > Signed-off-by: Peter Xu <[email protected]>
Reviewed-by: Li Zhijian <[email protected]> Thanks Zhijian > --- > migration/rdma.c | 102 +++++++++++++++++++---------------------------- > 1 file changed, 41 insertions(+), 61 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 2b995513aa..7751262460 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -29,7 +29,6 @@ > #include "qemu/rcu.h" > #include "qemu/sockets.h" > #include "qemu/bitmap.h" > -#include "qemu/coroutine.h" > #include "system/memory.h" > #include <sys/socket.h> > #include <netdb.h> > @@ -357,13 +356,6 @@ typedef struct RDMAContext { > /* Index of the next RAMBlock received during block registration */ > unsigned int next_src_index; > > - /* > - * Migration on *destination* started. > - * Then use coroutine yield function. > - * Source runs in a thread, so we don't care. > - */ > - int migration_started_on_destination; > - > int total_registrations; > int total_writes; > > @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext > *rdma, > struct rdma_cm_event *cm_event; > > /* > - * Coroutine doesn't start until migration_fd_process_incoming() > - * so don't yield unless we know we're running inside of a coroutine. > + * This is the source or dest side, either during precopy or > + * postcopy. We're always in a separate thread when reaching here. > + * Poll the fd. We need to be able to handle 'cancel' or an error > + * without hanging forever. > */ > - if (rdma->migration_started_on_destination && > - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE && > - qemu_in_coroutine()) { > - yield_until_fd_readable(comp_channel->fd); > - } else { > - /* This is the source side, we're in a separate thread > - * or destination prior to migration_fd_process_incoming() > - * after postcopy, the destination also in a separate thread. > - * we can't yield; so we have to poll the fd. > - * But we need to be able to handle 'cancel' or an error > - * without hanging forever. > - */ > - while (!rdma->errored && !rdma->received_error) { > - GPollFD pfds[2]; > - pfds[0].fd = comp_channel->fd; > - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[0].revents = 0; > - > - pfds[1].fd = rdma->channel->fd; > - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[1].revents = 0; > - > - /* 0.1s timeout, should be fine for a 'cancel' */ > - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { > - case 2: > - case 1: /* fd active */ > - if (pfds[0].revents) { > - return 0; > - } > + while (!rdma->errored && !rdma->received_error) { > + GPollFD pfds[2]; > + pfds[0].fd = comp_channel->fd; > + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[0].revents = 0; > + > + pfds[1].fd = rdma->channel->fd; > + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[1].revents = 0; > + > + /* 0.1s timeout, should be fine for a 'cancel' */ > + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { > + case 2: > + case 1: /* fd active */ > + if (pfds[0].revents) { > + return 0; > + } > > - if (pfds[1].revents) { > - if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > - return -1; > - } > + if (pfds[1].revents) { > + if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > + return -1; > + } > > - if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > - cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > - rdma_ack_cm_event(cm_event); > - return -1; > - } > + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > rdma_ack_cm_event(cm_event); > + return -1; > } > - break; > + rdma_ack_cm_event(cm_event); > + } > + break; > > - case 0: /* Timeout, go around again */ > - break; > + case 0: /* Timeout, go around again */ > + break; > > - default: /* Error of some type - > - * I don't trust errno from qemu_poll_ns > - */ > - return -1; > - } > + default: /* Error of some type - > + * I don't trust errno from qemu_poll_ns > + */ > + return -1; > + } > > - if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) > { > - /* Bail out and let the cancellation happen */ > - return -1; > - } > + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { > + /* Bail out and let the cancellation happen */ > + return -1; > } > } > > @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque) > return; > } > > - rdma->migration_started_on_destination = 1; > migration_fd_process_incoming(f); > } >
