On Wed, 27 Nov 2024 at 19:42, Fabiano Rosas <[email protected]> wrote: > From e9110360eb0efddf6945f37c518e3cc38d12b600 Mon Sep 17 00:00:00 2001 > From: Fabiano Rosas <[email protected]> > Date: Wed, 27 Nov 2024 11:03:04 -0300 > Subject: [PATCH] migration: Rationalize multifd flushes from ram code > > We currently have a mess of conditionals to achieve the correct > combination of multifd local flushes, where we sync the local > (send/recv) multifd threads between themselves, and multifd remote > flushes, where we put a flag on the stream to inform the recv side to > perform a local flush. > > On top of that we also have the multifd_flush_after_each_section > property, which is there to provide backward compatibility from when > we used to flush after every vmstate section. > > And lastly, there's the mapped-ram feature which always wants the > non-backward compatible behavior and also does not support extraneous > flags on the stream (such as the MULTIFD_FLUSH flag). > > Move the conditionals into a separate function that encapsulates and > explains the whole situation. > > Signed-off-by: Fabiano Rosas <[email protected]> > --- > migration/ram.c | 198 ++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 157 insertions(+), 41 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 05ff9eb328..caaaae6fdc 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1277,6 +1277,149 @@ static int ram_save_page(RAMState *rs, > PageSearchStatus *pss) > return pages; > } > > +enum RamMultifdFlushSpots { > + FLUSH_SAVE_SETUP, > + FLUSH_SAVE_ITER, > + FLUSH_DIRTY_BLOCK, > + FLUSH_SAVE_COMPLETE, > + > + FLUSH_LOAD_POSTCOPY_EOS, > + FLUSH_LOAD_POSTCOPY_FLUSH, > + FLUSH_LOAD_PRECOPY_EOS, > + FLUSH_LOAD_PRECOPY_FLUSH, > +}; > + > +static int ram_multifd_flush(QEMUFile *f, enum RamMultifdFlushSpots spot) > +{ > + int ret; > + bool always_flush, do_local_flush, do_remote_flush; > + bool mapped_ram = migrate_mapped_ram(); > + > + if (!migrate_multifd()) { > + return 0; > + } > + > + /* > + * For backward compatibility, whether to flush multifd after each > + * section is sent. This is mutually exclusive with a > + * RAM_SAVE_FLAG_MULTIFD_FLUSH on the stream > + */ > + always_flush = migrate_multifd_flush_after_each_section(); > + > + /* > + * Save side flushes > + */ > + > + do_local_flush = false; > + > + switch (spot) { > + case FLUSH_SAVE_SETUP: > + assert(!bql_locked()); > + do_local_flush = true; > + break; > + > + case FLUSH_SAVE_ITER: > + /* > + * This flush is not necessary, only do for backward > + * compatibility. Mapped-ram assumes the new scheme. > + */ > + do_local_flush = always_flush && !mapped_ram; > + break; > + > + case FLUSH_DIRTY_BLOCK: > + /* > + * This is the flush that's actually required, always do it > + * unless we're emulating the old behavior. > + */ > + do_local_flush = !always_flush || mapped_ram; > + break; > + > + case FLUSH_SAVE_COMPLETE: > + do_local_flush = true; > + break; > + > + default: > + break; > + } > + > + if (do_local_flush) { > + ret = multifd_ram_flush_and_sync(); > + if (ret < 0) { > + return ret; > + } > + } > + > + /* > + * There's never a remote flush with mapped-ram because any flags > + * put on the stream (aside from RAM_SAVE_FLAG_MEM_SIZE and > + * RAM_SAVE_FLAG_EOS) break mapped-ram's assumption that ram pages > + * can be read contiguously from the stream. > + * > + * On the recv side, there's no local flush, even at EOS because > + * mapped-ram does its own flush after loading the ramblock. > + */ > + if (mapped_ram) { > + return 0; > + } > + > + do_remote_flush = false; > + > + /* Save side remote flush */ > + switch (spot) { > + case FLUSH_SAVE_SETUP: > + do_remote_flush = !always_flush; > + break; > + > + case FLUSH_SAVE_ITER: > + do_remote_flush = false; > + break; > + > + case FLUSH_DIRTY_BLOCK: > + do_remote_flush = do_local_flush; > + break; > + > + case FLUSH_SAVE_COMPLETE: > + do_remote_flush = false; > + break; > + > + default: > + break; > + } > + > + /* Put a flag on the stream to trigger a remote flush */ > + if (do_remote_flush) { > + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); > + qemu_fflush(f); > + } > + > + /* > + * Load side flushes. > + */ > + > + do_local_flush = false; > + > + switch (spot) { > + case FLUSH_LOAD_PRECOPY_EOS: > + case FLUSH_LOAD_POSTCOPY_EOS: > + do_local_flush = always_flush; > + break; > + > + case FLUSH_LOAD_PRECOPY_FLUSH: > + case FLUSH_LOAD_POSTCOPY_FLUSH: > + do_local_flush = true; > + break; > + > + default: > + break; > + } > + > + if (do_local_flush) { > + multifd_recv_sync_main(); > + } > + > + return 0; > +} > + > static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) > { > if (!multifd_queue_page(block, offset)) { > @@ -1323,19 +1466,10 @@ static int find_dirty_block(RAMState *rs, > PageSearchStatus *pss) > pss->page = 0; > pss->block = QLIST_NEXT_RCU(pss->block, next); > if (!pss->block) { > - if (migrate_multifd() && > - (!migrate_multifd_flush_after_each_section() || > - migrate_mapped_ram())) { > - QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; > - int ret = multifd_ram_flush_and_sync(); > - if (ret < 0) { > - return ret; > - } > - > - if (!migrate_mapped_ram()) { > - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); > - qemu_fflush(f); > - } > + int ret = > ram_multifd_flush(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel, > + FLUSH_DIRTY_BLOCK); > + if (ret < 0) { > + return ret; > } > > /* Hit the end of the list */ > @@ -3065,18 +3199,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque, > Error **errp) > } > > bql_unlock(); > - ret = multifd_ram_flush_and_sync(); > + ret = ram_multifd_flush(f, FLUSH_SAVE_SETUP); > bql_lock(); > if (ret < 0) { > error_setg(errp, "%s: multifd synchronization failed", __func__); > return ret; > } > > - if (migrate_multifd() && !migrate_multifd_flush_after_each_section() > - && !migrate_mapped_ram()) { > - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); > - } > - > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > ret = qemu_fflush(f); > if (ret < 0) { > @@ -3209,12 +3338,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) > > out: > if (ret >= 0 && migration_is_running()) { > - if (migrate_multifd() && migrate_multifd_flush_after_each_section() > && > - !migrate_mapped_ram()) { > - ret = multifd_ram_flush_and_sync(); > - if (ret < 0) { > - return ret; > - } > + > + ret = ram_multifd_flush(f, FLUSH_SAVE_ITER); > + if (ret < 0) { > + return ret; > } > > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > @@ -3283,7 +3410,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) > } > } > > - ret = multifd_ram_flush_and_sync(); > + ret = ram_multifd_flush(f, FLUSH_SAVE_COMPLETE); > if (ret < 0) { > return ret; > } > @@ -3797,14 +3924,11 @@ int ram_load_postcopy(QEMUFile *f, int channel) > } > break; > case RAM_SAVE_FLAG_MULTIFD_FLUSH: > - multifd_recv_sync_main(); > + ram_multifd_flush(f, FLUSH_LOAD_POSTCOPY_FLUSH); > break; > case RAM_SAVE_FLAG_EOS: > /* normal exit */ > - if (migrate_multifd() && > - migrate_multifd_flush_after_each_section()) { > - multifd_recv_sync_main(); > - } > + ram_multifd_flush(f, FLUSH_LOAD_POSTCOPY_EOS); > break; > default: > error_report("Unknown combination of migration flags: 0x%x" > @@ -4237,19 +4361,11 @@ static int ram_load_precopy(QEMUFile *f) > } > break; > case RAM_SAVE_FLAG_MULTIFD_FLUSH: > - multifd_recv_sync_main(); > + ram_multifd_flush(f, FLUSH_LOAD_PRECOPY_FLUSH); > break; > case RAM_SAVE_FLAG_EOS: > /* normal exit */ > - if (migrate_multifd() && > - migrate_multifd_flush_after_each_section() && > - /* > - * Mapped-ram migration flushes once and for all after > - * parsing ramblocks. Always ignore EOS for it. > - */ > - !migrate_mapped_ram()) { > - multifd_recv_sync_main(); > - } > + ram_multifd_flush(f, FLUSH_LOAD_PRECOPY_EOS); > break; > case RAM_SAVE_FLAG_HOOK: > ret = rdma_registration_handle(f); > -- > 2.35.3 >
* This does not seem to solve for complexity. When reading/following code, it is easier to see 3-4 conditions and work them to check if the full expression is 'true' or 'false', that is not doable here. * fflush(1) is just flushing buffered content into (or out of) the stream IIUC, why do we have to tie it to a specific spot? At any time it is going to do the same thing: flush available data to (or out of) the stream. * Could we separate out send side fflush(1) from the receive side fflush(1) operations? Writing a flag in the stream on the send side to trigger fflush(1) on the receive side is weird; Data stream need not say when to fflush(1). Let the sender and receiver decide when they want to fflush(1) and fsync(1) data, no? Maybe that'll help to reduce/solve the complexity of long conditionals? ie. if we are able to fflush(1) and fsync(1) without any condition? Thank you. --- - Prasad
