Re: [PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-23 Thread Dr. David Alan Gilbert
* Peter Xu (pet...@redhat.com) wrote:
> On Wed, Feb 23, 2022 at 09:52:08AM +, Dr. David Alan Gilbert wrote:
> > * Peter Xu (pet...@redhat.com) wrote:
> > > On Tue, Feb 22, 2022 at 11:32:10AM +, Dr. David Alan Gilbert wrote:
> > > > * Peter Xu (pet...@redhat.com) wrote:
> > > > > To allow postcopy recovery, the ram fast load (preempt-only) dest 
> > > > > QEMU thread
> > > > > needs similar handling on fault tolerance.  When ram_load_postcopy() 
> > > > > fails,
> > > > > instead of stopping the thread it halts with a semaphore, preparing 
> > > > > to be
> > > > > kicked again when recovery is detected.
> > > > > 
> > > > > A mutex is introduced to make sure there's no concurrent operation 
> > > > > upon the
> > > > > socket.  To make it simple, the fast ram load thread will take the 
> > > > > mutex during
> > > > > its whole procedure, and only release it if it's paused.  The 
> > > > > fast-path socket
> > > > > will be properly released by the main loading thread safely when 
> > > > > there's
> > > > > network failures during postcopy with that mutex held.
> > > > 
> > > > I *think* this is mostly OK; but I worry I don't understand all the
> > > > cases; e.g.
> > > >   a) If the postcopy channel errors first
> > > >   b) If the main channel errors first
> > > 
> > > Ah right, I don't think I handled all the cases.  Sorry.
> > > 
> > > We always check the main channel, but if the postcopy channel got faulted,
> > > we may not fall into paused mode as expected.
> > > 
> > > I'll fix that up.
> > 
> > Thanks.
> > 
> > > > 
> > > > Can you add some docs to walk through those and explain the locking ?
> > > 
> > > Sure.
> > > 
> > > The sem is mentioned in the last sentence of paragraph 1, where it's 
> > > purely
> > > used for a way to yield the fast ram load thread so that when something
> > > wrong happens it can sleep on that semaphore.  Then when we recover we'll
> > > post to the semaphore to kick it up.  We used it like that in many places,
> > > e.g. postcopy_pause_sem_dst to yield the main load thread.
> > > 
> > > The 2nd paragraph above was for explaining why we need the mutex; it's
> > > basically the same as rp_mutex protecting to_src_file, so that we won't
> > > accidentally close() the qemufile during some other thread using it.  So
> > > the fast ram load thread needs to take that new mutex for mostly the whole
> > > lifecycle of itself (because it's loading from that qemufile), meanwhile
> > > only drop the mutex when it prepares to sleep.  Then the main load thread
> > > can recycle the postcopy channel using qemu_fclose() safely.
> > 
> > Yes, that feels like it needs to go in the code somewhere.
> 
> Sure, I'll further squash below comment update into the same patch.  I
> reworded some places but mostly it should be telling the same thing:
> 
> ---8<---
> diff --git a/migration/migration.h b/migration/migration.h
> index 945088064a..91f845e9e4 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -118,7 +118,17 @@ struct MigrationIncomingState {
>  /* Postcopy priority thread is used to receive postcopy requested pages 
> */
>  QemuThread postcopy_prio_thread;
>  bool postcopy_prio_thread_created;
> -/* Used to sync with the prio thread */
> +/*
> + * Used to sync between the ram load main thread and the fast ram load
> + * thread.  It protects postcopy_qemufile_dst, which is the postcopy
> + * fast channel.
> + *
> + * The ram fast load thread will take it mostly for the whole lifecycle
> + * because it needs to continuously read data from the channel, and
> + * it'll only release this mutex if postcopy is interrupted, so that
> + * the ram load main thread will take this mutex over and properly
> + * release the broken channel.
> + */
>  QemuMutex postcopy_prio_thread_mutex;
>  /*
>   * An array of temp host huge pages to be used, one for each postcopy
> @@ -149,6 +159,12 @@ struct MigrationIncomingState {
>  /* notify PAUSED postcopy incoming migrations to try to continue */
>  QemuSemaphore postcopy_pause_sem_dst;
>  QemuSemaphore postcopy_pause_sem_fault;
> +/*
> + * This semaphore is used to allow the ram fast load thread (only when
> + * postcopy preempt is enabled) fall into sleep when there's network
> + * interruption detected.  When the recovery is done, the main load
> + * thread will kick the fast ram load thread using this semaphore.
> + */
>  QemuSemaphore postcopy_pause_sem_fast_load;

Acked-by: Dr. David Alan Gilbert 

>  
>  /* List of listening socket addresses  */
> ---8<---
> 
> > 
> > > [...]
> > > 
> > > > > @@ -3466,6 +3468,17 @@ static MigThrError 
> > > > > postcopy_pause(MigrationState *s)
> > > > >  qemu_file_shutdown(file);
> > > > >  qemu_fclose(file);
> > > > >  
> > > > > +/*
> > > > > + * Do the same to postcopy fast path socket too if there is. 
> > > > >  No
> > > > > + *

Re: [PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-23 Thread Peter Xu
On Wed, Feb 23, 2022 at 09:52:08AM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > On Tue, Feb 22, 2022 at 11:32:10AM +, Dr. David Alan Gilbert wrote:
> > > * Peter Xu (pet...@redhat.com) wrote:
> > > > To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU 
> > > > thread
> > > > needs similar handling on fault tolerance.  When ram_load_postcopy() 
> > > > fails,
> > > > instead of stopping the thread it halts with a semaphore, preparing to 
> > > > be
> > > > kicked again when recovery is detected.
> > > > 
> > > > A mutex is introduced to make sure there's no concurrent operation upon 
> > > > the
> > > > socket.  To make it simple, the fast ram load thread will take the 
> > > > mutex during
> > > > its whole procedure, and only release it if it's paused.  The fast-path 
> > > > socket
> > > > will be properly released by the main loading thread safely when there's
> > > > network failures during postcopy with that mutex held.
> > > 
> > > I *think* this is mostly OK; but I worry I don't understand all the
> > > cases; e.g.
> > >   a) If the postcopy channel errors first
> > >   b) If the main channel errors first
> > 
> > Ah right, I don't think I handled all the cases.  Sorry.
> > 
> > We always check the main channel, but if the postcopy channel got faulted,
> > we may not fall into paused mode as expected.
> > 
> > I'll fix that up.
> 
> Thanks.
> 
> > > 
> > > Can you add some docs to walk through those and explain the locking ?
> > 
> > Sure.
> > 
> > The sem is mentioned in the last sentence of paragraph 1, where it's purely
> > used for a way to yield the fast ram load thread so that when something
> > wrong happens it can sleep on that semaphore.  Then when we recover we'll
> > post to the semaphore to kick it up.  We used it like that in many places,
> > e.g. postcopy_pause_sem_dst to yield the main load thread.
> > 
> > The 2nd paragraph above was for explaining why we need the mutex; it's
> > basically the same as rp_mutex protecting to_src_file, so that we won't
> > accidentally close() the qemufile during some other thread using it.  So
> > the fast ram load thread needs to take that new mutex for mostly the whole
> > lifecycle of itself (because it's loading from that qemufile), meanwhile
> > only drop the mutex when it prepares to sleep.  Then the main load thread
> > can recycle the postcopy channel using qemu_fclose() safely.
> 
> Yes, that feels like it needs to go in the code somewhere.

Sure, I'll further squash below comment update into the same patch.  I
reworded some places but mostly it should be telling the same thing:

---8<---
diff --git a/migration/migration.h b/migration/migration.h
index 945088064a..91f845e9e4 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -118,7 +118,17 @@ struct MigrationIncomingState {
 /* Postcopy priority thread is used to receive postcopy requested pages */
 QemuThread postcopy_prio_thread;
 bool postcopy_prio_thread_created;
-/* Used to sync with the prio thread */
+/*
+ * Used to sync between the ram load main thread and the fast ram load
+ * thread.  It protects postcopy_qemufile_dst, which is the postcopy
+ * fast channel.
+ *
+ * The ram fast load thread will take it mostly for the whole lifecycle
+ * because it needs to continuously read data from the channel, and
+ * it'll only release this mutex if postcopy is interrupted, so that
+ * the ram load main thread will take this mutex over and properly
+ * release the broken channel.
+ */
 QemuMutex postcopy_prio_thread_mutex;
 /*
  * An array of temp host huge pages to be used, one for each postcopy
@@ -149,6 +159,12 @@ struct MigrationIncomingState {
 /* notify PAUSED postcopy incoming migrations to try to continue */
 QemuSemaphore postcopy_pause_sem_dst;
 QemuSemaphore postcopy_pause_sem_fault;
+/*
+ * This semaphore is used to allow the ram fast load thread (only when
+ * postcopy preempt is enabled) fall into sleep when there's network
+ * interruption detected.  When the recovery is done, the main load
+ * thread will kick the fast ram load thread using this semaphore.
+ */
 QemuSemaphore postcopy_pause_sem_fast_load;
 
 /* List of listening socket addresses  */
---8<---

> 
> > [...]
> > 
> > > > @@ -3466,6 +3468,17 @@ static MigThrError postcopy_pause(MigrationState 
> > > > *s)
> > > >  qemu_file_shutdown(file);
> > > >  qemu_fclose(file);
> > > >  
> > > > +/*
> > > > + * Do the same to postcopy fast path socket too if there is.  
> > > > No
> > > > + * locking needed because no racer as long as we do this 
> > > > before setting
> > > > + * status to paused.
> > > > + */
> > > > +if (s->postcopy_qemufile_src) {
> > > > +
> > > > migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
> > > 
> > > Shouldn't this do 

Re: [PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-23 Thread Dr. David Alan Gilbert
* Peter Xu (pet...@redhat.com) wrote:
> On Tue, Feb 22, 2022 at 11:32:10AM +, Dr. David Alan Gilbert wrote:
> > * Peter Xu (pet...@redhat.com) wrote:
> > > To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU 
> > > thread
> > > needs similar handling on fault tolerance.  When ram_load_postcopy() 
> > > fails,
> > > instead of stopping the thread it halts with a semaphore, preparing to be
> > > kicked again when recovery is detected.
> > > 
> > > A mutex is introduced to make sure there's no concurrent operation upon 
> > > the
> > > socket.  To make it simple, the fast ram load thread will take the mutex 
> > > during
> > > its whole procedure, and only release it if it's paused.  The fast-path 
> > > socket
> > > will be properly released by the main loading thread safely when there's
> > > network failures during postcopy with that mutex held.
> > 
> > I *think* this is mostly OK; but I worry I don't understand all the
> > cases; e.g.
> >   a) If the postcopy channel errors first
> >   b) If the main channel errors first
> 
> Ah right, I don't think I handled all the cases.  Sorry.
> 
> We always check the main channel, but if the postcopy channel got faulted,
> we may not fall into paused mode as expected.
> 
> I'll fix that up.

Thanks.

> > 
> > Can you add some docs to walk through those and explain the locking ?
> 
> Sure.
> 
> The sem is mentioned in the last sentence of paragraph 1, where it's purely
> used for a way to yield the fast ram load thread so that when something
> wrong happens it can sleep on that semaphore.  Then when we recover we'll
> post to the semaphore to kick it up.  We used it like that in many places,
> e.g. postcopy_pause_sem_dst to yield the main load thread.
> 
> The 2nd paragraph above was for explaining why we need the mutex; it's
> basically the same as rp_mutex protecting to_src_file, so that we won't
> accidentally close() the qemufile during some other thread using it.  So
> the fast ram load thread needs to take that new mutex for mostly the whole
> lifecycle of itself (because it's loading from that qemufile), meanwhile
> only drop the mutex when it prepares to sleep.  Then the main load thread
> can recycle the postcopy channel using qemu_fclose() safely.

Yes, that feels like it needs to go in the code somewhere.

> [...]
> 
> > > @@ -3466,6 +3468,17 @@ static MigThrError postcopy_pause(MigrationState 
> > > *s)
> > >  qemu_file_shutdown(file);
> > >  qemu_fclose(file);
> > >  
> > > +/*
> > > + * Do the same to postcopy fast path socket too if there is.  No
> > > + * locking needed because no racer as long as we do this before 
> > > setting
> > > + * status to paused.
> > > + */
> > > +if (s->postcopy_qemufile_src) {
> > > +
> > > migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
> > 
> > Shouldn't this do a qemu_file_shutdown on here first?
> 
> Yes I probably should.
> 
> With all above, I plan to squash below changes into this patch:
> 
> ---8<---
> diff --git a/migration/migration.c b/migration/migration.c
> index c68a281406..69778cab23 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3475,6 +3475,7 @@ static MigThrError postcopy_pause(MigrationState *s)
>   */
>  if (s->postcopy_qemufile_src) {
>  
> migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
> +qemu_file_shutdown(s->postcopy_qemufile_src);
>  qemu_fclose(s->postcopy_qemufile_src);
>  s->postcopy_qemufile_src = NULL;
>  }
> @@ -3534,8 +3535,13 @@ static MigThrError 
> migration_detect_error(MigrationState *s)
>  return MIG_THR_ERR_FATAL;
>  }
> 
> -/* Try to detect any file errors */
> -ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
> +/*
> + * Try to detect any file errors.  Note that postcopy_qemufile_src will
> + * be NULL when postcopy preempt is not enabled.
> + */
> +ret = qemu_file_get_error_obj_any(s->to_dst_file,
> +  s->postcopy_qemufile_src,
> +  &local_error);
>  if (!ret) {
>  /* Everything is fine */
>  assert(!local_error);
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 1479cddad9..397652f0ba 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -139,6 +139,33 @@ int qemu_file_get_error_obj(QEMUFile *f, Error **errp)
>  return f->last_error;
>  }
> 
> +/*
> + * Get last error for either stream f1 or f2 with optional Error*.
> + * The error returned (non-zero) can be either from f1 or f2.
> + *
> + * If any of the qemufile* is NULL, then skip the check on that file.
> + *
> + * When there is no error on both qemufile, zero is returned.
> + */
> +int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp)
> +{
> +int ret = 0;
> +
> +if (f1) {
> +

Re: [PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-22 Thread Peter Xu
On Tue, Feb 22, 2022 at 11:32:10AM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU 
> > thread
> > needs similar handling on fault tolerance.  When ram_load_postcopy() fails,
> > instead of stopping the thread it halts with a semaphore, preparing to be
> > kicked again when recovery is detected.
> > 
> > A mutex is introduced to make sure there's no concurrent operation upon the
> > socket.  To make it simple, the fast ram load thread will take the mutex 
> > during
> > its whole procedure, and only release it if it's paused.  The fast-path 
> > socket
> > will be properly released by the main loading thread safely when there's
> > network failures during postcopy with that mutex held.
> 
> I *think* this is mostly OK; but I worry I don't understand all the
> cases; e.g.
>   a) If the postcopy channel errors first
>   b) If the main channel errors first

Ah right, I don't think I handled all the cases.  Sorry.

We always check the main channel, but if the postcopy channel got faulted,
we may not fall into paused mode as expected.

I'll fix that up.

> 
> Can you add some docs to walk through those and explain the locking ?

Sure.

The sem is mentioned in the last sentence of paragraph 1, where it's purely
used for a way to yield the fast ram load thread so that when something
wrong happens it can sleep on that semaphore.  Then when we recover we'll
post to the semaphore to kick it up.  We used it like that in many places,
e.g. postcopy_pause_sem_dst to yield the main load thread.

The 2nd paragraph above was for explaining why we need the mutex; it's
basically the same as rp_mutex protecting to_src_file, so that we won't
accidentally close() the qemufile during some other thread using it.  So
the fast ram load thread needs to take that new mutex for mostly the whole
lifecycle of itself (because it's loading from that qemufile), meanwhile
only drop the mutex when it prepares to sleep.  Then the main load thread
can recycle the postcopy channel using qemu_fclose() safely.

[...]

> > @@ -3466,6 +3468,17 @@ static MigThrError postcopy_pause(MigrationState *s)
> >  qemu_file_shutdown(file);
> >  qemu_fclose(file);
> >  
> > +/*
> > + * Do the same to postcopy fast path socket too if there is.  No
> > + * locking needed because no racer as long as we do this before 
> > setting
> > + * status to paused.
> > + */
> > +if (s->postcopy_qemufile_src) {
> > +
> > migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
> 
> Shouldn't this do a qemu_file_shutdown on here first?

Yes I probably should.

With all above, I plan to squash below changes into this patch:

---8<---
diff --git a/migration/migration.c b/migration/migration.c
index c68a281406..69778cab23 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3475,6 +3475,7 @@ static MigThrError postcopy_pause(MigrationState *s)
  */
 if (s->postcopy_qemufile_src) {
 migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
+qemu_file_shutdown(s->postcopy_qemufile_src);
 qemu_fclose(s->postcopy_qemufile_src);
 s->postcopy_qemufile_src = NULL;
 }
@@ -3534,8 +3535,13 @@ static MigThrError migration_detect_error(MigrationState 
*s)
 return MIG_THR_ERR_FATAL;
 }

-/* Try to detect any file errors */
-ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
+/*
+ * Try to detect any file errors.  Note that postcopy_qemufile_src will
+ * be NULL when postcopy preempt is not enabled.
+ */
+ret = qemu_file_get_error_obj_any(s->to_dst_file,
+  s->postcopy_qemufile_src,
+  &local_error);
 if (!ret) {
 /* Everything is fine */
 assert(!local_error);
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 1479cddad9..397652f0ba 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -139,6 +139,33 @@ int qemu_file_get_error_obj(QEMUFile *f, Error **errp)
 return f->last_error;
 }

+/*
+ * Get last error for either stream f1 or f2 with optional Error*.
+ * The error returned (non-zero) can be either from f1 or f2.
+ *
+ * If any of the qemufile* is NULL, then skip the check on that file.
+ *
+ * When there is no error on both qemufile, zero is returned.
+ */
+int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp)
+{
+int ret = 0;
+
+if (f1) {
+ret = qemu_file_get_error_obj(f1, errp);
+/* If there's already error detected, return */
+if (ret) {
+return ret;
+}
+}
+
+if (f2) {
+ret = qemu_file_get_error_obj(f2, errp);
+}
+
+return ret;
+}
+
 /*
  * Set the last error for stream f with optional Error*
  */
diff --git a/migration/qemu-file.h b/migrati

Re: [PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-22 Thread Dr. David Alan Gilbert
* Peter Xu (pet...@redhat.com) wrote:
> To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread
> needs similar handling on fault tolerance.  When ram_load_postcopy() fails,
> instead of stopping the thread it halts with a semaphore, preparing to be
> kicked again when recovery is detected.
> 
> A mutex is introduced to make sure there's no concurrent operation upon the
> socket.  To make it simple, the fast ram load thread will take the mutex 
> during
> its whole procedure, and only release it if it's paused.  The fast-path socket
> will be properly released by the main loading thread safely when there's
> network failures during postcopy with that mutex held.

I *think* this is mostly OK; but I worry I don't understand all the
cases; e.g.
  a) If the postcopy channel errors first
  b) If the main channel errors first

Can you add some docs to walk through those and explain the locking ?

Dave

> Signed-off-by: Peter Xu 
> ---
>  migration/migration.c| 17 +++--
>  migration/migration.h|  3 +++
>  migration/postcopy-ram.c | 24 ++--
>  migration/savevm.c   | 17 +
>  migration/trace-events   |  2 ++
>  5 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index d20db04097..c68a281406 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -215,9 +215,11 @@ void migration_object_init(void)
>  current_incoming->postcopy_remote_fds =
>  g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
>  qemu_mutex_init(¤t_incoming->rp_mutex);
> +qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex);
>  qemu_event_init(¤t_incoming->main_thread_load_event, false);
>  qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0);
>  qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0);
> +qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0);
>  qemu_mutex_init(¤t_incoming->page_request_mutex);
>  current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
>  
> @@ -697,9 +699,9 @@ static bool postcopy_try_recover(void)
>  
>  /*
>   * Here, we only wake up the main loading thread (while the
> - * fault thread will still be waiting), so that we can receive
> + * rest threads will still be waiting), so that we can receive
>   * commands from source now, and answer it if needed. The
> - * fault thread will be woken up afterwards until we are sure
> + * rest threads will be woken up afterwards until we are sure
>   * that source is ready to reply to page requests.
>   */
>  qemu_sem_post(&mis->postcopy_pause_sem_dst);
> @@ -3466,6 +3468,17 @@ static MigThrError postcopy_pause(MigrationState *s)
>  qemu_file_shutdown(file);
>  qemu_fclose(file);
>  
> +/*
> + * Do the same to postcopy fast path socket too if there is.  No
> + * locking needed because no racer as long as we do this before 
> setting
> + * status to paused.
> + */
> +if (s->postcopy_qemufile_src) {
> +
> migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);

Shouldn't this do a qemu_file_shutdown on here first?

> +qemu_fclose(s->postcopy_qemufile_src);
> +s->postcopy_qemufile_src = NULL;
> +}
> +
>  migrate_set_state(&s->state, s->state,
>MIGRATION_STATUS_POSTCOPY_PAUSED);
>  
> diff --git a/migration/migration.h b/migration/migration.h
> index b8aacfe3af..945088064a 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -118,6 +118,8 @@ struct MigrationIncomingState {
>  /* Postcopy priority thread is used to receive postcopy requested pages 
> */
>  QemuThread postcopy_prio_thread;
>  bool postcopy_prio_thread_created;
> +/* Used to sync with the prio thread */
> +QemuMutex postcopy_prio_thread_mutex;
>  /*
>   * An array of temp host huge pages to be used, one for each postcopy
>   * channel.
> @@ -147,6 +149,7 @@ struct MigrationIncomingState {
>  /* notify PAUSED postcopy incoming migrations to try to continue */
>  QemuSemaphore postcopy_pause_sem_dst;
>  QemuSemaphore postcopy_pause_sem_fault;
> +QemuSemaphore postcopy_pause_sem_fast_load;
>  
>  /* List of listening socket addresses  */
>  SocketAddressList *socket_address_list;
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 30eddaacd1..b3d23804bc 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1575,6 +1575,15 @@ int postcopy_preempt_setup(MigrationState *s, Error 
> **errp)
>  return 0;
>  }
>  
> +static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
> +{
> +trace_postcopy_pause_fast_load();
> +qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
> +qemu_sem_wait(&mis->postcopy_pause_sem_fast_loa

[PATCH 19/20] migration: Postcopy recover with preempt enabled

2022-02-16 Thread Peter Xu
To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread
needs similar handling on fault tolerance.  When ram_load_postcopy() fails,
instead of stopping the thread it halts with a semaphore, preparing to be
kicked again when recovery is detected.

A mutex is introduced to make sure there's no concurrent operation upon the
socket.  To make it simple, the fast ram load thread will take the mutex during
its whole procedure, and only release it if it's paused.  The fast-path socket
will be properly released by the main loading thread safely when there's
network failures during postcopy with that mutex held.

Signed-off-by: Peter Xu 
---
 migration/migration.c| 17 +++--
 migration/migration.h|  3 +++
 migration/postcopy-ram.c | 24 ++--
 migration/savevm.c   | 17 +
 migration/trace-events   |  2 ++
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index d20db04097..c68a281406 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -215,9 +215,11 @@ void migration_object_init(void)
 current_incoming->postcopy_remote_fds =
 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
 qemu_mutex_init(¤t_incoming->rp_mutex);
+qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex);
 qemu_event_init(¤t_incoming->main_thread_load_event, false);
 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0);
 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0);
+qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0);
 qemu_mutex_init(¤t_incoming->page_request_mutex);
 current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 
@@ -697,9 +699,9 @@ static bool postcopy_try_recover(void)
 
 /*
  * Here, we only wake up the main loading thread (while the
- * fault thread will still be waiting), so that we can receive
+ * rest threads will still be waiting), so that we can receive
  * commands from source now, and answer it if needed. The
- * fault thread will be woken up afterwards until we are sure
+ * rest threads will be woken up afterwards until we are sure
  * that source is ready to reply to page requests.
  */
 qemu_sem_post(&mis->postcopy_pause_sem_dst);
@@ -3466,6 +3468,17 @@ static MigThrError postcopy_pause(MigrationState *s)
 qemu_file_shutdown(file);
 qemu_fclose(file);
 
+/*
+ * Do the same to postcopy fast path socket too if there is.  No
+ * locking needed because no racer as long as we do this before setting
+ * status to paused.
+ */
+if (s->postcopy_qemufile_src) {
+migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
+qemu_fclose(s->postcopy_qemufile_src);
+s->postcopy_qemufile_src = NULL;
+}
+
 migrate_set_state(&s->state, s->state,
   MIGRATION_STATUS_POSTCOPY_PAUSED);
 
diff --git a/migration/migration.h b/migration/migration.h
index b8aacfe3af..945088064a 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -118,6 +118,8 @@ struct MigrationIncomingState {
 /* Postcopy priority thread is used to receive postcopy requested pages */
 QemuThread postcopy_prio_thread;
 bool postcopy_prio_thread_created;
+/* Used to sync with the prio thread */
+QemuMutex postcopy_prio_thread_mutex;
 /*
  * An array of temp host huge pages to be used, one for each postcopy
  * channel.
@@ -147,6 +149,7 @@ struct MigrationIncomingState {
 /* notify PAUSED postcopy incoming migrations to try to continue */
 QemuSemaphore postcopy_pause_sem_dst;
 QemuSemaphore postcopy_pause_sem_fault;
+QemuSemaphore postcopy_pause_sem_fast_load;
 
 /* List of listening socket addresses  */
 SocketAddressList *socket_address_list;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 30eddaacd1..b3d23804bc 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1575,6 +1575,15 @@ int postcopy_preempt_setup(MigrationState *s, Error 
**errp)
 return 0;
 }
 
+static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
+{
+trace_postcopy_pause_fast_load();
+qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+qemu_sem_wait(&mis->postcopy_pause_sem_fast_load);
+qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+trace_postcopy_pause_fast_load_continued();
+}
+
 void *postcopy_preempt_thread(void *opaque)
 {
 MigrationIncomingState *mis = opaque;
@@ -1587,11 +1596,22 @@ void *postcopy_preempt_thread(void *opaque)
 qemu_sem_post(&mis->thread_sync_sem);
 
 /* Sending RAM_SAVE_FLAG_EOS to terminate this thread */
-ret = ram_load_postcopy(mis->postcopy_qemufile_dst, RAM_CHANNEL_POSTCOPY);
+qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+while (1) {
+re