Re: [PATCH v2 2/5] migration: Make from_dst_file accesses thread-safe

2021-07-22 Thread Dr. David Alan Gilbert
* Peter Xu (pet...@redhat.com) wrote:
> Accessing from_dst_file is potentially racy in current code base like below:
> 
>   if (s->from_dst_file)
> do_something(s->from_dst_file);
> 
> Because from_dst_file can be reset right after the check in another
> thread (rp_thread).  One example is migrate_fd_cancel().
> 
> Use the same qemu_file_lock to protect it too, just like to_dst_file.
> 
> When it's safe to access without lock, comment it.
> 
> There's one special reference in migration_thread() that can be replaced by
> the newly introduced rp_thread_created flag.
> 
> Reported-by: Dr. David Alan Gilbert 
> Signed-off-by: Peter Xu 

Yep, with Eric's comments

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/migration.c | 32 +---
>  migration/migration.h |  8 +---
>  migration/ram.c   |  1 +
>  3 files changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 21b94f75a3..fa70400f98 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1879,10 +1879,12 @@ static void migrate_fd_cancel(MigrationState *s)
>  QEMUFile *f = migrate_get_current()->to_dst_file;
>  trace_migrate_fd_cancel();
>  
> +qemu_mutex_lock(>qemu_file_lock);
>  if (s->rp_state.from_dst_file) {
>  /* shutdown the rp socket, so causing the rp thread to shutdown */
>  qemu_file_shutdown(s->rp_state.from_dst_file);
>  }
> +qemu_mutex_unlock(>qemu_file_lock);
>  
>  do {
>  old_state = s->state;
> @@ -2686,6 +2688,22 @@ static int migrate_handle_rp_resume_ack(MigrationState 
> *s, uint32_t value)
>  return 0;
>  }
>  
> +/* Release ms->rp_state.from_dst_file in a safe way */
> +static void migration_release_from_dst_file(MigrationState *ms)
> +{
> +QEMUFile *file = ms->rp_state.from_dst_file;
> +
> +qemu_mutex_lock(>qemu_file_lock);
> +/*
> + * Reset the from_dst_file pointer first before releasing it, as we can't
> + * block within lock section
> + */
> +ms->rp_state.from_dst_file = NULL;
> +qemu_mutex_unlock(>qemu_file_lock);
> +
> +qemu_fclose(file);
> +}
> +
>  /*
>   * Handles messages sent on the return path towards the source VM
>   *
> @@ -2827,11 +2845,13 @@ out:
>   * Maybe there is something we can do: it looks like a
>   * network down issue, and we pause for a recovery.
>   */
> -qemu_fclose(rp);
> -ms->rp_state.from_dst_file = NULL;
> +migration_release_from_dst_file(ms);
>  rp = NULL;
>  if (postcopy_pause_return_path_thread(ms)) {
> -/* Reload rp, reset the rest */
> +/*
> + * Reload rp, reset the rest.  Referencing it is save since
> + * it's reset only by us above, or when migration completes
> + */
>  rp = ms->rp_state.from_dst_file;
>  ms->rp_state.error = false;
>  goto retry;
> @@ -2843,8 +2863,7 @@ out:
>  }
>  
>  trace_source_return_path_thread_end();
> -ms->rp_state.from_dst_file = NULL;
> -qemu_fclose(rp);
> +migration_release_from_dst_file(ms);
>  rcu_unregister_thread();
>  return NULL;
>  }
> @@ -2852,7 +2871,6 @@ out:
>  static int open_return_path_on_source(MigrationState *ms,
>bool create_thread)
>  {
> -
>  ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>  if (!ms->rp_state.from_dst_file) {
>  return -1;
> @@ -3746,7 +3764,7 @@ static void *migration_thread(void *opaque)
>   * If we opened the return path, we need to make sure dst has it
>   * opened as well.
>   */
> -if (s->rp_state.from_dst_file) {
> +if (s->rp_state.rp_thread_created) {
>  /* Now tell the dest that it should open its end so it can reply */
>  qemu_savevm_send_open_return_path(s->to_dst_file);
>  
> diff --git a/migration/migration.h b/migration/migration.h
> index c302879fad..7a5aa8c2fd 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -154,12 +154,13 @@ struct MigrationState {
>  QemuThread thread;
>  QEMUBH *vm_start_bh;
>  QEMUBH *cleanup_bh;
> +/* Protected by qemu_file_lock */
>  QEMUFile *to_dst_file;
>  QIOChannelBuffer *bioc;
>  /*
> - * Protects to_dst_file pointer.  We need to make sure we won't
> - * yield or hang during the critical section, since this lock will
> - * be used in OOB command handler.
> + * Protects to_dst_file/from_dst_file pointers.  We need to make sure we
> + * won't yield or hang during the critical section, since this lock will 
> be
> + * used in OOB command handler.
>   */
>  QemuMutex qemu_file_lock;
>  
> @@ -192,6 +193,7 @@ struct MigrationState {
>  
>  /* State related to return path */
>  struct {
> +/* Protected by qemu_file_lock 

Re: [PATCH v2 2/5] migration: Make from_dst_file accesses thread-safe

2021-07-22 Thread Peter Xu
On Wed, Jul 21, 2021 at 04:15:27PM -0500, Eric Blake wrote:
> On Wed, Jul 21, 2021 at 03:34:06PM -0400, Peter Xu wrote:
> > Accessing from_dst_file is potentially racy in current code base like below:
> > 
> >   if (s->from_dst_file)
> > do_something(s->from_dst_file);
> > 
> > Because from_dst_file can be reset right after the check in another
> > thread (rp_thread).  One example is migrate_fd_cancel().
> > 
> > Use the same qemu_file_lock to protect it too, just like to_dst_file.
> > 
> > When it's safe to access without lock, comment it.
> > 
> > There's one special reference in migration_thread() that can be replaced by
> > the newly introduced rp_thread_created flag.
> > 
> > Reported-by: Dr. David Alan Gilbert 
> > Signed-off-by: Peter Xu 
> > ---
> >  migration/migration.c | 32 +---
> >  migration/migration.h |  8 +---
> >  migration/ram.c   |  1 +
> >  3 files changed, 31 insertions(+), 10 deletions(-)
> > 
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 21b94f75a3..fa70400f98 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -1879,10 +1879,12 @@ static void migrate_fd_cancel(MigrationState *s)
> >  QEMUFile *f = migrate_get_current()->to_dst_file;
> >  trace_migrate_fd_cancel();
> >  
> > +qemu_mutex_lock(>qemu_file_lock);
> >  if (s->rp_state.from_dst_file) {
> >  /* shutdown the rp socket, so causing the rp thread to shutdown */
> >  qemu_file_shutdown(s->rp_state.from_dst_file);
> >  }
> > +qemu_mutex_unlock(>qemu_file_lock);
> 
> Worth using WITH_QEMU_LOCK_GUARD?

Sure.

> 
> > @@ -2827,11 +2845,13 @@ out:
> >   * Maybe there is something we can do: it looks like a
> >   * network down issue, and we pause for a recovery.
> >   */
> > -qemu_fclose(rp);
> > -ms->rp_state.from_dst_file = NULL;
> > +migration_release_from_dst_file(ms);
> >  rp = NULL;
> >  if (postcopy_pause_return_path_thread(ms)) {
> > -/* Reload rp, reset the rest */
> > +/*
> > + * Reload rp, reset the rest.  Referencing it is save since
> 
> s/save/safe/

Will fix.

I'll wait for some more comments before I repost.

Thanks,

-- 
Peter Xu




Re: [PATCH v2 2/5] migration: Make from_dst_file accesses thread-safe

2021-07-21 Thread Eric Blake
On Wed, Jul 21, 2021 at 03:34:06PM -0400, Peter Xu wrote:
> Accessing from_dst_file is potentially racy in current code base like below:
> 
>   if (s->from_dst_file)
> do_something(s->from_dst_file);
> 
> Because from_dst_file can be reset right after the check in another
> thread (rp_thread).  One example is migrate_fd_cancel().
> 
> Use the same qemu_file_lock to protect it too, just like to_dst_file.
> 
> When it's safe to access without lock, comment it.
> 
> There's one special reference in migration_thread() that can be replaced by
> the newly introduced rp_thread_created flag.
> 
> Reported-by: Dr. David Alan Gilbert 
> Signed-off-by: Peter Xu 
> ---
>  migration/migration.c | 32 +---
>  migration/migration.h |  8 +---
>  migration/ram.c   |  1 +
>  3 files changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 21b94f75a3..fa70400f98 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1879,10 +1879,12 @@ static void migrate_fd_cancel(MigrationState *s)
>  QEMUFile *f = migrate_get_current()->to_dst_file;
>  trace_migrate_fd_cancel();
>  
> +qemu_mutex_lock(>qemu_file_lock);
>  if (s->rp_state.from_dst_file) {
>  /* shutdown the rp socket, so causing the rp thread to shutdown */
>  qemu_file_shutdown(s->rp_state.from_dst_file);
>  }
> +qemu_mutex_unlock(>qemu_file_lock);

Worth using WITH_QEMU_LOCK_GUARD?

> @@ -2827,11 +2845,13 @@ out:
>   * Maybe there is something we can do: it looks like a
>   * network down issue, and we pause for a recovery.
>   */
> -qemu_fclose(rp);
> -ms->rp_state.from_dst_file = NULL;
> +migration_release_from_dst_file(ms);
>  rp = NULL;
>  if (postcopy_pause_return_path_thread(ms)) {
> -/* Reload rp, reset the rest */
> +/*
> + * Reload rp, reset the rest.  Referencing it is save since

s/save/safe/

> + * it's reset only by us above, or when migration completes
> + */
>  rp = ms->rp_state.from_dst_file;
>  ms->rp_state.error = false;
>  goto retry;

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




[PATCH v2 2/5] migration: Make from_dst_file accesses thread-safe

2021-07-21 Thread Peter Xu
Accessing from_dst_file is potentially racy in current code base like below:

  if (s->from_dst_file)
do_something(s->from_dst_file);

Because from_dst_file can be reset right after the check in another
thread (rp_thread).  One example is migrate_fd_cancel().

Use the same qemu_file_lock to protect it too, just like to_dst_file.

When it's safe to access without lock, comment it.

There's one special reference in migration_thread() that can be replaced by
the newly introduced rp_thread_created flag.

Reported-by: Dr. David Alan Gilbert 
Signed-off-by: Peter Xu 
---
 migration/migration.c | 32 +---
 migration/migration.h |  8 +---
 migration/ram.c   |  1 +
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 21b94f75a3..fa70400f98 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1879,10 +1879,12 @@ static void migrate_fd_cancel(MigrationState *s)
 QEMUFile *f = migrate_get_current()->to_dst_file;
 trace_migrate_fd_cancel();
 
+qemu_mutex_lock(>qemu_file_lock);
 if (s->rp_state.from_dst_file) {
 /* shutdown the rp socket, so causing the rp thread to shutdown */
 qemu_file_shutdown(s->rp_state.from_dst_file);
 }
+qemu_mutex_unlock(>qemu_file_lock);
 
 do {
 old_state = s->state;
@@ -2686,6 +2688,22 @@ static int migrate_handle_rp_resume_ack(MigrationState 
*s, uint32_t value)
 return 0;
 }
 
+/* Release ms->rp_state.from_dst_file in a safe way */
+static void migration_release_from_dst_file(MigrationState *ms)
+{
+QEMUFile *file = ms->rp_state.from_dst_file;
+
+qemu_mutex_lock(>qemu_file_lock);
+/*
+ * Reset the from_dst_file pointer first before releasing it, as we can't
+ * block within lock section
+ */
+ms->rp_state.from_dst_file = NULL;
+qemu_mutex_unlock(>qemu_file_lock);
+
+qemu_fclose(file);
+}
+
 /*
  * Handles messages sent on the return path towards the source VM
  *
@@ -2827,11 +2845,13 @@ out:
  * Maybe there is something we can do: it looks like a
  * network down issue, and we pause for a recovery.
  */
-qemu_fclose(rp);
-ms->rp_state.from_dst_file = NULL;
+migration_release_from_dst_file(ms);
 rp = NULL;
 if (postcopy_pause_return_path_thread(ms)) {
-/* Reload rp, reset the rest */
+/*
+ * Reload rp, reset the rest.  Referencing it is save since
+ * it's reset only by us above, or when migration completes
+ */
 rp = ms->rp_state.from_dst_file;
 ms->rp_state.error = false;
 goto retry;
@@ -2843,8 +2863,7 @@ out:
 }
 
 trace_source_return_path_thread_end();
-ms->rp_state.from_dst_file = NULL;
-qemu_fclose(rp);
+migration_release_from_dst_file(ms);
 rcu_unregister_thread();
 return NULL;
 }
@@ -2852,7 +2871,6 @@ out:
 static int open_return_path_on_source(MigrationState *ms,
   bool create_thread)
 {
-
 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
 if (!ms->rp_state.from_dst_file) {
 return -1;
@@ -3746,7 +3764,7 @@ static void *migration_thread(void *opaque)
  * If we opened the return path, we need to make sure dst has it
  * opened as well.
  */
-if (s->rp_state.from_dst_file) {
+if (s->rp_state.rp_thread_created) {
 /* Now tell the dest that it should open its end so it can reply */
 qemu_savevm_send_open_return_path(s->to_dst_file);
 
diff --git a/migration/migration.h b/migration/migration.h
index c302879fad..7a5aa8c2fd 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -154,12 +154,13 @@ struct MigrationState {
 QemuThread thread;
 QEMUBH *vm_start_bh;
 QEMUBH *cleanup_bh;
+/* Protected by qemu_file_lock */
 QEMUFile *to_dst_file;
 QIOChannelBuffer *bioc;
 /*
- * Protects to_dst_file pointer.  We need to make sure we won't
- * yield or hang during the critical section, since this lock will
- * be used in OOB command handler.
+ * Protects to_dst_file/from_dst_file pointers.  We need to make sure we
+ * won't yield or hang during the critical section, since this lock will be
+ * used in OOB command handler.
  */
 QemuMutex qemu_file_lock;
 
@@ -192,6 +193,7 @@ struct MigrationState {
 
 /* State related to return path */
 struct {
+/* Protected by qemu_file_lock */
 QEMUFile *from_dst_file;
 QemuThreadrp_thread;
 bool  error;
diff --git a/migration/ram.c b/migration/ram.c
index b5fc454b2f..f728f5072f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4012,6 +4012,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState 
*s)
 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock