Re: [PATCH v8 16/20] jobs: protect job.aio_context with BQL and job_mutex

2022-07-05 Thread Stefan Hajnoczi
On Wed, Jun 29, 2022 at 10:15:34AM -0400, Emanuele Giuseppe Esposito wrote:
> In order to make it thread safe, implement a "fake rwlock",
> where we allow reads under BQL *or* job_mutex held, but
> writes only under BQL *and* job_mutex.
> 
> The only write we have is in child_job_set_aio_ctx, which always
> happens under drain (so the job is paused).
> For this reason, introduce job_set_aio_context and make sure that
> the context is set under BQL, job_mutex and drain.
> Also make sure all other places where the aiocontext is read
> are protected.
> 
> Note: at this stage, job_{lock/unlock} and job lock guard macros
> are *nop*.
> 
> Suggested-by: Paolo Bonzini 
> Signed-off-by: Emanuele Giuseppe Esposito 
> ---
>  block/replication.c |  6 --
>  blockjob.c  |  3 ++-
>  include/qemu/job.h  | 19 ++-
>  job.c   | 12 
>  4 files changed, 36 insertions(+), 4 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


[PATCH v8 16/20] jobs: protect job.aio_context with BQL and job_mutex

2022-06-29 Thread Emanuele Giuseppe Esposito
In order to make it thread safe, implement a "fake rwlock",
where we allow reads under BQL *or* job_mutex held, but
writes only under BQL *and* job_mutex.

The only write we have is in child_job_set_aio_ctx, which always
happens under drain (so the job is paused).
For this reason, introduce job_set_aio_context and make sure that
the context is set under BQL, job_mutex and drain.
Also make sure all other places where the aiocontext is read
are protected.

Note: at this stage, job_{lock/unlock} and job lock guard macros
are *nop*.

Suggested-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/replication.c |  6 --
 blockjob.c  |  3 ++-
 include/qemu/job.h  | 19 ++-
 job.c   | 12 
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 55c8f894aa..2189863df1 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -148,8 +148,10 @@ static void replication_close(BlockDriverState *bs)
 }
 if (s->stage == BLOCK_REPLICATION_FAILOVER) {
 commit_job = &s->commit_job->job;
-assert(commit_job->aio_context == qemu_get_current_aio_context());
-job_cancel_sync(commit_job, false);
+WITH_JOB_LOCK_GUARD() {
+assert(commit_job->aio_context == qemu_get_current_aio_context());
+job_cancel_sync_locked(commit_job, false);
+}
 }
 
 if (s->mode == REPLICATION_MODE_SECONDARY) {
diff --git a/blockjob.c b/blockjob.c
index 1075def475..2293a00b4a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -166,12 +166,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, 
AioContext *ctx,
 bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
 }
 
-job->job.aio_context = ctx;
+job_set_aio_context(&job->job, ctx);
 }
 
 static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
 {
 BlockJob *job = c->opaque;
+assert(qemu_in_main_thread());
 
 return job->job.aio_context;
 }
diff --git a/include/qemu/job.h b/include/qemu/job.h
index e887f88cb2..8f13c3de61 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -77,7 +77,12 @@ typedef struct Job {
 
 /** Protected by AioContext lock */
 
-/** AioContext to run the job coroutine in */
+/**
+ * AioContext to run the job coroutine in.
+ * This field can be read when holding either the BQL (so we are in
+ * the main loop) or the job_mutex.
+ * It can be only written when we hold *both* BQL and job_mutex.
+ */
 AioContext *aio_context;
 
 /** Reference count of the block job */
@@ -728,4 +733,16 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error 
**errp),
 int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
Error **errp);
 
+/**
+ * Sets the @job->aio_context.
+ * Called with job_mutex *not* held.
+ *
+ * This function must run in the main thread to protect against
+ * concurrent read in job_finish_sync_locked(),
+ * takes the job_mutex lock to protect against the read in
+ * job_do_yield_locked(), and must be called when the coroutine
+ * is quiescent.
+ */
+void job_set_aio_context(Job *job, AioContext *ctx);
+
 #endif
diff --git a/job.c b/job.c
index 8db80b8086..ad2badd107 100644
--- a/job.c
+++ b/job.c
@@ -394,6 +394,17 @@ Job *job_get(const char *id)
 return job_get_locked(id);
 }
 
+void job_set_aio_context(Job *job, AioContext *ctx)
+{
+/* protect against read in job_finish_sync_locked and job_start */
+assert(qemu_in_main_thread());
+/* protect against read in job_do_yield_locked */
+JOB_LOCK_GUARD();
+/* ensure the coroutine is quiescent while the AioContext is changed */
+assert(job->pause_count > 0);
+job->aio_context = ctx;
+}
+
 /* Called with job_mutex *not* held. */
 static void job_sleep_timer_cb(void *opaque)
 {
@@ -1379,6 +1390,7 @@ int job_finish_sync_locked(Job *job,
 {
 Error *local_err = NULL;
 int ret;
+assert(qemu_in_main_thread());
 
 job_ref_locked(job);
 
-- 
2.31.1