On 03/23/2017 01:39 PM, Paolo Bonzini wrote: > This splits the part that touches job states from the part that invokes > callbacks. It will be a bit simpler to understand once job states will > be protected by a different mutex than the AioContext lock. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > blockjob.c | 165 > ++++++++++++++++++++++++++++++++----------------------------- > 1 file changed, 88 insertions(+), 77 deletions(-) > > diff --git a/blockjob.c b/blockjob.c > index 093962b..3fa2885 100644 > --- a/blockjob.c > +++ b/blockjob.c > @@ -76,6 +76,39 @@ BlockJob *block_job_get(const char *id) > return NULL; > } > > +BlockJobTxn *block_job_txn_new(void) > +{ > + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); > + QLIST_INIT(&txn->jobs); > + txn->refcnt = 1; > + return txn; > +} > + > +static void block_job_txn_ref(BlockJobTxn *txn) > +{ > + txn->refcnt++; > +} > + > +void block_job_txn_unref(BlockJobTxn *txn) > +{ > + if (txn && --txn->refcnt == 0) { > + g_free(txn); > + } > +} > + > +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) > +{ > + if (!txn) { > + return; > + } > + > + assert(!job->txn); > + job->txn = txn; > + > + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); > + block_job_txn_ref(txn); > +} > +
Pure movement; split it please? > static void block_job_pause(BlockJob *job) > { > job->pause_count++; > @@ -336,6 +369,8 @@ void block_job_start(BlockJob *job) > > static void block_job_completed_single(BlockJob *job) > { > + assert(job->completed); > + > if (!job->ret) { > if (job->driver->commit) { > job->driver->commit(job); > @@ -376,14 +411,49 @@ static void block_job_completed_single(BlockJob *job) > static void block_job_cancel_async(BlockJob *job) > { > job->cancelled = true; > - block_job_iostatus_reset(job); > + if (!job->completed) { > + block_job_iostatus_reset(job); > + } > +} > + > +static int block_job_finish_sync(BlockJob *job, > + void (*finish)(BlockJob *, Error **errp), > + Error **errp) > +{ > + Error *local_err = NULL; > + int ret; > + > + assert(blk_bs(job->blk)->job == job); > + > + block_job_ref(job); > + > + if (finish) { > + finish(job, &local_err); > + } > + if (local_err) { > + error_propagate(errp, local_err); > + block_job_unref(job); > + return -EBUSY; > + } > + /* block_job_drain calls block_job_enter, and it should be enough to > + * induce progress until the job completes or moves to the main thread. > + */ > + while (!job->deferred_to_main_loop && !job->completed) { > + block_job_drain(job); > + } > + while (!job->completed) { > + aio_poll(qemu_get_aio_context(), true); > + } > + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; > + block_job_unref(job); > + return ret; > } block_job_finish_sync is almost pure movement except for the if (finish) that gets added around the call to finish(job, &local_err). I guess this is for the new call where we invoke this with the callback set as NULL, to avoid calling block_job_cancel_async twice. > > static void block_job_completed_txn_abort(BlockJob *job) > { > AioContext *ctx; > BlockJobTxn *txn = job->txn; > - BlockJob *other_job, *next; > + BlockJob *other_job; > > if (txn->aborting) { > /* > @@ -392,29 +462,34 @@ static void block_job_completed_txn_abort(BlockJob *job) > return; > } > txn->aborting = true; > + block_job_txn_ref(txn); > + > /* We are the first failed job. Cancel other jobs. */ > QLIST_FOREACH(other_job, &txn->jobs, txn_list) { > ctx = blk_get_aio_context(other_job->blk); > aio_context_acquire(ctx); > } > + > + /* Other jobs are "effectively" cancelled by us, set the status for > + * them; this job, however, may or may not be cancelled, depending > + * on the caller, so leave it. */ > QLIST_FOREACH(other_job, &txn->jobs, txn_list) { > - if (other_job == job || other_job->completed) { > - /* Other jobs are "effectively" cancelled by us, set the status > for > - * them; this job, however, may or may not be cancelled, > depending > - * on the caller, so leave it. */ > - if (other_job != job) { > - block_job_cancel_async(other_job); > - } > - continue; > + if (other_job != job) { > + block_job_cancel_async(other_job); > } > - block_job_cancel_sync(other_job); > - assert(other_job->completed); > } > - QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { > + while (!QLIST_EMPTY(&txn->jobs)) { > + other_job = QLIST_FIRST(&txn->jobs); > ctx = blk_get_aio_context(other_job->blk); > + if (!other_job->completed) { > + assert(other_job->cancelled); > + block_job_finish_sync(other_job, NULL, NULL); > + } > block_job_completed_single(other_job); > aio_context_release(ctx); > } > + > + block_job_txn_unref(txn); > } > OK, so in a nutshell, here's what used to happen: -Don't do anything to our own job. -Other jobs that are completed get block_job_cancel_async. -Other jobs that are not completed get block_job_cancel_sync. -All jobs then get block_job_completed_single. And here's what happens now: - All other jobs get block_job_cancel_async (completed or not.) - If the job isn't completed, assert it is canceled, then call block_job_finish_sync. - All jobs get block_job_completed_single. Now, cancel_sync eventually does call block_job_cancel_async, so in practice we were already calling block_job_cancel_async on all other jobs anyway. The only difference now is that some jobs may be in a canceled state but still running, so you handle that with the block_job_finished_sync call for any job that is still running. So, it's basically the same between the two, it just takes a hot second to see. One thing that I wonder about a little is the push-down of whether or not to reset iostatus falling to block_job_cancel_async; it seemed to me as if txn_abort really had the best knowledge as to whether or not we wanted to reset iostatus, but as it stands it doesn't really make a difference. ACK for now, because it's still not perfectly obvious to me how this will wind up helping, though I do believe you :) > static void block_job_completed_txn_success(BlockJob *job) > @@ -502,37 +577,6 @@ void block_job_cancel(BlockJob *job) > } > } > > -static int block_job_finish_sync(BlockJob *job, > - void (*finish)(BlockJob *, Error **errp), > - Error **errp) > -{ > - Error *local_err = NULL; > - int ret; > - > - assert(blk_bs(job->blk)->job == job); > - > - block_job_ref(job); > - > - finish(job, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - block_job_unref(job); > - return -EBUSY; > - } > - /* block_job_drain calls block_job_enter, and it should be enough to > - * induce progress until the job completes or moves to the main thread. > - */ > - while (!job->deferred_to_main_loop && !job->completed) { > - block_job_drain(job); > - } > - while (!job->completed) { > - aio_poll(qemu_get_aio_context(), true); > - } > - ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; > - block_job_unref(job); > - return ret; > -} > - > /* A wrapper around block_job_cancel() taking an Error ** parameter so it > may be > * used with block_job_finish_sync() without the need for (rather nasty) > * function pointer casts there. */ > @@ -856,36 +900,3 @@ void block_job_defer_to_main_loop(BlockJob *job, > aio_bh_schedule_oneshot(qemu_get_aio_context(), > block_job_defer_to_main_loop_bh, data); > } And everything following is pure movement. > - > -BlockJobTxn *block_job_txn_new(void) > -{ > - BlockJobTxn *txn = g_new0(BlockJobTxn, 1); > - QLIST_INIT(&txn->jobs); > - txn->refcnt = 1; > - return txn; > -} > - > -static void block_job_txn_ref(BlockJobTxn *txn) > -{ > - txn->refcnt++; > -} > - > -void block_job_txn_unref(BlockJobTxn *txn) > -{ > - if (txn && --txn->refcnt == 0) { > - g_free(txn); > - } > -} > - > -void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) > -{ > - if (!txn) { > - return; > - } > - > - assert(!job->txn); > - job->txn = txn; > - > - QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); > - block_job_txn_ref(txn); > -} >