After failover, the Secondary side of replication shouldn't change state. Add the necessary checks to ignore requests after failover.
Signed-off-by: Lukas Straub <lukasstra...@web.de> --- block/replication.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/block/replication.c b/block/replication.c index 3d4dedddfc..466d463963 100644 --- a/block/replication.c +++ b/block/replication.c @@ -454,6 +454,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, aio_context_acquire(aio_context); s = bs->opaque; + if (s->stage == BLOCK_REPLICATION_DONE || s->stage == BLOCK_REPLICATION_FAILOVER) { + /* This case happens when a secondary is promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. */ + aio_context_release(aio_context); + return; + } + if (s->stage != BLOCK_REPLICATION_NONE) { error_setg(errp, "Block replication is running or done"); aio_context_release(aio_context); @@ -529,8 +537,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, "Block device is in use by internal backup job"); top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL); - if (!top_bs || !bdrv_is_root_node(top_bs) || - !check_top_bs(top_bs, bs)) { + if (!top_bs || !check_top_bs(top_bs, bs)) { error_setg(errp, "No top_bs or it is invalid"); reopen_backing_file(bs, false, NULL); aio_context_release(aio_context); @@ -577,6 +584,14 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) aio_context_acquire(aio_context); s = bs->opaque; + if (s->stage == BLOCK_REPLICATION_DONE || s->stage == BLOCK_REPLICATION_FAILOVER) { + /* This case happens when a secondary was promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. */ + aio_context_release(aio_context); + return; + } + if (s->mode == REPLICATION_MODE_SECONDARY) { secondary_do_checkpoint(s, errp); } @@ -592,8 +607,8 @@ static void replication_get_error(ReplicationState *rs, Error **errp) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); s = bs->opaque; - - if (s->stage != BLOCK_REPLICATION_RUNNING) { + + if (s->stage == BLOCK_REPLICATION_NONE) { error_setg(errp, "Block replication is not running"); aio_context_release(aio_context); return; @@ -635,6 +650,14 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) aio_context_acquire(aio_context); s = bs->opaque; + if (s->stage == BLOCK_REPLICATION_DONE || s->stage == BLOCK_REPLICATION_FAILOVER) { + /* This case happens when a secondary was promoted to primary. + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. */ + aio_context_release(aio_context); + return; + } + if (s->stage != BLOCK_REPLICATION_RUNNING) { error_setg(errp, "Block replication is not running"); aio_context_release(aio_context); -- 2.20.1