Re: [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
19.02.2020 18:34, Vladimir Sementsov-Ogievskiy wrote: 18.02.2020 21:54, Andrey Shinkevich wrote: On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Bitmaps data is not critical, and we should not fail the migration (or use postcopy recovering) because of dirty-bitmaps migration failure. Instead we should just lose unfinished bitmaps. Still we have to report io stream violation errors, as they affect the whole migration stream. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 148 + 1 file changed, 113 insertions(+), 35 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 1329db8d7d..aea5326804 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -145,6 +145,15 @@ typedef struct DBMLoadState { bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ + /* + * cancelled + * Incoming migration is cancelled for some reason. That means that we + * still should read our chunks from migration stream, to not affect other + * migration objects (like RAM), but just ignore them and do not touch any + * bitmaps or nodes. + */ + bool cancelled; + GSList *bitmaps; QemuMutex lock; /* protect bitmaps */ } DBMLoadState; @@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void) qemu_mutex_unlock(>lock); } +static void cancel_incoming_locked(DBMLoadState *s) +{ + GSList *item; + + if (s->cancelled) { + return; + } + + s->cancelled = true; + s->bs = NULL; + s->bitmap = NULL; + + /* Drop all unfinished bitmaps */ + for (item = s->bitmaps; item; item = g_slist_next(item)) { + LoadBitmapState *b = item->data; + + /* + * Bitmap must be unfinished, as finished bitmaps should already be + * removed from the list. + */ + assert(!s->before_vm_start_handled || !b->migrated); + if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { + bdrv_reclaim_dirty_bitmap(b->bitmap, _abort); + } + bdrv_release_dirty_bitmap(b->bitmap); + } + + g_slist_free_full(s->bitmaps, g_free); + s->bitmaps = NULL; +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; trace_dirty_bitmap_load_complete(); - bdrv_dirty_bitmap_deserialize_finish(s->bitmap); - qemu_mutex_lock(>lock); Why is it safe to remove the critical section? It's not removed, it becomes wider in this patch. + if (s->cancelled) { + return; + } + + bdrv_dirty_bitmap_deserialize_finish(s->bitmap); if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { bdrv_reclaim_dirty_bitmap(s->bitmap, _abort); @@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) break; } } - - qemu_mutex_unlock(>lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { trace_dirty_bitmap_load_bits_zeroes(); - bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, - false); + if (!s->cancelled) { + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, + nr_bytes, false); + } } else { size_t ret; uint8_t *buf; uint64_t buf_size = qemu_get_be64(f); - uint64_t needed_size = - bdrv_dirty_bitmap_serialization_size(s->bitmap, - first_byte, nr_bytes); + uint64_t needed_size; + + buf = g_malloc(buf_size); + ret = qemu_get_buffer(f, buf, buf_size); + if (ret != buf_size) { + error_report("Failed to read bitmap bits"); + g_free(buf); + return -EIO; + } + + if (s->cancelled) { + g_free(buf); + return 0; + } + + needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap, + first_byte, + nr_bytes); if (needed_size > buf_size || buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) @@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) error_report("Migrated bitmap granularity doesn't " "match the destination bitmap '%s' granularity", bdrv_dirty_bitmap_name(s->bitmap)); - return -EINVAL; - } - - buf = g_malloc(buf_size); - ret = qemu_get_buffer(f, buf, buf_size); - if (ret != buf_size) { - error_report("Failed to read bitmap
Re: [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
18.02.2020 21:54, Andrey Shinkevich wrote: On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Bitmaps data is not critical, and we should not fail the migration (or use postcopy recovering) because of dirty-bitmaps migration failure. Instead we should just lose unfinished bitmaps. Still we have to report io stream violation errors, as they affect the whole migration stream. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 148 + 1 file changed, 113 insertions(+), 35 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 1329db8d7d..aea5326804 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -145,6 +145,15 @@ typedef struct DBMLoadState { bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ + /* + * cancelled + * Incoming migration is cancelled for some reason. That means that we + * still should read our chunks from migration stream, to not affect other + * migration objects (like RAM), but just ignore them and do not touch any + * bitmaps or nodes. + */ + bool cancelled; + GSList *bitmaps; QemuMutex lock; /* protect bitmaps */ } DBMLoadState; @@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void) qemu_mutex_unlock(>lock); } +static void cancel_incoming_locked(DBMLoadState *s) +{ + GSList *item; + + if (s->cancelled) { + return; + } + + s->cancelled = true; + s->bs = NULL; + s->bitmap = NULL; + + /* Drop all unfinished bitmaps */ + for (item = s->bitmaps; item; item = g_slist_next(item)) { + LoadBitmapState *b = item->data; + + /* + * Bitmap must be unfinished, as finished bitmaps should already be + * removed from the list. + */ + assert(!s->before_vm_start_handled || !b->migrated); + if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { + bdrv_reclaim_dirty_bitmap(b->bitmap, _abort); + } + bdrv_release_dirty_bitmap(b->bitmap); + } + + g_slist_free_full(s->bitmaps, g_free); + s->bitmaps = NULL; +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; trace_dirty_bitmap_load_complete(); - bdrv_dirty_bitmap_deserialize_finish(s->bitmap); - qemu_mutex_lock(>lock); Why is it safe to remove the critical section? It's not removed, it becomes wider in this patch. + if (s->cancelled) { + return; + } + + bdrv_dirty_bitmap_deserialize_finish(s->bitmap); if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { bdrv_reclaim_dirty_bitmap(s->bitmap, _abort); @@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) break; } } - - qemu_mutex_unlock(>lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { trace_dirty_bitmap_load_bits_zeroes(); - bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, - false); + if (!s->cancelled) { + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, + nr_bytes, false); + } } else { size_t ret; uint8_t *buf; uint64_t buf_size = qemu_get_be64(f); - uint64_t needed_size = - bdrv_dirty_bitmap_serialization_size(s->bitmap, - first_byte, nr_bytes); + uint64_t needed_size; + + buf = g_malloc(buf_size); + ret = qemu_get_buffer(f, buf, buf_size); + if (ret != buf_size) { + error_report("Failed to read bitmap bits"); + g_free(buf); + return -EIO; + } + + if (s->cancelled) { + g_free(buf); + return 0; + } + + needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap, + first_byte, + nr_bytes); if (needed_size > buf_size || buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) @@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) error_report("Migrated bitmap granularity doesn't " "match the destination bitmap '%s' granularity", bdrv_dirty_bitmap_name(s->bitmap)); - return -EINVAL; - } - - buf = g_malloc(buf_size); - ret = qemu_get_buffer(f, buf, buf_size); - if (ret != buf_size) { - error_report("Failed to read bitmap bits"); - g_free(buf); - return -EIO; +
Re: [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Bitmaps data is not critical, and we should not fail the migration (or use postcopy recovering) because of dirty-bitmaps migration failure. Instead we should just lose unfinished bitmaps. Still we have to report io stream violation errors, as they affect the whole migration stream. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 148 + 1 file changed, 113 insertions(+), 35 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 1329db8d7d..aea5326804 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -145,6 +145,15 @@ typedef struct DBMLoadState { bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ +/* + * cancelled + * Incoming migration is cancelled for some reason. That means that we + * still should read our chunks from migration stream, to not affect other + * migration objects (like RAM), but just ignore them and do not touch any + * bitmaps or nodes. + */ +bool cancelled; + GSList *bitmaps; QemuMutex lock; /* protect bitmaps */ } DBMLoadState; @@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void) qemu_mutex_unlock(>lock); } +static void cancel_incoming_locked(DBMLoadState *s) +{ +GSList *item; + +if (s->cancelled) { +return; +} + +s->cancelled = true; +s->bs = NULL; +s->bitmap = NULL; + +/* Drop all unfinished bitmaps */ +for (item = s->bitmaps; item; item = g_slist_next(item)) { +LoadBitmapState *b = item->data; + +/* + * Bitmap must be unfinished, as finished bitmaps should already be + * removed from the list. + */ +assert(!s->before_vm_start_handled || !b->migrated); +if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { +bdrv_reclaim_dirty_bitmap(b->bitmap, _abort); +} +bdrv_release_dirty_bitmap(b->bitmap); +} + +g_slist_free_full(s->bitmaps, g_free); +s->bitmaps = NULL; +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; trace_dirty_bitmap_load_complete(); -bdrv_dirty_bitmap_deserialize_finish(s->bitmap); -qemu_mutex_lock(>lock); Why is it safe to remove the critical section? +if (s->cancelled) { +return; +} + +bdrv_dirty_bitmap_deserialize_finish(s->bitmap); if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { bdrv_reclaim_dirty_bitmap(s->bitmap, _abort); @@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) break; } } - -qemu_mutex_unlock(>lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { trace_dirty_bitmap_load_bits_zeroes(); -bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, - false); +if (!s->cancelled) { +bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, + nr_bytes, false); +} } else { size_t ret; uint8_t *buf; uint64_t buf_size = qemu_get_be64(f); -uint64_t needed_size = -bdrv_dirty_bitmap_serialization_size(s->bitmap, - first_byte, nr_bytes); +uint64_t needed_size; + +buf = g_malloc(buf_size); +ret = qemu_get_buffer(f, buf, buf_size); +if (ret != buf_size) { +error_report("Failed to read bitmap bits"); +g_free(buf); +return -EIO; +} + +if (s->cancelled) { +g_free(buf); +return 0; +} + +needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap, + first_byte, + nr_bytes); if (needed_size > buf_size || buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) @@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) error_report("Migrated bitmap granularity doesn't " "match the destination bitmap '%s' granularity", bdrv_dirty_bitmap_name(s->bitmap)); -return -EINVAL; -} - -buf = g_malloc(buf_size); -ret = qemu_get_buffer(f, buf, buf_size); -if (ret != buf_size) { -error_report("Failed to read bitmap bits"); -g_free(buf); -return -EIO; +cancel_incoming_locked(s); /* Continue
[PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
Bitmaps data is not critical, and we should not fail the migration (or use postcopy recovering) because of dirty-bitmaps migration failure. Instead we should just lose unfinished bitmaps. Still we have to report io stream violation errors, as they affect the whole migration stream. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 148 + 1 file changed, 113 insertions(+), 35 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 1329db8d7d..aea5326804 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -145,6 +145,15 @@ typedef struct DBMLoadState { bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ +/* + * cancelled + * Incoming migration is cancelled for some reason. That means that we + * still should read our chunks from migration stream, to not affect other + * migration objects (like RAM), but just ignore them and do not touch any + * bitmaps or nodes. + */ +bool cancelled; + GSList *bitmaps; QemuMutex lock; /* protect bitmaps */ } DBMLoadState; @@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void) qemu_mutex_unlock(>lock); } +static void cancel_incoming_locked(DBMLoadState *s) +{ +GSList *item; + +if (s->cancelled) { +return; +} + +s->cancelled = true; +s->bs = NULL; +s->bitmap = NULL; + +/* Drop all unfinished bitmaps */ +for (item = s->bitmaps; item; item = g_slist_next(item)) { +LoadBitmapState *b = item->data; + +/* + * Bitmap must be unfinished, as finished bitmaps should already be + * removed from the list. + */ +assert(!s->before_vm_start_handled || !b->migrated); +if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { +bdrv_reclaim_dirty_bitmap(b->bitmap, _abort); +} +bdrv_release_dirty_bitmap(b->bitmap); +} + +g_slist_free_full(s->bitmaps, g_free); +s->bitmaps = NULL; +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; trace_dirty_bitmap_load_complete(); -bdrv_dirty_bitmap_deserialize_finish(s->bitmap); -qemu_mutex_lock(>lock); +if (s->cancelled) { +return; +} + +bdrv_dirty_bitmap_deserialize_finish(s->bitmap); if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { bdrv_reclaim_dirty_bitmap(s->bitmap, _abort); @@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) break; } } - -qemu_mutex_unlock(>lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { trace_dirty_bitmap_load_bits_zeroes(); -bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, - false); +if (!s->cancelled) { +bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, + nr_bytes, false); +} } else { size_t ret; uint8_t *buf; uint64_t buf_size = qemu_get_be64(f); -uint64_t needed_size = -bdrv_dirty_bitmap_serialization_size(s->bitmap, - first_byte, nr_bytes); +uint64_t needed_size; + +buf = g_malloc(buf_size); +ret = qemu_get_buffer(f, buf, buf_size); +if (ret != buf_size) { +error_report("Failed to read bitmap bits"); +g_free(buf); +return -EIO; +} + +if (s->cancelled) { +g_free(buf); +return 0; +} + +needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap, + first_byte, + nr_bytes); if (needed_size > buf_size || buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) @@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) error_report("Migrated bitmap granularity doesn't " "match the destination bitmap '%s' granularity", bdrv_dirty_bitmap_name(s->bitmap)); -return -EINVAL; -} - -buf = g_malloc(buf_size); -ret = qemu_get_buffer(f, buf, buf_size); -if (ret != buf_size) { -error_report("Failed to read bitmap bits"); -g_free(buf); -return -EIO; +cancel_incoming_locked(s); +return 0; } bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes, @@ -632,14 +683,16 @@ static int dirty_bitmap_load_header(QEMUFile