On 11/12/2015 04:33 PM, Wen Congyang wrote:
>Imagine that migration_dirty_pages is slightly too small and we enter
ram_save_iterate;
>ram_save_iterate now sends*all* it's pages, it decrements
migration_dirty_pages for
>every page sent. At the end of ram_save_iterate, migration_dirty_pages would
be negative.
>But migration_dirty_pages is *u*int64_t; so we exit ram_save_iterate,
>go around the main migration_thread loop again and call
qemu_savevm_state_pending, and
>it returns a very large number (because it's actually a negative number), so
we keep
>going around the loop, because it never gets smaller.
I don't know how to trigger the problem. I think store migration_dirty_pages in
BitmapRcu
can fix this problem.
hi, David
It seem that it's not easy to reproduce this problem in my environment.
and the following 2 patches are to fix this issue, can you help to review and
test.
thx
Li
>From 81f27571b3ec2be2afb76576b47f42fb96a06411 Mon Sep 17 00:00:00 2001
From: Wen Congyang <we...@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 13:16:25 +0800
Subject: [PATCH 2/2] migration: use rcu to protect migration_dirty_pages
Signed-off-by: Wen Congyang <we...@cn.fujitsu.com>
---
migration/ram.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index 7f32696..ef259f9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -221,7 +221,6 @@ static RAMBlock *last_seen_block;
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static QemuMutex migration_bitmap_mutex;
-static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;
@@ -246,6 +245,7 @@ static struct BitmapRcu {
* of the postcopy phase
*/
unsigned long *unsentmap;
+ uint64_t dirty_pages;
} *migration_bitmap_rcu;
struct CompressParam {
@@ -580,7 +580,7 @@ static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
ret = test_and_clear_bit(nr, bitmap);
if (ret) {
- migration_dirty_pages--;
+ atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages--;
}
return ret;
}
@@ -589,7 +589,7 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
{
unsigned long *bitmap;
bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
- migration_dirty_pages +=
+ atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
}
@@ -613,7 +613,7 @@ static void migration_bitmap_sync_init(void)
static void migration_bitmap_sync(void)
{
RAMBlock *block;
- uint64_t num_dirty_pages_init = migration_dirty_pages;
+ uint64_t num_dirty_pages_init, num_dirty_pages_new;
MigrationState *s = migrate_get_current();
int64_t end_time;
int64_t bytes_xfer_now;
@@ -633,15 +633,17 @@ static void migration_bitmap_sync(void)
qemu_mutex_lock(&migration_bitmap_mutex);
rcu_read_lock();
+ num_dirty_pages_init = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
migration_bitmap_sync_range(block->offset, block->used_length);
}
+ num_dirty_pages_new = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
rcu_read_unlock();
qemu_mutex_unlock(&migration_bitmap_mutex);
- trace_migration_bitmap_sync_end(migration_dirty_pages
+ trace_migration_bitmap_sync_end(num_dirty_pages_new
- num_dirty_pages_init);
- num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+ num_dirty_pages_period += num_dirty_pages_new - num_dirty_pages_init;
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
/* more than 1 second = 1000 millisecons */
@@ -1364,7 +1366,13 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
static ram_addr_t ram_save_remaining(void)
{
- return migration_dirty_pages;
+ ram_addr_t dirty_pages;
+
+ rcu_read_lock();
+ dirty_pages = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
+ rcu_read_unlock();
+
+ return dirty_pages;
}
uint64_t ram_bytes_remaining(void)
@@ -1454,7 +1462,9 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
*/
qemu_mutex_lock(&migration_bitmap_mutex);
bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
+ bitmap->dirty_pages = bitmap_weight(bitmap->bmap, old);
bitmap_set(bitmap->bmap, old, new - old);
+ bitmap->dirty_pages += new - old;
/* We don't have a way to safely extend the sentmap
* with RCU; so mark it as missing, entry to postcopy
@@ -1464,7 +1474,6 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
atomic_rcu_set(&migration_bitmap_rcu, bitmap);
qemu_mutex_unlock(&migration_bitmap_mutex);
- migration_dirty_pages += new - old;
call_rcu(old_bitmap, migration_bitmap_free, rcu);
}
}
@@ -1692,7 +1701,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
* Remark them as dirty, updating the count for any pages
* that weren't previously dirty.
*/
- migration_dirty_pages += !test_and_set_bit(page, bitmap);
+ atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
+ !test_and_set_bit(page, bitmap);
}
}
@@ -1924,7 +1934,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
* Count the total number of pages used by ram blocks not including any
* gaps due to alignment or unplugs.
*/
- migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+ migration_bitmap_rcu->dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
memory_global_dirty_log_start();
migration_bitmap_sync();
--
2.1.4
>From 7a2cffb8802e955ede85fb907e94a7172b4a0fa6 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhij...@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 15:51:48 +0800
Subject: [PATCH 1/2] bitmap: add bitmap_weight() api
count the number of bit set in bitmap
Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com>
---
include/qemu/bitmap.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 86dd9cd..6e48429 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -43,6 +43,7 @@
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
+ * bitmap_weight(src, nbits) Hamming Weight: number set bits
*/
/*
@@ -227,6 +228,23 @@ static inline int bitmap_intersects(const unsigned long *src1,
}
}
+static inline int bitmap_weight(const unsigned long *src, long nbits)
+{
+ int i, count = 0, nlong = nbits / BITS_PER_LONG;
+
+ if (small_nbits(nbits)) {
+ return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+ }
+ for (i = 0; i < nlong; i++) {
+ count += hweight_long(src[i]);
+ }
+ if (nbits % BITS_PER_LONG) {
+ count += hweight_long(src[i] & BITMAP_LAST_WORD_MASK(nbits));
+ }
+
+ return count;
+}
+
void bitmap_set(unsigned long *map, long i, long len);
void bitmap_set_atomic(unsigned long *map, long i, long len);
void bitmap_clear(unsigned long *map, long start, long nr);
--
2.1.4