On 11/12/2015 04:33 PM, Wen Congyang wrote:

>Imagine that migration_dirty_pages is slightly too small and we enter 
ram_save_iterate;
>ram_save_iterate now sends*all*  it's pages, it decrements 
migration_dirty_pages for
>every page sent.  At the end of ram_save_iterate, migration_dirty_pages would 
be negative.
>But migration_dirty_pages is *u*int64_t; so we exit ram_save_iterate,
>go around the main migration_thread loop again and call 
qemu_savevm_state_pending, and
>it returns a very large number (because it's actually a negative number), so 
we keep
>going around the loop, because it never gets smaller.
I don't know how to trigger the problem. I think store migration_dirty_pages in 
BitmapRcu
can fix this problem.


hi, David

It seem that it's not easy to reproduce this problem in my environment.
and the following 2 patches are to fix this issue, can you help to review and 
test.

thx

Li

>From 81f27571b3ec2be2afb76576b47f42fb96a06411 Mon Sep 17 00:00:00 2001
From: Wen Congyang <we...@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 13:16:25 +0800
Subject: [PATCH 2/2] migration: use rcu to protect migration_dirty_pages

Signed-off-by: Wen Congyang <we...@cn.fujitsu.com>
---
 migration/ram.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 7f32696..ef259f9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -221,7 +221,6 @@ static RAMBlock *last_seen_block;
 static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
 static QemuMutex migration_bitmap_mutex;
-static uint64_t migration_dirty_pages;
 static uint32_t last_version;
 static bool ram_bulk_stage;
 
@@ -246,6 +245,7 @@ static struct BitmapRcu {
      * of the postcopy phase
      */
     unsigned long *unsentmap;
+    uint64_t dirty_pages;
 } *migration_bitmap_rcu;
 
 struct CompressParam {
@@ -580,7 +580,7 @@ static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
     ret = test_and_clear_bit(nr, bitmap);
 
     if (ret) {
-        migration_dirty_pages--;
+        atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages--;
     }
     return ret;
 }
@@ -589,7 +589,7 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
     unsigned long *bitmap;
     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    migration_dirty_pages +=
+    atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
         cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
 }
 
@@ -613,7 +613,7 @@ static void migration_bitmap_sync_init(void)
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
-    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    uint64_t num_dirty_pages_init, num_dirty_pages_new;
     MigrationState *s = migrate_get_current();
     int64_t end_time;
     int64_t bytes_xfer_now;
@@ -633,15 +633,17 @@ static void migration_bitmap_sync(void)
 
     qemu_mutex_lock(&migration_bitmap_mutex);
     rcu_read_lock();
+    num_dirty_pages_init = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         migration_bitmap_sync_range(block->offset, block->used_length);
     }
+    num_dirty_pages_new = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
     rcu_read_unlock();
     qemu_mutex_unlock(&migration_bitmap_mutex);
 
-    trace_migration_bitmap_sync_end(migration_dirty_pages
+    trace_migration_bitmap_sync_end(num_dirty_pages_new
                                     - num_dirty_pages_init);
-    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    num_dirty_pages_period += num_dirty_pages_new - num_dirty_pages_init;
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     /* more than 1 second = 1000 millisecons */
@@ -1364,7 +1366,13 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
 
 static ram_addr_t ram_save_remaining(void)
 {
-    return migration_dirty_pages;
+    ram_addr_t dirty_pages;
+
+    rcu_read_lock();
+    dirty_pages = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
+    rcu_read_unlock();
+
+    return dirty_pages;
 }
 
 uint64_t ram_bytes_remaining(void)
@@ -1454,7 +1462,9 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
          */
         qemu_mutex_lock(&migration_bitmap_mutex);
         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
+        bitmap->dirty_pages = bitmap_weight(bitmap->bmap, old);
         bitmap_set(bitmap->bmap, old, new - old);
+        bitmap->dirty_pages += new - old;
 
         /* We don't have a way to safely extend the sentmap
          * with RCU; so mark it as missing, entry to postcopy
@@ -1464,7 +1474,6 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
 
         atomic_rcu_set(&migration_bitmap_rcu, bitmap);
         qemu_mutex_unlock(&migration_bitmap_mutex);
-        migration_dirty_pages += new - old;
         call_rcu(old_bitmap, migration_bitmap_free, rcu);
     }
 }
@@ -1692,7 +1701,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
                  * Remark them as dirty, updating the count for any pages
                  * that weren't previously dirty.
                  */
-                migration_dirty_pages += !test_and_set_bit(page, bitmap);
+                atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
+                    !test_and_set_bit(page, bitmap);
             }
         }
 
@@ -1924,7 +1934,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
      */
-    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+    migration_bitmap_rcu->dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
 
     memory_global_dirty_log_start();
     migration_bitmap_sync();
-- 
2.1.4

>From 7a2cffb8802e955ede85fb907e94a7172b4a0fa6 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhij...@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 15:51:48 +0800
Subject: [PATCH 1/2] bitmap: add bitmap_weight() api

count the number of bit set in bitmap

Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com>
---
 include/qemu/bitmap.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 86dd9cd..6e48429 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -43,6 +43,7 @@
  * bitmap_clear(dst, pos, nbits)		Clear specified bit area
  * bitmap_test_and_clear_atomic(dst, pos, nbits)    Test and clear area
  * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
+ * bitmap_weight(src, nbits)                    Hamming Weight: number set bits
  */
 
 /*
@@ -227,6 +228,23 @@ static inline int bitmap_intersects(const unsigned long *src1,
     }
 }
 
+static inline int bitmap_weight(const unsigned long *src, long nbits)
+{
+    int i, count = 0, nlong = nbits / BITS_PER_LONG;
+
+    if (small_nbits(nbits)) {
+        return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+    }
+    for (i = 0; i < nlong; i++) {
+        count += hweight_long(src[i]);
+    }
+    if (nbits % BITS_PER_LONG) {
+        count += hweight_long(src[i] & BITMAP_LAST_WORD_MASK(nbits));
+    }
+
+    return count;
+}
+
 void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
-- 
2.1.4

Reply via email to