Re: [Qemu-devel] [PATCH v6 07/11] dump: Add API to write dump_bitmap

2014-01-07 Thread Laszlo Ersek
comments below

On 01/05/14 08:27, Qiao Nuohan wrote:
 functions are used to write 1st and 2nd dump_bitmap of kdump-compressed 
 format,
 which is used to indicate whether the corresponded page is existed in vmcore.
 1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.
 
 Signed-off-by: Qiao Nuohan qiaonuo...@cn.fujitsu.com
 ---
  dump.c|  116 
 +
  include/sysemu/dump.h |7 +++
  2 files changed, 123 insertions(+), 0 deletions(-)
 
 diff --git a/dump.c b/dump.c
 index e3623b9..1fae152 100644
 --- a/dump.c
 +++ b/dump.c
 @@ -80,12 +80,14 @@ typedef struct DumpState {
  bool flag_flatten;
  uint32_t nr_cpus;
  size_t page_size;
 +uint32_t page_shift;
  uint64_t max_mapnr;
  size_t len_dump_bitmap;
  void *note_buf;
  size_t note_buf_offset;
  off_t offset_dump_bitmap;
  off_t offset_page;
 +size_t num_dumpable;
  uint32_t flag_compress;
  } DumpState;
  
 @@ -972,6 +974,120 @@ static int write_dump_header(DumpState *s)
  }
  }
  
 +/* set dump_bitmap sequencely. bitmap is not allowed to be rewritten. */
 +static int set_dump_bitmap(int64_t last_pfn, int64_t pfn, uint32_t value,
 +   void *buf, DumpState *s)
 +{

I'd recommend
- uint8_t *buf, and
- bool value, and
- maybe an assert() that neither of pfn and last_pfn is negative.

 +off_t old_offset, new_offset;
 +off_t offset_bitmap1, offset_bitmap2;
 +uint32_t byte, bit;
 +
 +/* should not set the previous place */
 +if (last_pfn  pfn) {
 +return -1;
 +}
 +
 +/*
 + * if the bit needed to be set is not cached in buf, flush the data in 
 buf
 + * to vmcore firstly.
 + * making new_offset be bigger than old_offset can also sync remained 
 data
 + * into vmcore.
 + */
 +old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
 +new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
 +
 +while (old_offset  new_offset) {
 +/* calculate the offset and write dump_bitmap */
 +offset_bitmap1 = s-offset_dump_bitmap + old_offset;
 +if (write_buffer(s-fd, s-flag_flatten, offset_bitmap1, buf,
 + BUFSIZE_BITMAP)  0) {
 +return -1;
 +}
 +
 +/* dump level 1 is chosen, so 1st and 2nd bitmap are same */
 +offset_bitmap2 = s-offset_dump_bitmap + s-len_dump_bitmap +
 + old_offset;
 +if (write_buffer(s-fd, s-flag_flatten, offset_bitmap2, buf,
 + BUFSIZE_BITMAP)  0) {
 +return -1;
 +}
 +
 +memset(buf, 0, BUFSIZE_BITMAP);
 +old_offset += BUFSIZE_BITMAP;
 +}

Seems sane to me.

 +
 +/* get the exact place of the bit in the buf, and set it */
 +byte = (pfn % PFN_BUFBITMAP)  3;

(dividing by CHAR_BIT would be more consistent with the connection
between PFN_BUFBITMAP and BUFSIZE_BITMAP)

 +bit = (pfn % PFN_BUFBITMAP)  7;
 +if (value) {
 +((char *)buf)[byte] |= 1bit;
 +} else {
 +((char *)buf)[byte] = ~(1bit);
 +}

Shudder. I would much prefer (with uint8_t *buf included):

if (value) {
buf[byte] |=   1u  bit;
} else {
buf[byte] = ~(1u  bit);
}

When you interpret the expressions in question against the C standard,
my proposal is simpler to understand, and relies on fewer
platform-specifics. The current code looks simple, but it's actually
quite complex.

In any case, I think it will work in practice.


 +
 +return 0;
 +}
 +
 +static int write_dump_bitmap(DumpState *s)
 +{
 +int ret = 0;
 +int64_t pfn_start, pfn_end, pfn;
 +int64_t last_pfn;
 +void *dump_bitmap_buf;
 +size_t num_dumpable;
 +MemoryMapping *memory_mapping;
 +
 +/* dump_bitmap_buf is used to store dump_bitmap temporarily */
 +dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
 +
 +num_dumpable = 0;
 +last_pfn = -1;

This would trip up the assert() that I proposed above. It also exploits
that (last_pfn == -1) will result in (old_offset = 0) in
set_dump_bitmap(), due to the division. I think it's fairly ugly, but
should work in practice.

 +
 +/*
 + * exam memory page by page, and set the bit in dump_bitmap corresponded
 + * to the existing page
 + */
 +QTAILQ_FOREACH(memory_mapping, s-list.head, next) {
 +pfn_start = paddr_to_pfn(memory_mapping-phys_addr, s-page_shift);
 +pfn_end = paddr_to_pfn(memory_mapping-phys_addr +
 +   memory_mapping-length, s-page_shift);

OK, the intent seems to be to make pfn_end exclusive (see the loop
below). That however depends on memory_mapping-length being an
integral multiple of the target page size.

I propose an assert() here for that reason. Looking at patch v6 10/11,
for a compressed dump both paging and filtering are rejected. This
implies that in dump_init(),
- qemu_get_guest_simple_memory_mapping() is called 

Re: [Qemu-devel] [PATCH v6 07/11] dump: Add API to write dump_bitmap

2014-01-07 Thread Laszlo Ersek
On 01/07/14 15:49, Laszlo Ersek wrote:
 
 On 01/05/14 08:27, Qiao Nuohan wrote:

 diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
 index 9e47b4c..b5eaf8d 100644
 --- a/include/sysemu/dump.h
 +++ b/include/sysemu/dump.h
 @@ -27,11 +27,18 @@
  #define DUMP_DH_COMPRESSED_LZO  (0x2)
  #define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
  
 +#define PAGE_SIZE   (4096)
  #define KDUMP_SIGNATURE KDUMP   
  #define SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1)
  #define PHYS_BASE   (0)
  #define DUMP_LEVEL  (1)
  #define DISKDUMP_HEADER_BLOCKS  (1)
 +#define BUFSIZE_BITMAP  (PAGE_SIZE)
 +#define PFN_BUFBITMAP   (CHAR_BIT * BUFSIZE_BITMAP)
 +#define ARCH_PFN_OFFSET (0)
 +
 +#define paddr_to_pfn(X, page_shift) \
 +(((unsigned long long)(X)  (page_shift)) - ARCH_PFN_OFFSET)
  
  typedef struct ArchDumpInfo {
  int d_machine;  /* Architecture */

 
 I think these magic constants are somewhat tied to x86, and therefore
 should be in an arch-specific file rather than a common file, but
 whoever wants to extend this to another architecture can do that.

Stressing the argument a bit more for PAGE_SIZE specifically:
- we already have TARGET_PAGE_SIZE, maybe that would be a better choice,
- PAGE_SIZE is defined *as* TARGET_PAGE_SIZE in kvm-all.c. There's no
actual conflict, but the mental conflict is bad enough.

Anyway my R-b stands.

Laszlo



[Qemu-devel] [PATCH v6 07/11] dump: Add API to write dump_bitmap

2014-01-05 Thread Qiao Nuohan
functions are used to write 1st and 2nd dump_bitmap of kdump-compressed format,
which is used to indicate whether the corresponded page is existed in vmcore.
1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.

Signed-off-by: Qiao Nuohan qiaonuo...@cn.fujitsu.com
---
 dump.c|  116 +
 include/sysemu/dump.h |7 +++
 2 files changed, 123 insertions(+), 0 deletions(-)

diff --git a/dump.c b/dump.c
index e3623b9..1fae152 100644
--- a/dump.c
+++ b/dump.c
@@ -80,12 +80,14 @@ typedef struct DumpState {
 bool flag_flatten;
 uint32_t nr_cpus;
 size_t page_size;
+uint32_t page_shift;
 uint64_t max_mapnr;
 size_t len_dump_bitmap;
 void *note_buf;
 size_t note_buf_offset;
 off_t offset_dump_bitmap;
 off_t offset_page;
+size_t num_dumpable;
 uint32_t flag_compress;
 } DumpState;
 
@@ -972,6 +974,120 @@ static int write_dump_header(DumpState *s)
 }
 }
 
+/* set dump_bitmap sequencely. bitmap is not allowed to be rewritten. */
+static int set_dump_bitmap(int64_t last_pfn, int64_t pfn, uint32_t value,
+   void *buf, DumpState *s)
+{
+off_t old_offset, new_offset;
+off_t offset_bitmap1, offset_bitmap2;
+uint32_t byte, bit;
+
+/* should not set the previous place */
+if (last_pfn  pfn) {
+return -1;
+}
+
+/*
+ * if the bit needed to be set is not cached in buf, flush the data in buf
+ * to vmcore firstly.
+ * making new_offset be bigger than old_offset can also sync remained data
+ * into vmcore.
+ */
+old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
+new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
+
+while (old_offset  new_offset) {
+/* calculate the offset and write dump_bitmap */
+offset_bitmap1 = s-offset_dump_bitmap + old_offset;
+if (write_buffer(s-fd, s-flag_flatten, offset_bitmap1, buf,
+ BUFSIZE_BITMAP)  0) {
+return -1;
+}
+
+/* dump level 1 is chosen, so 1st and 2nd bitmap are same */
+offset_bitmap2 = s-offset_dump_bitmap + s-len_dump_bitmap +
+ old_offset;
+if (write_buffer(s-fd, s-flag_flatten, offset_bitmap2, buf,
+ BUFSIZE_BITMAP)  0) {
+return -1;
+}
+
+memset(buf, 0, BUFSIZE_BITMAP);
+old_offset += BUFSIZE_BITMAP;
+}
+
+/* get the exact place of the bit in the buf, and set it */
+byte = (pfn % PFN_BUFBITMAP)  3;
+bit = (pfn % PFN_BUFBITMAP)  7;
+if (value) {
+((char *)buf)[byte] |= 1bit;
+} else {
+((char *)buf)[byte] = ~(1bit);
+}
+
+return 0;
+}
+
+static int write_dump_bitmap(DumpState *s)
+{
+int ret = 0;
+int64_t pfn_start, pfn_end, pfn;
+int64_t last_pfn;
+void *dump_bitmap_buf;
+size_t num_dumpable;
+MemoryMapping *memory_mapping;
+
+/* dump_bitmap_buf is used to store dump_bitmap temporarily */
+dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
+
+num_dumpable = 0;
+last_pfn = -1;
+
+/*
+ * exam memory page by page, and set the bit in dump_bitmap corresponded
+ * to the existing page
+ */
+QTAILQ_FOREACH(memory_mapping, s-list.head, next) {
+pfn_start = paddr_to_pfn(memory_mapping-phys_addr, s-page_shift);
+pfn_end = paddr_to_pfn(memory_mapping-phys_addr +
+   memory_mapping-length, s-page_shift);
+
+for (pfn = pfn_start; pfn  pfn_end; pfn++) {
+ret = set_dump_bitmap(last_pfn, pfn, 1, dump_bitmap_buf, s);
+if (ret  0) {
+dump_error(s, dump: failed to set dump_bitmap.\n);
+ret = -1;
+goto out;
+}
+
+last_pfn = pfn;
+num_dumpable++;
+}
+}
+
+/*
+ * last_pfn  -1 means bitmap is set, then remained data in buf should be
+ * synchronized into vmcore
+ */
+if (last_pfn  -1) {
+ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, 0,
+  dump_bitmap_buf, s);
+if (ret  0) {
+dump_error(s, dump: failed to sync dump_bitmap.\n);
+ret = -1;
+goto out;
+}
+}
+
+/* number of dumpable pages that will be dumped later */
+s-num_dumpable = num_dumpable;
+
+out:
+g_free(dump_bitmap_buf);
+
+return ret;
+}
+
 static ram_addr_t get_start_block(DumpState *s)
 {
 GuestPhysBlock *block;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 9e47b4c..b5eaf8d 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -27,11 +27,18 @@
 #define DUMP_DH_COMPRESSED_LZO  (0x2)
 #define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
 
+#define PAGE_SIZE   (4096)
 #define KDUMP_SIGNATURE KDUMP   
 #define SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1)
 #define