[Qemu-devel] [RFC PATCH] tests: rtl8139: test timers and interrupt

2015-01-07 Thread Frediano Ziglio
Test behaviour of timers and interrupts related to timeouts.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 tests/Makefile   |   2 +-
 tests/rtl8139-test.c | 164 +++
 2 files changed, 165 insertions(+), 1 deletion(-)

This patch was derived from a test I did while implementing timer in
rtl8139 code. Now that there is support for integrated testing I converted
it. The test was tested on a real NIC.

As if it's the first test I wrote I don't know if syntax and details are
fine. For instance should I remove nop test? Should I split my test?

diff --git a/tests/Makefile b/tests/Makefile
index e4ddb6a..8858407 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -320,7 +320,7 @@ tests/tmp105-test$(EXESUF): tests/tmp105-test.o 
$(libqos-omap-obj-y)
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
 tests/e1000-test$(EXESUF): tests/e1000-test.o
-tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o
+tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y)
 tests/pcnet-test$(EXESUF): tests/pcnet-test.o
 tests/eepro100-test$(EXESUF): tests/eepro100-test.o
 tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o
diff --git a/tests/rtl8139-test.c b/tests/rtl8139-test.c
index f6a1be3..87f1792 100644
--- a/tests/rtl8139-test.c
+++ b/tests/rtl8139-test.c
@@ -10,19 +10,183 @@
 #include glib.h
 #include string.h
 #include libqtest.h
+#include libqos/pci-pc.h
 #include qemu/osdep.h
+#include qemu-common.h
 
 /* Tests only initialization so far. TODO: Replace with functional tests */
 static void nop(void)
 {
 }
 
+#define CLK 3300
+#define NS_PER_SEC 10ULL
+
+static QPCIBus *pcibus;
+static QPCIDevice *dev = NULL;
+static void *dev_base = NULL;
+
+static void save_fn(QPCIDevice *dev, int devfn, void *data)
+{
+QPCIDevice **pdev = (QPCIDevice **) data;
+
+*pdev = dev;
+}
+
+static QPCIDevice *get_device(void)
+{
+QPCIDevice *dev;
+
+pcibus = qpci_init_pc();
+qpci_device_foreach(pcibus, 0x10ec, 0x8139, save_fn, dev);
+g_assert(dev != NULL);
+
+return dev;
+}
+
+#define PORT(name,len,val) \
+static unsigned __attribute__((unused)) in_##name(void) { \
+unsigned res = qpci_io_read##len(dev,dev_base+(val)); \
+g_test_message(*%s - %x\n, #name, res); \
+return res; \
+} \
+static void out_##name(unsigned v) { \
+g_test_message(%x - *%s\n, v, #name); \
+qpci_io_write##len(dev,dev_base+(val),v); \
+}
+
+PORT(Timer,l,0x48)
+PORT(IntrMask,w,0x3c)
+PORT(IntrStatus,w,0x3E)
+PORT(TimerInt,l,0x54)
+
+#define fatal(...) do { g_test_message(__VA_ARGS__); g_assert(0); } while(0)
+
+static void test_timer(void)
+{
+const unsigned from = 0.95 * CLK;
+const unsigned to = 1.6 * CLK;
+
+out_IntrMask(0);
+
+in_IntrStatus();
+in_Timer();
+in_Timer();
+
+// Test 1. test counter continue and continue
+out_TimerInt(0); // disable timer
+out_IntrStatus(0x4000);
+out_Timer(12345); // reset timer to 0
+unsigned curr = in_Timer();
+unsigned cnt;
+if (curr  0.1 * CLK)
+fatal(time too big %u\n, curr);
+for (cnt=0;;) {
+clock_step(1 * NS_PER_SEC);
+unsigned prev = curr;
+curr = in_Timer();
+
+// test skip is in a specific range
+unsigned diff = (curr-prev)  0xu;
+if (diff  from || diff  to)
+fatal(Invalid diff %u (%u-%u)\n, diff, from,to);
+if (curr  prev  ++cnt == 3)
+break;
+}
+
+// Test 2. Check we didn't get an interrupt with TimerInt == 0
+if (in_IntrStatus()  0x4000)
+fatal(got an interrupt\n);
+
+// Test 3. Setting TimerInt to 1 and Timer to 0 get interrupt
+out_TimerInt(1);
+out_Timer(0);
+clock_step(40);
+if ((in_IntrStatus()  0x4000) == 0)
+fatal(we should have an interrupt here!\n);
+
+// Test 3. Check acknowledge
+out_IntrStatus(0x4000);
+if (in_IntrStatus()  0x4000)
+fatal(got an interrupt\n);
+
+// Test. Status set after Timer reset
+out_Timer(0);
+out_TimerInt(0);
+out_IntrStatus(0x4000);
+curr = in_Timer();
+out_TimerInt(curr + 0.5 * CLK);
+clock_step(1 * NS_PER_SEC);
+out_Timer(0);
+if ((in_IntrStatus()  0x4000) == 0)
+fatal(we should have an interrupt here!\n);
+
+// Test. Status set after TimerInt reset
+out_Timer(0);
+out_TimerInt(0);
+out_IntrStatus(0x4000);
+curr = in_Timer();
+out_TimerInt(curr + 0.5 * CLK);
+clock_step(1 * NS_PER_SEC);
+out_TimerInt(0);
+if ((in_IntrStatus()  0x4000) == 0)
+fatal(we should have an interrupt here!\n);
+
+// Test 4. Increment TimerInt we should see an interrupt
+curr = in_Timer();
+unsigned next = curr + 5.0 * CLK;
+out_TimerInt(next);
+for (cnt=0;;) {
+clock_step(1 * NS_PER_SEC);
+unsigned prev = curr;
+curr = in_Timer();
+unsigned diff

Re: [Qemu-devel] [Xen-devel] Project idea: make QEMU more flexible

2014-01-06 Thread Frediano Ziglio
On Mon, 2014-01-06 at 12:54 +, Wei Liu wrote:
 Hi all
 
 This idea is to modify QEMU's Makefiles, plus implementing some stubs to
 make it possible to tailor QEMU to a smaller binary.
 
 The current setup for Xen on X86 is to build i386-softmmu, and uses this
 single binary for two purposes:
 1. serves as device emulator for HVM guest.
 2. serves as PV driver backend for PV guest.
 

Perhaps even KVM would benefit too.
Are you sure we don't emulate any instruction for other purposes?

 Either case CPU emulation is never used because Xen handles that
 already. So we are in fact having a load of unused code in QEMU build.
 
 What I have in mind is to build a QEMU binary which:
 1. does not include CPU emulation code at all.
 2. only includes components that's useful (what's useful is TBD).
 
 And the rationales behind this idea are:
 
 1. Reduce memory footprint. One usecase would be running Xen on embedded
platform (X86 or ARM). We would expect the system has very limited
resources. The smaller the binary, the better.
 
 2. It doesn't make sense to have i386 emulation on ARM platform.
Arguably nobody can prevent user from running i386 emulator on ARM
platform, but it doesn't make sense in Xen's setup where QEMU is
only used as PV device backend on ARM.
 
 3. Security concern. It's much easier to audit small code base.
 
 Please note that I'm not proposing to invalidate all the other usecases.
 I'm only speaking with my Xen developer's hat on, aiming to make QEMU
 more flexible.
 
 Down to implementation level I only need to (hopefully) add a few stubs
 and create some new CONFIG_* options and move a few things around. It
 might not be as intrusive as one thinks.
 
 In fact I've already hacked a prototype during Christmas. What's I've
 done so far:
 
 1. create target-null which only has some stubs to CPU emulation
framework.
 
 2. add a few lines to configure / Makefiles*, create
default-configs/null-softmmu
 
 Finally I got a qemu-system-null. And the effect is immediately visible
 -- the size of QEMU binary shrinked from 13MB to 7.6MB. I haven't really
 looked at what device emulation code can be removed so the size can even
 be made smaller.
 

Well, I would prefer something like a i386-nocpu. The system qemu
emulate is still specific to a given architecture. You can't emulate ARM
with the same executable. So we'd add default-configs/arm-nocpu.mak and
default-config/x86_64-nocpu.mak (i386 don't make much sense anymore).

 What do you think about this idea?
 
 Thanks
 Wei.
 

Frediano





Re: [Qemu-devel] [RFC PATCH] vga: Start supporting resolution not multiple of 16 correctly.

2013-07-28 Thread Frediano Ziglio
2013/7/23 Gerd Hoffmann kra...@redhat.com

   Hi,

  Tested-by: Fabio Fantoni fabio.fant...@m2r.biz
 
  I tested it for a long time with spice on xen (because qxl will be fully
  working only after adding SSE support on hvm domUs). It works, I think
  it is good to add this and the respective vgabios patch on upstream.

   case VBE_DISPI_INDEX_XRES:
  -if ((val = VBE_DISPI_MAX_XRES)  ((val  7) == 0)) {
  +if ((val = VBE_DISPI_MAX_XRES)  ((val  1) == 0)) {
   s-vbe_regs[s-vbe_index] = val;
   }
   break;

 It's not that simple.  With 32bit depths common today it will work fine,
 but for lower depths (especially those lower than 8bit) this will give
 you broken scanline alignment.

 cheers,
   Gerd



In the card I tested the scanline is keep aligned but for this reason is
not directly computed by maxx * bits but is something bigger.

Frediano


Re: [Qemu-devel] [RFC PATCH] vga: Start supporting resolution not multiple of 16 correctly.

2013-06-18 Thread Frediano Ziglio
Modern notebook support 1366x768 resolution. The resolution width is
not multiple of 16 causing some problems.

QEMU VGA emulation requires width resolution to be multiple of 8.

VNC implementation requires width resolution to be multiple of 16.

Signed-off-by: Frediano Ziglio frediano.zig...@citrix.com
---
 hw/display/vga.c |2 +-
 ui/vnc.c |   34 +++---
 2 files changed, 20 insertions(+), 16 deletions(-)

Updates:
- rebased
- fixed style problems
- fixed typos in comment

Attached patch for last vgabios in order to get some new resolutions.
Still had no time to test deeply.

diff --git a/hw/display/vga.c b/hw/display/vga.c
index 21a108d..0053b0f 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -648,7 +648,7 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr,
uint32_t val)
 }
 break;
 case VBE_DISPI_INDEX_XRES:
-if ((val = VBE_DISPI_MAX_XRES)  ((val  7) == 0)) {
+if ((val = VBE_DISPI_MAX_XRES)  ((val  1) == 0)) {
 s-vbe_regs[s-vbe_index] = val;
 }
 break;
diff --git a/ui/vnc.c b/ui/vnc.c
index dfc7459..2a2bb90 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -911,26 +911,30 @@ static int vnc_update_client(VncState *vs, int
has_dirty)
 for (y = 0; y  height; y++) {
 int x;
 int last_x = -1;
-for (x = 0; x  width / 16; x++) {
-if (test_and_clear_bit(x, vs-dirty[y])) {
+for (x = 0; x  width; x += 16) {
+if (test_and_clear_bit(x/16, vs-dirty[y])) {
 if (last_x == -1) {
 last_x = x;
 }
 } else {
 if (last_x != -1) {
-int h = find_and_clear_dirty_height(vs, y, last_x,
x,
-height);
+int h = find_and_clear_dirty_height(vs, y,
last_x/16,
+x/16, height);

-n += vnc_job_add_rect(job, last_x * 16, y,
-  (x - last_x) * 16, h);
+n += vnc_job_add_rect(job, last_x, y,
+  (x - last_x), h);
 }
 last_x = -1;
 }
 }
 if (last_x != -1) {
-int h = find_and_clear_dirty_height(vs, y, last_x, x,
height);
-n += vnc_job_add_rect(job, last_x * 16, y,
-  (x - last_x) * 16, h);
+int h = find_and_clear_dirty_height(vs, y, last_x/16, x/16,
+height);
+if (x  width) {
+x = width;
+}
+n += vnc_job_add_rect(job, last_x, y,
+  (x - last_x), h);
 }
 }

@@ -1861,7 +1865,7 @@ static void framebuffer_update_request(VncState *vs,
int incremental,
int w, int h)
 {
 int i;
-const size_t width = surface_width(vs-vd-ds) / 16;
+const size_t width = (surface_width(vs-vd-ds)+15) / 16;
 const size_t height = surface_height(vs-vd-ds);

 if (y_position  height) {
@@ -2686,10 +2690,6 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
  * Check and copy modified bits from guest to server surface.
  * Update server dirty map.
  */
-cmp_bytes = 64;
-if (cmp_bytes  vnc_server_fb_stride(vd)) {
-cmp_bytes = vnc_server_fb_stride(vd);
-}
 if (vd-guest.format != VNC_SERVER_FB_FORMAT) {
 int width = pixman_image_get_width(vd-server);
 tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
@@ -2710,8 +2710,12 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
 }
 server_ptr = server_row;

-for (x = 0; x + 15  width;
+cmp_bytes = 64;
+for (x = 0; x  width;
 x += 16, guest_ptr += cmp_bytes, server_ptr +=
cmp_bytes) {
+if (width - x  16) {
+cmp_bytes = 4 * (width - x);
+}
 if (!test_and_clear_bit((x / 16), vd-guest.dirty[y]))
 continue;
 if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
-- 
1.7.10.4



2013/5/10 Andreas Färber afaer...@suse.de

 Am 15.03.2013 19:14, schrieb Frediano Ziglio:
  Modern notebook support 136x768 resolution. The resolution width is

 1366?

  not multiple of 16 causing some problems.

 a multiple? (me not a native English speaker)

 
  Qemu VGA emulation require width resolution to be multiple of 8.

 QEMU

 
  VNC implementation require width resolution to be multiple of 16.

 requires or implementations

 
  This patch remove these limits. Was tested with a Windows machine

[Qemu-devel] [RFC PATCH] vga: Start supporting resolution not multiple of 16 correctly.

2013-03-15 Thread Frediano Ziglio
Modern notebook support 136x768 resolution. The resolution width is
not multiple of 16 causing some problems.

Qemu VGA emulation require width resolution to be multiple of 8.

VNC implementation require width resolution to be multiple of 16.

This patch remove these limits. Was tested with a Windows machine with
standard vga and 1366x768 as resolution. I had to update vgabios as
version in qemu (pc-bios/vgabios-stdvga.bin) is quite old. I also had
to add some patches on top of VGABIOS 0.7a to add some new
resolutions.

I have some doubt about this patch
- are other UI (sdl, cocoa, qxl) happy if resolution is not multiple of 16 ?
- scanline is computed exactly without any alignment (so 1366 8 bit is
1366 bytes) while getting vesa information from a laptop it seems to
use some kind of alignment (if became 0x580 which is 1408 bytes).
Perhaps should I change either VGABIOS and Qemu to make this
alignment?

Signed-off-by: Frediano Ziglio frediano.zig...@citrix.com

---
 hw/vga.c |2 +-
 ui/vnc.c |   27 +--
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/hw/vga.c b/hw/vga.c
index 1caf23d..d229f06 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -651,7 +651,7 @@ void vbe_ioport_write_data(void *opaque, uint32_t
addr, uint32_t val)
 }
 break;
 case VBE_DISPI_INDEX_XRES:
-if ((val = VBE_DISPI_MAX_XRES)  ((val  7) == 0)) {
+if ((val = VBE_DISPI_MAX_XRES)  ((val  1) == 0)) {
 s-vbe_regs[s-vbe_index] = val;
 }
 break;
diff --git a/ui/vnc.c b/ui/vnc.c
index ff4e2ae..328d14d 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -907,26 +907,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
 for (y = 0; y  height; y++) {
 int x;
 int last_x = -1;
-for (x = 0; x  width / 16; x++) {
-if (test_and_clear_bit(x, vs-dirty[y])) {
+for (x = 0; x  width; x += 16) {
+if (test_and_clear_bit(x/16, vs-dirty[y])) {
 if (last_x == -1) {
 last_x = x;
 }
 } else {
 if (last_x != -1) {
-int h = find_and_clear_dirty_height(vs, y, last_x, x,
+int h = find_and_clear_dirty_height(vs, y,
last_x/16, x/16,
 height);

-n += vnc_job_add_rect(job, last_x * 16, y,
-  (x - last_x) * 16, h);
+n += vnc_job_add_rect(job, last_x, y,
+  (x - last_x), h);
 }
 last_x = -1;
 }
 }
 if (last_x != -1) {
-int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
-n += vnc_job_add_rect(job, last_x * 16, y,
-  (x - last_x) * 16, h);
+int h = find_and_clear_dirty_height(vs, y, last_x/16,
x/16, height);
+if (x  width) x = width;
+n += vnc_job_add_rect(job, last_x, y,
+  (x - last_x), h);
 }
 }

@@ -1771,7 +1772,7 @@ static void framebuffer_update_request(VncState
*vs, int incremental,
int w, int h)
 {
 int i;
-const size_t width = ds_get_width(vs-ds) / 16;
+const size_t width = (ds_get_width(vs-ds)+15) / 16;

 if (y_position  ds_get_height(vs-ds))
 y_position = ds_get_height(vs-ds);
@@ -2595,10 +2596,6 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
  * Check and copy modified bits from guest to server surface.
  * Update server dirty map.
  */
-cmp_bytes = 64;
-if (cmp_bytes  vnc_server_fb_stride(vd)) {
-cmp_bytes = vnc_server_fb_stride(vd);
-}
 if (vd-guest.format != VNC_SERVER_FB_FORMAT) {
 int width = pixman_image_get_width(vd-server);
 tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
@@ -2619,8 +2616,10 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
 }
 server_ptr = server_row;

-for (x = 0; x + 15  width;
+cmp_bytes = 64;
+for (x = 0; x  width;
 x += 16, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
+if (width - x  16) cmp_bytes = 4 * (width - x);
 if (!test_and_clear_bit((x / 16), vd-guest.dirty[y]))
 continue;
 if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
-- 
1.7.10.4



[Qemu-devel] Fix invalidate if memory requested was not bucket aligned

2012-08-07 Thread Frediano Ziglio

When memory is mapped in qemu_map_cache with lock != 0 a reverse mapping
is created pointing to the virtual address of location requested.
The cached mapped entry is saved in last_address_vaddr with the memory
location of the base virtual address (without bucket offset).
However when this entry is invalidated the virtual address saved in the
reverse mapping is used. This cause that the mapping is freed but the
last_address_vaddr is not reset.

Signed-off-by: Frediano Ziglio frediano.zig...@citrix.com
---
 xen-mapcache.c |9 +
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/xen-mapcache.c b/xen-mapcache.c
index 59ba085..9cd6db3 100644
--- a/xen-mapcache.c
+++ b/xen-mapcache.c
@@ -320,10 +320,6 @@ void xen_invalidate_map_cache_entry(uint8_t *buffer)
 target_phys_addr_t size;
 int found = 0;
 
-if (mapcache-last_address_vaddr == buffer) {
-mapcache-last_address_index = -1;
-}
-
 QTAILQ_FOREACH(reventry, mapcache-locked_entries, next) {
 if (reventry-vaddr_req == buffer) {
 paddr_index = reventry-paddr_index;
@@ -342,6 +338,11 @@ void xen_invalidate_map_cache_entry(uint8_t *buffer)
 QTAILQ_REMOVE(mapcache-locked_entries, reventry, next);
 g_free(reventry);
 
+if (mapcache-last_address_index == paddr_index) {
+mapcache-last_address_index = -1;
+mapcache-last_address_vaddr = NULL;
+}
+
 entry = mapcache-entry[paddr_index % mapcache-nr_buckets];
 while (entry  (entry-paddr_index != paddr_index || entry-size != 
size)) {
 pentry = entry;
-- 
1.7.5.4



Re: [Qemu-devel] Qemu crashed with lsi booting

2012-07-27 Thread Frediano Ziglio
2012/7/27 Paolo Bonzini pbonz...@redhat.com:
 Il 26/07/2012 10:31, Frediano Ziglio ha scritto:
 sudo ./x86_64-softmmu/qemu-system-x86_64 -m 1024 -hda test.qcow
 -device lsi -drive file=/dev/sdb,if=none,id=XXX -device
 scsi-block,drive=XXX -enable-kvm -bios ~/seabios/out/bios.bin -serial
 file:out.txt
 lsi_scsi: error: Multiple IO pending for request 0x7fd1075bf100
 qemu-system-x86_64: /home/fredianoz/qemu/hw/lsi53c895a.c:774:
 lsi_do_command: Assertion `s-current == ((void *)0)' failed.

 (sometimes I don't get the Multiple IO ending line).

 I'm using a recent SeaBIOS which support booting from LSI SCSI.

 Qemu version

 $ git branch -v
 * master 61dc008 Revert audio: Make PC speaker audio card available by 
 default

 I'm using SeaBIOS commit 9d6bac1d32b72cdf7c0ad009c1371a2e69084de3
 (some minor changes in order to support 4k sectors).

 Can you share them?  4k sectors are not supported by BIOS at all
 AFAIK...  Does virtio-scsi work with those changes?

 Adding some debugging to SeaBIOS lsi code seems that drivers send
 initial INQUIRY request but after that all requests have some problems
 and lead to a lot of reset command.

 Can you gather tracing output for the following events:

 scsi_req_data
 scsi_req_dequeue
 scsi_req_parsed
 scsi_req_parse_bad
 scsi_req_build_sense
 scsi_inquiry
 scsi_test_unit_ready

 (Perhaps you can also instrument scsi_req_cancel for tracing).

 The timing doesn't matter, so you can use the stderr backend:

 --enable-trace-backend=stderr

 Place the above list in a file (one tracepoint per line) and then start
 QEMU with -trace events=/path/to/file.txt.

 You could also try github.com/bonzini/qemu.git, branch scsi-next (it
 shouldn't fix anything, but I added a couple more assertions).

 Paolo

It's currently a quite complicated setup. I used a virtual SCSI disk.
However Linux is happy with it and does not crash.

Also I got an assert even using scsi-hd directly (I had to disable
multipathd). It seems that the LSI Qemu driver assert when code try to
send two commands (second without waiting first completion).

Attached error with trace enabled (err.txt) and serial output with
SeaBIOS debug level set to 8.

To support these 4kb disk in SeaBIOS I just changed the line in src/blockcmd.c

   if (drive-blksize != DISK_SECTOR_SIZE) {

to

   if (drive-blksize != DISK_SECTOR_SIZE  drive-blksize != 4096) {

Frediano


serial.txt.gz
Description: GNU Zip compressed data


err.txt.gz
Description: GNU Zip compressed data


[Qemu-devel] Qemu crashed with lsi booting

2012-07-26 Thread Frediano Ziglio
I always get an assert launching this command

sudo ./x86_64-softmmu/qemu-system-x86_64 -m 1024 -hda test.qcow
-device lsi -drive file=/dev/sdb,if=none,id=XXX -device
scsi-block,drive=XXX -enable-kvm -bios ~/seabios/out/bios.bin -serial
file:out.txt
lsi_scsi: error: Multiple IO pending for request 0x7fd1075bf100
qemu-system-x86_64: /home/fredianoz/qemu/hw/lsi53c895a.c:774:
lsi_do_command: Assertion `s-current == ((void *)0)' failed.

(sometimes I don't get the Multiple IO ending line).

I'm using a recent SeaBIOS which support booting from LSI SCSI.

Qemu version

$ git branch -v
* master 61dc008 Revert audio: Make PC speaker audio card available by default

I'm using SeaBIOS commit 9d6bac1d32b72cdf7c0ad009c1371a2e69084de3
(some minor changes in order to support 4k sectors).

Adding some debugging to SeaBIOS lsi code seems that drivers send
initial INQUIRY request but after that all requests have some problems
and lead to a lot of reset command.

Frediano



[Qemu-devel] automatic exit on memory errors

2012-04-04 Thread Frediano Ziglio
Hi,
  I just realized that on any out of memory conditions Qemu exit with an error.

I found this thread on g_malloc

http://comments.gmane.org/gmane.comp.emulators.qemu/128863

but nothing related to the exit conditions.

I know that out of memory conditions is quite hard to handle but if
somebody rely to a virtual machine and at least to a clean close I
think this won't happen. Personally I think that managing this
situation in a clean way where error is tried to be reported and
cleans are tried.

If some function try to allocate a large buffer the function should
just return an error and program can report error and let use clean or
try to recover (like powering off the machine) but calling g_malloc
everywhere is not that fine.

It would at least be fine if block drivers register some atexit
function to flush every buffer they possible have.

Anybody considered this problem?

Frediano



[Qemu-devel] [PATCH] make qemu_event_handle static

2012-03-31 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 main-loop.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/main-loop.c b/main-loop.c
index db23de0..fc738d1 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -164,7 +164,7 @@ static int qemu_signal_init(void)
 
 #else /* _WIN32 */
 
-HANDLE qemu_event_handle = NULL;
+static HANDLE qemu_event_handle = NULL;
 
 static void dummy_event_handler(void *opaque)
 {
-- 
1.7.4.1




[Qemu-devel] Adding size to snapshot

2012-03-27 Thread Frediano Ziglio
Hi,
  did you add disk size to snapshot in order to support resizing of snapshot?

I remember we discussed about some months ago.

Regards
  Frediano



[Qemu-devel] [PATCH] qcow2: fix 028 iotest

2011-09-23 Thread Frediano Ziglio
This fix bound check bug accessing last cluster if image size is not
cluster aligned caused by Unlock during COW patch.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c |8 ++--
 block/qcow2.c |2 +-
 block/qcow2.h |2 ++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c9a1b1c..78e1b49 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -295,16 +295,20 @@ static int copy_sectors(BlockDriverState *bs, uint64_t 
start_sect,
 BDRVQcowState *s = bs-opaque;
 int n, ret;
 void *buf;
+QEMUIOVector qiov;
+struct iovec iov;
 
 n = n_end - n_start;
 if (n = 0) {
 return 0;
 }
 
-buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+iov.iov_base = buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+iov.iov_len = n * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(qiov, iov, 1);
 
 BLKDBG_EVENT(bs-file, BLKDBG_COW_READ);
-ret = bdrv_read(bs, start_sect + n_start, buf, n);
+ret = qcow2_co_readv(bs, start_sect + n_start, n, qiov);
 if (ret  0) {
 goto out;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 510ff68..fd90881 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -376,7 +376,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector 
*qiov,
 return n1;
 }
 
-static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
   int remaining_sectors, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs-opaque;
diff --git a/block/qcow2.h b/block/qcow2.h
index 531af39..7f5ff42 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -176,6 +176,8 @@ static inline int64_t align_offset(int64_t offset, int n)
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
   int64_t sector_num, int nb_sectors);
+int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+int remaining_sectors, QEMUIOVector *qiov);
 
 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
-- 
1.7.1




[Qemu-devel] [PATCH] core: remove qemu_service_io

2011-09-22 Thread Frediano Ziglio
qemu_service_io was mainly an alias to qemu_notify_event,
currently used only by PPC for timer hack, so call
qemu_notify_event directly.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 arch_init.c  |5 -
 qemu-common.h|3 ---
 qemu-tool.c  |4 
 target-ppc/kvm_ppc.c |2 +-
 4 files changed, 1 insertions(+), 13 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 9a5a0e3..a6c69c7 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -459,11 +459,6 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
 return 0;
 }
 
-void qemu_service_io(void)
-{
-qemu_notify_event();
-}
-
 #ifdef HAS_AUDIO
 struct soundhw {
 const char *name;
diff --git a/qemu-common.h b/qemu-common.h
index 404c421..5e87bdf 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -276,9 +276,6 @@ void cpu_exec_init_all(void);
 void cpu_save(QEMUFile *f, void *opaque);
 int cpu_load(QEMUFile *f, void *opaque, int version_id);
 
-/* Force QEMU to stop what it's doing and service IO */
-void qemu_service_io(void);
-
 /* Force QEMU to process pending events */
 void qemu_notify_event(void);
 
diff --git a/qemu-tool.c b/qemu-tool.c
index eb89fe0..e9f7fe1 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -29,10 +29,6 @@ struct QEMUBH
 void *opaque;
 };
 
-void qemu_service_io(void)
-{
-}
-
 Monitor *cur_mon;
 
 int monitor_cur_is_qmp(void)
diff --git a/target-ppc/kvm_ppc.c b/target-ppc/kvm_ppc.c
index 867dc1d..c031fcb 100644
--- a/target-ppc/kvm_ppc.c
+++ b/target-ppc/kvm_ppc.c
@@ -88,7 +88,7 @@ void kvmppc_fdt_update(void *fdt)
 
 static void kvmppc_timer_hack(void *opaque)
 {
-qemu_service_io();
+qemu_notify_event();
 qemu_mod_timer(kvmppc_timer, qemu_get_clock_ns(vm_clock) + 
kvmppc_timer_rate);
 }
 
-- 
1.7.1




Re: [Qemu-devel] [PATCH] qcow2: Unlock during COW

2011-09-22 Thread Frediano Ziglio
2011/9/19 Kevin Wolf kw...@redhat.com:
 Unlocking during COW allows for more parallelism. One change it requires is
 that buffers are dynamically allocated instead of just using a per-image
 buffer.

 While touching the code, drop the synchronous qcow2_read() function and 
 replace
 it by a bdrv_read() call.

 Signed-off-by: Kevin Wolf kw...@redhat.com
 ---
  block/qcow2-cluster.c |   95 +---
  1 files changed, 26 insertions(+), 69 deletions(-)

 diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
 index c79323a..61b8e62 100644
 --- a/block/qcow2-cluster.c
 +++ b/block/qcow2-cluster.c
 @@ -289,89 +289,42 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t 
 sector_num,
     }
  }

 -
 -static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
 -                      uint8_t *buf, int nb_sectors)
 -{
 -    BDRVQcowState *s = bs-opaque;
 -    int ret, index_in_cluster, n, n1;
 -    uint64_t cluster_offset;
 -    struct iovec iov;
 -    QEMUIOVector qiov;
 -
 -    while (nb_sectors  0) {
 -        n = nb_sectors;
 -
 -        ret = qcow2_get_cluster_offset(bs, sector_num  9, n,
 -            cluster_offset);
 -        if (ret  0) {
 -            return ret;
 -        }
 -
 -        index_in_cluster = sector_num  (s-cluster_sectors - 1);
 -        if (!cluster_offset) {
 -            if (bs-backing_hd) {
 -                /* read from the base image */
 -                iov.iov_base = buf;
 -                iov.iov_len = n * 512;
 -                qemu_iovec_init_external(qiov, iov, 1);
 -
 -                n1 = qcow2_backing_read1(bs-backing_hd, qiov, sector_num, 
 n);
 -                if (n1  0) {
 -                    BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING);
 -                    ret = bdrv_read(bs-backing_hd, sector_num, buf, n1);
 -                    if (ret  0)
 -                        return -1;
 -                }
 -            } else {
 -                memset(buf, 0, 512 * n);
 -            }
 -        } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 -            if (qcow2_decompress_cluster(bs, cluster_offset)  0)
 -                return -1;
 -            memcpy(buf, s-cluster_cache + index_in_cluster * 512, 512 * n);
 -        } else {
 -            BLKDBG_EVENT(bs-file, BLKDBG_READ);
 -            ret = bdrv_pread(bs-file, cluster_offset + index_in_cluster * 
 512, buf, n * 512);
 -            if (ret != n * 512)
 -                return -1;
 -            if (s-crypt_method) {
 -                qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
 -                                s-aes_decrypt_key);
 -            }
 -        }
 -        nb_sectors -= n;
 -        sector_num += n;
 -        buf += n * 512;
 -    }
 -    return 0;
 -}
 -
  static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
                         uint64_t cluster_offset, int n_start, int n_end)
  {
     BDRVQcowState *s = bs-opaque;
     int n, ret;
 +    void *buf;

     n = n_end - n_start;
 -    if (n = 0)
 +    if (n = 0) {
         return 0;
 +    }
 +
 +    buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
 +
     BLKDBG_EVENT(bs-file, BLKDBG_COW_READ);
 -    ret = qcow2_read(bs, start_sect + n_start, s-cluster_data, n);
 -    if (ret  0)
 -        return ret;
 +    ret = bdrv_read(bs, start_sect + n_start, buf, n);
 +    if (ret  0) {
 +        goto out;
 +    }
 +
     if (s-crypt_method) {
         qcow2_encrypt_sectors(s, start_sect + n_start,
 -                        s-cluster_data,
 -                        s-cluster_data, n, 1,
 +                        buf, buf, n, 1,
                         s-aes_encrypt_key);
     }
 +
     BLKDBG_EVENT(bs-file, BLKDBG_COW_WRITE);
 -    ret = bdrv_write(bs-file, (cluster_offset  9) + n_start,
 -        s-cluster_data, n);
 -    if (ret  0)
 -        return ret;
 -    return 0;
 +    ret = bdrv_write(bs-file, (cluster_offset  9) + n_start, buf, n);
 +    if (ret  0) {
 +        goto out;
 +    }
 +
 +    ret = 0;
 +out:
 +    qemu_vfree(buf);
 +    return ret;
  }


 @@ -620,7 +573,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
 QCowL2Meta *m)
     start_sect = (m-offset  ~(s-cluster_size - 1))  9;
     if (m-n_start) {
         cow = true;
 +        qemu_co_mutex_unlock(s-lock);
         ret = copy_sectors(bs, start_sect, cluster_offset, 0, m-n_start);
 +        qemu_co_mutex_lock(s-lock);
         if (ret  0)
             goto err;
     }
 @@ -628,8 +583,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
 QCowL2Meta *m)
     if (m-nb_available  (s-cluster_sectors - 1)) {
         uint64_t end = m-nb_available  ~(uint64_t)(s-cluster_sectors - 1);
         cow = true;
 +        qemu_co_mutex_unlock(s-lock);
         ret = copy_sectors(bs, start_sect + end, cluster_offset + (end  9),
                 m-nb_available - end, s-cluster_sectors);
 +        qemu_co_mutex_lock(s-lock);
         if (ret  0)
             goto err;
     }
 --
 1.7.6.2



Seems good, I like the idea 

[Qemu-devel] [PATCH 0/2][RFC?] Remove SIGUSR2 from posix-aio

2011-09-19 Thread Frediano Ziglio
Now that iothread is always compiled sending a signal seems only an
additional step. This patch also avoid writing to two pipe (one from signal
and one in qemu_service_io).

Tested and works correctly with KVM enabled. Performances are only sligthly
better (as I expected). strace output is more readable.

Doubts:
- any sense having two patches and not only last one?
- comment is perhaps wrong as is also affect main core
- is ok if KVM is disabled? more testing required

Frediano Ziglio (2):
  block: avoid storing a constant in qemu_paiocb structure
  block: avoid SIGUSR2

 cpus.c |5 -
 posix-aio-compat.c |   17 -
 2 files changed, 4 insertions(+), 18 deletions(-)




[Qemu-devel] [PATCH 1/2] block: avoid storing a constant in qemu_paiocb structure

2011-09-19 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 posix-aio-compat.c |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 3193dbf..7ea63a1 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -42,7 +42,6 @@ struct qemu_paiocb {
 int aio_niov;
 size_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-int ev_signo;
 off_t aio_offset;
 
 QTAILQ_ENTRY(qemu_paiocb) node;
@@ -381,7 +380,7 @@ static void *aio_thread(void *unused)
 aiocb-ret = ret;
 mutex_unlock(lock);
 
-if (kill(pid, aiocb-ev_signo)) die(kill failed);
+if (kill(pid, SIGUSR2)) die(kill failed);
 }
 
 cur_threads--;
@@ -623,7 +622,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
 return NULL;
 acb-aio_type = type;
 acb-aio_fildes = fd;
-acb-ev_signo = SIGUSR2;
 
 if (qiov) {
 acb-aio_iov = qiov-iov;
@@ -651,7 +649,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 return NULL;
 acb-aio_type = QEMU_AIO_IOCTL;
 acb-aio_fildes = fd;
-acb-ev_signo = SIGUSR2;
 acb-aio_offset = 0;
 acb-aio_ioctl_buf = buf;
 acb-aio_ioctl_cmd = req;
-- 
1.7.1




[Qemu-devel] [PATCH 2/2] block: avoid SIGUSR2

2011-09-19 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 cpus.c |5 -
 posix-aio-compat.c |   14 --
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/cpus.c b/cpus.c
index 54c188c..d0cfe91 100644
--- a/cpus.c
+++ b/cpus.c
@@ -380,11 +380,6 @@ static int qemu_signal_init(void)
 int sigfd;
 sigset_t set;
 
-/* SIGUSR2 used by posix-aio-compat.c */
-sigemptyset(set);
-sigaddset(set, SIGUSR2);
-pthread_sigmask(SIG_UNBLOCK, set, NULL);
-
 /*
  * SIG_IPI must be blocked in the main thread and must not be caught
  * by sigwait() in the signal thread. Otherwise, the cpu thread will
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 7ea63a1..72c6dbb 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -308,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
 return nbytes;
 }
 
+static void posix_aio_notify_event(void);
+
 static void *aio_thread(void *unused)
 {
 pid_t pid;
@@ -380,7 +382,7 @@ static void *aio_thread(void *unused)
 aiocb-ret = ret;
 mutex_unlock(lock);
 
-if (kill(pid, SIGUSR2)) die(kill failed);
+posix_aio_notify_event();
 }
 
 cur_threads--;
@@ -547,7 +549,7 @@ static int posix_aio_flush(void *opaque)
 
 static PosixAioState *posix_aio_state;
 
-static void aio_signal_handler(int signum)
+static void posix_aio_notify_event(void)
 {
 if (posix_aio_state) {
 char byte = 0;
@@ -557,8 +559,6 @@ static void aio_signal_handler(int signum)
 if (ret  0  errno != EAGAIN)
 die(write());
 }
-
-qemu_service_io();
 }
 
 static void paio_remove(struct qemu_paiocb *acb)
@@ -662,7 +662,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 
 int paio_init(void)
 {
-struct sigaction act;
 PosixAioState *s;
 int fds[2];
 int ret;
@@ -672,11 +671,6 @@ int paio_init(void)
 
 s = g_malloc(sizeof(PosixAioState));
 
-sigfillset(act.sa_mask);
-act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-act.sa_handler = aio_signal_handler;
-sigaction(SIGUSR2, act, NULL);
-
 s-first_aio = NULL;
 if (qemu_pipe(fds) == -1) {
 fprintf(stderr, failed to create pipe\n);
-- 
1.7.1




[Qemu-devel] [PATCH v2] block: avoid SIGUSR2

2011-09-19 Thread Frediano Ziglio
Now that iothread is always compiled sending a signal seems only an
additional step. This patch also avoid writing to two pipe (one from signal
and one in qemu_service_io).

Work with kvm enabled or disabled. strace output is more readable (less 
syscalls).

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 cpus.c |5 -
 posix-aio-compat.c |   29 +
 2 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/cpus.c b/cpus.c
index 54c188c..d0cfe91 100644
--- a/cpus.c
+++ b/cpus.c
@@ -380,11 +380,6 @@ static int qemu_signal_init(void)
 int sigfd;
 sigset_t set;
 
-/* SIGUSR2 used by posix-aio-compat.c */
-sigemptyset(set);
-sigaddset(set, SIGUSR2);
-pthread_sigmask(SIG_UNBLOCK, set, NULL);
-
 /*
  * SIG_IPI must be blocked in the main thread and must not be caught
  * by sigwait() in the signal thread. Otherwise, the cpu thread will
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 3193dbf..185d5b2 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -42,7 +42,6 @@ struct qemu_paiocb {
 int aio_niov;
 size_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-int ev_signo;
 off_t aio_offset;
 
 QTAILQ_ENTRY(qemu_paiocb) node;
@@ -309,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
 return nbytes;
 }
 
+static void posix_aio_notify_event(void);
+
 static void *aio_thread(void *unused)
 {
 pid_t pid;
@@ -381,7 +382,7 @@ static void *aio_thread(void *unused)
 aiocb-ret = ret;
 mutex_unlock(lock);
 
-if (kill(pid, aiocb-ev_signo)) die(kill failed);
+posix_aio_notify_event();
 }
 
 cur_threads--;
@@ -548,18 +549,14 @@ static int posix_aio_flush(void *opaque)
 
 static PosixAioState *posix_aio_state;
 
-static void aio_signal_handler(int signum)
+static void posix_aio_notify_event(void)
 {
-if (posix_aio_state) {
-char byte = 0;
-ssize_t ret;
-
-ret = write(posix_aio_state-wfd, byte, sizeof(byte));
-if (ret  0  errno != EAGAIN)
-die(write());
-}
+char byte = 0;
+ssize_t ret;
 
-qemu_service_io();
+ret = write(posix_aio_state-wfd, byte, sizeof(byte));
+if (ret  0  errno != EAGAIN)
+die(write());
 }
 
 static void paio_remove(struct qemu_paiocb *acb)
@@ -623,7 +620,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
 return NULL;
 acb-aio_type = type;
 acb-aio_fildes = fd;
-acb-ev_signo = SIGUSR2;
 
 if (qiov) {
 acb-aio_iov = qiov-iov;
@@ -651,7 +647,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 return NULL;
 acb-aio_type = QEMU_AIO_IOCTL;
 acb-aio_fildes = fd;
-acb-ev_signo = SIGUSR2;
 acb-aio_offset = 0;
 acb-aio_ioctl_buf = buf;
 acb-aio_ioctl_cmd = req;
@@ -665,7 +660,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 
 int paio_init(void)
 {
-struct sigaction act;
 PosixAioState *s;
 int fds[2];
 int ret;
@@ -675,11 +669,6 @@ int paio_init(void)
 
 s = g_malloc(sizeof(PosixAioState));
 
-sigfillset(act.sa_mask);
-act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-act.sa_handler = aio_signal_handler;
-sigaction(SIGUSR2, act, NULL);
-
 s-first_aio = NULL;
 if (qemu_pipe(fds) == -1) {
 fprintf(stderr, failed to create pipe\n);
-- 
1.7.1




[Qemu-devel] [PATCH] removed usused offset variable

2011-09-16 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 posix-aio-compat.c |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 3193dbf..63a8fae 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -181,7 +181,6 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, 
off_t offset)
 
 static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
 {
-size_t offset = 0;
 ssize_t len;
 
 do {
@@ -189,12 +188,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb 
*aiocb)
 len = qemu_pwritev(aiocb-aio_fildes,
aiocb-aio_iov,
aiocb-aio_niov,
-   aiocb-aio_offset + offset);
+   aiocb-aio_offset);
  else
 len = qemu_preadv(aiocb-aio_fildes,
   aiocb-aio_iov,
   aiocb-aio_niov,
-  aiocb-aio_offset + offset);
+  aiocb-aio_offset);
 } while (len == -1  errno == EINTR);
 
 if (len == -1)
-- 
1.7.1




Re: [Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-15 Thread Frediano Ziglio
2011/9/14 Kevin Wolf kw...@redhat.com:
...

 But let's measure the effects first, I suspect that for cluster
 allocation it doesn't help much because every REF- comes with a REF+.


 That's 50% of effort if REF- clusters are far from REF+ :)

 I would expect that the next REF+ allocates exactly the REF- cluster.
 But you still have a point, we save the write on REF- and combine it
 with the REF+ write.


 This is still a TODO for REF+ patch.

 Actually, I was talking about the qcow2_cache_entry_mark_dirty_wb() case
 without any other change. You get it automatically then.


No, I forgot I already thought about this. REF- cluster get not reused
on normal operation, but reach 0 only during snapshot delete. This
cause if it reach 0 it means that it was 1 but if it was 1 copied flag
would be 1 and so there is no reason to decrement counter (reductio ab
asburdum).

Frediano



Re: [Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-14 Thread Frediano Ziglio
2011/9/14 Kevin Wolf kw...@redhat.com:
 Am 13.09.2011 15:36, schrieb Frediano Ziglio:
 2011/9/13 Kevin Wolf kw...@redhat.com:
 Am 13.09.2011 09:53, schrieb Frediano Ziglio:
 These patches try to trade-off between leaks and speed for clusters
 refcounts.

 Refcount increments (REF+ or refp) are handled in a different way from
 decrements (REF- or refm). The reason it that posting or not flushing
 a REF- cause just a leak while posting a REF+ cause a corruption.

 To optimize REF- I just used an array to store offsets then when a
 flush is requested or array reach a limit (currently 1022) the array
 is sorted and written to disk. I use an array with offset instead of
 ranges to support compression (an offset could appear multiple times
 in the array).
 I consider this patch quite ready.

 Ok, first of all let's clarify what this optimises. I don't think it
 changes anything at all for the writeback cache modes, because these
 already do most operations in memory only. So this must be about
 optimising some operations with cache=writethrough. REF- isn't about
 normal cluster allocation, it is about COW with internal snapshots or
 bdrv_discard. Do you have benchmarks for any of them?

 I strongly disagree with your approach for REF-. We already have a
 cache, and introducing a second one sounds like a bad idea. I think we
 could get a very similar effect if we introduced a
 qcow2_cache_entry_mark_dirty_wb() that marks a given refcount block as
 dirty, but at the same time tells the cache that even in write-through
 mode it can still treat this block as write-back. This should require
 much less code changes.


 Yes, mainly optimize for writethrough. I did not test with writeback
 but should improve even this (I think here you have some flush to keep
 consistency).
 I'll try to write a qcow2_cache_entry_mark_dirty_wb patch and test it.

 Great, thanks!


Don't expect however the patch too soon, I'm quite busy in these days.

 But let's measure the effects first, I suspect that for cluster
 allocation it doesn't help much because every REF- comes with a REF+.


 That's 50% of effort if REF- clusters are far from REF+ :)

 I would expect that the next REF+ allocates exactly the REF- cluster.
 But you still have a point, we save the write on REF- and combine it
 with the REF+ write.


This is still a TODO for REF+ patch.

Oh... time ago looking at refcount code I realize that a single
deallocation could be reused in some cases only after Qemu restart.
For instance
- got a single cluster REF- which take refcount to 0
- free_cluster_index get decreased to this index
- we get a new cluster request for 2 clusters
- free_cluster_index get increased
we skip freed deallocation and if we don't get a new deallocation for
a cluster with index minor to our freed cluster this cluster is not
reused.
(I didn't test this behavior, no leak, no corruption, just image could
be larger then expected)

 To optimize REF+ I mark a range as allocated and use this range to
 get new ones (avoiding writing refcount to disk). When a flush is
 requested or in some situations (like snapshot) this cache is disabled
 and flushed (written as REF-).
 I do not consider this patch ready, it works and pass all io-tests
 but for instance I would avoid allocating new clusters for refcount
 during preallocation.

 The only question here is if improving cache=writethrough cluster
 allocation performance is worth the additional complexity in the already
 complex refcounting code.


 I didn't see this optimization as a second level cache, but yes, for
 REF- is a second cache.

 The alternative that was discussed before is the dirty bit approach that
 is used in QED and would allow us to use writeback for all refcount
 blocks, regardless of REF- or REF+. It would be an easier approach
 requiring less code changes, but it comes with the cost of requiring an
 fsck after a qemu crash.


 I was thinking about changing the header magic first time we change
 refcount in order to mark image as dirty so newer Qemu recognize the
 flag while former one does not recognize image. Obviously reverting
 magic on image close.

 We've discussed this idea before and I think it wasn't considered a
 great idea to automagically change the header in an incompatible way.
 But we can always say that for improved performance you need to upgrade
 your image to qcow2 v3.


I don't understand why there is not a wiki page for detailed qcow3
changes. I saw your post on May. I follow this ML since August so I
think I missed a lot of discussion on qcow improves.

 End speed up is quite visible allocating clusters (more then 20%).

 What benchmark do you use for testing this?

 Kevin


 Currently I'm using bonnie++ but I noted similar improves with iozone.
 The test script format an image then launch a Linux machine which run
 a script and save result to a file.
 The test image is seems by this virtual machine as a separate disk.
 The file on hist reside in a separate LV.
 I got quite

Re: [Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-14 Thread Frediano Ziglio
2011/9/14 Kevin Wolf kw...@redhat.com:
... omissis...

 To optimize REF+ I mark a range as allocated and use this range to
 get new ones (avoiding writing refcount to disk). When a flush is
 requested or in some situations (like snapshot) this cache is disabled
 and flushed (written as REF-).
 I do not consider this patch ready, it works and pass all io-tests
 but for instance I would avoid allocating new clusters for refcount
 during preallocation.

 The only question here is if improving cache=writethrough cluster
 allocation performance is worth the additional complexity in the already
 complex refcounting code.


 I didn't see this optimization as a second level cache, but yes, for
 REF- is a second cache.

 The alternative that was discussed before is the dirty bit approach that
 is used in QED and would allow us to use writeback for all refcount
 blocks, regardless of REF- or REF+. It would be an easier approach
 requiring less code changes, but it comes with the cost of requiring an
 fsck after a qemu crash.


 I was thinking about changing the header magic first time we change
 refcount in order to mark image as dirty so newer Qemu recognize the
 flag while former one does not recognize image. Obviously reverting
 magic on image close.

 We've discussed this idea before and I think it wasn't considered a
 great idea to automagically change the header in an incompatible way.
 But we can always say that for improved performance you need to upgrade
 your image to qcow2 v3.


 I don't understand why there is not a wiki page for detailed qcow3
 changes. I saw your post on May. I follow this ML since August so I
 think I missed a lot of discussion on qcow improves.

 Unfortunately there have been almost no comments, so you can consider
 RFC v2 as the current proposal.


:(

 End speed up is quite visible allocating clusters (more then 20%).

 What benchmark do you use for testing this?

 Kevin


 Currently I'm using bonnie++ but I noted similar improves with iozone.
 The test script format an image then launch a Linux machine which run
 a script and save result to a file.
 The test image is seems by this virtual machine as a separate disk.
 The file on hist reside in a separate LV.
 I got quite consistent results (of course not working on the machine
 while testing, is not actually dedicated to this job).

 Actually I'm running the test (added a test working in a snapshot image).

 Okay. Let me guess the remaining variables: The image is on an ext4 host
 filesystem, you use cache=writethrough and virtio-blk. You don't use
 backing files, compression and encryption. For your tests with internal
 snapshots you have exactly one internal snapshot that is taken
 immediately before the benchmark. Oh, and not to forget, KVM is enabled.

 Are these assumptions correct?


 change ext4 and put xfs and assumptions are ok. Yes I use internal
 snapshots (REF- are useful only in this case). To produce qcow2s I
 use these commands

 $QEMU_IMG create -f qcow2 -o preallocation=metadata $FILE 15g
 $QEMU_IMG snapshot -c test $FILE

 Ah, using preallocation for this is a nice trick. :-)

 Anyway, thanks, I think I understand now what you're measuring.


Yes, and real performance are even worst cause preallocating only
metadata you get many less physical read from disk.

 I run again tests (yesterdays test was not that consistence, today I
 stopped all unneeded services and avoid the X session tunneled by
 ssh).

 without patched
 run   raw    qcow2   qcow2s
 1     22758  4206    1054
 2     28401  21745 17997

 with patches (ref-,ref+)
 run   raw    qcow2   qcow2s
 1     22563  4965    4382
 2     23823  21043 20904

 beside I still don't understand difference in second runs for raw
 (about 20%!!!) this confirms the huge improves with snapshot.

 Measuring with cache=writethrough means that you use the host page
 cache, so I think that could be where you get the difference for raw.
 Numbers should be more consistent for cache=none, but that's not what we
 want to test...

 Did you also test how each patch performs if applied on its own, without
 the other one? It would be interesting to see how much REF- optimisation
 contributes and how much REF+.

 Kevin


Here you are

only ref-
run   rawqcow2   qcow2s
1 22579  40724053
2 30322  19667   16830

so mostly REF- for snapshot.

Frediano



[Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-13 Thread Frediano Ziglio
These patches try to trade-off between leaks and speed for clusters
refcounts.

Refcount increments (REF+ or refp) are handled in a different way from
decrements (REF- or refm). The reason it that posting or not flushing
a REF- cause just a leak while posting a REF+ cause a corruption.

To optimize REF- I just used an array to store offsets then when a
flush is requested or array reach a limit (currently 1022) the array
is sorted and written to disk. I use an array with offset instead of
ranges to support compression (an offset could appear multiple times
in the array).
I consider this patch quite ready.

To optimize REF+ I mark a range as allocated and use this range to
get new ones (avoiding writing refcount to disk). When a flush is
requested or in some situations (like snapshot) this cache is disabled
and flushed (written as REF-).
I do not consider this patch ready, it works and pass all io-tests
but for instance I would avoid allocating new clusters for refcount
during preallocation.

End speed up is quite visible allocating clusters (more then 20%).


Frediano Ziglio (2):
  qcow2: optimize refminus updates
  qcow2: ref+ optimization

 block/qcow2-refcount.c |  270 +---
 block/qcow2.c  |2 +
 block/qcow2.h  |   16 +++
 3 files changed, 275 insertions(+), 13 deletions(-)




[Qemu-devel] [PATCH][RFC][1/2] qcow2: optimize refminus updates

2011-09-13 Thread Frediano Ziglio
Cache refcount decrement in an array to trade-off between
leaks and speed.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |  142 ++--
 block/qcow2.c  |1 +
 block/qcow2.h  |   14 +
 3 files changed, 153 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9605367..7d59b68 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -40,6 +40,13 @@ int qcow2_refcount_init(BlockDriverState *bs)
 BDRVQcowState *s = bs-opaque;
 int ret, refcount_table_size2, i;
 
+s-refm_cache_index = 0;
+s-refm_cache_len = 1024;
+s-refm_cache = g_malloc(s-refm_cache_len * sizeof(uint64));
+if (!s-refm_cache) {
+goto fail;
+}
+
 refcount_table_size2 = s-refcount_table_size * sizeof(uint64_t);
 s-refcount_table = g_malloc(refcount_table_size2);
 if (s-refcount_table_size  0) {
@@ -53,12 +60,14 @@ int qcow2_refcount_init(BlockDriverState *bs)
 }
 return 0;
  fail:
+g_free(s-refm_cache);
 return -ENOMEM;
 }
 
 void qcow2_refcount_close(BlockDriverState *bs)
 {
 BDRVQcowState *s = bs-opaque;
+g_free(s-refm_cache);
 g_free(s-refcount_table);
 }
 
@@ -634,13 +643,21 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
 void qcow2_free_clusters(BlockDriverState *bs,
   int64_t offset, int64_t size)
 {
+BDRVQcowState *s = bs-opaque;
 int ret;
+int64_t start, last;
 
 BLKDBG_EVENT(bs-file, BLKDBG_CLUSTER_FREE);
-ret = update_refcount(bs, offset, size, -1);
-if (ret  0) {
-fprintf(stderr, qcow2_free_clusters failed: %s\n, strerror(-ret));
-/* TODO Remember the clusters to free them later and avoid leaking */
+start = offset  ~(s-cluster_size - 1);
+last = (offset + size - 1)  ~(s-cluster_size - 1);
+for (; start = last; start += s-cluster_size) {
+ret = qcow2_refm_add(bs, start);
+if (ret  0) {
+fprintf(stderr, qcow2_free_clusters failed: %s\n, 
strerror(-ret));
+/* TODO Remember the clusters to free them later
+ * and avoid leaking */
+break;
+}
 }
 }
 
@@ -1165,3 +1182,120 @@ fail:
 return ret;
 }
 
+int qcow2_refm_add_any(BlockDriverState *bs, int64_t offset)
+{
+BDRVQcowState *s = bs-opaque;
+
+offset = ~QCOW_OFLAG_COPIED;
+if (s-refm_cache_index + 2  s-refm_cache_len) {
+int ret = qcow2_refm_flush(bs);
+if (ret  0) {
+return ret;
+}
+}
+
+if ((offset  QCOW_OFLAG_COMPRESSED)) {
+int nb_csectors = ((offset  s-csize_shift)  s-csize_mask) + 1;
+int64_t last;
+
+offset = (offset  s-cluster_offset_mask)  ~511;
+last  = offset + nb_csectors * 512 - 1;
+if (!in_same_refcount_block(s, offset, last)) {
+s-refm_cache[s-refm_cache_index++] = last;
+}
+}
+s-refm_cache[s-refm_cache_index++] = offset;
+return 0;
+}
+
+static int uint64_cmp(const void *a, const void *b)
+{
+#define A (*((const uint64_t *)a))
+#define B (*((const uint64_t *)b))
+if (A == B) {
+return 0;
+}
+return A  B ? 1 : -1;
+#undef A
+#undef B
+}
+
+int qcow2_refm_flush(BlockDriverState *bs)
+{
+BDRVQcowState *s = bs-opaque;
+uint16_t *refcount_block = NULL;
+int64_t old_table_index = -1;
+int ret, i, saved_index = 0;
+int len = s-refm_cache_index;
+
+/* sort cache */
+qsort(s-refm_cache, len, sizeof(uint64_t), uint64_cmp);
+
+/* save */
+for (i = 0; i  len; ++i) {
+uint64_t cluster_offset = s-refm_cache[i];
+int block_index, refcount;
+int64_t cluster_index = cluster_offset  s-cluster_bits;
+int64_t table_index =
+cluster_index  (s-cluster_bits - REFCOUNT_SHIFT);
+
+/* Load the refcount block and allocate it if needed */
+if (table_index != old_table_index) {
+if (refcount_block) {
+ret = qcow2_cache_put(bs, s-refcount_block_cache,
+(void **) refcount_block);
+if (ret  0) {
+goto fail;
+}
+saved_index = i;
+refcount_block = NULL;
+}
+
+ret = alloc_refcount_block(bs, cluster_index, refcount_block);
+if (ret  0) {
+goto fail;
+}
+}
+old_table_index = table_index;
+
+qcow2_cache_entry_mark_dirty(s-refcount_block_cache, refcount_block);
+
+/* we can update the count and save it */
+block_index = cluster_index 
+((1  (s-cluster_bits - REFCOUNT_SHIFT)) - 1);
+
+refcount = be16_to_cpu(refcount_block[block_index]);
+refcount--;
+if (refcount  0) {
+ret = -EINVAL;
+goto fail;
+}
+if (refcount == 0  cluster_index  s-free_cluster_index) {
+s

[Qemu-devel] [PATCH][RFC][2/2] qcow2: ref+ optimization

2011-09-13 Thread Frediano Ziglio
preallocate multiple refcount increment in order to collapse
allocation writes. This cause leaks in case of Qemu crash but
no corruptions.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |  128 ---
 block/qcow2.c  |1 +
 block/qcow2.h  |2 +
 3 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 7d59b68..3792cda 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -30,6 +30,7 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, 
int64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
 int64_t offset, int64_t length,
 int addend);
+static void qcow2_refp_enable(BlockDriverState *bs);
 
 
 /*/
@@ -117,6 +118,12 @@ static int get_refcount(BlockDriverState *bs, int64_t 
cluster_index)
 ((1  (s-cluster_bits - REFCOUNT_SHIFT)) - 1);
 refcount = be16_to_cpu(refcount_block[block_index]);
 
+/* ignore preallocation */
+if (cluster_index = s-refp_prealloc_begin
+ cluster_index  s-refp_prealloc_end) {
+--refcount;
+}
+
 ret = qcow2_cache_put(bs, s-refcount_block_cache,
 (void**) refcount_block);
 if (ret  0) {
@@ -207,6 +214,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
  *   refcount block into the cache
  */
 
+uint64_t old_free_cluster_index = s-free_cluster_index;
+qcow2_refp_flush(bs);
+s-free_cluster_index = old_free_cluster_index;
+
 *refcount_block = NULL;
 
 /* We write to the refcount table, so we might depend on L2 tables */
@@ -215,6 +226,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 /* Allocate the refcount block itself and mark it as used */
 int64_t new_block = alloc_clusters_noref(bs, s-cluster_size);
 if (new_block  0) {
+qcow2_refp_enable(bs);
 return new_block;
 }
 
@@ -279,6 +291,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 }
 
 s-refcount_table[refcount_table_index] = new_block;
+qcow2_refp_enable(bs);
 return 0;
 }
 
@@ -400,10 +413,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
 s-refcount_table_offset = table_offset;
 
 /* Free old table. Remember, we must not change free_cluster_index */
-uint64_t old_free_cluster_index = s-free_cluster_index;
+old_free_cluster_index = s-free_cluster_index;
 qcow2_free_clusters(bs, old_table_offset, old_table_size * 
sizeof(uint64_t));
 s-free_cluster_index = old_free_cluster_index;
 
+qcow2_refp_enable(bs);
 ret = load_refcount_block(bs, new_block, (void**) refcount_block);
 if (ret  0) {
 return ret;
@@ -417,6 +431,7 @@ fail_block:
 if (*refcount_block != NULL) {
 qcow2_cache_put(bs, s-refcount_block_cache, (void**) refcount_block);
 }
+qcow2_refp_enable(bs);
 return ret;
 }
 
@@ -529,9 +544,23 @@ static int update_cluster_refcount(BlockDriverState *bs,
 BDRVQcowState *s = bs-opaque;
 int ret;
 
-ret = update_refcount(bs, cluster_index  s-cluster_bits, 1, addend);
-if (ret  0) {
-return ret;
+/* handle preallocation */
+if (cluster_index = s-refp_prealloc_begin
+ cluster_index  s-refp_prealloc_end) {
+
+/* free previous (should never happen) */
+int64_t index = s-refp_prealloc_begin;
+for (; index  cluster_index; ++index) {
+qcow2_refm_add(bs, index  s-cluster_bits);
+}
+addend--;
+s-refp_prealloc_begin = cluster_index + 1;
+}
+if (addend) {
+ret = update_refcount(bs, cluster_index  s-cluster_bits, 1, addend);
+if (ret  0) {
+return ret;
+}
 }
 
 bdrv_flush(bs-file);
@@ -572,20 +601,94 @@ retry:
 return (s-free_cluster_index - nb_clusters)  s-cluster_bits;
 }
 
+static void qcow2_refp_enable(BlockDriverState *bs)
+{
+BDRVQcowState *s = bs-opaque;
+
+if (s-refp_prealloc_end  0) {
+/* enable again ? */
+if (++s-refp_prealloc_end == 0) {
+s-refp_prealloc_end =
+s-refp_prealloc_begin;
+}
+}
+}
+
+int qcow2_refp_flush(BlockDriverState *bs)
+{
+BDRVQcowState *s = bs-opaque;
+int64_t index, end = s-refp_prealloc_end;
+
+if (end  0) {
+s-refp_prealloc_end = end - 1;
+return 0;
+}
+
+index = s-refp_prealloc_begin;
+/* this disable next allocations */
+s-refp_prealloc_end = -1;
+for (; index  end; ++index) {
+qcow2_refm_add(bs, index  s-cluster_bits);
+}
+qcow2_refm_flush(bs);
+return 0;
+}
+
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
 {
-int64_t offset;
-int ret;
+BDRVQcowState *s = bs-opaque;
+int64_t offset, cluster_index;
+int ret, nb_clusters

Re: [Qemu-devel] [PATCH] qcow2: fix range check

2011-09-13 Thread Frediano Ziglio
2011/9/12 Kevin Wolf kw...@redhat.com:
 Am 10.09.2011 10:23, schrieb Frediano Ziglio:
 QCowL2Meta::offset is not cluster aligned but only sector aligned
 however nb_clusters count cluster from cluster start.
 This fix range check. Note that old code have no corruption issues
 related to this check cause it only cause intersection to occur
 when shouldn't.

 Are you sure? See below. (I think it doesn't corrupt the image, but for
 a different reason)


 Signed-off-by: Frediano Ziglio fredd...@gmail.com
 ---
  block/qcow2-cluster.c |   14 +++---
  1 files changed, 7 insertions(+), 7 deletions(-)

 diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
 index 428b5ad..2f76311 100644
 --- a/block/qcow2-cluster.c
 +++ b/block/qcow2-cluster.c
 @@ -776,17 +776,17 @@ again:
       */
      QLIST_FOREACH(old_alloc, s-cluster_allocs, next_in_flight) {

 -        uint64_t end_offset = offset + nb_clusters * s-cluster_size;
 -        uint64_t old_offset = old_alloc-offset;
 -        uint64_t old_end_offset = old_alloc-offset +
 -            old_alloc-nb_clusters * s-cluster_size;
 +        uint64_t start = offset  s-cluster_bits;
 +        uint64_t end = start + nb_clusters;
 +        uint64_t old_start = old_alloc-offset  s-cluster_bits;
 +        uint64_t old_end = old_start + old_alloc-nb_clusters;

 -        if (end_offset  old_offset || offset  old_end_offset) {
 +        if (end  old_start || start  old_end) {
              /* No intersection */

 Consider request A from 0x0 + 0x1000 bytes and request B from 0x2000 +
 0x1000 bytes. Both touch the same cluster and therefore should be
 serialised, but 0x2000  0x1000, so we decided here that there is no
 intersection and we don't have to care.

 Note that this doesn't corrupt the image, qcow2 can handle parallel
 requests allocating the same cluster. In qcow2_alloc_cluster_link_l2()
 we get an additional COW operation, so performance will be hurt, but
 correctness is maintained.


I tested this adding some printf and also with strace and I can
confirm that current code serialize allocation.
Using ranges A (0-0x1000) and B (0x2000-0x3000) and assuming 0x1
(64k) as cluster size you get
A:
   offset 0
   nb_clusters 1
B:
  offset 0x2000
  nb_clusters 1

So without the patch you get two ranges
A: 0-0x1
B: 0x2000-0x12000
which intersects.

          } else {
 -            if (offset  old_offset) {
 +            if (start  old_start) {
                  /* Stop at the start of a running allocation */
 -                nb_clusters = (old_offset - offset)  s-cluster_bits;
 +                nb_clusters = old_start - start;
              } else {
                  nb_clusters = 0;
              }

 Anyway, the patch looks good. Applied to the block branch.

 Kevin


Oh... I realize that ranges are [start, end) (end not inclusive) so
intersection test should be

   if (end = old_start || start = old_end) {

intead of

if (end  old_start || start  old_end) {

However I don't understand why I got some small speed penalty with
this change (only done some small tests).

Frediano



[Qemu-devel] qcow2: snapshot and resize possible?

2011-09-13 Thread Frediano Ziglio
Looking at block TODOs I saw qcow2 resize with snapshot. However I
would ask if this is technical possible with current format. The
reason is that snapshots have no size (only l1_size, QCowHeader have a
size field) however I think that size if part of machine state and is
not possible to compute exact size from l1_size.
Should this resize be posted to qcow3 update?

Frediano



Re: [Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-13 Thread Frediano Ziglio
2011/9/13 Kevin Wolf kw...@redhat.com:
 Am 13.09.2011 09:53, schrieb Frediano Ziglio:
 These patches try to trade-off between leaks and speed for clusters
 refcounts.

 Refcount increments (REF+ or refp) are handled in a different way from
 decrements (REF- or refm). The reason it that posting or not flushing
 a REF- cause just a leak while posting a REF+ cause a corruption.

 To optimize REF- I just used an array to store offsets then when a
 flush is requested or array reach a limit (currently 1022) the array
 is sorted and written to disk. I use an array with offset instead of
 ranges to support compression (an offset could appear multiple times
 in the array).
 I consider this patch quite ready.

 Ok, first of all let's clarify what this optimises. I don't think it
 changes anything at all for the writeback cache modes, because these
 already do most operations in memory only. So this must be about
 optimising some operations with cache=writethrough. REF- isn't about
 normal cluster allocation, it is about COW with internal snapshots or
 bdrv_discard. Do you have benchmarks for any of them?

 I strongly disagree with your approach for REF-. We already have a
 cache, and introducing a second one sounds like a bad idea. I think we
 could get a very similar effect if we introduced a
 qcow2_cache_entry_mark_dirty_wb() that marks a given refcount block as
 dirty, but at the same time tells the cache that even in write-through
 mode it can still treat this block as write-back. This should require
 much less code changes.


Yes, mainly optimize for writethrough. I did not test with writeback
but should improve even this (I think here you have some flush to keep
consistency).
I'll try to write a qcow2_cache_entry_mark_dirty_wb patch and test it.

 But let's measure the effects first, I suspect that for cluster
 allocation it doesn't help much because every REF- comes with a REF+.


That's 50% of effort if REF- clusters are far from REF+ :)

 To optimize REF+ I mark a range as allocated and use this range to
 get new ones (avoiding writing refcount to disk). When a flush is
 requested or in some situations (like snapshot) this cache is disabled
 and flushed (written as REF-).
 I do not consider this patch ready, it works and pass all io-tests
 but for instance I would avoid allocating new clusters for refcount
 during preallocation.

 The only question here is if improving cache=writethrough cluster
 allocation performance is worth the additional complexity in the already
 complex refcounting code.


I didn't see this optimization as a second level cache, but yes, for
REF- is a second cache.

 The alternative that was discussed before is the dirty bit approach that
 is used in QED and would allow us to use writeback for all refcount
 blocks, regardless of REF- or REF+. It would be an easier approach
 requiring less code changes, but it comes with the cost of requiring an
 fsck after a qemu crash.


I was thinking about changing the header magic first time we change
refcount in order to mark image as dirty so newer Qemu recognize the
flag while former one does not recognize image. Obviously reverting
magic on image close.

 End speed up is quite visible allocating clusters (more then 20%).

 What benchmark do you use for testing this?

 Kevin


Currently I'm using bonnie++ but I noted similar improves with iozone.
The test script format an image then launch a Linux machine which run
a script and save result to a file.
The test image is seems by this virtual machine as a separate disk.
The file on hist reside in a separate LV.
I got quite consistent results (of course not working on the machine
while testing, is not actually dedicated to this job).

Actually I'm running the test (added a test working in a snapshot image).

Frediano



Re: [Qemu-devel] [PATCH][RFC][0/2] REF+/REF- optimization

2011-09-13 Thread Frediano Ziglio
2011/9/13 Kevin Wolf kw...@redhat.com:
 Am 13.09.2011 09:53, schrieb Frediano Ziglio:
 These patches try to trade-off between leaks and speed for clusters
 refcounts.

 Refcount increments (REF+ or refp) are handled in a different way from
 decrements (REF- or refm). The reason it that posting or not flushing
 a REF- cause just a leak while posting a REF+ cause a corruption.

 To optimize REF- I just used an array to store offsets then when a
 flush is requested or array reach a limit (currently 1022) the array
 is sorted and written to disk. I use an array with offset instead of
 ranges to support compression (an offset could appear multiple times
 in the array).
 I consider this patch quite ready.

 Ok, first of all let's clarify what this optimises. I don't think it
 changes anything at all for the writeback cache modes, because these
 already do most operations in memory only. So this must be about
 optimising some operations with cache=writethrough. REF- isn't about
 normal cluster allocation, it is about COW with internal snapshots or
 bdrv_discard. Do you have benchmarks for any of them?

 I strongly disagree with your approach for REF-. We already have a
 cache, and introducing a second one sounds like a bad idea. I think we
 could get a very similar effect if we introduced a
 qcow2_cache_entry_mark_dirty_wb() that marks a given refcount block as
 dirty, but at the same time tells the cache that even in write-through
 mode it can still treat this block as write-back. This should require
 much less code changes.

 But let's measure the effects first, I suspect that for cluster
 allocation it doesn't help much because every REF- comes with a REF+.

 To optimize REF+ I mark a range as allocated and use this range to
 get new ones (avoiding writing refcount to disk). When a flush is
 requested or in some situations (like snapshot) this cache is disabled
 and flushed (written as REF-).
 I do not consider this patch ready, it works and pass all io-tests
 but for instance I would avoid allocating new clusters for refcount
 during preallocation.

 The only question here is if improving cache=writethrough cluster
 allocation performance is worth the additional complexity in the already
 complex refcounting code.

 The alternative that was discussed before is the dirty bit approach that
 is used in QED and would allow us to use writeback for all refcount
 blocks, regardless of REF- or REF+. It would be an easier approach
 requiring less code changes, but it comes with the cost of requiring an
 fsck after a qemu crash.

 End speed up is quite visible allocating clusters (more then 20%).

 What benchmark do you use for testing this?

 Kevin


Here you are some results (kb/s)

with patch (ref-, ref+), qcow2s is qcow2 with a snapshot

run   rawqcow2  qcow2s
1 22748  4878   4792
2 29557  15839  23144

without
run   rawqcow2  qcow2s
1 21979  4308   1021
2 26249  13776  24182

Frediano



[Qemu-devel] [PATCH] qcow2: fix range check

2011-09-10 Thread Frediano Ziglio
QCowL2Meta::offset is not cluster aligned but only sector aligned
however nb_clusters count cluster from cluster start.
This fix range check. Note that old code have no corruption issues
related to this check cause it only cause intersection to occur
when shouldn't.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c |   14 +++---
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 428b5ad..2f76311 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -776,17 +776,17 @@ again:
  */
 QLIST_FOREACH(old_alloc, s-cluster_allocs, next_in_flight) {
 
-uint64_t end_offset = offset + nb_clusters * s-cluster_size;
-uint64_t old_offset = old_alloc-offset;
-uint64_t old_end_offset = old_alloc-offset +
-old_alloc-nb_clusters * s-cluster_size;
+uint64_t start = offset  s-cluster_bits;
+uint64_t end = start + nb_clusters;
+uint64_t old_start = old_alloc-offset  s-cluster_bits;
+uint64_t old_end = old_start + old_alloc-nb_clusters;
 
-if (end_offset  old_offset || offset  old_end_offset) {
+if (end  old_start || start  old_end) {
 /* No intersection */
 } else {
-if (offset  old_offset) {
+if (start  old_start) {
 /* Stop at the start of a running allocation */
-nb_clusters = (old_offset - offset)  s-cluster_bits;
+nb_clusters = old_start - start;
 } else {
 nb_clusters = 0;
 }
-- 
1.7.1




[Qemu-devel] [PATCH] qcow2: align cluster_data to block to improve performance using O_DIRECT

2011-09-10 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   14 +++---
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 8aed310..510ff68 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -237,7 +237,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
 
 s-cluster_cache = g_malloc(s-cluster_size);
 /* one more sector for decompressed data alignment */
-s-cluster_data = g_malloc(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size
+s-cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * 
s-cluster_size
   + 512);
 s-cluster_cache_offset = -1;
 
@@ -296,7 +296,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
 qcow2_cache_destroy(bs, s-l2_table_cache);
 }
 g_free(s-cluster_cache);
-g_free(s-cluster_data);
+qemu_vfree(s-cluster_data);
 return ret;
 }
 
@@ -456,7 +456,7 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
  */
 if (!cluster_data) {
 cluster_data =
-g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
+qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * 
s-cluster_size);
 }
 
 assert(cur_nr_sectors =
@@ -496,7 +496,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
-g_free(cluster_data);
+qemu_vfree(cluster_data);
 
 return ret;
 }
@@ -566,7 +566,7 @@ static int qcow2_co_writev(BlockDriverState *bs,
 
 if (s-crypt_method) {
 if (!cluster_data) {
-cluster_data = g_malloc0(QCOW_MAX_CRYPT_CLUSTERS *
+cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
  s-cluster_size);
 }
 
@@ -611,7 +611,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
-g_free(cluster_data);
+qemu_vfree(cluster_data);
 
 return ret;
 }
@@ -628,7 +628,7 @@ static void qcow2_close(BlockDriverState *bs)
 qcow2_cache_destroy(bs, s-refcount_block_cache);
 
 g_free(s-cluster_cache);
-g_free(s-cluster_data);
+qemu_vfree(s-cluster_data);
 qcow2_refcount_close(bs);
 }
 
-- 
1.7.1




[Qemu-devel] [PATCH] qcow2: initialize metadata before inserting in cluster_allocs

2011-09-08 Thread Frediano Ziglio
QCow2Meta structure was inserted into list before many fields are
initialized. Currently is not a problem cause all occur in a lock
but if qcow2_alloc_clusters would in a future unlock this lock
some issues could arise.
Initializing fields before inserting fix the problem.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 113db8b..428b5ad 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -806,6 +806,11 @@ again:
 abort();
 }
 
+/* save info needed for meta data update */
+m-offset = offset;
+m-n_start = n_start;
+m-nb_clusters = nb_clusters;
+
 QLIST_INSERT_HEAD(s-cluster_allocs, m, next_in_flight);
 
 /* allocate a new cluster */
@@ -816,11 +821,6 @@ again:
 goto fail;
 }
 
-/* save info needed for meta data update */
-m-offset = offset;
-m-n_start = n_start;
-m-nb_clusters = nb_clusters;
-
 out:
 ret = qcow2_cache_put(bs, s-l2_table_cache, (void**) l2_table);
 if (ret  0) {
-- 
1.7.1




[Qemu-devel] [PATCH] qcow2: removed unused depends_on field

2011-09-07 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c |3 +--
 block/qcow2.h |1 -
 2 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e06be64..113db8b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -694,7 +694,7 @@ err:
  * If the offset is not found, allocate a new cluster.
  *
  * If the cluster was already allocated, m-nb_clusters is set to 0,
- * m-depends_on is set to NULL and the other fields in m are meaningless.
+ * other fields in m are meaningless.
  *
  * If the cluster is newly allocated, m-nb_clusters is set to the number of
  * contiguous clusters that have been allocated. In this case, the other
@@ -736,7 +736,6 @@ again:
 
 cluster_offset = ~QCOW_OFLAG_COPIED;
 m-nb_clusters = 0;
-m-depends_on = NULL;
 
 goto out;
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index c8ca3bc..531af39 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -148,7 +148,6 @@ typedef struct QCowL2Meta
 int n_start;
 int nb_available;
 int nb_clusters;
-struct QCowL2Meta *depends_on;
 CoQueue dependent_requests;
 
 QLIST_ENTRY(QCowL2Meta) next_in_flight;
-- 
1.7.1




[Qemu-devel] Suspicious code in qcow2.

2011-09-07 Thread Frediano Ziglio
Actually it does not cause problems but this code order seems a bit
wrong to me (block/qcow2-cluster.c)


QLIST_INSERT_HEAD(s-cluster_allocs, m, next_in_flight);

/* allocate a new cluster */

cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s-cluster_size);
if (cluster_offset  0) {
ret = cluster_offset;
goto fail;
}

/* save info needed for meta data update */
m-offset = offset;
m-n_start = n_start;
m-nb_clusters = nb_clusters;


current metadata (m) get inserted in cluster allocation list with
nb_clusters set to 0. Loop on cluster_allocs ignore (wait for this
allocation or just skip it depending on dirty data in offset field)
this metadata. Currently all occur in a CoMutex so this does not cause
problems but in case qcow2_alloc_clusters unlock the mutex it can
occur to insert two overlapping updates into cluster_allocs. Perhaps a
better order would be


/* save info needed for meta data update */
m-offset = offset;
m-n_start = n_start;
m-nb_clusters = nb_clusters;

QLIST_INSERT_HEAD(s-cluster_allocs, m, next_in_flight);

/* allocate a new cluster */

cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s-cluster_size);
if (cluster_offset  0) {
ret = cluster_offset;
goto fail;
}


(tested successfully with iotests suite)

Frediano



[Qemu-devel] [PATCH] rename qemu_malloc and related to glib names for coherence

2011-08-31 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 trace-events |   10 +-
 vl.c |6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/trace-events b/trace-events
index dc300a2..37da2e0 100644
--- a/trace-events
+++ b/trace-events
@@ -14,7 +14,7 @@
 #
 # [disable] name(type1 arg1[, type2 arg2] ...) format-string
 #
-# Example: qemu_malloc(size_t size) size %zu
+# Example: g_malloc(size_t size) size %zu
 #
 # The disable keyword will build without the trace event.
 # In case of 'simple' trace backend, it will allow the trace event to be
@@ -28,10 +28,10 @@
 #
 # The format-string should be a sprintf()-compatible format string.
 
-# qemu-malloc.c
-disable qemu_malloc(size_t size, void *ptr) size %zu ptr %p
-disable qemu_realloc(void *ptr, size_t size, void *newptr) ptr %p size %zu 
newptr %p
-disable qemu_free(void *ptr) ptr %p
+# vl.c
+disable g_malloc(size_t size, void *ptr) size %zu ptr %p
+disable g_realloc(void *ptr, size_t size, void *newptr) ptr %p size %zu 
newptr %p
+disable g_free(void *ptr) ptr %p
 
 # osdep.c
 disable qemu_memalign(size_t alignment, size_t size, void *ptr) alignment %zu 
size %zu ptr %p
diff --git a/vl.c b/vl.c
index 9cd67a3..f71221b 100644
--- a/vl.c
+++ b/vl.c
@@ -2088,20 +2088,20 @@ static const QEMUOption *lookup_opt(int argc, char 
**argv,
 static gpointer malloc_and_trace(gsize n_bytes)
 {
 void *ptr = malloc(n_bytes);
-trace_qemu_malloc(n_bytes, ptr);
+trace_g_malloc(n_bytes, ptr);
 return ptr;
 }
 
 static gpointer realloc_and_trace(gpointer mem, gsize n_bytes)
 {
 void *ptr = realloc(mem, n_bytes);
-trace_qemu_realloc(mem, n_bytes, ptr);
+trace_g_realloc(mem, n_bytes, ptr);
 return ptr;
 }
 
 static void free_and_trace(gpointer mem)
 {
-trace_qemu_free(mem);
+trace_g_free(mem);
 free(mem);
 }
 
-- 
1.7.1




[Qemu-devel] [PATCH] linux aio: some comments

2011-08-30 Thread Frediano Ziglio
Add some notes about Linux AIO explaining why we don't use AIO in
some situations.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/raw-posix.c |4 
 linux-aio.c   |1 +
 2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index c5c9944..bcf50b2 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -236,6 +236,10 @@ static int raw_open_common(BlockDriverState *bs, const 
char *filename,
 }
 
 #ifdef CONFIG_LINUX_AIO
+/*
+ * Currently Linux do AIO only for files opened with O_DIRECT
+ * specified so check NOCACHE flag too
+ */
 if ((bdrv_flags  (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
   (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
 
diff --git a/linux-aio.c b/linux-aio.c
index 5fd3932..5265a02 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -181,6 +181,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void 
*aio_ctx, int fd,
 case QEMU_AIO_READ:
 io_prep_preadv(iocbs, fd, qiov-iov, qiov-niov, offset);
break;
+/* Currently Linux kernel does not support other operations */
 default:
 fprintf(stderr, %s: invalid AIO request type 0x%x.\n,
 __func__, type);
-- 
1.7.1




[Qemu-devel] [PATCH] disasm: update comment

2011-08-25 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 disas.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/disas.c b/disas.c
index 1334b8e..611b30b 100644
--- a/disas.c
+++ b/disas.c
@@ -137,7 +137,7 @@ print_insn_thumb1(bfd_vma pc, disassemble_info *info)
 
 /* Disassemble this for me please... (debugging). 'flags' has the following
values:
-i386 - nonzero means 16 bit code
+i386 - 1 means 16 bit code, 2 means 64 bit code
 arm  - nonzero means thumb code
 ppc  - nonzero means little endian
 other targets - unused
-- 
1.7.1




[Qemu-devel] [PATCH] qcow2: use always stderr for debugging

2011-08-25 Thread Frediano Ziglio
let all DEBUG_ALLOC2 printf goes to stderr

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c  |2 +-
 block/qcow2-refcount.c |4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 9269dda..9034840 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -53,7 +53,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, 
bool exact_size)
 }
 
 #ifdef DEBUG_ALLOC2
-printf(grow l1_table from %d to %d\n, s-l1_size, new_l1_size);
+fprintf(stderr, grow l1_table from %d to %d\n, s-l1_size, new_l1_size);
 #endif
 
 new_l1_size2 = sizeof(uint64_t) * new_l1_size;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2a915be..fbf28da 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -422,7 +422,7 @@ static int QEMU_WARN_UNUSED_RESULT 
update_refcount(BlockDriverState *bs,
 int ret;
 
 #ifdef DEBUG_ALLOC2
-printf(update_refcount: offset=% PRId64  size=% PRId64  addend=%d\n,
+fprintf(stderr, update_refcount: offset=% PRId64  size=% PRId64  
addend=%d\n,
offset, length, addend);
 #endif
 if (length  0) {
@@ -556,7 +556,7 @@ retry:
 }
 }
 #ifdef DEBUG_ALLOC2
-printf(alloc_clusters: size=% PRId64  - % PRId64 \n,
+fprintf(stderr, alloc_clusters: size=% PRId64  - % PRId64 \n,
 size,
 (s-free_cluster_index - nb_clusters)  s-cluster_bits);
 #endif
-- 
1.7.1




[Qemu-devel] [PATCH] qcow2: remove unused qcow2_create_refcount_update function

2011-08-25 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |   18 --
 block/qcow2.h  |2 --
 2 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2a915be..62946bb 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -680,24 +680,6 @@ void qcow2_free_any_clusters(BlockDriverState *bs,
 
 
 
-void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
-int64_t size)
-{
-int refcount;
-int64_t start, last, cluster_offset;
-uint16_t *p;
-
-start = offset  ~(s-cluster_size - 1);
-last = (offset + size - 1)   ~(s-cluster_size - 1);
-for(cluster_offset = start; cluster_offset = last;
-cluster_offset += s-cluster_size) {
-p = s-refcount_block[cluster_offset  s-cluster_bits];
-refcount = be16_to_cpu(*p);
-refcount++;
-*p = cpu_to_be16(refcount);
-}
-}
-
 /* update the refcounts of snapshots and the copied flag */
 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 int64_t l1_table_offset, int l1_size, int addend)
diff --git a/block/qcow2.h b/block/qcow2.h
index de23abe..c8ca3bc 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -189,8 +189,6 @@ void qcow2_free_clusters(BlockDriverState *bs,
 void qcow2_free_any_clusters(BlockDriverState *bs,
 uint64_t cluster_offset, int nb_clusters);
 
-void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
-int64_t size);
 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 int64_t l1_table_offset, int l1_size, int addend);
 
-- 
1.7.1




Re: [Qemu-devel] [PATCH 2/2] disasm: update comment

2011-08-25 Thread Frediano Ziglio
2011/8/25 Peter Maydell peter.mayd...@linaro.org:
 On 25 August 2011 09:44, Stefan Hajnoczi stefa...@linux.vnet.ibm.com wrote:
  /* Disassemble this for me please... (debugging). 'flags' has the following
    values:
 -    i386 - nonzero means 16 bit code
 +    i386 - 1 means 16 bit code, 2 means 64 bit code

 ...presumably 0 means 32 bit code, by elimination ?

 -- PMM


Currently you can pass even 10 :)
Yes, 0 (default) means 32 bit.

Frediano



[Qemu-devel] [PATCH v3 00/15] qcow/qcow2 cleanups

2011-08-23 Thread Frediano Ziglio
These patches mostly cleanup some AIO code using coroutines.
Mostly they use stack instead of allocated AIO structure.
Feel free to collapse it too short.

Frediano Ziglio (15):
  qcow: allocate QCowAIOCB structure using stack
  qcow: QCowAIOCB field cleanup
  qcow: move some blocks of code to avoid useless variable
initialization
  qcow: embed qcow_aio_read_cb into qcow_co_readv and qcow_aio_write_cb
into qcow_co_writev
  qcow: remove old #undefined code
  qcow2: removed unused fields
  qcow2: removed cur_nr_sectors field in QCowAIOCB
  qcow2: remove l2meta from QCowAIOCB
  qcow2: remove cluster_offset from QCowAIOCB
  qcow2: remove common from QCowAIOCB
  qcow2: reindent and use while before the big jump
  qcow2: removed QCowAIOCB entirely
  qcow2: remove memory leak
  qcow2: small math optimization
  qcow2: small optimization

 block/qcow.c   |  378 ++--
 block/qcow2-refcount.c |   16 +--
 block/qcow2.c  |  412 +++
 3 files changed, 294 insertions(+), 512 deletions(-)




[Qemu-devel] [PATCH v3 01/15] qcow: allocate QCowAIOCB structure using stack

2011-08-23 Thread Frediano Ziglio
instead of calling qemi_aio_get use stack

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c  |   52 
 block/qcow2.c |   38 +++---
 2 files changed, 27 insertions(+), 63 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index e155d3c..b4506b4 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -503,28 +503,12 @@ typedef struct QCowAIOCB {
 BlockDriverAIOCB *hd_aiocb;
 } QCowAIOCB;
 
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-if (acb-hd_aiocb)
-bdrv_aio_cancel(acb-hd_aiocb);
-qemu_aio_release(acb);
-}
-
-static AIOPool qcow_aio_pool = {
-.aiocb_size = sizeof(QCowAIOCB),
-.cancel = qcow_aio_cancel,
-};
-
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-int is_write)
+int is_write, QCowAIOCB *acb)
 {
-QCowAIOCB *acb;
-
-acb = qemu_aio_get(qcow_aio_pool, bs, NULL, NULL);
-if (!acb)
-return NULL;
+memset(acb, 0, sizeof(*acb));
+acb-common.bs = bs;
 acb-hd_aiocb = NULL;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
@@ -543,9 +527,8 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 return acb;
 }
 
-static int qcow_aio_read_cb(void *opaque)
+static int qcow_aio_read_cb(QCowAIOCB *acb)
 {
-QCowAIOCB *acb = opaque;
 BlockDriverState *bs = acb-common.bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
@@ -634,29 +617,27 @@ static int qcow_co_readv(BlockDriverState *bs, int64_t 
sector_num,
  int nb_sectors, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs-opaque;
-QCowAIOCB *acb;
+QCowAIOCB acb;
 int ret;
 
-acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
+qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
-ret = qcow_aio_read_cb(acb);
+ret = qcow_aio_read_cb(acb);
 } while (ret  0);
 qemu_co_mutex_unlock(s-lock);
 
-if (acb-qiov-niov  1) {
-qemu_iovec_from_buffer(acb-qiov, acb-orig_buf, acb-qiov-size);
-qemu_vfree(acb-orig_buf);
+if (acb.qiov-niov  1) {
+qemu_iovec_from_buffer(acb.qiov, acb.orig_buf, acb.qiov-size);
+qemu_vfree(acb.orig_buf);
 }
-qemu_aio_release(acb);
 
 return ret;
 }
 
-static int qcow_aio_write_cb(void *opaque)
+static int qcow_aio_write_cb(QCowAIOCB *acb)
 {
-QCowAIOCB *acb = opaque;
 BlockDriverState *bs = acb-common.bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
@@ -714,23 +695,22 @@ static int qcow_co_writev(BlockDriverState *bs, int64_t 
sector_num,
   int nb_sectors, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs-opaque;
-QCowAIOCB *acb;
+QCowAIOCB acb;
 int ret;
 
 s-cluster_cache_offset = -1; /* disable compressed cache */
 
-acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
+qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
-ret = qcow_aio_write_cb(acb);
+ret = qcow_aio_write_cb(acb);
 } while (ret  0);
 qemu_co_mutex_unlock(s-lock);
 
-if (acb-qiov-niov  1) {
-qemu_vfree(acb-orig_buf);
+if (acb.qiov-niov  1) {
+qemu_vfree(acb.orig_buf);
 }
-qemu_aio_release(acb);
 
 return ret;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index bfff6cd..bb6c75e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -388,17 +388,6 @@ typedef struct QCowAIOCB {
 QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
-static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-qemu_aio_release(acb);
-}
-
-static AIOPool qcow2_aio_pool = {
-.aiocb_size = sizeof(QCowAIOCB),
-.cancel = qcow2_aio_cancel,
-};
-
 /*
  * Returns 0 when the request is completed successfully, 1 when there is still
  * a part left to do and -errno in error cases.
@@ -528,13 +517,10 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb,
-  void *opaque, int is_write)
+  void *opaque, int is_write, QCowAIOCB *acb)
 {
-QCowAIOCB *acb;
-
-acb = qemu_aio_get(qcow2_aio_pool, bs, cb, opaque);
-if (!acb)
-return NULL;
+memset(acb, 0, sizeof(*acb));
+acb-common.bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
 acb-is_write = is_write;
@@ -554,19 +540,18 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
   int

[Qemu-devel] [PATCH v3 02/15] qcow: QCowAIOCB field cleanup

2011-08-23 Thread Frediano Ziglio
remove unused field from this structure and put some of them in 
qcow_aio_read_cb and qcow_aio_write_cb

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |  137 +++--
 1 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index b4506b4..9754ca9 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -487,72 +487,61 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
 #endif
 
 typedef struct QCowAIOCB {
-BlockDriverAIOCB common;
+BlockDriverState *bs;
 int64_t sector_num;
 QEMUIOVector *qiov;
 uint8_t *buf;
 void *orig_buf;
 int nb_sectors;
-int n;
-uint64_t cluster_offset;
-uint8_t *cluster_data;
-struct iovec hd_iov;
-bool is_write;
-QEMUBH *bh;
-QEMUIOVector hd_qiov;
-BlockDriverAIOCB *hd_aiocb;
 } QCowAIOCB;
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 int is_write, QCowAIOCB *acb)
 {
-memset(acb, 0, sizeof(*acb));
-acb-common.bs = bs;
-acb-hd_aiocb = NULL;
+acb-bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
-acb-is_write = is_write;
 
 if (qiov-niov  1) {
 acb-buf = acb-orig_buf = qemu_blockalign(bs, qiov-size);
 if (is_write)
 qemu_iovec_to_buffer(qiov, acb-buf);
 } else {
+acb-orig_buf = NULL;
 acb-buf = (uint8_t *)qiov-iov-iov_base;
 }
 acb-nb_sectors = nb_sectors;
-acb-n = 0;
-acb-cluster_offset = 0;
 return acb;
 }
 
 static int qcow_aio_read_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret;
-
-acb-hd_aiocb = NULL;
+int ret, n = 0;
+uint64_t cluster_offset = 0;
+struct iovec hd_iov;
+QEMUIOVector hd_qiov;
 
  redo:
 /* post process the read buffer */
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 /* nothing to do */
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* nothing to do */
 } else {
 if (s-crypt_method) {
 encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
-acb-n, 0,
+n, 0,
 s-aes_decrypt_key);
 }
 }
 
-acb-nb_sectors -= acb-n;
-acb-sector_num += acb-n;
-acb-buf += acb-n * 512;
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
 
 if (acb-nb_sectors == 0) {
 /* request completed */
@@ -560,57 +549,58 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 
 /* prepare next AIO request */
-acb-cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
+cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
  0, 0, 0, 0);
 index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-acb-n = s-cluster_sectors - index_in_cluster;
-if (acb-n  acb-nb_sectors)
-acb-n = acb-nb_sectors;
+n = s-cluster_sectors - index_in_cluster;
+if (n  acb-nb_sectors) {
+n = acb-nb_sectors;
+}
 
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 if (bs-backing_hd) {
 /* read from the base image */
-acb-hd_iov.iov_base = (void *)acb-buf;
-acb-hd_iov.iov_len = acb-n * 512;
-qemu_iovec_init_external(acb-hd_qiov, acb-hd_iov, 1);
+hd_iov.iov_base = (void *)acb-buf;
+hd_iov.iov_len = n * 512;
+qemu_iovec_init_external(hd_qiov, hd_iov, 1);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-acb-n, acb-hd_qiov);
+n, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
 return -EIO;
 }
 } else {
 /* Note: in this case, no need to wait */
-memset(acb-buf, 0, 512 * acb-n);
+memset(acb-buf, 0, 512 * n);
 goto redo;
 }
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
-if (decompress_cluster(bs, acb-cluster_offset)  0) {
+if (decompress_cluster(bs, cluster_offset)  0) {
 return -EIO;
 }
 memcpy(acb-buf,
-   s-cluster_cache + index_in_cluster * 512, 512 * acb-n);
+   s-cluster_cache + index_in_cluster * 512, 512 * n);
 goto redo;
 } else {
-if ((acb-cluster_offset  511) != 0) {
+if ((cluster_offset  511) != 0) {
 return -EIO;
 }
-acb-hd_iov.iov_base = (void *)acb-buf

[Qemu-devel] [PATCH v3 03/15] qcow: move some blocks of code to avoid useless variable initialization

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |   53 ++---
 1 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 9754ca9..4ede7f3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -520,35 +520,18 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret, n = 0;
-uint64_t cluster_offset = 0;
+int ret, n;
+uint64_t cluster_offset;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
 
  redo:
-/* post process the read buffer */
-if (!cluster_offset) {
-/* nothing to do */
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* nothing to do */
-} else {
-if (s-crypt_method) {
-encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
-n, 0,
-s-aes_decrypt_key);
-}
-}
-
-acb-nb_sectors -= n;
-acb-sector_num += n;
-acb-buf += n * 512;
-
 if (acb-nb_sectors == 0) {
 /* request completed */
 return 0;
 }
 
-/* prepare next AIO request */
+/* prepare next request */
 cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
  0, 0, 0, 0);
 index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
@@ -573,7 +556,6 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 } else {
 /* Note: in this case, no need to wait */
 memset(acb-buf, 0, 512 * n);
-goto redo;
 }
 } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
@@ -582,7 +564,6 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 memcpy(acb-buf,
s-cluster_cache + index_in_cluster * 512, 512 * n);
-goto redo;
 } else {
 if ((cluster_offset  511) != 0) {
 return -EIO;
@@ -600,6 +581,23 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 }
 
+/* post process the read buffer */
+if (!cluster_offset) {
+/* nothing to do */
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* nothing to do */
+} else {
+if (s-crypt_method) {
+encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
+n, 0,
+s-aes_decrypt_key);
+}
+}
+
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
+
 goto redo;
 }
 
@@ -631,16 +629,12 @@ static int qcow_aio_write_cb(QCowAIOCB *acb)
 int index_in_cluster;
 uint64_t cluster_offset;
 const uint8_t *src_buf;
-int ret, n = 0;
+int ret, n;
 uint8_t *cluster_data = NULL;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
 
 redo:
-acb-nb_sectors -= n;
-acb-sector_num += n;
-acb-buf += n * 512;
-
 if (acb-nb_sectors == 0) {
 /* request completed */
 return 0;
@@ -683,6 +677,11 @@ redo:
 if (ret  0) {
 return ret;
 }
+
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
+
 goto redo;
 }
 
-- 
1.7.1




[Qemu-devel] [PATCH v3 05/15] qcow: remove old #undefined code

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |   63 --
 1 files changed, 0 insertions(+), 63 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index f28c821..8cbabdd 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -190,24 +190,6 @@ static int qcow_set_key(BlockDriverState *bs, const char 
*key)
 return -1;
 if (AES_set_decrypt_key(keybuf, 128, s-aes_decrypt_key) != 0)
 return -1;
-#if 0
-/* test */
-{
-uint8_t in[16];
-uint8_t out[16];
-uint8_t tmp[16];
-for(i=0;i16;i++)
-in[i] = i;
-AES_encrypt(in, tmp, s-aes_encrypt_key);
-AES_decrypt(tmp, out, s-aes_decrypt_key);
-for(i = 0; i  16; i++)
-printf( %02x, tmp[i]);
-printf(\n);
-for(i = 0; i  16; i++)
-printf( %02x, out[i]);
-printf(\n);
-}
-#endif
 return 0;
 }
 
@@ -441,51 +423,6 @@ static int decompress_cluster(BlockDriverState *bs, 
uint64_t cluster_offset)
 return 0;
 }
 
-#if 0
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
-BDRVQcowState *s = bs-opaque;
-int ret, index_in_cluster, n;
-uint64_t cluster_offset;
-
-while (nb_sectors  0) {
-cluster_offset = get_cluster_offset(bs, sector_num  9, 0, 0, 0, 0);
-index_in_cluster = sector_num  (s-cluster_sectors - 1);
-n = s-cluster_sectors - index_in_cluster;
-if (n  nb_sectors)
-n = nb_sectors;
-if (!cluster_offset) {
-if (bs-backing_hd) {
-/* read from the base image */
-ret = bdrv_read(bs-backing_hd, sector_num, buf, n);
-if (ret  0)
-return -1;
-} else {
-memset(buf, 0, 512 * n);
-}
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-if (decompress_cluster(bs, cluster_offset)  0)
-return -1;
-memcpy(buf, s-cluster_cache + index_in_cluster * 512, 512 * n);
-} else {
-ret = bdrv_pread(bs-file, cluster_offset + index_in_cluster * 
512, buf, n * 512);
-if (ret != n * 512)
-return -1;
-if (s-crypt_method) {
-encrypt_sectors(s, sector_num, buf, buf, n, 0,
-s-aes_decrypt_key);
-}
-}
-nb_sectors -= n;
-sector_num += n;
-buf += n * 512;
-}
-return 0;
-}
-#endif
-
 static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
  int nb_sectors, QEMUIOVector *qiov)
 {
-- 
1.7.1




[Qemu-devel] [PATCH v3 12/15] qcow2: removed QCowAIOCB entirely

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  207 ++---
 1 files changed, 80 insertions(+), 127 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 287dd99..d34bd72 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -372,83 +372,77 @@ int qcow2_backing_read1(BlockDriverState *bs, 
QEMUIOVector *qiov,
 return n1;
 }
 
-typedef struct QCowAIOCB {
-BlockDriverState *bs;
-int64_t sector_num;
-QEMUIOVector *qiov;
-int remaining_sectors;
-uint64_t bytes_done;
-uint8_t *cluster_data;
-QEMUIOVector hd_qiov;
-} QCowAIOCB;
-
-/*
- * Returns 0 when the request is completed successfully, 1 when there is still
- * a part left to do and -errno in error cases.
- */
-static int qcow2_aio_read_cb(QCowAIOCB *acb)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+  int remaining_sectors, QEMUIOVector *qiov)
 {
-BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
+uint64_t bytes_done = 0;
+QEMUIOVector hd_qiov;
+uint8_t *cluster_data = NULL;
 
-while (acb-remaining_sectors != 0) {
+qemu_iovec_init(hd_qiov, qiov-niov);
+
+qemu_co_mutex_lock(s-lock);
+
+while (remaining_sectors != 0) {
 
 /* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
+cur_nr_sectors = remaining_sectors;
 if (s-crypt_method) {
 cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+ret = qcow2_get_cluster_offset(bs, sector_num  9,
 cur_nr_sectors, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
+index_in_cluster = sector_num  (s-cluster_sectors - 1);
 
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+qemu_iovec_reset(hd_qiov);
+qemu_iovec_copy(hd_qiov, qiov, bytes_done,
 cur_nr_sectors * 512);
 
 if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
+n1 = qcow2_backing_read1(bs-backing_hd, hd_qiov,
+sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
+ret = bdrv_co_readv(bs-backing_hd, sector_num,
+n1, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-return ret;
+goto fail;
 }
 }
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+qemu_iovec_memset(hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
 ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
+qemu_iovec_from_buffer(hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
 if ((cluster_offset  511) != 0) {
-return -EIO;
+ret = -EIO;
+goto fail;
 }
 
 if (s-crypt_method) {
@@ -456,15 +450,15 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
  * For encrypted images, read everything into a temporary
  * contiguous buffer on which the AES functions can work.
  */
-if (!acb-cluster_data) {
-acb-cluster_data =
+if (!cluster_data) {
+cluster_data =
 g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
 }
 
 assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_reset(hd_qiov

[Qemu-devel] [PATCH v3 13/15] qcow2: remove memory leak

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index d34bd72..07529b1 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -492,6 +492,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+g_free(cluster_data);
 
 return ret;
 }
@@ -604,6 +605,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+g_free(cluster_data);
 
 return ret;
 }
-- 
1.7.1




[Qemu-devel] [PATCH v3 14/15] qcow2: small math optimization

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2a915be..0f9a64a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -140,10 +140,7 @@ static unsigned int next_refcount_table_size(BDRVQcowState 
*s,
 static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
 uint64_t offset_b)
 {
-uint64_t block_a = offset_a  (2 * s-cluster_bits - REFCOUNT_SHIFT);
-uint64_t block_b = offset_b  (2 * s-cluster_bits - REFCOUNT_SHIFT);
-
-return (block_a == block_b);
+return ((offset_a ^ offset_b)  (2 * s-cluster_bits - REFCOUNT_SHIFT)) 
== 0;
 }
 
 /*
-- 
1.7.1




[Qemu-devel] [PATCH v3 15/15] qcow2: small optimization

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |   11 +--
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0f9a64a..243c24b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -681,14 +681,13 @@ void qcow2_create_refcount_update(QCowCreateState *s, 
int64_t offset,
 int64_t size)
 {
 int refcount;
-int64_t start, last, cluster_offset;
+int64_t start, last, cluster;
 uint16_t *p;
 
-start = offset  ~(s-cluster_size - 1);
-last = (offset + size - 1)   ~(s-cluster_size - 1);
-for(cluster_offset = start; cluster_offset = last;
-cluster_offset += s-cluster_size) {
-p = s-refcount_block[cluster_offset  s-cluster_bits];
+start = offset  s-cluster_bits;
+last = (offset + size - 1)   s-cluster_bits;
+for(cluster = start; cluster = last; ++cluster) {
+p = s-refcount_block[cluster];
 refcount = be16_to_cpu(*p);
 refcount++;
 *p = cpu_to_be16(refcount);
-- 
1.7.1




[Qemu-devel] [PATCH v3 04/15] qcow: embed qcow_aio_read_cb into qcow_co_readv and qcow_aio_write_cb into qcow_co_writev

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |  291 -
 1 files changed, 123 insertions(+), 168 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 4ede7f3..f28c821 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -486,223 +486,178 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
 }
 #endif
 
-typedef struct QCowAIOCB {
-BlockDriverState *bs;
-int64_t sector_num;
-QEMUIOVector *qiov;
-uint8_t *buf;
-void *orig_buf;
-int nb_sectors;
-} QCowAIOCB;
-
-static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-int is_write, QCowAIOCB *acb)
-{
-acb-bs = bs;
-acb-sector_num = sector_num;
-acb-qiov = qiov;
-
-if (qiov-niov  1) {
-acb-buf = acb-orig_buf = qemu_blockalign(bs, qiov-size);
-if (is_write)
-qemu_iovec_to_buffer(qiov, acb-buf);
-} else {
-acb-orig_buf = NULL;
-acb-buf = (uint8_t *)qiov-iov-iov_base;
-}
-acb-nb_sectors = nb_sectors;
-return acb;
-}
-
-static int qcow_aio_read_cb(QCowAIOCB *acb)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
 {
-BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret, n;
+int ret = 0, n;
 uint64_t cluster_offset;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
+uint8_t *buf;
+void *orig_buf;
 
- redo:
-if (acb-nb_sectors == 0) {
-/* request completed */
-return 0;
+if (qiov-niov  1) {
+buf = orig_buf = qemu_blockalign(bs, qiov-size);
+} else {
+orig_buf = NULL;
+buf = (uint8_t *)qiov-iov-iov_base;
 }
 
-/* prepare next request */
-cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
- 0, 0, 0, 0);
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-n = s-cluster_sectors - index_in_cluster;
-if (n  acb-nb_sectors) {
-n = acb-nb_sectors;
-}
+qemu_co_mutex_lock(s-lock);
+
+while (nb_sectors != 0) {
+/* prepare next request */
+cluster_offset = get_cluster_offset(bs, sector_num  9,
+ 0, 0, 0, 0);
+index_in_cluster = sector_num  (s-cluster_sectors - 1);
+n = s-cluster_sectors - index_in_cluster;
+if (n  nb_sectors) {
+n = nb_sectors;
+}
 
-if (!cluster_offset) {
-if (bs-backing_hd) {
-/* read from the base image */
-hd_iov.iov_base = (void *)acb-buf;
+if (!cluster_offset) {
+if (bs-backing_hd) {
+/* read from the base image */
+hd_iov.iov_base = (void *)buf;
+hd_iov.iov_len = n * 512;
+qemu_iovec_init_external(hd_qiov, hd_iov, 1);
+qemu_co_mutex_unlock(s-lock);
+ret = bdrv_co_readv(bs-backing_hd, sector_num,
+n, hd_qiov);
+qemu_co_mutex_lock(s-lock);
+if (ret  0) {
+goto fail;
+}
+} else {
+/* Note: in this case, no need to wait */
+memset(buf, 0, 512 * n);
+}
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* add AIO support for compressed blocks ? */
+if (decompress_cluster(bs, cluster_offset)  0) {
+goto fail;
+}
+memcpy(buf,
+   s-cluster_cache + index_in_cluster * 512, 512 * n);
+} else {
+if ((cluster_offset  511) != 0) {
+goto fail;
+}
+hd_iov.iov_base = (void *)buf;
 hd_iov.iov_len = n * 512;
 qemu_iovec_init_external(hd_qiov, hd_iov, 1);
 qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
+ret = bdrv_co_readv(bs-file,
+(cluster_offset  9) + index_in_cluster,
 n, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-return -EIO;
+break;
+}
+if (s-crypt_method) {
+encrypt_sectors(s, sector_num, buf, buf,
+n, 0,
+s-aes_decrypt_key);
 }
-} else {
-/* Note: in this case, no need to wait */
-memset(acb-buf, 0, 512 * n);
-}
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* add AIO support for compressed blocks ? */
-if (decompress_cluster(bs, cluster_offset)  0) {
-return -EIO;
-}
-memcpy(acb-buf,
-   s

[Qemu-devel] [PATCH v3 09/15] qcow2: remove cluster_offset from QCowAIOCB

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   22 +++---
 1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 4b9ec29..7e13283 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -378,7 +378,6 @@ typedef struct QCowAIOCB {
 QEMUIOVector *qiov;
 int remaining_sectors;
 uint64_t bytes_done;
-uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
 } QCowAIOCB;
@@ -394,6 +393,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+uint64_t cluster_offset = 0;
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -408,7 +408,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -419,7 +419,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
 cur_nr_sectors * 512);
 
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
@@ -439,9 +439,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 /* Note: in this case, no need to wait */
 qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, acb-cluster_offset);
+ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -450,7 +450,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
-if ((acb-cluster_offset  511) != 0) {
+if ((cluster_offset  511) != 0) {
 return -EIO;
 }
 
@@ -474,7 +474,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
-(acb-cluster_offset  9) + index_in_cluster,
+(cluster_offset  9) + index_in_cluster,
 cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
@@ -512,7 +512,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
-acb-cluster_offset = 0;
 return acb;
 }
 
@@ -564,6 +563,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 QCowL2Meta l2meta;
+uint64_t cluster_offset;
 
 l2meta.nb_clusters = 0;
 qemu_co_queue_init(l2meta.dependent_requests);
@@ -585,8 +585,8 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-acb-cluster_offset = l2meta.cluster_offset;
-assert((acb-cluster_offset  511) == 0);
+cluster_offset = l2meta.cluster_offset;
+assert((cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
@@ -612,7 +612,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_WRITE_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_writev(bs-file,
- (acb-cluster_offset  9) + index_in_cluster,
+ (cluster_offset  9) + index_in_cluster,
  cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-- 
1.7.1




[Qemu-devel] [PATCH v3 10/15] qcow2: remove common from QCowAIOCB

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 7e13283..519fc8b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -373,7 +373,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector 
*qiov,
 }
 
 typedef struct QCowAIOCB {
-BlockDriverAIOCB common;
+BlockDriverState *bs;
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
@@ -388,7 +388,7 @@ typedef struct QCowAIOCB {
  */
 static int qcow2_aio_read_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
@@ -504,7 +504,7 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
   void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
-acb-common.bs = bs;
+acb-bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
 
@@ -556,7 +556,7 @@ static void run_dependent_requests(BDRVQcowState *s, 
QCowL2Meta *m)
  */
 static int qcow2_aio_write_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
 int n_end;
-- 
1.7.1




Re: [Qemu-devel] [PATCH v2 00/15] qcow/qcow2 cleanups

2011-08-23 Thread Frediano Ziglio
2011/8/22 Kevin Wolf kw...@redhat.com:
 Am 09.08.2011 09:46, schrieb Frediano Ziglio:
 These patches mostly cleanup some AIO code using coroutines.
 Mostly they use stack instead of allocated AIO structure.
 Feel free to collapse it too short.

 Frediano Ziglio (15):
   qcow: allocate QCowAIOCB structure using stack
   qcow: QCowAIOCB field cleanup
   qcow: move some blocks of code to avoid useless variable
     initialization
   qcow: embed qcow_aio_read_cb into qcow_co_readv and qcow_aio_write_cb
     into qcow_co_writev
   qcow: remove old #undefined code
   qcow2: removed unused fields
   qcow2: removed cur_nr_sectors field in QCowAIOCB
   qcow2: remove l2meta from QCowAIOCB
   qcow2: remove cluster_offset from QCowAIOCB
   qcow2: remove common from QCowAIOCB
   qcow2: reindent and use while before the big jump
   qcow2: removed QCowAIOCB entirely
   qcow2: remove memory leak
   qcow2: small math optimization
   qcow2: small optimization

  block/qcow.c           |  378 ++--
  block/qcow2-refcount.c |   16 +--
  block/qcow2.c          |  412 
 +++
  3 files changed, 294 insertions(+), 512 deletions(-)

 Can you please rebase this series to current master? I expect that most
 conflicts are related to the qemu_malloc - g_malloc change, so they
 should be easy to fix.


Done, yes glib rename was the problem. I rebased all patches and
tested latest with iotests.

 One thing I noticed when reading the first two patches is that there is
 some state in ACBs that previously was shared across multiple requests
 (e.g. the bounce buffer for encryption). It is no longer shared after
 you move the ACB to the stack, so each request allocates a new buffer.
 Have you checked if this makes a noticeable difference in performance
 for encrypted images?

 Kevin


I didn't test encrypt performance. Looking at master code the only
cached state that get reused is cluster_data so only encryption is
affected.

Frediano



[Qemu-devel] [PATCH v3 06/15] qcow2: removed unused fields

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   10 +++---
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index bb6c75e..4171b47 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,11 +381,8 @@ typedef struct QCowAIOCB {
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
-bool is_write;
 QEMUIOVector hd_qiov;
-QEMUBH *bh;
 QCowL2Meta l2meta;
-QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
 /*
@@ -517,13 +514,12 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb,
-  void *opaque, int is_write, QCowAIOCB *acb)
+  void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
 acb-common.bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
-acb-is_write = is_write;
 
 qemu_iovec_init(acb-hd_qiov, qiov-niov);
 
@@ -543,7 +539,7 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
@@ -658,7 +654,7 @@ static int qcow2_co_writev(BlockDriverState *bs,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 s-cluster_cache_offset = -1; /* disable compressed cache */
 
 qemu_co_mutex_lock(s-lock);
-- 
1.7.1




[Qemu-devel] [PATCH v3 07/15] qcow2: removed cur_nr_sectors field in QCowAIOCB

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   98 +
 1 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 4171b47..f920dbe 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -377,7 +377,6 @@ typedef struct QCowAIOCB {
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
-int cur_nr_sectors;/* number of sectors in current iteration */
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
@@ -395,42 +394,22 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
-
-/* post process the read buffer */
-if (!acb-cluster_offset) {
-/* nothing to do */
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* nothing to do */
-} else {
-if (s-crypt_method) {
-qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
-acb-cluster_data, acb-cur_nr_sectors, 0, 
s-aes_decrypt_key);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
-qemu_iovec_from_buffer(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
-}
-}
-
-acb-remaining_sectors -= acb-cur_nr_sectors;
-acb-sector_num += acb-cur_nr_sectors;
-acb-bytes_done += acb-cur_nr_sectors * 512;
+int cur_nr_sectors; /* number of sectors in current iteration */
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
 return 0;
 }
 
-/* prepare next AIO request */
-acb-cur_nr_sectors = acb-remaining_sectors;
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
 if (s-crypt_method) {
-acb-cur_nr_sectors = MIN(acb-cur_nr_sectors,
+cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-acb-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, acb-cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -439,14 +418,14 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
+cur_nr_sectors * 512);
 
 if (!acb-cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
 n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, acb-cur_nr_sectors);
+acb-sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
@@ -457,11 +436,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 return ret;
 }
 }
-return 1;
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * acb-cur_nr_sectors);
-return 1;
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
@@ -472,9 +449,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_from_buffer(acb-hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
-512 * acb-cur_nr_sectors);
-
-return 1;
+512 * cur_nr_sectors);
 } else {
 if ((acb-cluster_offset  511) != 0) {
 return -EIO;
@@ -490,24 +465,37 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
 }
 
-assert(acb-cur_nr_sectors =
+assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
+512 * cur_nr_sectors);
 }
 
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
 (acb-cluster_offset  9) + index_in_cluster,
-acb-cur_nr_sectors, acb-hd_qiov);
+cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
 return ret;
 }
+if (s-crypt_method) {
+qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
+acb-cluster_data, cur_nr_sectors, 0, s-aes_decrypt_key);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb-qiov

[Qemu-devel] [PATCH v3 11/15] qcow2: reindent and use while before the big jump

2011-08-23 Thread Frediano Ziglio
prepare to remove read/write callbacks

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  272 -
 1 files changed, 135 insertions(+), 137 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 519fc8b..287dd99 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -395,107 +395,105 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
 
-if (acb-remaining_sectors == 0) {
-/* request completed */
-return 0;
-}
-
-/* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
-if (s-crypt_method) {
-cur_nr_sectors = MIN(cur_nr_sectors,
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-}
+while (acb-remaining_sectors != 0) {
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, cluster_offset);
-if (ret  0) {
-return ret;
-}
-
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-cur_nr_sectors * 512);
-
-if (!cluster_offset) {
-
-if (bs-backing_hd) {
-/* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
-if (n1  0) {
-BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
-qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
-qemu_co_mutex_lock(s-lock);
-if (ret  0) {
-return ret;
-}
-}
-} else {
-/* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
+if (s-crypt_method) {
+cur_nr_sectors = MIN(cur_nr_sectors,
+QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, cluster_offset);
+
+ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
-s-cluster_cache + index_in_cluster * 512,
-512 * cur_nr_sectors);
-} else {
-if ((cluster_offset  511) != 0) {
-return -EIO;
-}
+index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
 
-if (s-crypt_method) {
-/*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
-if (!acb-cluster_data) {
-acb-cluster_data =
-g_malloc0(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+cur_nr_sectors * 512);
+
+if (!cluster_offset) {
+
+if (bs-backing_hd) {
+/* read from the base image */
+n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
+acb-sector_num, cur_nr_sectors);
+if (n1  0) {
+BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
+qemu_co_mutex_unlock(s-lock);
+ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
+n1, acb-hd_qiov);
+qemu_co_mutex_lock(s-lock);
+if (ret  0) {
+return ret;
+}
+}
+} else {
+/* Note: in this case, no need to wait */
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+}
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* add AIO support for compressed blocks ? */
+ret = qcow2_decompress_cluster(bs, cluster_offset);
+if (ret  0) {
+return ret;
 }
 
-assert(cur_nr_sectors =
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_from_buffer(acb-hd_qiov,
+s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
-}
+} else {
+if ((cluster_offset  511) != 0) {
+return -EIO;
+}
 
-BLKDBG_EVENT(bs-file

[Qemu-devel] [PATCH v3 08/15] qcow2: remove l2meta from QCowAIOCB

2011-08-23 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   15 ---
 1 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index f920dbe..4b9ec29 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,7 +381,6 @@ typedef struct QCowAIOCB {
 uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
-QCowL2Meta l2meta;
 } QCowAIOCB;
 
 /*
@@ -514,8 +513,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
 acb-cluster_offset = 0;
-acb-l2meta.nb_clusters = 0;
-qemu_co_queue_init(acb-l2meta.dependent_requests);
 return acb;
 }
 
@@ -566,6 +563,10 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int n_end;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+QCowL2Meta l2meta;
+
+l2meta.nb_clusters = 0;
+qemu_co_queue_init(l2meta.dependent_requests);
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -579,12 +580,12 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 n_end = QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors;
 
 ret = qcow2_alloc_cluster_offset(bs, acb-sector_num  9,
-index_in_cluster, n_end, cur_nr_sectors, acb-l2meta);
+index_in_cluster, n_end, cur_nr_sectors, l2meta);
 if (ret  0) {
 return ret;
 }
 
-acb-cluster_offset = acb-l2meta.cluster_offset;
+acb-cluster_offset = l2meta.cluster_offset;
 assert((acb-cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
@@ -618,9 +619,9 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-ret = qcow2_alloc_cluster_link_l2(bs, acb-l2meta);
+ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
 
-run_dependent_requests(s, acb-l2meta);
+run_dependent_requests(s, l2meta);
 
 if (ret  0) {
 return ret;
-- 
1.7.1




[Qemu-devel] full allocation and file fragmentation.

2011-08-13 Thread Frediano Ziglio
Looking at you patch for full image preallocation I noted you used
bdrv_write to fill space with zeroes. Nothing wrong but I would add a
call to fallocate or posix_fallocate in order to get less
fragmentation on file system. Perhaps there should be a
bdrv_preallocate call ??

Regards
  Frediano



[Qemu-devel] [PATCH v2 00/15] qcow/qcow2 cleanups

2011-08-09 Thread Frediano Ziglio
These patches mostly cleanup some AIO code using coroutines.
Mostly they use stack instead of allocated AIO structure.
Feel free to collapse it too short.

Frediano Ziglio (15):
  qcow: allocate QCowAIOCB structure using stack
  qcow: QCowAIOCB field cleanup
  qcow: move some blocks of code to avoid useless variable
initialization
  qcow: embed qcow_aio_read_cb into qcow_co_readv and qcow_aio_write_cb
into qcow_co_writev
  qcow: remove old #undefined code
  qcow2: removed unused fields
  qcow2: removed cur_nr_sectors field in QCowAIOCB
  qcow2: remove l2meta from QCowAIOCB
  qcow2: remove cluster_offset from QCowAIOCB
  qcow2: remove common from QCowAIOCB
  qcow2: reindent and use while before the big jump
  qcow2: removed QCowAIOCB entirely
  qcow2: remove memory leak
  qcow2: small math optimization
  qcow2: small optimization

 block/qcow.c   |  378 ++--
 block/qcow2-refcount.c |   16 +--
 block/qcow2.c  |  412 +++
 3 files changed, 294 insertions(+), 512 deletions(-)




[Qemu-devel] [PATCH v2 02/15] qcow: QCowAIOCB field cleanup

2011-08-09 Thread Frediano Ziglio
remove unused field from this structure and put some of them in 
qcow_aio_read_cb and qcow_aio_write_cb

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |  137 +++--
 1 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index d19ef04..f0b1599 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -487,72 +487,61 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
 #endif
 
 typedef struct QCowAIOCB {
-BlockDriverAIOCB common;
+BlockDriverState *bs;
 int64_t sector_num;
 QEMUIOVector *qiov;
 uint8_t *buf;
 void *orig_buf;
 int nb_sectors;
-int n;
-uint64_t cluster_offset;
-uint8_t *cluster_data;
-struct iovec hd_iov;
-bool is_write;
-QEMUBH *bh;
-QEMUIOVector hd_qiov;
-BlockDriverAIOCB *hd_aiocb;
 } QCowAIOCB;
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 int is_write, QCowAIOCB *acb)
 {
-memset(acb, 0, sizeof(*acb));
-acb-common.bs = bs;
-acb-hd_aiocb = NULL;
+acb-bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
-acb-is_write = is_write;
 
 if (qiov-niov  1) {
 acb-buf = acb-orig_buf = qemu_blockalign(bs, qiov-size);
 if (is_write)
 qemu_iovec_to_buffer(qiov, acb-buf);
 } else {
+acb-orig_buf = NULL;
 acb-buf = (uint8_t *)qiov-iov-iov_base;
 }
 acb-nb_sectors = nb_sectors;
-acb-n = 0;
-acb-cluster_offset = 0;
 return acb;
 }
 
 static int qcow_aio_read_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret;
-
-acb-hd_aiocb = NULL;
+int ret, n = 0;
+uint64_t cluster_offset = 0;
+struct iovec hd_iov;
+QEMUIOVector hd_qiov;
 
  redo:
 /* post process the read buffer */
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 /* nothing to do */
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* nothing to do */
 } else {
 if (s-crypt_method) {
 encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
-acb-n, 0,
+n, 0,
 s-aes_decrypt_key);
 }
 }
 
-acb-nb_sectors -= acb-n;
-acb-sector_num += acb-n;
-acb-buf += acb-n * 512;
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
 
 if (acb-nb_sectors == 0) {
 /* request completed */
@@ -560,57 +549,58 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 
 /* prepare next AIO request */
-acb-cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
+cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
  0, 0, 0, 0);
 index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-acb-n = s-cluster_sectors - index_in_cluster;
-if (acb-n  acb-nb_sectors)
-acb-n = acb-nb_sectors;
+n = s-cluster_sectors - index_in_cluster;
+if (n  acb-nb_sectors) {
+n = acb-nb_sectors;
+}
 
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 if (bs-backing_hd) {
 /* read from the base image */
-acb-hd_iov.iov_base = (void *)acb-buf;
-acb-hd_iov.iov_len = acb-n * 512;
-qemu_iovec_init_external(acb-hd_qiov, acb-hd_iov, 1);
+hd_iov.iov_base = (void *)acb-buf;
+hd_iov.iov_len = n * 512;
+qemu_iovec_init_external(hd_qiov, hd_iov, 1);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-acb-n, acb-hd_qiov);
+n, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
 return -EIO;
 }
 } else {
 /* Note: in this case, no need to wait */
-memset(acb-buf, 0, 512 * acb-n);
+memset(acb-buf, 0, 512 * n);
 goto redo;
 }
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
-if (decompress_cluster(bs, acb-cluster_offset)  0) {
+if (decompress_cluster(bs, cluster_offset)  0) {
 return -EIO;
 }
 memcpy(acb-buf,
-   s-cluster_cache + index_in_cluster * 512, 512 * acb-n);
+   s-cluster_cache + index_in_cluster * 512, 512 * n);
 goto redo;
 } else {
-if ((acb-cluster_offset  511) != 0) {
+if ((cluster_offset  511) != 0) {
 return -EIO;
 }
-acb-hd_iov.iov_base = (void *)acb-buf

[Qemu-devel] [PATCH v2 03/15] qcow: move some blocks of code to avoid useless variable initialization

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |   53 ++---
 1 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index f0b1599..e17f9c5 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -520,35 +520,18 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret, n = 0;
-uint64_t cluster_offset = 0;
+int ret, n;
+uint64_t cluster_offset;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
 
  redo:
-/* post process the read buffer */
-if (!cluster_offset) {
-/* nothing to do */
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* nothing to do */
-} else {
-if (s-crypt_method) {
-encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
-n, 0,
-s-aes_decrypt_key);
-}
-}
-
-acb-nb_sectors -= n;
-acb-sector_num += n;
-acb-buf += n * 512;
-
 if (acb-nb_sectors == 0) {
 /* request completed */
 return 0;
 }
 
-/* prepare next AIO request */
+/* prepare next request */
 cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
  0, 0, 0, 0);
 index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
@@ -573,7 +556,6 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 } else {
 /* Note: in this case, no need to wait */
 memset(acb-buf, 0, 512 * n);
-goto redo;
 }
 } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
@@ -582,7 +564,6 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 memcpy(acb-buf,
s-cluster_cache + index_in_cluster * 512, 512 * n);
-goto redo;
 } else {
 if ((cluster_offset  511) != 0) {
 return -EIO;
@@ -600,6 +581,23 @@ static int qcow_aio_read_cb(QCowAIOCB *acb)
 }
 }
 
+/* post process the read buffer */
+if (!cluster_offset) {
+/* nothing to do */
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* nothing to do */
+} else {
+if (s-crypt_method) {
+encrypt_sectors(s, acb-sector_num, acb-buf, acb-buf,
+n, 0,
+s-aes_decrypt_key);
+}
+}
+
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
+
 goto redo;
 }
 
@@ -631,16 +629,12 @@ static int qcow_aio_write_cb(QCowAIOCB *acb)
 int index_in_cluster;
 uint64_t cluster_offset;
 const uint8_t *src_buf;
-int ret, n = 0;
+int ret, n;
 uint8_t *cluster_data = NULL;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
 
 redo:
-acb-nb_sectors -= n;
-acb-sector_num += n;
-acb-buf += n * 512;
-
 if (acb-nb_sectors == 0) {
 /* request completed */
 return 0;
@@ -683,6 +677,11 @@ redo:
 if (ret  0) {
 return ret;
 }
+
+acb-nb_sectors -= n;
+acb-sector_num += n;
+acb-buf += n * 512;
+
 goto redo;
 }
 
-- 
1.7.1




[Qemu-devel] [PATCH v2 01/15] qcow: allocate QCowAIOCB structure using stack

2011-08-09 Thread Frediano Ziglio
instead of calling qemi_aio_get use stack

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c  |   52 
 block/qcow2.c |   38 +++---
 2 files changed, 27 insertions(+), 63 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 6447c2a..d19ef04 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -503,28 +503,12 @@ typedef struct QCowAIOCB {
 BlockDriverAIOCB *hd_aiocb;
 } QCowAIOCB;
 
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-if (acb-hd_aiocb)
-bdrv_aio_cancel(acb-hd_aiocb);
-qemu_aio_release(acb);
-}
-
-static AIOPool qcow_aio_pool = {
-.aiocb_size = sizeof(QCowAIOCB),
-.cancel = qcow_aio_cancel,
-};
-
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-int is_write)
+int is_write, QCowAIOCB *acb)
 {
-QCowAIOCB *acb;
-
-acb = qemu_aio_get(qcow_aio_pool, bs, NULL, NULL);
-if (!acb)
-return NULL;
+memset(acb, 0, sizeof(*acb));
+acb-common.bs = bs;
 acb-hd_aiocb = NULL;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
@@ -543,9 +527,8 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 return acb;
 }
 
-static int qcow_aio_read_cb(void *opaque)
+static int qcow_aio_read_cb(QCowAIOCB *acb)
 {
-QCowAIOCB *acb = opaque;
 BlockDriverState *bs = acb-common.bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
@@ -634,29 +617,27 @@ static int qcow_co_readv(BlockDriverState *bs, int64_t 
sector_num,
  int nb_sectors, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs-opaque;
-QCowAIOCB *acb;
+QCowAIOCB acb;
 int ret;
 
-acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
+qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 0, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
-ret = qcow_aio_read_cb(acb);
+ret = qcow_aio_read_cb(acb);
 } while (ret  0);
 qemu_co_mutex_unlock(s-lock);
 
-if (acb-qiov-niov  1) {
-qemu_iovec_from_buffer(acb-qiov, acb-orig_buf, acb-qiov-size);
-qemu_vfree(acb-orig_buf);
+if (acb.qiov-niov  1) {
+qemu_iovec_from_buffer(acb.qiov, acb.orig_buf, acb.qiov-size);
+qemu_vfree(acb.orig_buf);
 }
-qemu_aio_release(acb);
 
 return ret;
 }
 
-static int qcow_aio_write_cb(void *opaque)
+static int qcow_aio_write_cb(QCowAIOCB *acb)
 {
-QCowAIOCB *acb = opaque;
 BlockDriverState *bs = acb-common.bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
@@ -714,23 +695,22 @@ static int qcow_co_writev(BlockDriverState *bs, int64_t 
sector_num,
   int nb_sectors, QEMUIOVector *qiov)
 {
 BDRVQcowState *s = bs-opaque;
-QCowAIOCB *acb;
+QCowAIOCB acb;
 int ret;
 
 s-cluster_cache_offset = -1; /* disable compressed cache */
 
-acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
+qcow_aio_setup(bs, sector_num, qiov, nb_sectors, 1, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
-ret = qcow_aio_write_cb(acb);
+ret = qcow_aio_write_cb(acb);
 } while (ret  0);
 qemu_co_mutex_unlock(s-lock);
 
-if (acb-qiov-niov  1) {
-qemu_vfree(acb-orig_buf);
+if (acb.qiov-niov  1) {
+qemu_vfree(acb.orig_buf);
 }
-qemu_aio_release(acb);
 
 return ret;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index f07d550..edc068e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -388,17 +388,6 @@ typedef struct QCowAIOCB {
 QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
-static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-qemu_aio_release(acb);
-}
-
-static AIOPool qcow2_aio_pool = {
-.aiocb_size = sizeof(QCowAIOCB),
-.cancel = qcow2_aio_cancel,
-};
-
 /*
  * Returns 0 when the request is completed successfully, 1 when there is still
  * a part left to do and -errno in error cases.
@@ -528,13 +517,10 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb,
-  void *opaque, int is_write)
+  void *opaque, int is_write, QCowAIOCB *acb)
 {
-QCowAIOCB *acb;
-
-acb = qemu_aio_get(qcow2_aio_pool, bs, cb, opaque);
-if (!acb)
-return NULL;
+memset(acb, 0, sizeof(*acb));
+acb-common.bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
 acb-is_write = is_write;
@@ -554,19 +540,18 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
   int

[Qemu-devel] [PATCH v2 15/15] qcow2: small optimization

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |   11 +--
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0e31868..318d66d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -681,14 +681,13 @@ void qcow2_create_refcount_update(QCowCreateState *s, 
int64_t offset,
 int64_t size)
 {
 int refcount;
-int64_t start, last, cluster_offset;
+int64_t start, last, cluster;
 uint16_t *p;
 
-start = offset  ~(s-cluster_size - 1);
-last = (offset + size - 1)   ~(s-cluster_size - 1);
-for(cluster_offset = start; cluster_offset = last;
-cluster_offset += s-cluster_size) {
-p = s-refcount_block[cluster_offset  s-cluster_bits];
+start = offset  s-cluster_bits;
+last = (offset + size - 1)   s-cluster_bits;
+for (cluster = start; cluster = last; ++cluster) {
+p = s-refcount_block[cluster];
 refcount = be16_to_cpu(*p);
 refcount++;
 *p = cpu_to_be16(refcount);
-- 
1.7.1




[Qemu-devel] [PATCH v2 13/15] qcow2: remove memory leak

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6073568..aed8da7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -492,6 +492,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+qemu_free(cluster_data);
 
 return ret;
 }
@@ -604,6 +605,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+qemu_free(cluster_data);
 
 return ret;
 }
-- 
1.7.1




[Qemu-devel] [PATCH v2 14/15] qcow2: small math optimization

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-refcount.c |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 14b2f67..0e31868 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -140,10 +140,7 @@ static unsigned int next_refcount_table_size(BDRVQcowState 
*s,
 static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
 uint64_t offset_b)
 {
-uint64_t block_a = offset_a  (2 * s-cluster_bits - REFCOUNT_SHIFT);
-uint64_t block_b = offset_b  (2 * s-cluster_bits - REFCOUNT_SHIFT);
-
-return (block_a == block_b);
+return !((offset_a ^ offset_b)  (2 * s-cluster_bits - REFCOUNT_SHIFT));
 }
 
 /*
-- 
1.7.1




[Qemu-devel] [PATCH v2 11/15] qcow2: reindent and use while before the big jump

2011-08-09 Thread Frediano Ziglio
prepare to remove read/write callbacks

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  272 -
 1 files changed, 135 insertions(+), 137 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index c7445cc..dfb969e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -395,107 +395,105 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
 
-if (acb-remaining_sectors == 0) {
-/* request completed */
-return 0;
-}
-
-/* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
-if (s-crypt_method) {
-cur_nr_sectors = MIN(cur_nr_sectors,
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-}
+while (acb-remaining_sectors != 0) {
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, cluster_offset);
-if (ret  0) {
-return ret;
-}
-
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-cur_nr_sectors * 512);
-
-if (!cluster_offset) {
-
-if (bs-backing_hd) {
-/* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
-if (n1  0) {
-BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
-qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
-qemu_co_mutex_lock(s-lock);
-if (ret  0) {
-return ret;
-}
-}
-} else {
-/* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
+if (s-crypt_method) {
+cur_nr_sectors = MIN(cur_nr_sectors,
+QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, cluster_offset);
+
+ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
-s-cluster_cache + index_in_cluster * 512,
-512 * cur_nr_sectors);
-} else {
-if ((cluster_offset  511) != 0) {
-return -EIO;
-}
+index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
 
-if (s-crypt_method) {
-/*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
-if (!acb-cluster_data) {
-acb-cluster_data =
-qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+cur_nr_sectors * 512);
+
+if (!cluster_offset) {
+
+if (bs-backing_hd) {
+/* read from the base image */
+n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
+acb-sector_num, cur_nr_sectors);
+if (n1  0) {
+BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
+qemu_co_mutex_unlock(s-lock);
+ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
+n1, acb-hd_qiov);
+qemu_co_mutex_lock(s-lock);
+if (ret  0) {
+return ret;
+}
+}
+} else {
+/* Note: in this case, no need to wait */
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+}
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* add AIO support for compressed blocks ? */
+ret = qcow2_decompress_cluster(bs, cluster_offset);
+if (ret  0) {
+return ret;
 }
 
-assert(cur_nr_sectors =
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_from_buffer(acb-hd_qiov,
+s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
-}
+} else {
+if ((cluster_offset  511) != 0) {
+return -EIO;
+}
 
-BLKDBG_EVENT(bs-file

[Qemu-devel] [PATCH v2 10/15] qcow2: remove common from QCowAIOCB

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 446946e..c7445cc 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -373,7 +373,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector 
*qiov,
 }
 
 typedef struct QCowAIOCB {
-BlockDriverAIOCB common;
+BlockDriverState *bs;
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
@@ -388,7 +388,7 @@ typedef struct QCowAIOCB {
  */
 static int qcow2_aio_read_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
@@ -504,7 +504,7 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
   void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
-acb-common.bs = bs;
+acb-bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
 
@@ -556,7 +556,7 @@ static void run_dependent_requests(BDRVQcowState *s, 
QCowL2Meta *m)
  */
 static int qcow2_aio_write_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
 int n_end;
-- 
1.7.1




[Qemu-devel] [PATCH v2 06/15] qcow2: removed unused fields

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   10 +++---
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index edc068e..5c454bb 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,11 +381,8 @@ typedef struct QCowAIOCB {
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
-bool is_write;
 QEMUIOVector hd_qiov;
-QEMUBH *bh;
 QCowL2Meta l2meta;
-QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
 /*
@@ -517,13 +514,12 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb,
-  void *opaque, int is_write, QCowAIOCB *acb)
+  void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
 acb-common.bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
-acb-is_write = is_write;
 
 qemu_iovec_init(acb-hd_qiov, qiov-niov);
 
@@ -543,7 +539,7 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
@@ -658,7 +654,7 @@ static int qcow2_co_writev(BlockDriverState *bs,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 s-cluster_cache_offset = -1; /* disable compressed cache */
 
 qemu_co_mutex_lock(s-lock);
-- 
1.7.1




[Qemu-devel] [PATCH v2 07/15] qcow2: removed cur_nr_sectors field in QCowAIOCB

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   98 +
 1 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 5c454bb..0cf4465 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -377,7 +377,6 @@ typedef struct QCowAIOCB {
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
-int cur_nr_sectors;/* number of sectors in current iteration */
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
@@ -395,42 +394,22 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
-
-/* post process the read buffer */
-if (!acb-cluster_offset) {
-/* nothing to do */
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* nothing to do */
-} else {
-if (s-crypt_method) {
-qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
-acb-cluster_data, acb-cur_nr_sectors, 0, 
s-aes_decrypt_key);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
-qemu_iovec_from_buffer(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
-}
-}
-
-acb-remaining_sectors -= acb-cur_nr_sectors;
-acb-sector_num += acb-cur_nr_sectors;
-acb-bytes_done += acb-cur_nr_sectors * 512;
+int cur_nr_sectors; /* number of sectors in current iteration */
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
 return 0;
 }
 
-/* prepare next AIO request */
-acb-cur_nr_sectors = acb-remaining_sectors;
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
 if (s-crypt_method) {
-acb-cur_nr_sectors = MIN(acb-cur_nr_sectors,
+cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-acb-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, acb-cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -439,14 +418,14 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
+cur_nr_sectors * 512);
 
 if (!acb-cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
 n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, acb-cur_nr_sectors);
+acb-sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
@@ -457,11 +436,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 return ret;
 }
 }
-return 1;
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * acb-cur_nr_sectors);
-return 1;
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
@@ -472,9 +449,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_from_buffer(acb-hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
-512 * acb-cur_nr_sectors);
-
-return 1;
+512 * cur_nr_sectors);
 } else {
 if ((acb-cluster_offset  511) != 0) {
 return -EIO;
@@ -490,24 +465,37 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
 }
 
-assert(acb-cur_nr_sectors =
+assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
+512 * cur_nr_sectors);
 }
 
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
 (acb-cluster_offset  9) + index_in_cluster,
-acb-cur_nr_sectors, acb-hd_qiov);
+cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
 return ret;
 }
+if (s-crypt_method) {
+qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
+acb-cluster_data, cur_nr_sectors, 0, s-aes_decrypt_key);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb

[Qemu-devel] [PATCH v2 08/15] qcow2: remove l2meta from QCowAIOCB

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   15 ---
 1 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cf4465..adf31ce 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,7 +381,6 @@ typedef struct QCowAIOCB {
 uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
-QCowL2Meta l2meta;
 } QCowAIOCB;
 
 /*
@@ -514,8 +513,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
 acb-cluster_offset = 0;
-acb-l2meta.nb_clusters = 0;
-qemu_co_queue_init(acb-l2meta.dependent_requests);
 return acb;
 }
 
@@ -566,6 +563,10 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int n_end;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+QCowL2Meta l2meta;
+
+l2meta.nb_clusters = 0;
+qemu_co_queue_init(l2meta.dependent_requests);
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -579,12 +580,12 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 n_end = QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors;
 
 ret = qcow2_alloc_cluster_offset(bs, acb-sector_num  9,
-index_in_cluster, n_end, cur_nr_sectors, acb-l2meta);
+index_in_cluster, n_end, cur_nr_sectors, l2meta);
 if (ret  0) {
 return ret;
 }
 
-acb-cluster_offset = acb-l2meta.cluster_offset;
+acb-cluster_offset = l2meta.cluster_offset;
 assert((acb-cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
@@ -618,9 +619,9 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-ret = qcow2_alloc_cluster_link_l2(bs, acb-l2meta);
+ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
 
-run_dependent_requests(s, acb-l2meta);
+run_dependent_requests(s, l2meta);
 
 if (ret  0) {
 return ret;
-- 
1.7.1




[Qemu-devel] [PATCH v2 09/15] qcow2: remove cluster_offset from QCowAIOCB

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   22 +++---
 1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index adf31ce..446946e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -378,7 +378,6 @@ typedef struct QCowAIOCB {
 QEMUIOVector *qiov;
 int remaining_sectors;
 uint64_t bytes_done;
-uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
 } QCowAIOCB;
@@ -394,6 +393,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+uint64_t cluster_offset = 0;
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -408,7 +408,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -419,7 +419,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
 cur_nr_sectors * 512);
 
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
@@ -439,9 +439,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 /* Note: in this case, no need to wait */
 qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, acb-cluster_offset);
+ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -450,7 +450,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
-if ((acb-cluster_offset  511) != 0) {
+if ((cluster_offset  511) != 0) {
 return -EIO;
 }
 
@@ -474,7 +474,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
-(acb-cluster_offset  9) + index_in_cluster,
+(cluster_offset  9) + index_in_cluster,
 cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
@@ -512,7 +512,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
-acb-cluster_offset = 0;
 return acb;
 }
 
@@ -564,6 +563,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 QCowL2Meta l2meta;
+uint64_t cluster_offset;
 
 l2meta.nb_clusters = 0;
 qemu_co_queue_init(l2meta.dependent_requests);
@@ -585,8 +585,8 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-acb-cluster_offset = l2meta.cluster_offset;
-assert((acb-cluster_offset  511) == 0);
+cluster_offset = l2meta.cluster_offset;
+assert((cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
@@ -612,7 +612,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_WRITE_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_writev(bs-file,
- (acb-cluster_offset  9) + index_in_cluster,
+ (cluster_offset  9) + index_in_cluster,
  cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-- 
1.7.1




[Qemu-devel] [PATCH v2 05/15] qcow: remove old #undefined code

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |   63 --
 1 files changed, 0 insertions(+), 63 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index 00f339f..0989799 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -190,24 +190,6 @@ static int qcow_set_key(BlockDriverState *bs, const char 
*key)
 return -1;
 if (AES_set_decrypt_key(keybuf, 128, s-aes_decrypt_key) != 0)
 return -1;
-#if 0
-/* test */
-{
-uint8_t in[16];
-uint8_t out[16];
-uint8_t tmp[16];
-for(i=0;i16;i++)
-in[i] = i;
-AES_encrypt(in, tmp, s-aes_encrypt_key);
-AES_decrypt(tmp, out, s-aes_decrypt_key);
-for(i = 0; i  16; i++)
-printf( %02x, tmp[i]);
-printf(\n);
-for(i = 0; i  16; i++)
-printf( %02x, out[i]);
-printf(\n);
-}
-#endif
 return 0;
 }
 
@@ -441,51 +423,6 @@ static int decompress_cluster(BlockDriverState *bs, 
uint64_t cluster_offset)
 return 0;
 }
 
-#if 0
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
-BDRVQcowState *s = bs-opaque;
-int ret, index_in_cluster, n;
-uint64_t cluster_offset;
-
-while (nb_sectors  0) {
-cluster_offset = get_cluster_offset(bs, sector_num  9, 0, 0, 0, 0);
-index_in_cluster = sector_num  (s-cluster_sectors - 1);
-n = s-cluster_sectors - index_in_cluster;
-if (n  nb_sectors)
-n = nb_sectors;
-if (!cluster_offset) {
-if (bs-backing_hd) {
-/* read from the base image */
-ret = bdrv_read(bs-backing_hd, sector_num, buf, n);
-if (ret  0)
-return -1;
-} else {
-memset(buf, 0, 512 * n);
-}
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-if (decompress_cluster(bs, cluster_offset)  0)
-return -1;
-memcpy(buf, s-cluster_cache + index_in_cluster * 512, 512 * n);
-} else {
-ret = bdrv_pread(bs-file, cluster_offset + index_in_cluster * 
512, buf, n * 512);
-if (ret != n * 512)
-return -1;
-if (s-crypt_method) {
-encrypt_sectors(s, sector_num, buf, buf, n, 0,
-s-aes_decrypt_key);
-}
-}
-nb_sectors -= n;
-sector_num += n;
-buf += n * 512;
-}
-return 0;
-}
-#endif
-
 static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
  int nb_sectors, QEMUIOVector *qiov)
 {
-- 
1.7.1




[Qemu-devel] [PATCH v2 12/15] qcow2: removed QCowAIOCB entirely

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  207 ++---
 1 files changed, 80 insertions(+), 127 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index dfb969e..6073568 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -372,83 +372,77 @@ int qcow2_backing_read1(BlockDriverState *bs, 
QEMUIOVector *qiov,
 return n1;
 }
 
-typedef struct QCowAIOCB {
-BlockDriverState *bs;
-int64_t sector_num;
-QEMUIOVector *qiov;
-int remaining_sectors;
-uint64_t bytes_done;
-uint8_t *cluster_data;
-QEMUIOVector hd_qiov;
-} QCowAIOCB;
-
-/*
- * Returns 0 when the request is completed successfully, 1 when there is still
- * a part left to do and -errno in error cases.
- */
-static int qcow2_aio_read_cb(QCowAIOCB *acb)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+  int remaining_sectors, QEMUIOVector *qiov)
 {
-BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
+uint64_t bytes_done = 0;
+QEMUIOVector hd_qiov;
+uint8_t *cluster_data = NULL;
 
-while (acb-remaining_sectors != 0) {
+qemu_iovec_init(hd_qiov, qiov-niov);
+
+qemu_co_mutex_lock(s-lock);
+
+while (remaining_sectors != 0) {
 
 /* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
+cur_nr_sectors = remaining_sectors;
 if (s-crypt_method) {
 cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+ret = qcow2_get_cluster_offset(bs, sector_num  9,
 cur_nr_sectors, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
+index_in_cluster = sector_num  (s-cluster_sectors - 1);
 
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+qemu_iovec_reset(hd_qiov);
+qemu_iovec_copy(hd_qiov, qiov, bytes_done,
 cur_nr_sectors * 512);
 
 if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
+n1 = qcow2_backing_read1(bs-backing_hd, hd_qiov,
+sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
+ret = bdrv_co_readv(bs-backing_hd, sector_num,
+n1, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-return ret;
+goto fail;
 }
 }
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+qemu_iovec_memset(hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
 ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
+qemu_iovec_from_buffer(hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
 if ((cluster_offset  511) != 0) {
-return -EIO;
+ret = -EIO;
+goto fail;
 }
 
 if (s-crypt_method) {
@@ -456,15 +450,15 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
  * For encrypted images, read everything into a temporary
  * contiguous buffer on which the AES functions can work.
  */
-if (!acb-cluster_data) {
-acb-cluster_data =
+if (!cluster_data) {
+cluster_data =
 qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * 
s-cluster_size);
 }
 
 assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_reset(hd_qiov

[Qemu-devel] [PATCH v2 04/15] qcow: embed qcow_aio_read_cb into qcow_co_readv and qcow_aio_write_cb into qcow_co_writev

2011-08-09 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow.c |  291 -
 1 files changed, 123 insertions(+), 168 deletions(-)

diff --git a/block/qcow.c b/block/qcow.c
index e17f9c5..00f339f 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -486,223 +486,178 @@ static int qcow_read(BlockDriverState *bs, int64_t 
sector_num,
 }
 #endif
 
-typedef struct QCowAIOCB {
-BlockDriverState *bs;
-int64_t sector_num;
-QEMUIOVector *qiov;
-uint8_t *buf;
-void *orig_buf;
-int nb_sectors;
-} QCowAIOCB;
-
-static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-int is_write, QCowAIOCB *acb)
-{
-acb-bs = bs;
-acb-sector_num = sector_num;
-acb-qiov = qiov;
-
-if (qiov-niov  1) {
-acb-buf = acb-orig_buf = qemu_blockalign(bs, qiov-size);
-if (is_write)
-qemu_iovec_to_buffer(qiov, acb-buf);
-} else {
-acb-orig_buf = NULL;
-acb-buf = (uint8_t *)qiov-iov-iov_base;
-}
-acb-nb_sectors = nb_sectors;
-return acb;
-}
-
-static int qcow_aio_read_cb(QCowAIOCB *acb)
+static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
 {
-BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
-int ret, n;
+int ret = 0, n;
 uint64_t cluster_offset;
 struct iovec hd_iov;
 QEMUIOVector hd_qiov;
+uint8_t *buf;
+void *orig_buf;
 
- redo:
-if (acb-nb_sectors == 0) {
-/* request completed */
-return 0;
+if (qiov-niov  1) {
+buf = orig_buf = qemu_blockalign(bs, qiov-size);
+} else {
+orig_buf = NULL;
+buf = (uint8_t *)qiov-iov-iov_base;
 }
 
-/* prepare next request */
-cluster_offset = get_cluster_offset(bs, acb-sector_num  9,
- 0, 0, 0, 0);
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-n = s-cluster_sectors - index_in_cluster;
-if (n  acb-nb_sectors) {
-n = acb-nb_sectors;
-}
+qemu_co_mutex_lock(s-lock);
+
+while (nb_sectors != 0) {
+/* prepare next request */
+cluster_offset = get_cluster_offset(bs, sector_num  9,
+ 0, 0, 0, 0);
+index_in_cluster = sector_num  (s-cluster_sectors - 1);
+n = s-cluster_sectors - index_in_cluster;
+if (n  nb_sectors) {
+n = nb_sectors;
+}
 
-if (!cluster_offset) {
-if (bs-backing_hd) {
-/* read from the base image */
-hd_iov.iov_base = (void *)acb-buf;
+if (!cluster_offset) {
+if (bs-backing_hd) {
+/* read from the base image */
+hd_iov.iov_base = (void *)buf;
+hd_iov.iov_len = n * 512;
+qemu_iovec_init_external(hd_qiov, hd_iov, 1);
+qemu_co_mutex_unlock(s-lock);
+ret = bdrv_co_readv(bs-backing_hd, sector_num,
+n, hd_qiov);
+qemu_co_mutex_lock(s-lock);
+if (ret  0) {
+goto fail;
+}
+} else {
+/* Note: in this case, no need to wait */
+memset(buf, 0, 512 * n);
+}
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* add AIO support for compressed blocks ? */
+if (decompress_cluster(bs, cluster_offset)  0) {
+goto fail;
+}
+memcpy(buf,
+   s-cluster_cache + index_in_cluster * 512, 512 * n);
+} else {
+if ((cluster_offset  511) != 0) {
+goto fail;
+}
+hd_iov.iov_base = (void *)buf;
 hd_iov.iov_len = n * 512;
 qemu_iovec_init_external(hd_qiov, hd_iov, 1);
 qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
+ret = bdrv_co_readv(bs-file,
+(cluster_offset  9) + index_in_cluster,
 n, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-return -EIO;
+break;
+}
+if (s-crypt_method) {
+encrypt_sectors(s, sector_num, buf, buf,
+n, 0,
+s-aes_decrypt_key);
 }
-} else {
-/* Note: in this case, no need to wait */
-memset(acb-buf, 0, 512 * n);
-}
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* add AIO support for compressed blocks ? */
-if (decompress_cluster(bs, cluster_offset)  0) {
-return -EIO;
-}
-memcpy(acb-buf,
-   s

Re: [Qemu-devel] [PATCH] posix-aio-compat: fix latency issues

2011-08-08 Thread Frediano Ziglio
2011/8/8 Avi Kivity a...@redhat.com:
 In certain circumstances, posix-aio-compat can incur a lot of latency:
  - threads are created by vcpu threads, so if vcpu affinity is set,
   aio threads inherit vcpu affinity.  This can cause many aio threads
   to compete for one cpu.
  - we can create up to max_threads (64) aio threads in one go; since a
   pthread_create can take around 30μs, we have up to 2ms of cpu time
   under a global lock.

 Fix by:
  - moving thread creation to the main thread, so we inherit the main
   thread's affinity instead of the vcpu thread's affinity.
  - if a thread is currently being created, and we need to create yet
   another thread, let thread being born create the new thread, reducing
   the amount of time we spend under the main thread.
  - drop the local lock while creating a thread (we may still hold the
   global mutex, though)

 Note this doesn't eliminate latency completely; scheduler artifacts or
 lack of host cpu resources can still cause it.  We may want pre-allocated
 threads when this cannot be tolerated.

 Thanks to Uli Obergfell of Red Hat for his excellent analysis and suggestions.

 Signed-off-by: Avi Kivity a...@redhat.com

Why not calling pthread_attr_setaffinity_np (where available) before
thread creation or shed_setaffinity at thread start instead of telling
another thread to create a thread for us just to get affinity cleared?

Regards
  Frediano

 ---
  posix-aio-compat.c |   48 ++--
  1 files changed, 46 insertions(+), 2 deletions(-)

 diff --git a/posix-aio-compat.c b/posix-aio-compat.c
 index 8dc00cb..aa30673 100644
 --- a/posix-aio-compat.c
 +++ b/posix-aio-compat.c
 @@ -30,6 +30,7 @@

  #include block/raw-posix-aio.h

 +static void do_spawn_thread(void);

  struct qemu_paiocb {
     BlockDriverAIOCB common;
 @@ -64,6 +65,9 @@ static pthread_attr_t attr;
  static int max_threads = 64;
  static int cur_threads = 0;
  static int idle_threads = 0;
 +static int new_threads = 0;     /* backlog of threads we need to create */
 +static int pending_threads = 0; /* threads created but not running yet */
 +static QEMUBH *new_thread_bh;
  static QTAILQ_HEAD(, qemu_paiocb) request_list;

  #ifdef CONFIG_PREADV
 @@ -311,6 +315,13 @@ static void *aio_thread(void *unused)

     pid = getpid();

 +    mutex_lock(lock);
 +    if (new_threads) {
 +        do_spawn_thread();
 +    }
 +    pending_threads--;
 +    mutex_unlock(lock);
 +
     while (1) {
         struct qemu_paiocb *aiocb;
         ssize_t ret = 0;
 @@ -381,11 +392,18 @@ static void *aio_thread(void *unused)
     return NULL;
  }

 -static void spawn_thread(void)
 +static void do_spawn_thread(void)
  {
     sigset_t set, oldset;

 -    cur_threads++;
 +    if (!new_threads) {
 +        return;
 +    }
 +
 +    new_threads--;
 +    pending_threads++;
 +
 +    mutex_unlock(lock);

     /* block all signals */
     if (sigfillset(set)) die(sigfillset);
 @@ -394,6 +412,31 @@ static void spawn_thread(void)
     thread_create(thread_id, attr, aio_thread, NULL);

     if (sigprocmask(SIG_SETMASK, oldset, NULL)) die(sigprocmask restore);
 +
 +    mutex_lock(lock);
 +}
 +
 +static void spawn_thread_bh_fn(void *opaque)
 +{
 +    mutex_lock(lock);
 +    do_spawn_thread();
 +    mutex_unlock(lock);
 +}
 +
 +static void spawn_thread(void)
 +{
 +    cur_threads++;
 +    new_threads++;
 +    /* If there are threads being created, they will spawn new workers, so
 +     * we don't spend time creating many threads in a loop holding a mutex or
 +     * starving the current vcpu.
 +     *
 +     * If there are no idle threads, ask the main thread to create one, so we
 +     * inherit the correct affinity instead of the vcpu affinity.
 +     */
 +    if (!pending_threads) {
 +        qemu_bh_schedule(new_thread_bh);
 +    }
  }

  static void qemu_paio_submit(struct qemu_paiocb *aiocb)
 @@ -665,6 +708,7 @@ int paio_init(void)
         die2(ret, pthread_attr_setdetachstate);

     QTAILQ_INIT(request_list);
 +    new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);

     posix_aio_state = s;
     return 0;
 --
 1.7.5.3






Re: [Qemu-devel] [PATCH] posix-aio-compat: fix latency issues

2011-08-08 Thread Frediano Ziglio
2011/8/8 Avi Kivity a...@redhat.com:
 On 08/08/2011 03:49 PM, Frediano Ziglio wrote:

 2011/8/8 Avi Kivitya...@redhat.com:
   In certain circumstances, posix-aio-compat can incur a lot of latency:
     - threads are created by vcpu threads, so if vcpu affinity is set,
      aio threads inherit vcpu affinity.  This can cause many aio threads
      to compete for one cpu.
     - we can create up to max_threads (64) aio threads in one go; since a
      pthread_create can take around 30μs, we have up to 2ms of cpu time
      under a global lock.
 
   Fix by:
     - moving thread creation to the main thread, so we inherit the main
      thread's affinity instead of the vcpu thread's affinity.
     - if a thread is currently being created, and we need to create yet
      another thread, let thread being born create the new thread,
  reducing
      the amount of time we spend under the main thread.
     - drop the local lock while creating a thread (we may still hold the
      global mutex, though)
 
   Note this doesn't eliminate latency completely; scheduler artifacts or
   lack of host cpu resources can still cause it.  We may want
  pre-allocated
   threads when this cannot be tolerated.
 
   Thanks to Uli Obergfell of Red Hat for his excellent analysis and
  suggestions.
 
   Signed-off-by: Avi Kivitya...@redhat.com

 Why not calling pthread_attr_setaffinity_np (where available) before
 thread creation or shed_setaffinity at thread start instead of telling
 another thread to create a thread for us just to get affinity cleared?


 The entire qemu process may be affined to a subset of the host cpus; we
 don't want to break that.

 For example:

   taskset 0xf0 qemu 
   (qemu) info cpus
 pin individual vcpu threads to host cpus



Just call sched_getaffinity at program start, save to a global
variable and then set this affinity for io threads.
I didn't use affinity that much but from manual it seems that if you
own process you can set affinity as you like.
IMHO this patch introduce a delay in io thread creation due to posting
thread creation to another thread just to set different affinity.

Frediano



Re: [Qemu-devel] [PATCH v2 0.15.0] qcow2: Fix L1 table size after bdrv_snapshot_goto

2011-08-05 Thread Frediano Ziglio
2011/8/4 Kevin Wolf kw...@redhat.com:
 When loading an internal snapshot whose L1 table is smaller than the current 
 L1
 table, the size of the current L1 would be shrunk to the snapshot's L1 size in
 memory, but not on disk. This lead to incorrect refcount updates and 
 eventuelly
 to image corruption.

 Instead of writing the new L1 size to disk, this simply retains the bigger L1
 size that is currently in use and makes sure that the unused part is zeroed.

 Signed-off-by: Kevin Wolf kw...@redhat.com
 ---

 And the moment you send it out, you notice that it's wrong... *sigh*

 v2:
 - Check for s-l1_size  sn-l1_size in order to avoid disasters...

 Philipp, I think this should fix your corruption. Please give it a try.

 Anthony, this must go into 0.15. Given the short time until -rc2, do you 
 prefer
 to pick it up directly or should I send a pull request tomorrow? The patch
 looks obvious, is tested with the given testcase and survives a basic
 qemu-iotests run (though qemu-iotests doesn't exercise snapshots a lot)

 Stefan, please review :-)

  block/qcow2-snapshot.c |    5 -
  1 files changed, 4 insertions(+), 1 deletions(-)

 diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
 index 74823a5..6972e66 100644
 --- a/block/qcow2-snapshot.c
 +++ b/block/qcow2-snapshot.c
 @@ -330,8 +330,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char 
 *snapshot_id)
     if (qcow2_grow_l1_table(bs, sn-l1_size, true)  0)
         goto fail;

 -    s-l1_size = sn-l1_size;
 +    if (s-l1_size  sn-l1_size) {
 +        memset(s-l1_table + sn-l1_size, 0, s-l1_size - sn-l1_size);
 +    }
     l1_size2 = s-l1_size * sizeof(uint64_t);
 +
     /* copy the snapshot l1 table to the current l1 table */
     if (bdrv_pread(bs-file, sn-l1_table_offset,
                    s-l1_table, l1_size2) != l1_size2)
 --
 1.7.6


This patch looked odd at first sight. First a qcow2_grow_l1_table is
called to shrink L1 so perhaps should be qcow2_resize_l1_table.
Perhaps also it would be better to clean entries in
qcow2_grow_l1_table instead of  qcow2_snapshot_goto to avoid same
problem in different calls to qcow2_grow_l1_table. The other oddity
(still to understand) is: why does some code use l1_table above
l1_size ??

Frediano



[Qemu-devel] Possible corruption on qcow2_snapshot_goto

2011-08-05 Thread Frediano Ziglio
This function seems to be not that safe.

The first problem is that the first thing it does if free refcount
calling qcow2_update_snapshot_refcount(bs, s-l1_table_offset,
s-l1_size, -1). Now if something goes wrong (crash, disk errors) this
could lead to refcount == 0 (depending on L1 size, cache, open flags).

If table grow and qcow2_grow_l1_table needs to resize on disk it
probably will found some reference counter set to 0 (cause we already
decremented it), fail than qcow2_snapshot_goto return -EIO leaving
refcounts deallocated.

Frediano



Re: [Qemu-devel] [PATCH v2 0.15.0] qcow2: Fix L1 table size after bdrv_snapshot_goto

2011-08-05 Thread Frediano Ziglio
2011/8/5 Kevin Wolf kw...@redhat.com:
 Am 05.08.2011 08:35, schrieb Frediano Ziglio:
 2011/8/4 Kevin Wolf kw...@redhat.com:
 When loading an internal snapshot whose L1 table is smaller than the 
 current L1
 table, the size of the current L1 would be shrunk to the snapshot's L1 size 
 in
 memory, but not on disk. This lead to incorrect refcount updates and 
 eventuelly
 to image corruption.

 Instead of writing the new L1 size to disk, this simply retains the bigger 
 L1
 size that is currently in use and makes sure that the unused part is zeroed.

 Signed-off-by: Kevin Wolf kw...@redhat.com
 ---

 And the moment you send it out, you notice that it's wrong... *sigh*

 v2:
 - Check for s-l1_size  sn-l1_size in order to avoid disasters...

 Philipp, I think this should fix your corruption. Please give it a try.

 Anthony, this must go into 0.15. Given the short time until -rc2, do you 
 prefer
 to pick it up directly or should I send a pull request tomorrow? The patch
 looks obvious, is tested with the given testcase and survives a basic
 qemu-iotests run (though qemu-iotests doesn't exercise snapshots a lot)

 Stefan, please review :-)

  block/qcow2-snapshot.c |    5 -
  1 files changed, 4 insertions(+), 1 deletions(-)

 diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
 index 74823a5..6972e66 100644
 --- a/block/qcow2-snapshot.c
 +++ b/block/qcow2-snapshot.c
 @@ -330,8 +330,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const 
 char *snapshot_id)
     if (qcow2_grow_l1_table(bs, sn-l1_size, true)  0)
         goto fail;

 -    s-l1_size = sn-l1_size;
 +    if (s-l1_size  sn-l1_size) {
 +        memset(s-l1_table + sn-l1_size, 0, s-l1_size - sn-l1_size);
 +    }
     l1_size2 = s-l1_size * sizeof(uint64_t);
 +
     /* copy the snapshot l1 table to the current l1 table */
     if (bdrv_pread(bs-file, sn-l1_table_offset,
                    s-l1_table, l1_size2) != l1_size2)
 --
 1.7.6


 This patch looked odd at first sight. First a qcow2_grow_l1_table is
 called to shrink L1 so perhaps should be qcow2_resize_l1_table.

 No, it doesn't shrink the table:

    if (min_size = s-l1_size)
        return 0;


Yes, but perhaps returning success and not clipping anything is not
that correct, perhaps qcow2_snapshot_goto should not call
qcow2_grow_l1_table with a shorter value.

 Perhaps also it would be better to clean entries in
 qcow2_grow_l1_table instead of  qcow2_snapshot_goto to avoid same
 problem in different calls to qcow2_grow_l1_table. The other oddity
 (still to understand) is: why does some code use l1_table above
 l1_size ??

 Which code do you mean specifically?

 Kevin


I think this is the issue

# 1204 - 128 cluster per L2 entry - 128k per L2 entry
# 128 cluster per L1 entry - 16M per L1 entry
qemu-img create -f qcow2 /tmp/sn.qcow2 16m -o cluster_size=1024
qemu-img snapshot -c foo /tmp/sn.qcow2
# extend L1
qemu-io -c 'write -b 0 4M' /tmp/sn.qcow2
# shrink
qemu-img snapshot -a foo /tmp/sn.qcow2
qemu-img check /tmp/sn.qcow2

Well... I was trying to get a leak and got a core instead. I removed
your patch and got leaks.

also, should not be

memset(s-l1_table + sn-l1_size, 0, (s-l1_size - sn-l1_size) *
sizeof(uint64_t));

instead of

memset(s-l1_table + sn-l1_size, 0, s-l1_size - sn-l1_size);

Frediano



Re: [Qemu-devel] [PATCH v2 0.15.0] qcow2: Fix L1 table size after bdrv_snapshot_goto

2011-08-05 Thread Frediano Ziglio
2011/8/5 Frediano Ziglio fredd...@gmail.com:
 2011/8/5 Kevin Wolf kw...@redhat.com:
 Am 05.08.2011 08:35, schrieb Frediano Ziglio:
 2011/8/4 Kevin Wolf kw...@redhat.com:
 When loading an internal snapshot whose L1 table is smaller than the 
 current L1
 table, the size of the current L1 would be shrunk to the snapshot's L1 
 size in
 memory, but not on disk. This lead to incorrect refcount updates and 
 eventuelly
 to image corruption.

 Instead of writing the new L1 size to disk, this simply retains the bigger 
 L1
 size that is currently in use and makes sure that the unused part is 
 zeroed.

 Signed-off-by: Kevin Wolf kw...@redhat.com
 ---

 And the moment you send it out, you notice that it's wrong... *sigh*

 v2:
 - Check for s-l1_size  sn-l1_size in order to avoid disasters...

 Philipp, I think this should fix your corruption. Please give it a try.

 Anthony, this must go into 0.15. Given the short time until -rc2, do you 
 prefer
 to pick it up directly or should I send a pull request tomorrow? The patch
 looks obvious, is tested with the given testcase and survives a basic
 qemu-iotests run (though qemu-iotests doesn't exercise snapshots a lot)

 Stefan, please review :-)

  block/qcow2-snapshot.c |    5 -
  1 files changed, 4 insertions(+), 1 deletions(-)

 diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
 index 74823a5..6972e66 100644
 --- a/block/qcow2-snapshot.c
 +++ b/block/qcow2-snapshot.c
 @@ -330,8 +330,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const 
 char *snapshot_id)
     if (qcow2_grow_l1_table(bs, sn-l1_size, true)  0)
         goto fail;

 -    s-l1_size = sn-l1_size;
 +    if (s-l1_size  sn-l1_size) {
 +        memset(s-l1_table + sn-l1_size, 0, s-l1_size - sn-l1_size);
 +    }
     l1_size2 = s-l1_size * sizeof(uint64_t);
 +
     /* copy the snapshot l1 table to the current l1 table */
     if (bdrv_pread(bs-file, sn-l1_table_offset,
                    s-l1_table, l1_size2) != l1_size2)
 --
 1.7.6


 This patch looked odd at first sight. First a qcow2_grow_l1_table is
 called to shrink L1 so perhaps should be qcow2_resize_l1_table.

 No, it doesn't shrink the table:

    if (min_size = s-l1_size)
        return 0;


 Yes, but perhaps returning success and not clipping anything is not
 that correct, perhaps qcow2_snapshot_goto should not call
 qcow2_grow_l1_table with a shorter value.

 Perhaps also it would be better to clean entries in
 qcow2_grow_l1_table instead of  qcow2_snapshot_goto to avoid same
 problem in different calls to qcow2_grow_l1_table. The other oddity
 (still to understand) is: why does some code use l1_table above
 l1_size ??

 Which code do you mean specifically?

 Kevin


 I think this is the issue

 # 1204 - 128 cluster per L2 entry - 128k per L2 entry
 # 128 cluster per L1 entry - 16M per L1 entry
 qemu-img create -f qcow2 /tmp/sn.qcow2 16m -o cluster_size=1024
 qemu-img snapshot -c foo /tmp/sn.qcow2
 # extend L1
 qemu-io -c 'write -b 0 4M' /tmp/sn.qcow2
 # shrink
 qemu-img snapshot -a foo /tmp/sn.qcow2
 qemu-img check /tmp/sn.qcow2

 Well... I was trying to get a leak and got a core instead. I removed
 your patch and got leaks.

 also, should not be

    memset(s-l1_table + sn-l1_size, 0, (s-l1_size - sn-l1_size) *
 sizeof(uint64_t));

 instead of

    memset(s-l1_table + sn-l1_size, 0, s-l1_size - sn-l1_size);

 Frediano


This patch fix Philipp and my problem

diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 74823a5..0e5fc13 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -330,14 +330,16 @@ int qcow2_snapshot_goto(BlockDriverState *bs,
const char *snapshot_id)
 if (qcow2_grow_l1_table(bs, sn-l1_size, true)  0)
 goto fail;

-s-l1_size = sn-l1_size;
-l1_size2 = s-l1_size * sizeof(uint64_t);
+if (s-l1_size  sn-l1_size) {
+memset(s-l1_table + sn-l1_size, 0, (s-l1_size -
sn-l1_size) * sizeof(uint64_t));
+}
+l1_size2 = sn-l1_size * sizeof(uint64_t);
 /* copy the snapshot l1 table to the current l1 table */
 if (bdrv_pread(bs-file, sn-l1_table_offset,
s-l1_table, l1_size2) != l1_size2)
 goto fail;
 if (bdrv_pwrite_sync(bs-file, s-l1_table_offset,
-s-l1_table, l1_size2)  0)
+s-l1_table, s-l1_size * sizeof(uint64_t))  0)
 goto fail;
 for(i = 0;i  s-l1_size; i++) {
 be64_to_cpus(s-l1_table[i]);


Frediano



Re: [Qemu-devel] qcow2x

2011-08-03 Thread Frediano Ziglio
2011/8/2 Kevin Wolf kw...@redhat.com:
 Am 02.08.2011 17:29, schrieb Frediano Ziglio:
 - L2 allocation can be done with relative data (this is not easy to do
 with current code)

 What do you mean by that?


 Let's take an example. By allocation I mean give a position to
 data/l2/refcount_table. Usually you cannot update/write L2 before data
 is allocated and written if you don't want to get a L2 entry pointing
 to garbage or unwritten data (if physically you write to a sector you
 get new data or old one on fail, not data changing to anything else).
 The exception to this is when even L2 table is not allocated. In this
 case you can write L2 table with data cause in case of failure this
 new L2 table is just not attached to anything (cause L1 still point to
 old L2 or is not allocated). My patch can collapse these two cluster
 writes in a single one. The key point of the patch is mainly
 collapsing all writes as you can not blocking other writes if not
 needed.

 Ok, I see.

 Not sure if it makes any measurable difference. With 64k clusters, an L2
 allocation happens only every 512 MB. But if it's not much code to
 support it, it's probably a good thing.


Let's see :)

I published a repo at gitorious. https://gitorious.org/qemu (yes, qemu
was free for me :) )

Currently there are 2 branches, qcow2x and my_cleanup. I think gitk
interface is much better. From my_cleanup branch from qcow: allocate
QCowAIOCB structure using stack to qcow: embed qcow_aio_read_cb into
qcow_co_readv and qcow_aio_write_cb into qcow_co_writev there are
cleanups for qcow while from qcow2: removed unused fields to qcow2:
remove memory leak there are cleanups for qcow2.

... omissis ...


 Works for me. Just anything that I can pull from.

 Are you in the #qemu IRC channel? I think we should coordinate our qcow2
 work a bit in order to avoid conflicting or duplicate work.

 No, I don't use irc that much (time shift problems and also connection
 too). When are you online?

 Usually until something like 18:00 CEST (16:00 UTC).

 Kevin


:( me from 18:00 CEST

Frediano



[Qemu-devel] qcow2x

2011-08-02 Thread Frediano Ziglio
Hi,
  I spent some time trying to find a way to speed up qcow2 performance
on allocation and snapshot so I branch from kevin/coroutine-block
branch a qcow2x branch. Currently it just write using different
algorithm (that is is fully compatible with qcow2, there is not
ICountAsZero(TM) method :) ). Mainly is a complete rewrite of
qcow2_co_writev. Some problems I encountered in the way current qcow2
code works

- reference decrement are not optimized (well, this is easy to fix on
current code)
- any L2 update blocks all other L2 updates, this is a problem if
guest is writing large file sequentially cause most write needs to be
serialized
- L2 allocation can be done with relative data (this is not easy to do
with current code)
- data/l2 are allocated sequentially (if there are not hole freed) but
written in another order. This cause excessive file fragmentation with
default cache mode, for instance on xfs file is allocated quite
sequentially on every write so any no-sequential write create a
different fragment.

Currently I'm getting these times with iotests (my_cleanup branch is
another branch more conservative with a patch to collapse reference
decrement, note that 011 and 026 are missing, still not working)

XCB
001 637
002 334
003 333
004 010
005 000
007 35   32   36
008 343
009 100
010 000
012 002
013 125  err  158
014 189  err  203
015 48   70   610
017 444
018 555
019 444
020 444
021 000
022 74   103  103
023 75   err  95
024 333
025 336
027 110
028 111

X qcow2x
C my_cleanup
B kevin/coroutine-block

Currently code is quite spaghetti code (needs a lot of cleanup,
checks, better error handling and so on). Taking into account that
code require additional optimizations and is full of internal
debugging time times are quite good.

Main questions are:
- are somebody interesting ?
- how can I send such a patch for review considering that is quite big
(I know, I have to clean a bit too) ?

Regards,
  Frediano



Re: [Qemu-devel] [PATCH 00/10] block: Coroutine support

2011-08-02 Thread Frediano Ziglio
2011/8/2 Kevin Wolf kw...@redhat.com:
 Am 02.08.2011 16:23, schrieb Avi Kivity:
 On 07/26/2011 02:48 PM, Kevin Wolf wrote:
 Depends on Stefan's latest coroutine patches. This series makes qcow and 
 qcow2
 take advantage of the new coroutine infrastructure. Both formats used
 synchronous operations for accessing their metadata and blocked the guest 
 CPU
 during that time. With coroutines, the I/O will happen asynchronously in the
 background and the CPU won't be blocked any more.


 Do you plan to convert qcow2 to a fully synchronous design?

 IMO that will make it more maintainable.  Cancellation will need some
 thought, though.

 After this patch series, all interesting paths are free of callbacks (I
 assume this is what you mean by synchronous?). The only thing I can see
 that is left is qcow2_aio_flush. What is required are some cleanups that
 eliminate things that still look like AIO code, and yes, that's
 something that I want to have.

 Frediano has posted some patches which I haven't fully reviewed yet, but
 the qcow1 RFC he posted was definitely a step in the right direction.


Did I send patches for qcow2?
I just rebased them with your last updates, I'll send them again.

 Regarding cancellation, I don't know any driver that really does what
 it's supposed to do. There are basically two ways of implementing it in
 current code: Either by completing the request instead of cancelling, or
 it's broken. I'd suggest that we implement waiting for completion as a
 generic function in the block layer and be done with it (actually this
 is what happens with bdrv_aio_co_cancel_em, it just could be a bit finer
 grained).

 Kevin



Frediano



Re: [Qemu-devel] qcow2x

2011-08-02 Thread Frediano Ziglio
2011/8/2 Kevin Wolf kw...@redhat.com:
 Am 02.08.2011 16:30, schrieb Frediano Ziglio:
 Hi,
   I spent some time trying to find a way to speed up qcow2 performance
 on allocation and snapshot so I branch from kevin/coroutine-block
 branch a qcow2x branch. Currently it just write using different
 algorithm (that is is fully compatible with qcow2, there is not
 ICountAsZero(TM) method :) ). Mainly is a complete rewrite of
 qcow2_co_writev. Some problems I encountered in the way current qcow2
 code works

 Do you have a public git repo of your code?

 - reference decrement are not optimized (well, this is easy to fix on
 current code)
 - any L2 update blocks all other L2 updates, this is a problem if
 guest is writing large file sequentially cause most write needs to be
 serialized

 Yes, I am well aware of that. In my old coroutine-devel branch (it's
 still online) I started pushing the locks down and parallelising things
 this way. The code that is there is broken because the locking isn't
 completely right (L2 allocation vs. cache users of that L2, and somthing
 with refcounts). The changes to the cache should be about right, though.

 - L2 allocation can be done with relative data (this is not easy to do
 with current code)

 What do you mean by that?


Let's take an example. By allocation I mean give a position to
data/l2/refcount_table. Usually you cannot update/write L2 before data
is allocated and written if you don't want to get a L2 entry pointing
to garbage or unwritten data (if physically you write to a sector you
get new data or old one on fail, not data changing to anything else).
The exception to this is when even L2 table is not allocated. In this
case you can write L2 table with data cause in case of failure this
new L2 table is just not attached to anything (cause L1 still point to
old L2 or is not allocated). My patch can collapse these two cluster
writes in a single one. The key point of the patch is mainly
collapsing all writes as you can not blocking other writes if not
needed.

 - data/l2 are allocated sequentially (if there are not hole freed) but
 written in another order. This cause excessive file fragmentation with
 default cache mode, for instance on xfs file is allocated quite
 sequentially on every write so any no-sequential write create a
 different fragment.

 Currently I'm getting these times with iotests (my_cleanup branch is
 another branch more conservative with a patch to collapse reference
 decrement, note that 011 and 026 are missing, still not working)

 Note that qemu-iotests is often a good indicator, but the tools often
 show different behaviour from real guests, so you should also run
 benchmarks in a VM.


I know, one reason is that guest usually do a lot of small write/read
(probably this is how hardware work but I don't know this side that
much, usually I didn't see request longer than 128 sectors).

     X    C    B
 001 6    3    7
 002 3    3    4
 003 3    3    3
 004 0    1    0
 005 0    0    0
 007 35   32   36
 008 3    4    3
 009 1    0    0
 010 0    0    0
 012 0    0    2
 013 125  err  158
 014 189  err  203
 015 48   70   610
 017 4    4    4
 018 5    5    5
 019 4    4    4
 020 4    4    4
 021 0    0    0
 022 74   103  103
 023 75   err  95
 024 3    3    3
 025 3    3    6
 027 1    1    0
 028 1    1    1

 X qcow2x
 C my_cleanup
 B kevin/coroutine-block

 Currently code is quite spaghetti code (needs a lot of cleanup,
 checks, better error handling and so on). Taking into account that
 code require additional optimizations and is full of internal
 debugging time times are quite good.

 Main questions are:
 - are somebody interesting ?
 - how can I send such a patch for review considering that is quite big
 (I know, I have to clean a bit too) ?

 You'll need to split it up into reviewable pieces. But let me have a
 look at your git tree first.


I have an account on gitorious but still my repo is only local. Do you
suggest a different provider or gitorious is ok for you?

 Are you in the #qemu IRC channel? I think we should coordinate our qcow2
 work a bit in order to avoid conflicting or duplicate work.

 Kevin


No, I don't use irc that much (time shift problems and also connection
too). When are you online?

Frediano



Re: [Qemu-devel] Volume key in qcow3?

2011-07-29 Thread Frediano Ziglio
2011/7/28 Kevin Wolf kw...@redhat.com:
 Am 28.07.2011 10:05, schrieb Frediano Ziglio:
 Hi,
   I noted that AES encryption using qcow2 just use the password given
 as as key (and also truncating it to 16 bytes == 128 bits).
 This is prone to brute force attacks and is not also easy to change
 password (you have to decrypt and encrypt again the entire image).
 LUKS and EncFS use another way. They generate a random key (the
 volume key) then use the password you give to encrypt N times (where
 N is decided by security level or automatically based on time to
 decrypt the volume key. To change the password just give the old one,
 get the volume key and encrypt again using the new one. LUKS support
 also multiple slots to allow multiple password and even using an
 external key file.
 Obviously this require an additional extension to qcow2 so I think it
 require a new qcow3 format.

 Yes, once we have qcow3, adding things like this should be easy enough.
 I think the idea makes sense.

 Another thing to consider with encryption is that we don't encrypt
 metadata currently. I'm not entirely sure if this is a good or a bad
 thing. Metadata is relatively predictable and I think that might hurt
 the encryption? Though I'm really not an expert in this area.

 Kevin


I don't think this is a big issue, usually sensitive data is not in
knowing which clusters are allocated or in which sequence (perhaps
looking at allocation strategy you can detect operating system and
filesystem type), snapshot ids and descriptions are IMO more sensible.

Do anybody though that qcow2 can support data-deduplication using
refcounts? Well, in this case this is not possible with encrypted
data.

Frediano



[Qemu-devel] [PATCH 0/8] qcow2 cleanup for coroutine-block branch

2011-07-28 Thread Frediano Ziglio
This is a collection of patches that remove QCowAIOCB, useless with
coroutines.
It apply to Kevin coroutine-block branch.
I leave all step I did to remove the structure, feel free to collapse
some of them.
I tested with iotests and got no changes (026 fails like coroutine-block
branch).

Frediano Ziglio (8):
  qcow2: removed unused fields
  qcow2: removed cur_nr_sectors field in QCowAIOCB
  qcow2: remove l2meta from QCowAIOCB
  qcow2: remove cluster_offset from QCowAIOCB
  qcow2: remove common from QCowAIOCB
  qcow2: reindent and use while before the big jump
  qcow2: removed QCowAIOCB entirely
  qcow2: remove memory leak

 block/qcow2.c |  396 -
 1 files changed, 167 insertions(+), 229 deletions(-)




[Qemu-devel] [PATCH 1/8] qcow2: removed unused fields

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   10 +++---
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index edc068e..5c454bb 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,11 +381,8 @@ typedef struct QCowAIOCB {
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
-bool is_write;
 QEMUIOVector hd_qiov;
-QEMUBH *bh;
 QCowL2Meta l2meta;
-QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
 /*
@@ -517,13 +514,12 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb,
-  void *opaque, int is_write, QCowAIOCB *acb)
+  void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
 acb-common.bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
-acb-is_write = is_write;
 
 qemu_iovec_init(acb-hd_qiov, qiov-niov);
 
@@ -543,7 +539,7 @@ static int qcow2_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 0, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 
 qemu_co_mutex_lock(s-lock);
 do {
@@ -658,7 +654,7 @@ static int qcow2_co_writev(BlockDriverState *bs,
 QCowAIOCB acb;
 int ret;
 
-qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, 1, acb);
+qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, NULL, NULL, acb);
 s-cluster_cache_offset = -1; /* disable compressed cache */
 
 qemu_co_mutex_lock(s-lock);
-- 
1.7.1




[Qemu-devel] [PATCH 2/8] qcow2: removed cur_nr_sectors field in QCowAIOCB

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   98 +
 1 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 5c454bb..0cf4465 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -377,7 +377,6 @@ typedef struct QCowAIOCB {
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
-int cur_nr_sectors;/* number of sectors in current iteration */
 uint64_t bytes_done;
 uint64_t cluster_offset;
 uint8_t *cluster_data;
@@ -395,42 +394,22 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
-
-/* post process the read buffer */
-if (!acb-cluster_offset) {
-/* nothing to do */
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* nothing to do */
-} else {
-if (s-crypt_method) {
-qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
-acb-cluster_data, acb-cur_nr_sectors, 0, 
s-aes_decrypt_key);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
-qemu_iovec_from_buffer(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
-}
-}
-
-acb-remaining_sectors -= acb-cur_nr_sectors;
-acb-sector_num += acb-cur_nr_sectors;
-acb-bytes_done += acb-cur_nr_sectors * 512;
+int cur_nr_sectors; /* number of sectors in current iteration */
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
 return 0;
 }
 
-/* prepare next AIO request */
-acb-cur_nr_sectors = acb-remaining_sectors;
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
 if (s-crypt_method) {
-acb-cur_nr_sectors = MIN(acb-cur_nr_sectors,
+cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-acb-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, acb-cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -439,14 +418,14 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-acb-cur_nr_sectors * 512);
+cur_nr_sectors * 512);
 
 if (!acb-cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
 n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, acb-cur_nr_sectors);
+acb-sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
@@ -457,11 +436,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 return ret;
 }
 }
-return 1;
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * acb-cur_nr_sectors);
-return 1;
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
@@ -472,9 +449,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 
 qemu_iovec_from_buffer(acb-hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
-512 * acb-cur_nr_sectors);
-
-return 1;
+512 * cur_nr_sectors);
 } else {
 if ((acb-cluster_offset  511) != 0) {
 return -EIO;
@@ -490,24 +465,37 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
 }
 
-assert(acb-cur_nr_sectors =
+assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
-512 * acb-cur_nr_sectors);
+512 * cur_nr_sectors);
 }
 
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
 (acb-cluster_offset  9) + index_in_cluster,
-acb-cur_nr_sectors, acb-hd_qiov);
+cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
 return ret;
 }
+if (s-crypt_method) {
+qcow2_encrypt_sectors(s, acb-sector_num,  acb-cluster_data,
+acb-cluster_data, cur_nr_sectors, 0, s-aes_decrypt_key);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb

[Qemu-devel] [PATCH 3/8] qcow2: remove l2meta from QCowAIOCB

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   15 ---
 1 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cf4465..adf31ce 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -381,7 +381,6 @@ typedef struct QCowAIOCB {
 uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
-QCowL2Meta l2meta;
 } QCowAIOCB;
 
 /*
@@ -514,8 +513,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
 acb-cluster_offset = 0;
-acb-l2meta.nb_clusters = 0;
-qemu_co_queue_init(acb-l2meta.dependent_requests);
 return acb;
 }
 
@@ -566,6 +563,10 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int n_end;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+QCowL2Meta l2meta;
+
+l2meta.nb_clusters = 0;
+qemu_co_queue_init(l2meta.dependent_requests);
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -579,12 +580,12 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 n_end = QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors;
 
 ret = qcow2_alloc_cluster_offset(bs, acb-sector_num  9,
-index_in_cluster, n_end, cur_nr_sectors, acb-l2meta);
+index_in_cluster, n_end, cur_nr_sectors, l2meta);
 if (ret  0) {
 return ret;
 }
 
-acb-cluster_offset = acb-l2meta.cluster_offset;
+acb-cluster_offset = l2meta.cluster_offset;
 assert((acb-cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
@@ -618,9 +619,9 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-ret = qcow2_alloc_cluster_link_l2(bs, acb-l2meta);
+ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
 
-run_dependent_requests(s, acb-l2meta);
+run_dependent_requests(s, l2meta);
 
 if (ret  0) {
 return ret;
-- 
1.7.1




[Qemu-devel] [PATCH 8/8] qcow2: remove memory leak

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6073568..aed8da7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -492,6 +492,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+qemu_free(cluster_data);
 
 return ret;
 }
@@ -604,6 +605,7 @@ fail:
 qemu_co_mutex_unlock(s-lock);
 
 qemu_iovec_destroy(hd_qiov);
+qemu_free(cluster_data);
 
 return ret;
 }
-- 
1.7.1




[Qemu-devel] [PATCH 7/8] qcow2: removed QCowAIOCB entirely

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  207 ++---
 1 files changed, 80 insertions(+), 127 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index dfb969e..6073568 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -372,83 +372,77 @@ int qcow2_backing_read1(BlockDriverState *bs, 
QEMUIOVector *qiov,
 return n1;
 }
 
-typedef struct QCowAIOCB {
-BlockDriverState *bs;
-int64_t sector_num;
-QEMUIOVector *qiov;
-int remaining_sectors;
-uint64_t bytes_done;
-uint8_t *cluster_data;
-QEMUIOVector hd_qiov;
-} QCowAIOCB;
-
-/*
- * Returns 0 when the request is completed successfully, 1 when there is still
- * a part left to do and -errno in error cases.
- */
-static int qcow2_aio_read_cb(QCowAIOCB *acb)
+static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+  int remaining_sectors, QEMUIOVector *qiov)
 {
-BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
+uint64_t bytes_done = 0;
+QEMUIOVector hd_qiov;
+uint8_t *cluster_data = NULL;
 
-while (acb-remaining_sectors != 0) {
+qemu_iovec_init(hd_qiov, qiov-niov);
+
+qemu_co_mutex_lock(s-lock);
+
+while (remaining_sectors != 0) {
 
 /* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
+cur_nr_sectors = remaining_sectors;
 if (s-crypt_method) {
 cur_nr_sectors = MIN(cur_nr_sectors,
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+ret = qcow2_get_cluster_offset(bs, sector_num  9,
 cur_nr_sectors, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
+index_in_cluster = sector_num  (s-cluster_sectors - 1);
 
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+qemu_iovec_reset(hd_qiov);
+qemu_iovec_copy(hd_qiov, qiov, bytes_done,
 cur_nr_sectors * 512);
 
 if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
+n1 = qcow2_backing_read1(bs-backing_hd, hd_qiov,
+sector_num, cur_nr_sectors);
 if (n1  0) {
 BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
 qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
+ret = bdrv_co_readv(bs-backing_hd, sector_num,
+n1, hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-return ret;
+goto fail;
 }
 }
 } else {
 /* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+qemu_iovec_memset(hd_qiov, 0, 512 * cur_nr_sectors);
 }
 } else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
 ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
-return ret;
+goto fail;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
+qemu_iovec_from_buffer(hd_qiov,
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
 if ((cluster_offset  511) != 0) {
-return -EIO;
+ret = -EIO;
+goto fail;
 }
 
 if (s-crypt_method) {
@@ -456,15 +450,15 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
  * For encrypted images, read everything into a temporary
  * contiguous buffer on which the AES functions can work.
  */
-if (!acb-cluster_data) {
-acb-cluster_data =
+if (!cluster_data) {
+cluster_data =
 qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * 
s-cluster_size);
 }
 
 assert(cur_nr_sectors =
 QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_reset(hd_qiov

[Qemu-devel] [PATCH 4/8] qcow2: remove cluster_offset from QCowAIOCB

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |   22 +++---
 1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index adf31ce..446946e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -378,7 +378,6 @@ typedef struct QCowAIOCB {
 QEMUIOVector *qiov;
 int remaining_sectors;
 uint64_t bytes_done;
-uint64_t cluster_offset;
 uint8_t *cluster_data;
 QEMUIOVector hd_qiov;
 } QCowAIOCB;
@@ -394,6 +393,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int index_in_cluster, n1;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
+uint64_t cluster_offset = 0;
 
 if (acb-remaining_sectors == 0) {
 /* request completed */
@@ -408,7 +408,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 }
 
 ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, acb-cluster_offset);
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -419,7 +419,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
 cur_nr_sectors * 512);
 
-if (!acb-cluster_offset) {
+if (!cluster_offset) {
 
 if (bs-backing_hd) {
 /* read from the base image */
@@ -439,9 +439,9 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 /* Note: in this case, no need to wait */
 qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
 }
-} else if (acb-cluster_offset  QCOW_OFLAG_COMPRESSED) {
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
 /* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, acb-cluster_offset);
+ret = qcow2_decompress_cluster(bs, cluster_offset);
 if (ret  0) {
 return ret;
 }
@@ -450,7 +450,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
 } else {
-if ((acb-cluster_offset  511) != 0) {
+if ((cluster_offset  511) != 0) {
 return -EIO;
 }
 
@@ -474,7 +474,7 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_READ_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_readv(bs-file,
-(acb-cluster_offset  9) + index_in_cluster,
+(cluster_offset  9) + index_in_cluster,
 cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
@@ -512,7 +512,6 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
 
 acb-bytes_done = 0;
 acb-remaining_sectors = nb_sectors;
-acb-cluster_offset = 0;
 return acb;
 }
 
@@ -564,6 +563,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
 QCowL2Meta l2meta;
+uint64_t cluster_offset;
 
 l2meta.nb_clusters = 0;
 qemu_co_queue_init(l2meta.dependent_requests);
@@ -585,8 +585,8 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 return ret;
 }
 
-acb-cluster_offset = l2meta.cluster_offset;
-assert((acb-cluster_offset  511) == 0);
+cluster_offset = l2meta.cluster_offset;
+assert((cluster_offset  511) == 0);
 
 qemu_iovec_reset(acb-hd_qiov);
 qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
@@ -612,7 +612,7 @@ static int qcow2_aio_write_cb(QCowAIOCB *acb)
 BLKDBG_EVENT(bs-file, BLKDBG_WRITE_AIO);
 qemu_co_mutex_unlock(s-lock);
 ret = bdrv_co_writev(bs-file,
- (acb-cluster_offset  9) + index_in_cluster,
+ (cluster_offset  9) + index_in_cluster,
  cur_nr_sectors, acb-hd_qiov);
 qemu_co_mutex_lock(s-lock);
 if (ret  0) {
-- 
1.7.1




[Qemu-devel] Volume key in qcow3?

2011-07-28 Thread Frediano Ziglio
Hi,
  I noted that AES encryption using qcow2 just use the password given
as as key (and also truncating it to 16 bytes == 128 bits).
This is prone to brute force attacks and is not also easy to change
password (you have to decrypt and encrypt again the entire image).
LUKS and EncFS use another way. They generate a random key (the
volume key) then use the password you give to encrypt N times (where
N is decided by security level or automatically based on time to
decrypt the volume key. To change the password just give the old one,
get the volume key and encrypt again using the new one. LUKS support
also multiple slots to allow multiple password and even using an
external key file.
Obviously this require an additional extension to qcow2 so I think it
require a new qcow3 format.

Frediano



[Qemu-devel] [PATCH 5/8] qcow2: remove common from QCowAIOCB

2011-07-28 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 446946e..c7445cc 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -373,7 +373,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector 
*qiov,
 }
 
 typedef struct QCowAIOCB {
-BlockDriverAIOCB common;
+BlockDriverState *bs;
 int64_t sector_num;
 QEMUIOVector *qiov;
 int remaining_sectors;
@@ -388,7 +388,7 @@ typedef struct QCowAIOCB {
  */
 static int qcow2_aio_read_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster, n1;
 int ret;
@@ -504,7 +504,7 @@ static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, 
int64_t sector_num,
   void *opaque, QCowAIOCB *acb)
 {
 memset(acb, 0, sizeof(*acb));
-acb-common.bs = bs;
+acb-bs = bs;
 acb-sector_num = sector_num;
 acb-qiov = qiov;
 
@@ -556,7 +556,7 @@ static void run_dependent_requests(BDRVQcowState *s, 
QCowL2Meta *m)
  */
 static int qcow2_aio_write_cb(QCowAIOCB *acb)
 {
-BlockDriverState *bs = acb-common.bs;
+BlockDriverState *bs = acb-bs;
 BDRVQcowState *s = bs-opaque;
 int index_in_cluster;
 int n_end;
-- 
1.7.1




[Qemu-devel] [PATCH 6/8] qcow2: reindent and use while before the big jump

2011-07-28 Thread Frediano Ziglio
prepare to remove read/write callbacks

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2.c |  272 -
 1 files changed, 135 insertions(+), 137 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index c7445cc..dfb969e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -395,107 +395,105 @@ static int qcow2_aio_read_cb(QCowAIOCB *acb)
 int cur_nr_sectors; /* number of sectors in current iteration */
 uint64_t cluster_offset = 0;
 
-if (acb-remaining_sectors == 0) {
-/* request completed */
-return 0;
-}
-
-/* prepare next request */
-cur_nr_sectors = acb-remaining_sectors;
-if (s-crypt_method) {
-cur_nr_sectors = MIN(cur_nr_sectors,
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-}
+while (acb-remaining_sectors != 0) {
 
-ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
-cur_nr_sectors, cluster_offset);
-if (ret  0) {
-return ret;
-}
-
-index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
-
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
-cur_nr_sectors * 512);
-
-if (!cluster_offset) {
-
-if (bs-backing_hd) {
-/* read from the base image */
-n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
-acb-sector_num, cur_nr_sectors);
-if (n1  0) {
-BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
-qemu_co_mutex_unlock(s-lock);
-ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
-n1, acb-hd_qiov);
-qemu_co_mutex_lock(s-lock);
-if (ret  0) {
-return ret;
-}
-}
-} else {
-/* Note: in this case, no need to wait */
-qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+/* prepare next request */
+cur_nr_sectors = acb-remaining_sectors;
+if (s-crypt_method) {
+cur_nr_sectors = MIN(cur_nr_sectors,
+QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
 }
-} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
-/* add AIO support for compressed blocks ? */
-ret = qcow2_decompress_cluster(bs, cluster_offset);
+
+ret = qcow2_get_cluster_offset(bs, acb-sector_num  9,
+cur_nr_sectors, cluster_offset);
 if (ret  0) {
 return ret;
 }
 
-qemu_iovec_from_buffer(acb-hd_qiov,
-s-cluster_cache + index_in_cluster * 512,
-512 * cur_nr_sectors);
-} else {
-if ((cluster_offset  511) != 0) {
-return -EIO;
-}
+index_in_cluster = acb-sector_num  (s-cluster_sectors - 1);
 
-if (s-crypt_method) {
-/*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
-if (!acb-cluster_data) {
-acb-cluster_data =
-qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s-cluster_size);
+qemu_iovec_reset(acb-hd_qiov);
+qemu_iovec_copy(acb-hd_qiov, acb-qiov, acb-bytes_done,
+cur_nr_sectors * 512);
+
+if (!cluster_offset) {
+
+if (bs-backing_hd) {
+/* read from the base image */
+n1 = qcow2_backing_read1(bs-backing_hd, acb-hd_qiov,
+acb-sector_num, cur_nr_sectors);
+if (n1  0) {
+BLKDBG_EVENT(bs-file, BLKDBG_READ_BACKING_AIO);
+qemu_co_mutex_unlock(s-lock);
+ret = bdrv_co_readv(bs-backing_hd, acb-sector_num,
+n1, acb-hd_qiov);
+qemu_co_mutex_lock(s-lock);
+if (ret  0) {
+return ret;
+}
+}
+} else {
+/* Note: in this case, no need to wait */
+qemu_iovec_memset(acb-hd_qiov, 0, 512 * cur_nr_sectors);
+}
+} else if (cluster_offset  QCOW_OFLAG_COMPRESSED) {
+/* add AIO support for compressed blocks ? */
+ret = qcow2_decompress_cluster(bs, cluster_offset);
+if (ret  0) {
+return ret;
 }
 
-assert(cur_nr_sectors =
-QCOW_MAX_CRYPT_CLUSTERS * s-cluster_sectors);
-qemu_iovec_reset(acb-hd_qiov);
-qemu_iovec_add(acb-hd_qiov, acb-cluster_data,
+qemu_iovec_from_buffer(acb-hd_qiov,
+s-cluster_cache + index_in_cluster * 512,
 512 * cur_nr_sectors);
-}
+} else {
+if ((cluster_offset  511) != 0) {
+return -EIO;
+}
 
-BLKDBG_EVENT(bs-file

Re: [Qemu-devel] Block layer roadmap

2011-07-28 Thread Frediano Ziglio
2011/7/28 Christoph Hellwig h...@lst.de:
 On Wed, Jul 27, 2011 at 01:37:31PM +0100, Stefan Hajnoczi wrote:
 Coroutines in the block layer [Kevin]
  * Programming model to simplify block drivers without blocking QEMU threads

 Can anyone explain what the whole point of this is?  It really just is
 a bit of syntactic sugar for the current async state machines.  What does
 it buy us over going for real threading?


This has nothing (or few) to do with threads. Instead of splitting
functions in callbacks at every synchronous function it allow to write
more readable code. For instance after changing qcow code from current
to coroutine you remove about 400 lines (about 30%). This will help
maintaining code and develop more complicated optimizations.

About threading you can do threading using AIO and using coroutines.

Frediano



[Qemu-devel] [PATCH] [RFC] qcow2: group refcount updates during cow

2011-07-28 Thread Frediano Ziglio
Well, I think this is the first real improve patch.
Is more a RFC than a patch. Yes, some lines are terrible!
It collapses refcount decrement during cow.
From a first check time executing 015 test passed from about 600 seconds
to 70.
This at least prove that refcount updates counts!
Some doubt:
1- place the code in qcow2-refcount.c as it update only refcount and not
  cluster?
2- allow some sort of begin transaction / commit / rollback like 
  databases instead?
3- allow changing tables from different coroutines?

1) If you have a sequence like (1, 2, 4) probably these clusters are all in
the same l2 table but with this code you get two write instead of one.
I'm thinking about a function in qcow2-refcount.c that accept an array of 
cluster
instead of a start + len.

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/qcow2-cluster.c |   36 ++--
 1 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 81cf77d..da17365 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -675,10 +675,42 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
  * Also flush bs-file to get the right order for L2 and refcount update.
  */
 if (j != 0) {
+int64_t old_start = 0, old_end = -2;
+int count = 0;
 for (i = 0; i  j; i++) {
-qcow2_free_any_clusters(bs,
-be64_to_cpu(old_cluster[i])  ~QCOW_OFLAG_COPIED, 1);
+old_cluster[i] = be64_to_cpu(old_cluster[i])  ~QCOW_OFLAG_COPIED;
 }
+// XXX sort old_cluster
+for (i = 0; i  j; i++) {
+int64_t cluster = old_cluster[i];
+
+/* group if contiguos */
+if (old_end + 1 == (cluster  s-cluster_bits)) {
+++old_end;
+continue;
+}
+
+/* handle */
+if (old_end  0) {
+qcow2_free_any_clusters(bs, old_start  s-cluster_bits, 
old_end - old_start + 1);
+count += old_end - old_start + 1;
+}
+old_end = -2;
+
+/* handle compressed separately */
+if ((cluster  QCOW_OFLAG_COMPRESSED)) {
+qcow2_free_any_clusters(bs, cluster, 1);
+continue;
+}
+
+/* start a new group */
+old_start = old_end = cluster  s-cluster_bits;
+}
+if (old_end  0) {
+qcow2_free_any_clusters(bs, old_start  s-cluster_bits, old_end 
- old_start + 1);
+count += old_end - old_start + 1;
+}
+assert(count == j);
 }
 
 ret = 0;
-- 
1.7.1




[Qemu-devel] [PATCH 0/3] small trivial patches

2011-07-27 Thread Frediano Ziglio
Minor fixes and improves

Frediano Ziglio (3):
  block: removed unused function
  block: typo fix
  aio: always check paio_init result

 block.c   |   13 -
 block.h   |2 --
 block/raw-posix.c |   13 ++---
 3 files changed, 6 insertions(+), 22 deletions(-)




[Qemu-devel] [PATCH 1/3] block: removed unused function

2011-07-27 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block.c |   13 -
 block.h |2 --
 2 files changed, 0 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index 9549b9e..b2d3983 100644
--- a/block.c
+++ b/block.c
@@ -1108,19 +1108,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t 
offset,
 return 0;
 }
 
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
-const uint8_t *buf, int nb_sectors)
-{
-return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
-buf, BDRV_SECTOR_SIZE * nb_sectors);
-}
-
 /**
  * Truncate file to 'offset' bytes (needed only for file protocols)
  */
diff --git a/block.h b/block.h
index 59cc410..e672bc6 100644
--- a/block.h
+++ b/block.h
@@ -85,8 +85,6 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
-int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
-const uint8_t *buf, int nb_sectors);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-- 
1.7.1




[Qemu-devel] [PATCH 2/3] block: typo fix

2011-07-27 Thread Frediano Ziglio

Signed-off-by: Frediano Ziglio fredd...@gmail.com
---
 block/raw-posix.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index cd89c83..275b41e 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -587,7 +587,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState 
*bs,
 
 /*
  * If O_DIRECT is used the buffer needs to be aligned on a sector
- * boundary.  Check if this is the case or telll the low-level
+ * boundary.  Check if this is the case or tell the low-level
  * driver that it needs to copy the buffer.
  */
 if (s-aligned_buf) {
-- 
1.7.1




<    1   2   3   >