Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Peter Lieven

Am 05.10.2015 um 23:15 schrieb John Snow:


On 09/21/2015 08:25 AM, Peter Lieven wrote:

PIO read requests on the ATAPI interface used to be sync blk requests.
This has to siginificant drawbacks. First the main loop hangs util an
I/O request is completed and secondly if the I/O request does not
complete (e.g. due to an unresponsive storage) Qemu hangs completely.

Signed-off-by: Peter Lieven 
---
  hw/ide/atapi.c | 69 --
  1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..9257e1c 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
  memset(buf, 0, 288);
  }
  
-static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)

+static void cd_read_sector_cb(void *opaque, int ret)
  {
-int ret;
+IDEState *s = opaque;
  
-switch(sector_size) {

-case 2048:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-break;
-case 2352:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-if (ret < 0)
-return ret;
-cd_data_to_raw(buf, lba);
-break;
-default:
-ret = -EIO;
-break;
+block_acct_done(blk_get_stats(s->blk), &s->acct);
+
+if (ret < 0) {
+ide_atapi_io_error(s, ret);
+return;
+}
+
+if (s->cd_sector_size == 2352) {
+cd_data_to_raw(s->io_buffer, s->lba);
  }
-return ret;
+
+s->lba++;
+s->io_buffer_index = 0;
+s->status &= ~BUSY_STAT;
+
+ide_atapi_cmd_reply_end(s);
+}
+
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+{
+if (sector_size != 2048 && sector_size != 2352) {
+return -EINVAL;
+}
+
+s->iov.iov_base = buf;
+if (sector_size == 2352) {
+buf += 4;
+}
+
+s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+
+if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
+  cd_read_sector_cb, s) == NULL) {
+return -EIO;
+}
+
+block_acct_start(blk_get_stats(s->blk), &s->acct,
+ 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+s->status |= BUSY_STAT;
+return 0;
  }
  

We discussed this off-list a bit, but for upstream synchronization:

Unfortunately, I believe making cd_read_sector here non-blocking makes
ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
s->end_transfer_func() nonblocking, which functions like ide_data_readw
are not prepared to cope with.

My suggestion is to buffer an entire DRQ block of data at once
(byte_count_limit) to avoid the problem.


Hi John,

first of all thank you for the detailed analysis.

Is the following what you have i mind. For PIO reads > 1 sector
it is a great improvement for the NFS backend:

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index ab45495..ec2ba89 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
 }

 if (s->cd_sector_size == 2352) {
-cd_data_to_raw(s->io_buffer, s->lba);
+int nb_sectors = s->packet_transfer_size / 2352;
+while (nb_sectors--) {
+memmove(s->io_buffer + nb_sectors * 2352 + 4,
+s->io_buffer + nb_sectors * 2048, 2048);
+cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
+   s->lba + nb_sectors);
+}
 }

-s->lba++;
+s->lba = -1;
 s->io_buffer_index = 0;
 s->status &= ~BUSY_STAT;

 ide_atapi_cmd_reply_end(s);
 }

-static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, 
int nb_sectors)
 {
 if (sector_size != 2048 && sector_size != 2352) {
 return -EINVAL;
 }

 s->iov.iov_base = buf;
-if (sector_size == 2352) {
-buf += 4;
-}
-
-s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+s->iov.iov_len = nb_sectors * 2048;
 qemu_iovec_init_external(&s->qiov, &s->iov, 1);

-if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
-  cd_read_sector_cb, s) == NULL) {
+if (ide_readv_cancelable(s, (int64_t)lba << 2,
+ &s->qiov, nb_sectors * 4,
+ cd_read_sector_cb, s) == NULL) {
 return -EIO;
 }

 block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+ 

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Kevin Wolf
Am 05.10.2015 um 23:15 hat John Snow geschrieben:
> 
> 
> On 09/21/2015 08:25 AM, Peter Lieven wrote:
> > PIO read requests on the ATAPI interface used to be sync blk requests.
> > This has to siginificant drawbacks. First the main loop hangs util an
> > I/O request is completed and secondly if the I/O request does not
> > complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> > 
> > Signed-off-by: Peter Lieven 
> > ---
> >  hw/ide/atapi.c | 69 
> > --
> >  1 file changed, 43 insertions(+), 26 deletions(-)
> > 
> > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> > index 747f466..9257e1c 100644
> > --- a/hw/ide/atapi.c
> > +++ b/hw/ide/atapi.c
> > @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
> >  memset(buf, 0, 288);
> >  }
> >  
> > -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int 
> > sector_size)
> > +static void cd_read_sector_cb(void *opaque, int ret)
> >  {
> > -int ret;
> > +IDEState *s = opaque;
> >  
> > -switch(sector_size) {
> > -case 2048:
> > -block_acct_start(blk_get_stats(s->blk), &s->acct,
> > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> > -block_acct_done(blk_get_stats(s->blk), &s->acct);
> > -break;
> > -case 2352:
> > -block_acct_start(blk_get_stats(s->blk), &s->acct,
> > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> > -block_acct_done(blk_get_stats(s->blk), &s->acct);
> > -if (ret < 0)
> > -return ret;
> > -cd_data_to_raw(buf, lba);
> > -break;
> > -default:
> > -ret = -EIO;
> > -break;
> > +block_acct_done(blk_get_stats(s->blk), &s->acct);
> > +
> > +if (ret < 0) {
> > +ide_atapi_io_error(s, ret);
> > +return;
> > +}
> > +
> > +if (s->cd_sector_size == 2352) {
> > +cd_data_to_raw(s->io_buffer, s->lba);
> >  }
> > -return ret;
> > +
> > +s->lba++;
> > +s->io_buffer_index = 0;
> > +s->status &= ~BUSY_STAT;
> > +
> > +ide_atapi_cmd_reply_end(s);
> > +}
> > +
> > +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> > +{
> > +if (sector_size != 2048 && sector_size != 2352) {
> > +return -EINVAL;
> > +}
> > +
> > +s->iov.iov_base = buf;
> > +if (sector_size == 2352) {
> > +buf += 4;
> > +}

This doesn't look quite right, buf is never read after this.

Also, why +=4 when it was originally buf + 16?

> > +
> > +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> > +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> > +
> > +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> > +  cd_read_sector_cb, s) == NULL) {
> > +return -EIO;
> > +}
> > +
> > +block_acct_start(blk_get_stats(s->blk), &s->acct,
> > + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> > +s->status |= BUSY_STAT;
> > +return 0;
> >  }
> >  
> 
> We discussed this off-list a bit, but for upstream synchronization:
> 
> Unfortunately, I believe making cd_read_sector here non-blocking makes
> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
> s->end_transfer_func() nonblocking, which functions like ide_data_readw
> are not prepared to cope with.

I don't think that's a problem as long as BSY is set while the
asynchronous command is running and DRQ is cleared. The latter will
protect ide_data_readw(). ide_sector_read() does essentially the same
thing.

Or maybe I'm just missing what you're trying to say.

> My suggestion is to buffer an entire DRQ block of data at once
> (byte_count_limit) to avoid the problem.

No matter whether there is a problem or not, buffering more data at once
(and therefore doing less requests) is better for performance anyway.

Kevin

> >  void ide_atapi_cmd_ok(IDEState *s)
> > @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
> >  ret = cd_read_sector(s, s->lba, s->io_buffer, 
> > s->cd_sector_size);
> >  if (ret < 0) {
> >  ide_atapi_io_error(s, ret);
> > -return;
> >  }
> > -s->lba++;
> > -s->io_buffer_index = 0;
> > +return;
> >  }
> >  if (s->elementary_transfer_size > 0) {
> >  /* there are some data left to transmit in this elementary
> > @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int 
> > lba, int nb_sectors,
> >  s->io_buffer_index = sector_size;
> >  s->cd_sector_size = sector_size;
> >  
> > -s->status = READY_STAT | SEEK_STAT;
> >  ide_atapi_cmd_reply_end(s);
> >  }
> >  
> > 



Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Peter Lieven

Am 06.10.2015 um 10:57 schrieb Kevin Wolf:

Am 05.10.2015 um 23:15 hat John Snow geschrieben:


On 09/21/2015 08:25 AM, Peter Lieven wrote:

PIO read requests on the ATAPI interface used to be sync blk requests.
This has to siginificant drawbacks. First the main loop hangs util an
I/O request is completed and secondly if the I/O request does not
complete (e.g. due to an unresponsive storage) Qemu hangs completely.

Signed-off-by: Peter Lieven 
---
  hw/ide/atapi.c | 69 --
  1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..9257e1c 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
  memset(buf, 0, 288);
  }
  
-static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)

+static void cd_read_sector_cb(void *opaque, int ret)
  {
-int ret;
+IDEState *s = opaque;
  
-switch(sector_size) {

-case 2048:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-break;
-case 2352:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-if (ret < 0)
-return ret;
-cd_data_to_raw(buf, lba);
-break;
-default:
-ret = -EIO;
-break;
+block_acct_done(blk_get_stats(s->blk), &s->acct);
+
+if (ret < 0) {
+ide_atapi_io_error(s, ret);
+return;
+}
+
+if (s->cd_sector_size == 2352) {
+cd_data_to_raw(s->io_buffer, s->lba);
  }
-return ret;
+
+s->lba++;
+s->io_buffer_index = 0;
+s->status &= ~BUSY_STAT;
+
+ide_atapi_cmd_reply_end(s);
+}
+
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+{
+if (sector_size != 2048 && sector_size != 2352) {
+return -EINVAL;
+}
+
+s->iov.iov_base = buf;
+if (sector_size == 2352) {
+buf += 4;
+}

This doesn't look quite right, buf is never read after this.

Also, why +=4 when it was originally buf + 16?


You are right. I mixed that up.




+
+s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+
+if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
+  cd_read_sector_cb, s) == NULL) {
+return -EIO;
+}
+
+block_acct_start(blk_get_stats(s->blk), &s->acct,
+ 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+s->status |= BUSY_STAT;
+return 0;
  }
  

We discussed this off-list a bit, but for upstream synchronization:

Unfortunately, I believe making cd_read_sector here non-blocking makes
ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
s->end_transfer_func() nonblocking, which functions like ide_data_readw
are not prepared to cope with.

I don't think that's a problem as long as BSY is set while the
asynchronous command is running and DRQ is cleared. The latter will
protect ide_data_readw(). ide_sector_read() does essentially the same
thing.


 I was thinking the same. Without the BSY its not working at all.



Or maybe I'm just missing what you're trying to say.


My suggestion is to buffer an entire DRQ block of data at once
(byte_count_limit) to avoid the problem.

No matter whether there is a problem or not, buffering more data at once
(and therefore doing less requests) is better for performance anyway.


Its possible to do only one read in the backend and read the whole
request into the IO buffer. I send a follow-up.

Maybe do you have a pointer to the test tool that John mentioned?

Peter




Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Peter Lieven

Am 06.10.2015 um 10:46 schrieb Peter Lieven:

Am 05.10.2015 um 23:15 schrieb John Snow:


On 09/21/2015 08:25 AM, Peter Lieven wrote:

PIO read requests on the ATAPI interface used to be sync blk requests.
This has to siginificant drawbacks. First the main loop hangs util an
I/O request is completed and secondly if the I/O request does not
complete (e.g. due to an unresponsive storage) Qemu hangs completely.

Signed-off-by: Peter Lieven 
---
  hw/ide/atapi.c | 69 --
  1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 747f466..9257e1c 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
  memset(buf, 0, 288);
  }
  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int 
sector_size)
+static void cd_read_sector_cb(void *opaque, int ret)
  {
-int ret;
+IDEState *s = opaque;
  -switch(sector_size) {
-case 2048:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-break;
-case 2352:
-block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
-block_acct_done(blk_get_stats(s->blk), &s->acct);
-if (ret < 0)
-return ret;
-cd_data_to_raw(buf, lba);
-break;
-default:
-ret = -EIO;
-break;
+block_acct_done(blk_get_stats(s->blk), &s->acct);
+
+if (ret < 0) {
+ide_atapi_io_error(s, ret);
+return;
+}
+
+if (s->cd_sector_size == 2352) {
+cd_data_to_raw(s->io_buffer, s->lba);
  }
-return ret;
+
+s->lba++;
+s->io_buffer_index = 0;
+s->status &= ~BUSY_STAT;
+
+ide_atapi_cmd_reply_end(s);
+}
+
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+{
+if (sector_size != 2048 && sector_size != 2352) {
+return -EINVAL;
+}
+
+s->iov.iov_base = buf;
+if (sector_size == 2352) {
+buf += 4;
+}
+
+s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+
+if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
+  cd_read_sector_cb, s) == NULL) {
+return -EIO;
+}
+
+block_acct_start(blk_get_stats(s->blk), &s->acct,
+ 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+s->status |= BUSY_STAT;
+return 0;
  }

We discussed this off-list a bit, but for upstream synchronization:

Unfortunately, I believe making cd_read_sector here non-blocking makes
ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
s->end_transfer_func() nonblocking, which functions like ide_data_readw
are not prepared to cope with.

My suggestion is to buffer an entire DRQ block of data at once
(byte_count_limit) to avoid the problem.


Hi John,

first of all thank you for the detailed analysis.

Is the following what you have i mind. For PIO reads > 1 sector
it is a great improvement for the NFS backend:

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index ab45495..ec2ba89 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
 }

 if (s->cd_sector_size == 2352) {
-cd_data_to_raw(s->io_buffer, s->lba);
+int nb_sectors = s->packet_transfer_size / 2352;
+while (nb_sectors--) {
+memmove(s->io_buffer + nb_sectors * 2352 + 4,
+s->io_buffer + nb_sectors * 2048, 2048);
+cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
+   s->lba + nb_sectors);
+}
 }

-s->lba++;
+s->lba = -1;
 s->io_buffer_index = 0;
 s->status &= ~BUSY_STAT;

 ide_atapi_cmd_reply_end(s);
 }

-static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, 
int nb_sectors)
 {
 if (sector_size != 2048 && sector_size != 2352) {
 return -EINVAL;
 }

 s->iov.iov_base = buf;
-if (sector_size == 2352) {
-buf += 4;
-}
-
-s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+s->iov.iov_len = nb_sectors * 2048;
 qemu_iovec_init_external(&s->qiov, &s->iov, 1);

-if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
-  cd_read_sector_cb, s) == NULL) {
+if (ide_readv_cancelable(s, (int64_t)lba << 2,
+ &s->qiov, nb_sectors * 4,
+ cd_read_sector_cb, s) == NULL) {
 return -EIO;
 }

 block_acct_start(blk_get_stats(s->blk), &s->acct,
- 4 * BDRV_SECTOR_SIZE, 

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Laszlo Ersek
On 09/21/15 14:25, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has to siginificant drawbacks. First the main loop hangs util an

Trivial comments:
- s/to/two/
- s/siginificant/significant/

Thanks
Laszlo

> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven 
> ---
>  hw/ide/atapi.c | 69 
> --
>  1 file changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..9257e1c 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>  memset(buf, 0, 288);
>  }
>  
> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int 
> sector_size)
> +static void cd_read_sector_cb(void *opaque, int ret)
>  {
> -int ret;
> +IDEState *s = opaque;
>  
> -switch(sector_size) {
> -case 2048:
> -block_acct_start(blk_get_stats(s->blk), &s->acct,
> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> -block_acct_done(blk_get_stats(s->blk), &s->acct);
> -break;
> -case 2352:
> -block_acct_start(blk_get_stats(s->blk), &s->acct,
> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> -block_acct_done(blk_get_stats(s->blk), &s->acct);
> -if (ret < 0)
> -return ret;
> -cd_data_to_raw(buf, lba);
> -break;
> -default:
> -ret = -EIO;
> -break;
> +block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +if (ret < 0) {
> +ide_atapi_io_error(s, ret);
> +return;
> +}
> +
> +if (s->cd_sector_size == 2352) {
> +cd_data_to_raw(s->io_buffer, s->lba);
>  }
> -return ret;
> +
> +s->lba++;
> +s->io_buffer_index = 0;
> +s->status &= ~BUSY_STAT;
> +
> +ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> +{
> +if (sector_size != 2048 && sector_size != 2352) {
> +return -EINVAL;
> +}
> +
> +s->iov.iov_base = buf;
> +if (sector_size == 2352) {
> +buf += 4;
> +}
> +
> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +  cd_read_sector_cb, s) == NULL) {
> +return -EIO;
> +}
> +
> +block_acct_start(blk_get_stats(s->blk), &s->acct,
> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +s->status |= BUSY_STAT;
> +return 0;
>  }
>  
>  void ide_atapi_cmd_ok(IDEState *s)
> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>  ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
>  if (ret < 0) {
>  ide_atapi_io_error(s, ret);
> -return;
>  }
> -s->lba++;
> -s->io_buffer_index = 0;
> +return;
>  }
>  if (s->elementary_transfer_size > 0) {
>  /* there are some data left to transmit in this elementary
> @@ -275,7 +293,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, 
> int nb_sectors,
>  s->io_buffer_index = sector_size;
>  s->cd_sector_size = sector_size;
>  
> -s->status = READY_STAT | SEEK_STAT;
>  ide_atapi_cmd_reply_end(s);
>  }
>  
> 




Re: [Qemu-block] [PATCH v2 14/16] blockjob: Store device name at job creation

2015-10-06 Thread Alberto Garcia
On Thu 01 Oct 2015 03:13:32 PM CEST, Kevin Wolf wrote:
> Some block jobs change the block device graph on completion. This means
> that the device that owns the job and originally was addressed with its
> device name may no longer be what the corresponding BlockBackend points
> to.
>
> Previously, the effects of bdrv_swap() ensured that the job was (at
> least partially) transferred to the target image. Events that contain
> the device name could still use bdrv_get_device_name(job->bs) and get
> the same result.
>
> After removing bdrv_swap(), this won't work any more. Instead, save the
> device name at job creation and use that copy for QMP events and
> anything else identifying the job.
>
> Signed-off-by: Kevin Wolf 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v2] iotests: disable core dumps in test 061

2015-10-06 Thread Kevin Wolf
Am 28.09.2015 um 17:13 hat Max Reitz geschrieben:
> On 28.09.2015 16:23, Alberto Garcia wrote:
> > Commit 934659c460 disabled the supression of segmentation faults in
> > bash tests. The new output of test 061, however, assumes that a core
> > dump will be produced if a program aborts. This is not necessarily the
> > case because core dumps can be disabled using ulimit.
> > 
> > Since we cannot guarantee that abort() will produce a core dump, we
> > should use SIGKILL instead (that does not produce any) and update the
> > test output accordingly.
> > 
> > Signed-off-by: Alberto Garcia 
> > ---
> >  tests/qemu-iotests/061 | 8 
> >  tests/qemu-iotests/061.out | 4 ++--
> >  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> Reviewed-by: Max Reitz 

Thanks, applied to the block branch.

Kevin


pgpUvWlI5b_gk.pgp
Description: PGP signature


Re: [Qemu-block] [PATCH] throttle: test that snapshots move the throttling configuration

2015-10-06 Thread Alberto Garcia
Ping

On Thu 17 Sep 2015 04:33:06 PM CEST, Alberto Garcia wrote:
> If a snapshot is performed on a device that has I/O limits they should
> be moved to the target image (the new active layer).
>
> Signed-off-by: Alberto Garcia 
> ---
>  tests/qemu-iotests/096 | 69 
> ++
>  tests/qemu-iotests/096.out |  5 
>  tests/qemu-iotests/group   |  1 +
>  3 files changed, 75 insertions(+)
>  create mode 100644 tests/qemu-iotests/096
>  create mode 100644 tests/qemu-iotests/096.out
>
> diff --git a/tests/qemu-iotests/096 b/tests/qemu-iotests/096
> new file mode 100644
> index 000..e34204b
> --- /dev/null
> +++ b/tests/qemu-iotests/096
> @@ -0,0 +1,69 @@
> +#!/usr/bin/env python
> +#
> +# Test that snapshots move the throttling configuration to the active
> +# layer
> +#
> +# Copyright (C) 2015 Igalia, S.L.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +import iotests
> +import os
> +
> +class TestLiveSnapshot(iotests.QMPTestCase):
> +base_img = os.path.join(iotests.test_dir, 'base.img')
> +target_img = os.path.join(iotests.test_dir, 'target.img')
> +group = 'mygroup'
> +iops = 6000
> +iops_size = 1024
> +
> +def setUp(self):
> +opts = []
> +opts.append('node-name=base')
> +opts.append('throttling.group=%s' % self.group)
> +opts.append('throttling.iops-total=%d' % self.iops)
> +opts.append('throttling.iops-size=%d' % self.iops_size)
> +iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, 
> '100M')
> +self.vm = iotests.VM().add_drive(self.base_img, ','.join(opts))
> +self.vm.launch()
> +
> +def tearDown(self):
> +self.vm.shutdown()
> +os.remove(self.base_img)
> +os.remove(self.target_img)
> +
> +def checkConfig(self, active_layer):
> +result = self.vm.qmp('query-named-block-nodes')
> +for r in result['return']:
> +if r['node-name'] == active_layer:
> +self.assertEqual(r['group'], self.group)
> +self.assertEqual(r['iops'], self.iops)
> +self.assertEqual(r['iops_size'], self.iops_size)
> +else:
> +self.assertFalse(r.has_key('group'))
> +self.assertEqual(r['iops'], 0)
> +self.assertFalse(r.has_key('iops_size'))
> +
> +def testSnapshot(self):
> +self.checkConfig('base')
> +self.vm.qmp('blockdev-snapshot-sync',
> +node_name = 'base',
> +snapshot_node_name = 'target',
> +snapshot_file = self.target_img,
> +format = iotests.imgfmt)
> +self.checkConfig('target')
> +
> +if __name__ == '__main__':
> +iotests.main(supported_fmts=['qcow2'])
> diff --git a/tests/qemu-iotests/096.out b/tests/qemu-iotests/096.out
> new file mode 100644
> index 000..ae1213e
> --- /dev/null
> +++ b/tests/qemu-iotests/096.out
> @@ -0,0 +1,5 @@
> +.
> +--
> +Ran 1 tests
> +
> +OK
> diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
> index 439b1d2..30c784e 100644
> --- a/tests/qemu-iotests/group
> +++ b/tests/qemu-iotests/group
> @@ -102,6 +102,7 @@
>  093 auto
>  094 rw auto quick
>  095 rw auto quick
> +096 rw auto quick
>  097 rw auto backing
>  098 rw auto backing quick
>  099 rw auto quick
> -- 
> 2.5.1



Re: [Qemu-block] [Qemu-devel] [PATCH] scsi: switch from g_slice allocator to malloc

2015-10-06 Thread Stefan Hajnoczi
On Thu, Oct 01, 2015 at 01:04:40PM +0200, Paolo Bonzini wrote:
> Simplify memory allocation by sticking with a single API.  GSlice
> is not that fast anyway (tcmalloc/jemalloc are better).
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi/scsi-bus.c  |  4 ++--
>  hw/scsi/virtio-scsi-dataplane.c | 10 +-
>  hw/scsi/virtio-scsi.c   | 12 +---
>  3 files changed, 12 insertions(+), 14 deletions(-)

Reviewed-by: Stefan Hajnoczi 



Re: [Qemu-block] [PATCH] gluster: allocate GlusterAIOCBs on the stack

2015-10-06 Thread Stefan Hajnoczi
On Thu, Oct 01, 2015 at 01:04:38PM +0200, Paolo Bonzini wrote:
> This is simpler now that the driver has been converted to coroutines.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/gluster.c | 86 
> ++---
>  1 file changed, 33 insertions(+), 53 deletions(-)

CCing Jeff on Gluster patches.

> diff --git a/block/gluster.c b/block/gluster.c
> index 1eb3a8c..0857c14 100644
> --- a/block/gluster.c
> +++ b/block/gluster.c
> @@ -429,28 +429,23 @@ static coroutine_fn int 
> qemu_gluster_co_write_zeroes(BlockDriverState *bs,
>  int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
>  {
>  int ret;
> -GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
> +GlusterAIOCB acb;
>  BDRVGlusterState *s = bs->opaque;
>  off_t size = nb_sectors * BDRV_SECTOR_SIZE;
>  off_t offset = sector_num * BDRV_SECTOR_SIZE;
>  
> -acb->size = size;
> -acb->ret = 0;
> -acb->coroutine = qemu_coroutine_self();
> -acb->aio_context = bdrv_get_aio_context(bs);
> +acb.size = size;
> +acb.ret = 0;
> +acb.coroutine = qemu_coroutine_self();
> +acb.aio_context = bdrv_get_aio_context(bs);
>  
> -ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, 
> acb);
> +ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, 
> &acb);
>  if (ret < 0) {
> -ret = -errno;
> -goto out;
> +return -errno;
>  }
>  
>  qemu_coroutine_yield();
> -ret = acb->ret;
> -
> -out:
> -g_slice_free(GlusterAIOCB, acb);
> -return ret;
> +return acb.ret;
>  }
>  
>  static inline bool gluster_supports_zerofill(void)
> @@ -541,35 +536,30 @@ static coroutine_fn int 
> qemu_gluster_co_rw(BlockDriverState *bs,
>  int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
>  {
>  int ret;
> -GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
> +GlusterAIOCB acb;
>  BDRVGlusterState *s = bs->opaque;
>  size_t size = nb_sectors * BDRV_SECTOR_SIZE;
>  off_t offset = sector_num * BDRV_SECTOR_SIZE;
>  
> -acb->size = size;
> -acb->ret = 0;
> -acb->coroutine = qemu_coroutine_self();
> -acb->aio_context = bdrv_get_aio_context(bs);
> +acb.size = size;
> +acb.ret = 0;
> +acb.coroutine = qemu_coroutine_self();
> +acb.aio_context = bdrv_get_aio_context(bs);
>  
>  if (write) {
>  ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
> -&gluster_finish_aiocb, acb);
> +gluster_finish_aiocb, &acb);
>  } else {
>  ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
> -&gluster_finish_aiocb, acb);
> +gluster_finish_aiocb, &acb);
>  }
>  
>  if (ret < 0) {
> -ret = -errno;
> -goto out;
> +return -errno;
>  }
>  
>  qemu_coroutine_yield();
> -ret = acb->ret;
> -
> -out:
> -g_slice_free(GlusterAIOCB, acb);
> -return ret;
> +return acb.ret;
>  }
>  
>  static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
> @@ -600,26 +590,21 @@ static coroutine_fn int 
> qemu_gluster_co_writev(BlockDriverState *bs,
>  static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
>  {
>  int ret;
> -GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
> +GlusterAIOCB acb;
>  BDRVGlusterState *s = bs->opaque;
>  
> -acb->size = 0;
> -acb->ret = 0;
> -acb->coroutine = qemu_coroutine_self();
> -acb->aio_context = bdrv_get_aio_context(bs);
> +acb.size = 0;
> +acb.ret = 0;
> +acb.coroutine = qemu_coroutine_self();
> +acb.aio_context = bdrv_get_aio_context(bs);
>  
> -ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
> +ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
>  if (ret < 0) {
> -ret = -errno;
> -goto out;
> +return -errno;
>  }
>  
>  qemu_coroutine_yield();
> -ret = acb->ret;
> -
> -out:
> -g_slice_free(GlusterAIOCB, acb);
> -return ret;
> +return acb.ret;
>  }
>  
>  #ifdef CONFIG_GLUSTERFS_DISCARD
> @@ -627,28 +612,23 @@ static coroutine_fn int 
> qemu_gluster_co_discard(BlockDriverState *bs,
>  int64_t sector_num, int nb_sectors)
>  {
>  int ret;
> -GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
> +GlusterAIOCB acb;
>  BDRVGlusterState *s = bs->opaque;
>  size_t size = nb_sectors * BDRV_SECTOR_SIZE;
>  off_t offset = sector_num * BDRV_SECTOR_SIZE;
>  
> -acb->size = 0;
> -acb->ret = 0;
> -acb->coroutine = qemu_coroutine_self();
> -acb->aio_context = bdrv_get_aio_context(bs);
> +acb.size = 0;
> +acb.ret = 0;
> +acb.coroutine = qemu_coroutine_self();
> +acb.aio_context = bdrv_get_aio_context(bs);
>  
> -ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, 
> acb);
> +ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, 
> &acb);
>  if (r

Re: [Qemu-block] [PATCH] nbd: switch from g_slice allocator to malloc

2015-10-06 Thread Stefan Hajnoczi
On Thu, Oct 01, 2015 at 01:04:41PM +0200, Paolo Bonzini wrote:
> Simplify memory allocation by sticking with a single API.  GSlice
> is not that fast anyway (tcmalloc/jemalloc are better).
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  nbd.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Stefan Hajnoczi 



Re: [Qemu-block] [PATCH] throttle: test that snapshots move the throttling configuration

2015-10-06 Thread Kevin Wolf
Am 18.09.2015 um 17:54 hat Max Reitz geschrieben:
> On 17.09.2015 16:33, Alberto Garcia wrote:
> > If a snapshot is performed on a device that has I/O limits they should
> > be moved to the target image (the new active layer).
> > 
> > Signed-off-by: Alberto Garcia 
> > ---
> >  tests/qemu-iotests/096 | 69 
> > ++
> >  tests/qemu-iotests/096.out |  5 
> >  tests/qemu-iotests/group   |  1 +
> >  3 files changed, 75 insertions(+)
> >  create mode 100644 tests/qemu-iotests/096
> >  create mode 100644 tests/qemu-iotests/096.out
> 
> Looks good, I'd just like to throw in that 096 is in use by my
> looks-dead-but-actually-is-not and
> only-waiting-for-the-blockbackend-and-media-series-to-get-merged series
> "block: Rework bdrv_close_all()":
> http://lists.nongnu.org/archive/html/qemu-block/2015-03/msg00048.html
> 
> But then again, once the prerequisites are met, I'll have to send a new
> version of that series anyway...

It's generally a good idea to check the mailing list for test numbers
taken by posted, but not yet merged patches. If there are any gaps in
groups, chances are high that some patch is using it, so don't fill gaps
without checking that first.

> So, since 096 is not a magic number I'm extremely keen on keeping in my
> greedy claws:
> 
> Reviewed-by: Max Reitz 

Thanks, applied to the block branch.

Kevin


pgp3df5DGUCZp.pgp
Description: PGP signature


Re: [Qemu-block] [PATCH v6 2/4] block: support passing 'backing': '' to 'blockdev-add'

2015-10-06 Thread Kevin Wolf
Am 22.09.2015 um 15:28 hat Alberto Garcia geschrieben:
> Passing an empty string allows opening an image but not its backing
> file. This was already described in the API documentation, only the
> implementation was missing.
> 
> This is useful for creating snapshots using images opened with
> blockdev-add, since they are not supposed to have a backing image
> before the operation.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Max Reitz 
> Reviewed-by: Eric Blake 

Reviewed-by: Kevin Wolf 



Re: [Qemu-block] [PATCH v6 3/4] block: add a 'blockdev-snapshot' QMP command

2015-10-06 Thread Kevin Wolf
Am 22.09.2015 um 15:28 hat Alberto Garcia geschrieben:
> One of the limitations of the 'blockdev-snapshot-sync' command is that
> it does not allow passing BlockdevOptions to the newly created
> snapshots, so they are always opened using the default values.
> 
> Extending the command to allow passing options is not a practical
> solution because there is overlap between those options and some of
> the existing parameters of the command.
> 
> This patch introduces a new 'blockdev-snapshot' command with a simpler
> interface: it just takes two references to existing block devices that
> will be used as the source and target for the snapshot.
> 
> Since the main difference between the two commands is that one of them
> creates and opens the target image, while the other uses an already
> opened one, the bulk of the implementation is shared.
> 
> Signed-off-by: Alberto Garcia 
> Cc: Eric Blake 
> Cc: Max Reitz 
> ---
>  blockdev.c   | 163 
> ---
>  qapi-schema.json |   2 +
>  qapi/block-core.json |  28 +
>  qmp-commands.hx  |  38 
>  4 files changed, 171 insertions(+), 60 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 1a5b889..daf72f3 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1183,6 +1183,18 @@ void qmp_blockdev_snapshot_sync(bool has_device, const 
> char *device,
> &snapshot, errp);
>  }
>  
> +void qmp_blockdev_snapshot(const char *node, const char *overlay,
> +   Error **errp)
> +{
> +BlockdevSnapshot snapshot_data = {
> +.node = (char *) node,
> +.overlay = (char *) overlay
> +};
> +
> +blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
> +   &snapshot_data, errp);
> +}
> +
>  void qmp_blockdev_snapshot_internal_sync(const char *device,
>   const char *name,
>   Error **errp)
> @@ -1521,57 +1533,48 @@ typedef struct ExternalSnapshotState {
>  static void external_snapshot_prepare(BlkTransactionState *common,
>Error **errp)
>  {
> -int flags, ret;
> -QDict *options;
> +int flags = 0, ret;
> +QDict *options = NULL;
>  Error *local_err = NULL;
> -bool has_device = false;
> +/* Device and node name of the image to generate the snapshot from */
>  const char *device;
> -bool has_node_name = false;
>  const char *node_name;
> -bool has_snapshot_node_name = false;
> -const char *snapshot_node_name;
> +/* Reference to the new image (for 'blockdev-snapshot') */
> +const char *snapshot_ref;
> +/* File name of the new image (for 'blockdev-snapshot-sync') */
>  const char *new_image_file;
> -const char *format = "qcow2";
> -enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
>  ExternalSnapshotState *state =
>   DO_UPCAST(ExternalSnapshotState, common, 
> common);
>  TransactionAction *action = common->action;
>  
> -/* get parameters */
> -g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
> -
> -has_device = action->blockdev_snapshot_sync->has_device;
> -device = action->blockdev_snapshot_sync->device;
> -has_node_name = action->blockdev_snapshot_sync->has_node_name;
> -node_name = action->blockdev_snapshot_sync->node_name;
> -has_snapshot_node_name =
> -action->blockdev_snapshot_sync->has_snapshot_node_name;
> -snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name;
> -
> -new_image_file = action->blockdev_snapshot_sync->snapshot_file;
> -if (action->blockdev_snapshot_sync->has_format) {
> -format = action->blockdev_snapshot_sync->format;
> -}
> -if (action->blockdev_snapshot_sync->has_mode) {
> -mode = action->blockdev_snapshot_sync->mode;
> +/* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
> + * purpose but a different set of parameters */
> +switch (action->kind) {
> +case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
> +{
> +BlockdevSnapshot *s = action->blockdev_snapshot;
> +device = s->node;
> +node_name = s->node;
> +new_image_file = NULL;
> +snapshot_ref = s->overlay;
> +}
> +break;
> +case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
> +{
> +BlockdevSnapshotSync *s = action->blockdev_snapshot_sync;
> +device = s->has_device ? s->device : NULL;
> +node_name = s->has_node_name ? s->node_name : NULL;
> +new_image_file = s->snapshot_file;
> +snapshot_ref = NULL;
> +}
> +break;
> +default:
> +g_assert_not_reached();
>  }
>  
>  /* start processing */
> -state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
> -   

Re: [Qemu-block] [PATCH v6 1/4] block: rename BlockdevSnapshot to BlockdevSnapshotSync

2015-10-06 Thread Kevin Wolf
Am 22.09.2015 um 15:28 hat Alberto Garcia geschrieben:
> We will introduce the 'blockdev-snapshot' command that will require
> its own struct for the parameters, so we need to rename this one in
> order to avoid name clashes.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Eric Blake 
> Reviewed-by: Max Reitz 

Reviewed-by: Kevin Wolf 



Re: [Qemu-block] [PATCH v6 3/4] block: add a 'blockdev-snapshot' QMP command

2015-10-06 Thread Alberto Garcia
On Tue 06 Oct 2015 05:30:07 PM CEST, Kevin Wolf wrote:
>> -options = qdict_new();
>> -if (has_snapshot_node_name) {
>> -qdict_put(options, "node-name",
>> -  qstring_from_str(snapshot_node_name));
>> +if (snapshot_node_name && bdrv_find_node(snapshot_node_name)) {
>> +error_setg(errp, "New snapshot node name already exists");
>> +return;
>> +}
>
> Preexisting, but shouldn't we use bdrv_lookup_bs() here (because devices
> and node names share a namespace)?

I think you're right, good catch!

>> +if (state->new_bs->blk != NULL) {
>> +error_setg(errp, "The snapshot is already in use by %s",
>> +   blk_name(state->new_bs->blk));
>> +return;
>> +}
>
> Is it even possible yet to create a root BDS without a BB?

It is possible with Max's series, on which mine depends.

   http://patchwork.ozlabs.org/patch/519375/

>> +if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
>> +   errp)) {
>> +return;
>> +}
>> +
>> +if (state->new_bs->backing_hd != NULL) {
>> +error_setg(errp, "The snapshot already has a backing image");
>>  }
>
> The error cases after bdrv_open() should probably bdrv_unref() the
> node.

I don't think it's necessary, external_snapshot_abort() already takes
care of that.

Thanks for reviewing!

Berto



Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread John Snow


On 10/06/2015 04:57 AM, Kevin Wolf wrote:
> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven 
>>> ---
>>>  hw/ide/atapi.c | 69 
>>> --
>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>  memset(buf, 0, 288);
>>>  }
>>>  
>>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int 
>>> sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>  {
>>> -int ret;
>>> +IDEState *s = opaque;
>>>  
>>> -switch(sector_size) {
>>> -case 2048:
>>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -break;
>>> -case 2352:
>>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -if (ret < 0)
>>> -return ret;
>>> -cd_data_to_raw(buf, lba);
>>> -break;
>>> -default:
>>> -ret = -EIO;
>>> -break;
>>> +block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +if (ret < 0) {
>>> +ide_atapi_io_error(s, ret);
>>> +return;
>>> +}
>>> +
>>> +if (s->cd_sector_size == 2352) {
>>> +cd_data_to_raw(s->io_buffer, s->lba);
>>>  }
>>> -return ret;
>>> +
>>> +s->lba++;
>>> +s->io_buffer_index = 0;
>>> +s->status &= ~BUSY_STAT;
>>> +
>>> +ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
>>> +{
>>> +if (sector_size != 2048 && sector_size != 2352) {
>>> +return -EINVAL;
>>> +}
>>> +
>>> +s->iov.iov_base = buf;
>>> +if (sector_size == 2352) {
>>> +buf += 4;
>>> +}
> 
> This doesn't look quite right, buf is never read after this.
> 
> Also, why +=4 when it was originally buf + 16?
> 
>>> +
>>> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +  cd_read_sector_cb, s) == NULL) {
>>> +return -EIO;
>>> +}
>>> +
>>> +block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +s->status |= BUSY_STAT;
>>> +return 0;
>>>  }
>>>  
>>
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
> 
> I don't think that's a problem as long as BSY is set while the
> asynchronous command is running and DRQ is cleared. The latter will
> protect ide_data_readw(). ide_sector_read() does essentially the same
> thing.
> 
> Or maybe I'm just missing what you're trying to say.
> 

It will be correct from a code standpoint, but I don't think the guest
*expects* DRQ to become re-set before byte_count_limit is exhausted.

In the synchronous version of the code, DRQ flickers while we rebuffer
s->io_buffer, but since it's synchronous, the guest *never sees this*.

The guest does not necessarily have any reason or motivation to check if
DRQ is still set after 2048 bytes -- is that recommended in the spec?

("Warning! The drive may decide to rebuffer arbitrarily while you read,
so check if DRQ is still set before each read to the data register!" ??)

>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> 
> No matter whether there is a problem or not, buffering more data at once
> (and therefore doing less requests) is better for performance anyway.
> 
> Kevin
> 
>>>  void ide_atapi_cmd_ok(IDEState *s)
>>> @@ -190,10 +210,8 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>>>  ret = cd_read_sector(s, s->lba, s->io_buffer, 
>>> s->cd_sector_size);
>>>  if (ret < 0) {
>>>  ide_atapi_io_error(s, ret);
>>> -return;
>>>   

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread John Snow


On 10/06/2015 05:20 AM, Peter Lieven wrote:
> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>>
>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
 PIO read requests on the ATAPI interface used to be sync blk requests.
 This has to siginificant drawbacks. First the main loop hangs util an
 I/O request is completed and secondly if the I/O request does not
 complete (e.g. due to an unresponsive storage) Qemu hangs completely.

 Signed-off-by: Peter Lieven 
 ---
   hw/ide/atapi.c | 69
 --
   1 file changed, 43 insertions(+), 26 deletions(-)

 diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
 index 747f466..9257e1c 100644
 --- a/hw/ide/atapi.c
 +++ b/hw/ide/atapi.c
 @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
   memset(buf, 0, 288);
   }
   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
 sector_size)
 +static void cd_read_sector_cb(void *opaque, int ret)
   {
 -int ret;
 +IDEState *s = opaque;
   -switch(sector_size) {
 -case 2048:
 -block_acct_start(blk_get_stats(s->blk), &s->acct,
 - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
 -block_acct_done(blk_get_stats(s->blk), &s->acct);
 -break;
 -case 2352:
 -block_acct_start(blk_get_stats(s->blk), &s->acct,
 - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
 -block_acct_done(blk_get_stats(s->blk), &s->acct);
 -if (ret < 0)
 -return ret;
 -cd_data_to_raw(buf, lba);
 -break;
 -default:
 -ret = -EIO;
 -break;
 +block_acct_done(blk_get_stats(s->blk), &s->acct);
 +
 +if (ret < 0) {
 +ide_atapi_io_error(s, ret);
 +return;
 +}
 +
 +if (s->cd_sector_size == 2352) {
 +cd_data_to_raw(s->io_buffer, s->lba);
   }
 -return ret;
 +
 +s->lba++;
 +s->io_buffer_index = 0;
 +s->status &= ~BUSY_STAT;
 +
 +ide_atapi_cmd_reply_end(s);
 +}
 +
 +static int cd_read_sector(IDEState *s, int lba, void *buf, int
 sector_size)
 +{
 +if (sector_size != 2048 && sector_size != 2352) {
 +return -EINVAL;
 +}
 +
 +s->iov.iov_base = buf;
 +if (sector_size == 2352) {
 +buf += 4;
 +}
>> This doesn't look quite right, buf is never read after this.
>>
>> Also, why +=4 when it was originally buf + 16?
> 
> You are right. I mixed that up.
> 
>>
 +
 +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
 +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
 +
 +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
 +  cd_read_sector_cb, s) == NULL) {
 +return -EIO;
 +}
 +
 +block_acct_start(blk_get_stats(s->blk), &s->acct,
 + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 +s->status |= BUSY_STAT;
 +return 0;
   }
   
>>> We discussed this off-list a bit, but for upstream synchronization:
>>>
>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>> are not prepared to cope with.
>> I don't think that's a problem as long as BSY is set while the
>> asynchronous command is running and DRQ is cleared. The latter will
>> protect ide_data_readw(). ide_sector_read() does essentially the same
>> thing.
> 
>  I was thinking the same. Without the BSY its not working at all.
> 
>>
>> Or maybe I'm just missing what you're trying to say.
>>
>>> My suggestion is to buffer an entire DRQ block of data at once
>>> (byte_count_limit) to avoid the problem.
>> No matter whether there is a problem or not, buffering more data at once
>> (and therefore doing less requests) is better for performance anyway.
> 
> Its possible to do only one read in the backend and read the whole
> request into the IO buffer. I send a follow-up.
> 

Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
and the READ10 cdb can request up to 128MiB! For performance, it might
be nice to always buffer something like:

MIN(128K, nb_sectors * sector_size)

and then as the guest drains the DRQ block of size byte_count_limit
which can only be at largest 0xFFFE (we can fit in at least two of these
per io_buffer refill) we can just shift the data_ptr and data_end
pointers to utilize io_buffer like a ring buffer.

Because the guest can at most fetch 0xfffe

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Peter Lieven

> Am 06.10.2015 um 19:07 schrieb John Snow :
> 
> 
> 
>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
 
 On 09/21/2015 08:25 AM, Peter Lieven wrote:
> PIO read requests on the ATAPI interface used to be sync blk requests.
> This has to siginificant drawbacks. First the main loop hangs util an
> I/O request is completed and secondly if the I/O request does not
> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
> 
> Signed-off-by: Peter Lieven 
> ---
>  hw/ide/atapi.c | 69
> --
>  1 file changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index 747f466..9257e1c 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>  memset(buf, 0, 288);
>  }
>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
> sector_size)
> +static void cd_read_sector_cb(void *opaque, int ret)
>  {
> -int ret;
> +IDEState *s = opaque;
>  -switch(sector_size) {
> -case 2048:
> -block_acct_start(blk_get_stats(s->blk), &s->acct,
> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
> -block_acct_done(blk_get_stats(s->blk), &s->acct);
> -break;
> -case 2352:
> -block_acct_start(blk_get_stats(s->blk), &s->acct,
> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
> -block_acct_done(blk_get_stats(s->blk), &s->acct);
> -if (ret < 0)
> -return ret;
> -cd_data_to_raw(buf, lba);
> -break;
> -default:
> -ret = -EIO;
> -break;
> +block_acct_done(blk_get_stats(s->blk), &s->acct);
> +
> +if (ret < 0) {
> +ide_atapi_io_error(s, ret);
> +return;
> +}
> +
> +if (s->cd_sector_size == 2352) {
> +cd_data_to_raw(s->io_buffer, s->lba);
>  }
> -return ret;
> +
> +s->lba++;
> +s->io_buffer_index = 0;
> +s->status &= ~BUSY_STAT;
> +
> +ide_atapi_cmd_reply_end(s);
> +}
> +
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size)
> +{
> +if (sector_size != 2048 && sector_size != 2352) {
> +return -EINVAL;
> +}
> +
> +s->iov.iov_base = buf;
> +if (sector_size == 2352) {
> +buf += 4;
> +}
>>> This doesn't look quite right, buf is never read after this.
>>> 
>>> Also, why +=4 when it was originally buf + 16?
>> 
>> You are right. I mixed that up.
>> 
>>> 
> +
> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> +
> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
> +  cd_read_sector_cb, s) == NULL) {
> +return -EIO;
> +}
> +
> +block_acct_start(blk_get_stats(s->blk), &s->acct,
> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +s->status |= BUSY_STAT;
> +return 0;
>  }
 We discussed this off-list a bit, but for upstream synchronization:
 
 Unfortunately, I believe making cd_read_sector here non-blocking makes
 ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
 s->end_transfer_func() nonblocking, which functions like ide_data_readw
 are not prepared to cope with.
>>> I don't think that's a problem as long as BSY is set while the
>>> asynchronous command is running and DRQ is cleared. The latter will
>>> protect ide_data_readw(). ide_sector_read() does essentially the same
>>> thing.
>> 
>> I was thinking the same. Without the BSY its not working at all.
>> 
>>> 
>>> Or maybe I'm just missing what you're trying to say.
>>> 
 My suggestion is to buffer an entire DRQ block of data at once
 (byte_count_limit) to avoid the problem.
>>> No matter whether there is a problem or not, buffering more data at once
>>> (and therefore doing less requests) is better for performance anyway.
>> 
>> Its possible to do only one read in the backend and read the whole
>> request into the IO buffer. I send a follow-up.
> 
> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
> and the READ10 cdb can request up to 128MiB! For performance, it might
> be nice to always buffer something like:
> 
> MIN(128K, nb_sectors * sector_size)

isnt nb_sectors limited to CD_MAX_SECTORS (32)?

Peter


> 
> and then as the guest drains the DRQ block of size byte_count

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread John Snow


On 10/06/2015 01:12 PM, Peter Lieven wrote:
> 
>> Am 06.10.2015 um 19:07 schrieb John Snow :
>>
>>
>>
>>> On 10/06/2015 05:20 AM, Peter Lieven wrote:
 Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
 Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>
> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>> PIO read requests on the ATAPI interface used to be sync blk requests.
>> This has to siginificant drawbacks. First the main loop hangs util an
>> I/O request is completed and secondly if the I/O request does not
>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>
>> Signed-off-by: Peter Lieven 
>> ---
>>  hw/ide/atapi.c | 69
>> --
>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>
>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>> index 747f466..9257e1c 100644
>> --- a/hw/ide/atapi.c
>> +++ b/hw/ide/atapi.c
>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>  memset(buf, 0, 288);
>>  }
>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>> sector_size)
>> +static void cd_read_sector_cb(void *opaque, int ret)
>>  {
>> -int ret;
>> +IDEState *s = opaque;
>>  -switch(sector_size) {
>> -case 2048:
>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>> -break;
>> -case 2352:
>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>> -if (ret < 0)
>> -return ret;
>> -cd_data_to_raw(buf, lba);
>> -break;
>> -default:
>> -ret = -EIO;
>> -break;
>> +block_acct_done(blk_get_stats(s->blk), &s->acct);
>> +
>> +if (ret < 0) {
>> +ide_atapi_io_error(s, ret);
>> +return;
>> +}
>> +
>> +if (s->cd_sector_size == 2352) {
>> +cd_data_to_raw(s->io_buffer, s->lba);
>>  }
>> -return ret;
>> +
>> +s->lba++;
>> +s->io_buffer_index = 0;
>> +s->status &= ~BUSY_STAT;
>> +
>> +ide_atapi_cmd_reply_end(s);
>> +}
>> +
>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>> sector_size)
>> +{
>> +if (sector_size != 2048 && sector_size != 2352) {
>> +return -EINVAL;
>> +}
>> +
>> +s->iov.iov_base = buf;
>> +if (sector_size == 2352) {
>> +buf += 4;
>> +}
 This doesn't look quite right, buf is never read after this.

 Also, why +=4 when it was originally buf + 16?
>>>
>>> You are right. I mixed that up.
>>>

>> +
>> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>> +
>> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>> +  cd_read_sector_cb, s) == NULL) {
>> +return -EIO;
>> +}
>> +
>> +block_acct_start(blk_get_stats(s->blk), &s->acct,
>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> +s->status |= BUSY_STAT;
>> +return 0;
>>  }
> We discussed this off-list a bit, but for upstream synchronization:
>
> Unfortunately, I believe making cd_read_sector here non-blocking makes
> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
> s->end_transfer_func() nonblocking, which functions like ide_data_readw
> are not prepared to cope with.
 I don't think that's a problem as long as BSY is set while the
 asynchronous command is running and DRQ is cleared. The latter will
 protect ide_data_readw(). ide_sector_read() does essentially the same
 thing.
>>>
>>> I was thinking the same. Without the BSY its not working at all.
>>>

 Or maybe I'm just missing what you're trying to say.

> My suggestion is to buffer an entire DRQ block of data at once
> (byte_count_limit) to avoid the problem.
 No matter whether there is a problem or not, buffering more data at once
 (and therefore doing less requests) is better for performance anyway.
>>>
>>> Its possible to do only one read in the backend and read the whole
>>> request into the IO buffer. I send a follow-up.
>>
>> Be cautious: we only have 128K (+4 bytes) to play with in the io_buffer
>> and the READ10 cdb can request up to 128MiB! For performance, it might
>> be nice to always buffer something like:
>>
>

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread John Snow


On 10/06/2015 02:31 PM, Peter Lieven wrote:
> Am 06.10.2015 um 19:56 schrieb John Snow:
>>
>> On 10/06/2015 01:12 PM, Peter Lieven wrote:
 Am 06.10.2015 um 19:07 schrieb John Snow :



> On 10/06/2015 05:20 AM, Peter Lieven wrote:
>> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
>> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
 PIO read requests on the ATAPI interface used to be sync blk requests.
 This has to siginificant drawbacks. First the main loop hangs util an
 I/O request is completed and secondly if the I/O request does not
 complete (e.g. due to an unresponsive storage) Qemu hangs completely.
Maybe you can have a look at the other patches of this series as well?
Then I can
respin the whole series.



 Signed-off-by: Peter Lieven 
 ---
  hw/ide/atapi.c | 69
 --
  1 file changed, 43 insertions(+), 26 deletions(-)

 diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
 index 747f466..9257e1c 100644
 --- a/hw/ide/atapi.c
 +++ b/hw/ide/atapi.c
 @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
  memset(buf, 0, 288);
  }
  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
 sector_size)
 +static void cd_read_sector_cb(void *opaque, int ret)
  {
 -int ret;
 +IDEState *s = opaque;
  -switch(sector_size) {
 -case 2048:
 -block_acct_start(blk_get_stats(s->blk), &s->acct,
 - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
 -block_acct_done(blk_get_stats(s->blk), &s->acct);
 -break;
 -case 2352:
 -block_acct_start(blk_get_stats(s->blk), &s->acct,
 - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
 -block_acct_done(blk_get_stats(s->blk), &s->acct);
 -if (ret < 0)
 -return ret;
 -cd_data_to_raw(buf, lba);
 -break;
 -default:
 -ret = -EIO;
 -break;
 +block_acct_done(blk_get_stats(s->blk), &s->acct);
 +
 +if (ret < 0) {
 +ide_atapi_io_error(s, ret);
 +return;
 +}
 +
 +if (s->cd_sector_size == 2352) {
 +cd_data_to_raw(s->io_buffer, s->lba);
  }
 -return ret;
 +
 +s->lba++;
 +s->io_buffer_index = 0;
 +s->status &= ~BUSY_STAT;
 +
 +ide_atapi_cmd_reply_end(s);
 +}
 +
 +static int cd_read_sector(IDEState *s, int lba, void *buf, int
 sector_size)
 +{
 +if (sector_size != 2048 && sector_size != 2352) {
 +return -EINVAL;
 +}
 +
 +s->iov.iov_base = buf;
 +if (sector_size == 2352) {
 +buf += 4;
 +}
>> This doesn't look quite right, buf is never read after this.
>>
>> Also, why +=4 when it was originally buf + 16?
> You are right. I mixed that up.
>
 +
 +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
 +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
 +
 +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
 +  cd_read_sector_cb, s) == NULL) {
 +return -EIO;
 +}
 +
 +block_acct_start(blk_get_stats(s->blk), &s->acct,
 + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
 +s->status |= BUSY_STAT;
 +return 0;
  }
>>> We discussed this off-list a bit, but for upstream synchronization:
>>>
>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>> are not prepared to cope with.
>> I don't think that's a problem as long as BSY is set while the
>> asynchronous command is running and DRQ is cleared. The latter will
>> protect ide_data_readw(). ide_sector_read() does essentially the same
>> thing.
> I was thinking the same. Without the BSY its not working at all.
>
>> Or maybe I'm just missing what you're trying to say.
>>
>>> My suggestion is to buffer an entire DRQ block of data at once
>>> (byte_count_limit) to avoid the problem.
>> No matter whether there is a problem or not, buffering more data 

Re: [Qemu-block] [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async

2015-10-06 Thread Peter Lieven
Am 06.10.2015 um 19:56 schrieb John Snow:
>
> On 10/06/2015 01:12 PM, Peter Lieven wrote:
>>> Am 06.10.2015 um 19:07 schrieb John Snow :
>>>
>>>
>>>
 On 10/06/2015 05:20 AM, Peter Lieven wrote:
> Am 06.10.2015 um 10:57 schrieb Kevin Wolf:
> Am 05.10.2015 um 23:15 hat John Snow geschrieben:
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven 
>>> ---
>>>  hw/ide/atapi.c | 69
>>> --
>>>  1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>  memset(buf, 0, 288);
>>>  }
>>>  -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>> sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>  {
>>> -int ret;
>>> +IDEState *s = opaque;
>>>  -switch(sector_size) {
>>> -case 2048:
>>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -break;
>>> -case 2352:
>>> -block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -if (ret < 0)
>>> -return ret;
>>> -cd_data_to_raw(buf, lba);
>>> -break;
>>> -default:
>>> -ret = -EIO;
>>> -break;
>>> +block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +if (ret < 0) {
>>> +ide_atapi_io_error(s, ret);
>>> +return;
>>> +}
>>> +
>>> +if (s->cd_sector_size == 2352) {
>>> +cd_data_to_raw(s->io_buffer, s->lba);
>>>  }
>>> -return ret;
>>> +
>>> +s->lba++;
>>> +s->io_buffer_index = 0;
>>> +s->status &= ~BUSY_STAT;
>>> +
>>> +ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>> sector_size)
>>> +{
>>> +if (sector_size != 2048 && sector_size != 2352) {
>>> +return -EINVAL;
>>> +}
>>> +
>>> +s->iov.iov_base = buf;
>>> +if (sector_size == 2352) {
>>> +buf += 4;
>>> +}
> This doesn't look quite right, buf is never read after this.
>
> Also, why +=4 when it was originally buf + 16?
 You are right. I mixed that up.

>>> +
>>> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +  cd_read_sector_cb, s) == NULL) {
>>> +return -EIO;
>>> +}
>>> +
>>> +block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +s->status |= BUSY_STAT;
>>> +return 0;
>>>  }
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
> I don't think that's a problem as long as BSY is set while the
> asynchronous command is running and DRQ is cleared. The latter will
> protect ide_data_readw(). ide_sector_read() does essentially the same
> thing.
 I was thinking the same. Without the BSY its not working at all.

> Or maybe I'm just missing what you're trying to say.
>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> No matter whether there is a problem or not, buffering more data at once
> (and therefore doing less requests) is better for performance anyway.
 Its possible to do only one read in the backend and read the whole
 request into the IO buffer. I send a follow-up.
>>> Be cautious: we only have 128K (+4 bytes) to play with in the i

Re: [Qemu-block] [Qemu-devel] [PATCH] gluster: allocate GlusterAIOCBs on the stack

2015-10-06 Thread Jeff Cody
On Thu, Oct 01, 2015 at 01:04:38PM +0200, Paolo Bonzini wrote:
> This is simpler now that the driver has been converted to coroutines.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/gluster.c | 86 
> ++---
>  1 file changed, 33 insertions(+), 53 deletions(-)
>

Thanks,

Applied to my block branch:

git git://github.com/codyprime/qemu-kvm-jtc.git block

-Jeff




Re: [Qemu-block] [PATCH v5 0/4] qapi: child add/delete support

2015-10-06 Thread Wen Congyang
Ping...

On 09/22/2015 03:44 PM, Wen Congyang wrote:
> If quorum's child is broken, we can use mirror job to replace it.
> But sometimes, the user only need to remove the broken child, and
> add it later when the problem is fixed.
> 
> It is based on the following patch:
> http://lists.nongnu.org/archive/html/qemu-devel/2015-09/msg04579.html
> 
> ChangLog:
> v5:
> 1. Address Eric Blake's comments
> v4:
> 1. drop nbd driver's implementation. We can use human-monitor-command
>to do it.
> 2. Rename the command name.
> v3:
> 1. Don't open BDS in bdrv_add_child(). Use the existing BDS which is
>created by the QMP command blockdev-add.
> 2. The driver NBD can support filename, path, host:port now.
> v2:
> 1. Use bdrv_get_device_or_node_name() instead of new function
>bdrv_get_id_or_node_name()
> 2. Update the error message
> 3. Update the documents in block-core.json
> 
> Wen Congyang (4):
>   Add new block driver interface to add/delete a BDS's child
>   quorum: implement bdrv_add_child() and bdrv_del_child()
>   qmp: add monitor command to add/remove a child
>   hmp: add monitor command to add/remove a child
> 
>  block.c   | 56 ++--
>  block/quorum.c| 72 
> +--
>  blockdev.c| 48 +++
>  hmp-commands.hx   | 28 ++
>  hmp.c | 20 +
>  hmp.h |  2 ++
>  include/block/block.h |  8 ++
>  include/block/block_int.h |  5 
>  qapi/block-core.json  | 34 ++
>  qmp-commands.hx   | 61 +++
>  10 files changed, 329 insertions(+), 5 deletions(-)
> 




Re: [Qemu-block] [PATCH v10 00/10] Block replication for continuous checkpoints

2015-10-06 Thread Wen Congyang
Ping...

On 09/25/2015 02:17 PM, Wen Congyang wrote:
> Block replication is a very important feature which is used for
> continuous checkpoints(for example: COLO).
> 
> You can the detailed information about block replication from here:
> http://wiki.qemu.org/Features/BlockReplication
> 
> Usage:
> Please refer to docs/block-replication.txt
> 
> This patch series is based on the following patch series:
> 1. http://lists.nongnu.org/archive/html/qemu-devel/2015-09/msg05514.html
> 2. http://lists.nongnu.org/archive/html/qemu-devel/2015-09/msg04900.html
> 
> You can get the patch here:
> https://github.com/coloft/qemu/tree/wency/block-replication-v10
> 
> You can get the patch with framework here:
> https://github.com/coloft/qemu/tree/wency/colo_framework_v9.5
> 
> TODO:
> 1. Continuous block replication. It will be started after basic functions
>are accepted.
> 
> Changs Log:
> V10:
> 1. Use blockdev-remove-medium and blockdev-insert-medium to replace backing
>reference.
> 2. Address the comments from Eric Blake
> V9:
> 1. Update the error messages
> 2. Rebase to the newest qemu
> 3. Split child add/delete support. These patches are sent in another patchset.
> V8:
> 1. Address Alberto Garcia's comments
> V7:
> 1. Implement adding/removing quorum child. Remove the option non-connect.
> 2. Simplify the backing refrence option according to Stefan Hajnoczi's 
> suggestion
> V6:
> 1. Rebase to the newest qemu.
> V5:
> 1. Address the comments from Gong Lei
> 2. Speed the failover up. The secondary vm can take over very quickly even
>if there are too many I/O requests.
> V4:
> 1. Introduce a new driver replication to avoid touch nbd and qcow2.
> V3:
> 1: use error_setg() instead of error_set()
> 2. Add a new block job API
> 3. Active disk, hidden disk and nbd target uses the same AioContext
> 4. Add a testcase to test new hbitmap API
> V2:
> 1. Redesign the secondary qemu(use image-fleecing)
> 2. Use Error objects to return error message
> 3. Address the comments from Max Reitz and Eric Blake
> 
> Wen Congyang (10):
>   allow writing to the backing file
>   Backup: clear all bitmap when doing block checkpoint
>   Allow creating backup jobs when opening BDS
>   block: make bdrv_put_ref_bh_schedule() as a public API
>   docs: block replication's description
>   Add new block driver interfaces to control block replication
>   quorum: implement block driver interfaces for block replication
>   Implement new driver for block replication
>   support replication driver in blockdev-add
>   Add a new API to start/stop replication, do checkpoint to all BDSes
> 
>  block.c| 192 +-
>  block/Makefile.objs|   3 +-
>  block/backup.c |  14 ++
>  block/quorum.c |  77 
>  block/replication.c| 471 
> +
>  blockdev.c |  37 +---
>  blockjob.c |  11 ++
>  docs/block-replication.txt | 259 +
>  include/block/block.h  |  10 +
>  include/block/block_int.h  |  14 ++
>  include/block/blockjob.h   |  12 ++
>  qapi/block-core.json   |  34 +++-
>  12 files changed, 1098 insertions(+), 36 deletions(-)
>  create mode 100644 block/replication.c
>  create mode 100644 docs/block-replication.txt
>