On 10/06/2015 04:46 AM, Peter Lieven wrote: > Am 05.10.2015 um 23:15 schrieb John Snow: >> >> On 09/21/2015 08:25 AM, Peter Lieven wrote: >>> PIO read requests on the ATAPI interface used to be sync blk requests. >>> This has to siginificant drawbacks. First the main loop hangs util an >>> I/O request is completed and secondly if the I/O request does not >>> complete (e.g. due to an unresponsive storage) Qemu hangs completely. >>> >>> Signed-off-by: Peter Lieven <p...@kamp.de> >>> --- >>> hw/ide/atapi.c | 69 >>> ++++++++++++++++++++++++++++++++++++---------------------- >>> 1 file changed, 43 insertions(+), 26 deletions(-) >>> >>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c >>> index 747f466..9257e1c 100644 >>> --- a/hw/ide/atapi.c >>> +++ b/hw/ide/atapi.c >>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba) >>> memset(buf, 0, 288); >>> } >>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int >>> sector_size) >>> +static void cd_read_sector_cb(void *opaque, int ret) >>> { >>> - int ret; >>> + IDEState *s = opaque; >>> - switch(sector_size) { >>> - case 2048: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - break; >>> - case 2352: >>> - block_acct_start(blk_get_stats(s->blk), &s->acct, >>> - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); >>> - block_acct_done(blk_get_stats(s->blk), &s->acct); >>> - if (ret < 0) >>> - return ret; >>> - cd_data_to_raw(buf, lba); >>> - break; >>> - default: >>> - ret = -EIO; >>> - break; >>> + block_acct_done(blk_get_stats(s->blk), &s->acct); >>> + >>> + if (ret < 0) { >>> + ide_atapi_io_error(s, ret); >>> + return; >>> + } >>> + >>> + if (s->cd_sector_size == 2352) { >>> + cd_data_to_raw(s->io_buffer, s->lba); >>> } >>> - return ret; >>> + >>> + s->lba++; >>> + s->io_buffer_index = 0; >>> + s->status &= ~BUSY_STAT; >>> + >>> + ide_atapi_cmd_reply_end(s); >>> +} >>> + >>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int >>> sector_size) >>> +{ >>> + if (sector_size != 2048 && sector_size != 2352) { >>> + return -EINVAL; >>> + } >>> + >>> + s->iov.iov_base = buf; >>> + if (sector_size == 2352) { >>> + buf += 4; >>> + } >>> + >>> + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; >>> + qemu_iovec_init_external(&s->qiov, &s->iov, 1); >>> + >>> + if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4, >>> + cd_read_sector_cb, s) == NULL) { >>> + return -EIO; >>> + } >>> + >>> + block_acct_start(blk_get_stats(s->blk), &s->acct, >>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); >>> + s->status |= BUSY_STAT; >>> + return 0; >>> } >>> >> We discussed this off-list a bit, but for upstream synchronization: >> >> Unfortunately, I believe making cd_read_sector here non-blocking makes >> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to >> s->end_transfer_func() nonblocking, which functions like ide_data_readw >> are not prepared to cope with. >> >> My suggestion is to buffer an entire DRQ block of data at once >> (byte_count_limit) to avoid the problem. > > Hi John, > > first of all thank you for the detailed analysis. > > Is the following what you have i mind. For PIO reads > 1 sector > it is a great improvement for the NFS backend: > > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c > index ab45495..ec2ba89 100644 > --- a/hw/ide/atapi.c > +++ b/hw/ide/atapi.c > @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret) > } > > if (s->cd_sector_size == 2352) { > - cd_data_to_raw(s->io_buffer, s->lba); > + int nb_sectors = s->packet_transfer_size / 2352; > + while (nb_sectors--) { > + memmove(s->io_buffer + nb_sectors * 2352 + 4, > + s->io_buffer + nb_sectors * 2048, 2048); > + cd_data_to_raw(s->io_buffer + nb_sectors * 2352, > + s->lba + nb_sectors); > + } > }
Is this going to be correct in cases where the number of sectors we are copying is less than the total request size? We might need to bookmark how many sectors/bytes we're copying this go-around. Perhaps by looking at lcyl/hcyl. > > - s->lba++; > + s->lba = -1; > s->io_buffer_index = 0; > s->status &= ~BUSY_STAT; > > ide_atapi_cmd_reply_end(s); > } > Well, I might not name it cd_read_sector and cd_read_sector_cb anymore. Perhaps cd_read_sectors[_cb]. > -static int cd_read_sector(IDEState *s, int lba, void *buf, int > sector_size) > +static int cd_read_sector(IDEState *s, int lba, void *buf, int > sector_size, int nb_sectors) > { > if (sector_size != 2048 && sector_size != 2352) { > return -EINVAL; > } > > s->iov.iov_base = buf; > - if (sector_size == 2352) { > - buf += 4; > - } > - > - s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; > + s->iov.iov_len = nb_sectors * 2048; > qemu_iovec_init_external(&s->qiov, &s->iov, 1); > > - if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4, > - cd_read_sector_cb, s) == NULL) { > + if (ide_readv_cancelable(s, (int64_t)lba << 2, > + &s->qiov, nb_sectors * 4, > + cd_read_sector_cb, s) == NULL) { > return -EIO; > } > > block_acct_start(blk_get_stats(s->blk), &s->acct, > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); > + nb_sectors * 2048, BLOCK_ACCT_READ); > s->status |= BUSY_STAT; > return 0; > } > @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret) > /* The whole ATAPI transfer logic is handled in this function */ > void ide_atapi_cmd_reply_end(IDEState *s) > { > - int byte_count_limit, size, ret; > + int byte_count_limit, size; > #ifdef DEBUG_IDE_ATAPI > printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", > s->packet_transfer_size, > @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) > printf("status=0x%x\n", s->status); > #endif > } else { > - /* see if a new sector must be read */ > - if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { > - ret = cd_read_sector(s, s->lba, s->io_buffer, > s->cd_sector_size); > - if (ret < 0) { > - ide_atapi_io_error(s, ret); > - } > - return; > - } > if (s->elementary_transfer_size > 0) { > /* there are some data left to transmit in this elementary > transfer */ > @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int > size, int max_size) > static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, > int sector_size) > { > + int ret; > s->lba = lba; > s->packet_transfer_size = nb_sectors * sector_size; > + assert(s->packet_transfer_size <= > + IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4); > s->elementary_transfer_size = 0; > - s->io_buffer_index = sector_size; > s->cd_sector_size = sector_size; > - > - ide_atapi_cmd_reply_end(s); > + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size, > + nb_sectors); > + if (ret < 0) { > + ide_atapi_io_error(s, ret); > + } > } > > static void ide_atapi_cmd_check_status(IDEState *s) > > > Did you also have a look at the other patches? > > Thanks, > Peter On my queue; hopefully Stefan can take a peek too, but I'll try to review the IDE-specific bits. I imagine you want to wait to respin until we've looked at all the patches, that's fine -- I'll try not to keep you waiting for much longer. --js