On Fri, Jun 19, 2015 at 2:14 PM, Peter Lieven <p...@kamp.de> wrote: > Am 18.06.2015 um 11:36 schrieb Stefan Hajnoczi: >> On Thu, Jun 18, 2015 at 10:29 AM, Peter Lieven <p...@kamp.de> wrote: >>> Am 18.06.2015 um 10:42 schrieb Kevin Wolf: >>>> Am 18.06.2015 um 10:30 hat Peter Lieven geschrieben: >>>>> Am 18.06.2015 um 09:45 schrieb Kevin Wolf: >>>>>> Am 18.06.2015 um 09:12 hat Peter Lieven geschrieben: >>>>>>> Thread 2 (Thread 0x7ffff5550700 (LWP 2636)): >>>>>>> #0 0x00007ffff5d87aa3 in ppoll () from /lib/x86_64-linux-gnu/libc.so.6 >>>>>>> No symbol table info available. >>>>>>> #1 0x0000555555955d91 in qemu_poll_ns (fds=0x5555563889c0, nfds=3, >>>>>>> timeout=4999424576) at qemu-timer.c:326 >>>>>>> ts = {tv_sec = 4, tv_nsec = 999424576} >>>>>>> tvsec = 4 >>>>>>> #2 0x0000555555956feb in aio_poll (ctx=0x5555563528e0, blocking=true) >>>>>>> at aio-posix.c:231 >>>>>>> node = 0x0 >>>>>>> was_dispatching = false >>>>>>> ret = 1 >>>>>>> progress = false >>>>>>> #3 0x000055555594aeed in bdrv_prwv_co (bs=0x55555637eae0, >>>>>>> offset=4292007936, >>>>>>> qiov=0x7ffff554f760, is_write=false, flags=0) at block.c:2699 >>>>>>> aio_context = 0x5555563528e0 >>>>>>> co = 0x5555563888a0 >>>>>>> rwco = {bs = 0x55555637eae0, offset = 4292007936, >>>>>>> qiov = 0x7ffff554f760, is_write = false, ret = 2147483647, >>>>>>> flags = 0} >>>>>>> #4 0x000055555594afa9 in bdrv_rw_co (bs=0x55555637eae0, >>>>>>> sector_num=8382828, >>>>>>> buf=0x7ffff44cc800 "(", nb_sectors=4, is_write=false, flags=0) >>>>>>> at block.c:2722 >>>>>>> qiov = {iov = 0x7ffff554f780, niov = 1, nalloc = -1, size = >>>>>>> 2048} >>>>>>> iov = {iov_base = 0x7ffff44cc800, iov_len = 2048} >>>>>>> #5 0x000055555594b008 in bdrv_read (bs=0x55555637eae0, >>>>>>> sector_num=8382828, >>>>>>> buf=0x7ffff44cc800 "(", nb_sectors=4) at block.c:2730 >>>>>>> No locals. >>>>>>> #6 0x000055555599acef in blk_read (blk=0x555556376820, >>>>>>> sector_num=8382828, >>>>>>> buf=0x7ffff44cc800 "(", nb_sectors=4) at block/block-backend.c:404 >>>>>>> No locals. >>>>>>> #7 0x0000555555833ed2 in cd_read_sector (s=0x555556408f88, >>>>>>> lba=2095707, >>>>>>> buf=0x7ffff44cc800 "(", sector_size=2048) at hw/ide/atapi.c:116 >>>>>>> ret = 32767 >>>>>> Here is the problem: The ATAPI emulation uses synchronous blk_read() >>>>>> instead of the AIO or coroutine interfaces. This means that it keeps >>>>>> polling for request completion while it holds the BQL until the request >>>>>> is completed. >>>>> I will look at this. >>> >>> I need some further help. My way to "emulate" a hung NFS Server is to >>> block it in the Firewall. Currently I face the problem that I cannot mount >>> a CD Iso via libnfs (nfs://) without hanging Qemu (i previously tried with >>> a kernel NFS mount). It reads a few sectors and then stalls (maybe another >>> bug): >>> >>> (gdb) thread apply all bt full >>> >>> Thread 3 (Thread 0x7ffff0c21700 (LWP 29710)): >>> #0 qemu_cond_broadcast (cond=cond@entry=0x555556259940) at >>> util/qemu-thread-posix.c:120 >>> err = <optimized out> >>> __func__ = "qemu_cond_broadcast" >>> #1 0x0000555555911164 in rfifolock_unlock (r=r@entry=0x555556259910) at >>> util/rfifolock.c:75 >>> __PRETTY_FUNCTION__ = "rfifolock_unlock" >>> #2 0x0000555555875921 in aio_context_release (ctx=ctx@entry=0x5555562598b0) >>> at async.c:329 >>> No locals. >>> #3 0x000055555588434c in aio_poll (ctx=ctx@entry=0x5555562598b0, >>> blocking=blocking@entry=true) at aio-posix.c:272 >>> node = <optimized out> >>> was_dispatching = false >>> i = <optimized out> >>> ret = <optimized out> >>> progress = false >>> timeout = 611734526 >>> __PRETTY_FUNCTION__ = "aio_poll" >>> #4 0x00005555558bc43d in bdrv_prwv_co (bs=bs@entry=0x55555627c0f0, >>> offset=offset@entry=7038976, qiov=qiov@entry=0x7ffff0c208f0, >>> is_write=is_write@entry=false, flags=flags@entry=(unknown: 0)) at >>> block/io.c:552 >>> aio_context = 0x5555562598b0 >>> co = <optimized out> >>> rwco = {bs = 0x55555627c0f0, offset = 7038976, qiov = >>> 0x7ffff0c208f0, is_write = false, ret = 2147483647, flags = (unknown: 0)} >>> #5 0x00005555558bc533 in bdrv_rw_co (bs=0x55555627c0f0, >>> sector_num=sector_num@entry=13748, buf=buf@entry=0x555557874800 "(", >>> nb_sectors=nb_sectors@entry=4, is_write=is_write@entry=false, >>> flags=flags@entry=(unknown: 0)) at block/io.c:575 >>> qiov = {iov = 0x7ffff0c208e0, niov = 1, nalloc = -1, size = 2048} >>> iov = {iov_base = 0x555557874800, iov_len = 2048} >>> #6 0x00005555558bc593 in bdrv_read (bs=<optimized out>, >>> sector_num=sector_num@entry=13748, buf=buf@entry=0x555557874800 "(", >>> nb_sectors=nb_sectors@entry=4) at block/io.c:583 >>> No locals. >>> #7 0x00005555558af75d in blk_read (blk=<optimized out>, >>> sector_num=sector_num@entry=13748, buf=buf@entry=0x555557874800 "(", >>> nb_sectors=nb_sectors@entry=4) at block/block-backend.c:493 >>> ret = <optimized out> >>> #8 0x00005555557abb88 in cd_read_sector (sector_size=<optimized out>, >>> buf=0x555557874800 "(", lba=3437, s=0x55555760db70) at hw/ide/atapi.c:116 >>> ret = <optimized out> >>> #9 ide_atapi_cmd_reply_end (s=0x55555760db70) at hw/ide/atapi.c:190 >>> byte_count_limit = <optimized out> >>> size = <optimized out> >>> ret = 2 >> This is still the same scenario Kevin explained. >> >> The ATAPI CD-ROM emulation code is using synchronous blk_read(). This >> function holds the QEMU global mutex while waiting for the I/O request >> to complete. This blocks other vcpu threads and the main loop thread. >> >> The solution is to convert the CD-ROM emulation code to use >> blk_aio_readv() instead of blk_read(). > > I tried a little, but i am stuck with my approach. I reads one sector > and then doesn't continue. Maybe someone with more knowledge > of ATAPI/IDE could help?
Converting synchronous code to asynchronous requires an understanding of the device's state transitions. Asynchronous code has to put the device registers into a busy state until the request completes. It also needs to handle hardware register accesses that occur while the request is still pending. I don't know ATAPI/IDE code well enough to suggest a fix. > diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c > index 950e311..cdcbd49 100644 > --- a/hw/ide/atapi.c > +++ b/hw/ide/atapi.c > @@ -27,6 +27,8 @@ > #include "hw/scsi/scsi.h" > #include "sysemu/block-backend.h" > > +#define DEBUG_IDE_ATAPI 1 > + > static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret); > > static void padstr8(uint8_t *buf, int buf_size, const char *src) > @@ -105,31 +107,55 @@ static void cd_data_to_raw(uint8_t *buf, int lba) > memset(buf, 0, 288); > } > > -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int > sector_size) > +static void cd_read_sector_cb(void *opaque, int ret) { > + IDEState *s = opaque; > + > + block_acct_done(blk_get_stats(s->blk), &s->acct); > + > + printf("cd_read_sector_cb lba %d ret = %d\n", s->lba, ret); > + > + if (ret < 0) { > + ide_atapi_io_error(s, ret); > + return; > + } > + > + if (s->cd_sector_size == 2352) { > + cd_data_to_raw(s->io_buffer, s->lba); > + } > + > + s->lba++; > + s->io_buffer_index = 0; > + > + ide_atapi_cmd_reply_end(s); > +} > + > +static BlockAIOCB *cd_read_sector(IDEState *s, int lba, void *buf, int > sector_size) > { > - int ret; > + BlockAIOCB *aiocb = NULL; > > - switch(sector_size) { > - case 2048: > - block_acct_start(blk_get_stats(s->blk), &s->acct, > - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); > - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); > - block_acct_done(blk_get_stats(s->blk), &s->acct); > - break; > - case 2352: > + if (sector_size != 2048 && sector_size != 2352) { > + return NULL; > + } > + > + s->iov.iov_base = buf; > + if (sector_size == 2352) { > + buf += 4; > + } > + > + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; > + qemu_iovec_init_external(&s->qiov, &s->iov, 1); > + > + printf("cd_read_sector lba %d\n", lba); > + > + aiocb = blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4, > + cd_read_sector_cb, s); > + > + if (aiocb != NULL) { > block_acct_start(blk_get_stats(s->blk), &s->acct, > 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); > - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); > - block_acct_done(blk_get_stats(s->blk), &s->acct); > - if (ret < 0) > - return ret; > - cd_data_to_raw(buf, lba); > - break; > - default: > - ret = -EIO; > - break; > } > - return ret; > + > + return aiocb; > } > > void ide_atapi_cmd_ok(IDEState *s) > @@ -170,7 +196,7 @@ void ide_atapi_io_error(IDEState *s, int ret) > /* The whole ATAPI transfer logic is handled in this function */ > void ide_atapi_cmd_reply_end(IDEState *s) > { > - int byte_count_limit, size, ret; > + int byte_count_limit, size; > #ifdef DEBUG_IDE_ATAPI > printf("reply: tx_size=%d elem_tx_size=%d index=%d\n", > s->packet_transfer_size, > @@ -187,13 +213,10 @@ void ide_atapi_cmd_reply_end(IDEState *s) > } else { > /* see if a new sector must be read */ > if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { > - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); > - if (ret < 0) { > - ide_atapi_io_error(s, ret); > - return; > + if (cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size) > == NULL) { > + ide_atapi_io_error(s, -EIO); > } > - s->lba++; > - s->io_buffer_index = 0; > + return; > } > if (s->elementary_transfer_size > 0) { > /* there are some data left to transmit in this elementary > > Thanks, > Peter