On 11/11/2016 22:03, Alex Williamson wrote: > On Fri, 11 Nov 2016 21:24:33 +0100 > Paolo Bonzini <pbonz...@redhat.com> wrote: >> If you can post a backtrace of all threads at the time of the hang, from >> origin/master (so without vhost, and not at ad07cd6) that could help. > > Yes, it occurs with all of the vfio devices removed using VNC/Cirrus.
I cannot reproduce it anyway. :( As you said on IRC it's a pretty standard "event loop doing nothing" backtrace, so it seems that an eventfd write was lost. Since I was lucky with the vhost patch, perhaps this can help: diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index f2ea29d..22d6cd5 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -202,13 +202,15 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) aio_context_acquire(s->ctx); virtio_scsi_clear_aio(s); - aio_context_release(s->ctx); - - blk_drain_all(); /* ensure there are no in-flight requests */ for (i = 0; i < vs->conf.num_queues + 2; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_queue_host_notifier_aio_read(virtio_queue_get_guest_notifier(vq)); } + aio_context_release(s->ctx); + + blk_drain_all(); /* ensure there are no in-flight requests */ /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 89b0b80..9c894d7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2018,7 +2018,7 @@ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) return &vq->guest_notifier; } -static void virtio_queue_host_notifier_aio_read(EventNotifier *n) +void virtio_queue_host_notifier_aio_read(EventNotifier *n) { VirtQueue *vq = container_of(n, VirtQueue, host_notifier); if (event_notifier_test_and_clear(n)) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 35ede30..d3dfc69 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -274,6 +274,7 @@ int virtio_device_grab_ioeventfd(VirtIODevice *vdev); void virtio_device_release_ioeventfd(VirtIODevice *vdev); bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); +void virtio_queue_host_notifier_aio_read(EventNotifier *n); void virtio_queue_host_notifier_read(EventNotifier *n); void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, void (*fn)(VirtIODevice *, And if it doesn't work here is some printf debugging. It's pretty verbose but the interesting part starts pretty much where you issue the virsh shutdown or system_powerdown command: diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index f2ea29d..ec0f750 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -108,11 +108,13 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s) VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); int i; + printf("before clear\n"); virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, NULL); virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, NULL); for (i = 0; i < vs->conf.num_queues; i++) { virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, NULL); } + printf("after clear\n"); } /* Context: QEMU global mutex held */ @@ -202,15 +204,20 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) aio_context_acquire(s->ctx); virtio_scsi_clear_aio(s); - aio_context_release(s->ctx); - - blk_drain_all(); /* ensure there are no in-flight requests */ for (i = 0; i < vs->conf.num_queues + 2; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_queue_host_notifier_aio_read(virtio_queue_get_guest_notifier(vq)); } + aio_context_release(s->ctx); + + printf("before drain\n"); + blk_drain_all(); /* ensure there are no in-flight requests */ + printf("after drain\n"); /* Clean up guest notifier (irq) */ + printf("end of virtio_scsi_dataplane_stop\n"); k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); s->dataplane_stopping = false; s->dataplane_started = false; diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 3e5ae6a..e8b83d4 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -75,6 +75,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) } if (req->sreq) { + printf("finish %x\n", req->sreq->tag); req->sreq->hba_private = NULL; scsi_req_unref(req->sreq); } @@ -549,6 +549,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) return -ENOENT; } virtio_scsi_ctx_check(s, d); + printf("prepare %lx %x\n", req->req.cmd.tag, req->req.cmd.cdb[0]); req->sreq = scsi_req_new(d, req->req.cmd.tag, virtio_scsi_get_lun(req->req.cmd.lun), req->req.cmd.cdb, req); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 62001b4..c75dec3 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -336,11 +336,13 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) { + printf("start ioeventfd %s\n", object_class_get_name(object_get_class(OBJECT(proxy)))); virtio_bus_start_ioeventfd(&proxy->bus); } static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) { + printf("stop ioeventfd %s\n", object_class_get_name(object_get_class(OBJECT(proxy)))); virtio_bus_stop_ioeventfd(&proxy->bus); } @@ -376,6 +378,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) } break; case VIRTIO_PCI_STATUS: + printf("set status %s %x\n", object_class_get_name(object_get_class(OBJECT(proxy))), val & 0xFF); if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { virtio_pci_stop_ioeventfd(proxy); } @@ -1274,6 +1277,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, vdev->config_vector = val; break; case VIRTIO_PCI_COMMON_STATUS: + printf("set status %s %x\n", object_class_get_name(object_get_class(OBJECT(proxy))), (uint8_t)val); if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { virtio_pci_stop_ioeventfd(proxy); } > Backtrace on all threads running 6bbcb76: > > Thread 7 (Thread 0x7f5734dff700 (LWP 13136)): > #0 0x00007f5748adcbd0 in pthread_cond_wait@@GLIBC_2.3.2 () at > ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 > #1 0x0000565105c1b5a1 in qemu_cond_wait (cond=0x565108c8f120, > mutex=0x565108c8f150) at util/qemu-thread-posix.c:137 > #2 0x0000565105b2219b in vnc_worker_thread_loop (queue=0x565108c8f120) at > ui/vnc-jobs.c:228 > #3 0x0000565105b226d1 in vnc_worker_thread (arg=0x565108c8f120) at > ui/vnc-jobs.c:335 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f5734dff700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 6 (Thread 0x7f5736bfe700 (LWP 13133)): > #0 0x00007f5748806ce7 in ioctl () at ../sysdeps/unix/syscall-template.S:84 > #1 0x000056510579734e in kvm_vcpu_ioctl (cpu=0x565107b21b70, type=44672) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:2079 > #2 0x0000565105796cd5 in kvm_cpu_exec (cpu=0x565107b21b70) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:1929 > #3 0x000056510577dc58 in qemu_kvm_cpu_thread_fn (arg=0x565107b21b70) at > /net/gimli/home/alwillia/Work/qemu.git/cpus.c:998 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f5736bfe700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 5 (Thread 0x7f57373ff700 (LWP 13132)): > #0 0x00007f5748806ce7 in ioctl () at ../sysdeps/unix/syscall-template.S:84 > #1 0x000056510579734e in kvm_vcpu_ioctl (cpu=0x565107b02350, type=44672) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:2079 > #2 0x0000565105796cd5 in kvm_cpu_exec (cpu=0x565107b02350) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:1929 > #3 0x000056510577dc58 in qemu_kvm_cpu_thread_fn (arg=0x565107b02350) at > /net/gimli/home/alwillia/Work/qemu.git/cpus.c:998 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f57373ff700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 4 (Thread 0x7f5737c00700 (LWP 13131)): > #0 0x00007f5748806ce7 in ioctl () at ../sysdeps/unix/syscall-template.S:84 > #1 0x000056510579734e in kvm_vcpu_ioctl (cpu=0x565107ae2b20, type=44672) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:2079 > #2 0x0000565105796cd5 in kvm_cpu_exec (cpu=0x565107ae2b20) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:1929 > #3 0x000056510577dc58 in qemu_kvm_cpu_thread_fn (arg=0x565107ae2b20) at > /net/gimli/home/alwillia/Work/qemu.git/cpus.c:998 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f5737c00700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 3 (Thread 0x7f5738401700 (LWP 13130)): > #0 0x00007f5748806ce7 in ioctl () at ../sysdeps/unix/syscall-template.S:84 > #1 0x000056510579734e in kvm_vcpu_ioctl (cpu=0x565107a85270, type=44672) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:2079 > #2 0x0000565105796cd5 in kvm_cpu_exec (cpu=0x565107a85270) at > /net/gimli/home/alwillia/Work/qemu.git/kvm-all.c:1929 > #3 0x000056510577dc58 in qemu_kvm_cpu_thread_fn (arg=0x565107a85270) at > /net/gimli/home/alwillia/Work/qemu.git/cpus.c:998 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f5738401700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 2 (Thread 0x7f573b221700 (LWP 13122)): > #0 0x00007f574880b239 in syscall () at > ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 > #1 0x0000565105c1b92f in futex_wait (ev=0x5651066c5624 > <rcu_call_ready_event>, val=4294967295) at util/qemu-thread-posix.c:306 > #2 0x0000565105c1ba32 in qemu_event_wait (ev=0x5651066c5624 > <rcu_call_ready_event>) at util/qemu-thread-posix.c:422 > #3 0x0000565105c31f30 in call_rcu_thread (opaque=0x0) at util/rcu.c:249 > #4 0x00007f5748ad75ca in start_thread (arg=0x7f573b221700) at > pthread_create.c:333 > #5 0x00007f57488110ed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > Thread 1 (Thread 0x7f57638c4f80 (LWP 13114)): > #0 0x00007f5748805631 in __GI_ppoll (fds=0x565108f6e6b0, nfds=13, > timeout=<optimized out>, sigmask=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:50 > #1 0x0000565105b3b20b in qemu_poll_ns (fds=0x565108f6e6b0, nfds=13, > timeout=2999772164) at qemu-timer.c:325 > #2 0x0000565105b3a630 in os_host_main_loop_wait (timeout=2999772164) at > main-loop.c:254 > #3 0x0000565105b3a6f3 in main_loop_wait (nonblocking=0) at main-loop.c:508 > #4 0x00005651058c9d80 in main_loop () at vl.c:1966 > #5 0x00005651058d169e in main (argc=64, argv=0x7fff56193428, > envp=0x7fff56193630) at vl.c:4684 > >