On Tue, Mar 15, 2016 at 7:34 PM, Markus Armbruster <arm...@redhat.com> wrote: > This kills off the funny state described in the previous commit. > > Simplify ivshmem_io_read() accordingly, and update documentation. > > Signed-off-by: Markus Armbruster <arm...@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lur...@redhat.com> > --- > docs/specs/ivshmem-spec.txt | 20 +++---- > hw/misc/ivshmem.c | 129 > ++++++++++++++++++++++++++++---------------- > qemu-doc.texi | 9 +--- > 3 files changed, 95 insertions(+), 63 deletions(-) > > diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt > index 0cd63ad..4c33973 100644 > --- a/docs/specs/ivshmem-spec.txt > +++ b/docs/specs/ivshmem-spec.txt > @@ -62,11 +62,11 @@ There are two ways to use this device: > likely want to write a kernel driver to handle interrupts. Requires > the device to be configured for interrupts, obviously. > > -If the device is configured for interrupts, BAR2 is initially invalid. > -It becomes safely accessible only after the ivshmem server provided > -the shared memory. Guest software should wait for the IVPosition > -register (described below) to become non-negative before accessing > -BAR2. > +Before QEMU 2.6.0, BAR2 can initially be invalid if the device is > +configured for interrupts. It becomes safely accessible only after > +the ivshmem server provided the shared memory. Guest software should > +wait for the IVPosition register (described below) to become > +non-negative before accessing BAR2. > > The device is not capable to tell guest software whether it is > configured for interrupts. > @@ -82,7 +82,7 @@ BAR 0 contains the following registers: > 4 4 read/write 0 Interrupt Status > bit 0: peer interrupt > bit 1..31: reserved > - 8 4 read-only 0 or -1 IVPosition > + 8 4 read-only 0 or ID IVPosition > 12 4 write-only N/A Doorbell > bit 0..15: vector > bit 16..31: peer ID > @@ -100,12 +100,14 @@ when an interrupt request from a peer is received. > Reading the > register clears it. > > IVPosition Register: if the device is not configured for interrupts, > -this is zero. Else, it's -1 for a short while after reset, then > -changes to the device's ID (between 0 and 65535). > +this is zero. Else, it is the device's ID (between 0 and 65535). > + > +Before QEMU 2.6.0, the register may read -1 for a short while after > +reset. > > There is no good way for software to find out whether the device is > configured for interrupts. A positive IVPosition means interrupts, > -but zero could be either. The initial -1 cannot be reliably observed. > +but zero could be either. > > Doorbell Register: writing this register requests to interrupt a peer. > The written value's high 16 bits are the ID of the peer to interrupt, > diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c > index ad16828..7f439c3 100644 > --- a/hw/misc/ivshmem.c > +++ b/hw/misc/ivshmem.c > @@ -234,12 +234,7 @@ static uint64_t ivshmem_io_read(void *opaque, hwaddr > addr, > break; > > case IVPOSITION: > - /* return my VM ID if the memory is mapped */ > - if (memory_region_is_mapped(&s->ivshmem)) { > - ret = s->vm_id; > - } else { > - ret = -1; > - } > + ret = s->vm_id; > break; > > default: > @@ -511,7 +506,8 @@ static bool fifo_update_and_get_i64(IVShmemState *s, > return false; > } > > -static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector) > +static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, > + Error **errp) > { > PCIDevice *pdev = PCI_DEVICE(s); > MSIMessage msg = msix_get_message(pdev, vector); > @@ -522,22 +518,21 @@ static int ivshmem_add_kvm_msi_virq(IVShmemState *s, > int vector) > > ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); > if (ret < 0) { > - error_report("ivshmem: kvm_irqchip_add_msi_route failed"); > - return -1; > + error_setg(errp, "kvm_irqchip_add_msi_route failed"); > + return; > } > > s->msi_vectors[vector].virq = ret; > s->msi_vectors[vector].pdev = pdev; > - > - return 0; > } > > -static void setup_interrupt(IVShmemState *s, int vector) > +static void setup_interrupt(IVShmemState *s, int vector, Error **errp) > { > EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > bool with_irqfd = kvm_msi_via_irqfd_enabled() && > ivshmem_has_feature(s, IVSHMEM_MSI); > PCIDevice *pdev = PCI_DEVICE(s); > + Error *err = NULL; > > IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); > > @@ -546,13 +541,16 @@ static void setup_interrupt(IVShmemState *s, int vector) > watch_vector_notifier(s, n, vector); > } else if (msix_enabled(pdev)) { > IVSHMEM_DPRINTF("with irqfd\n"); > - if (ivshmem_add_kvm_msi_virq(s, vector) < 0) { > + ivshmem_add_kvm_msi_virq(s, vector, &err); > + if (err) { > + error_propagate(errp, err); > return; > } > > if (!msix_is_masked(pdev, vector)) { > kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, > s->msi_vectors[vector].virq); > + /* TODO handle error */ > } > } else { > /* it will be delayed until msix is enabled, in write_config */ > @@ -560,19 +558,19 @@ static void setup_interrupt(IVShmemState *s, int vector) > } > } > > -static void process_msg_shmem(IVShmemState *s, int fd) > +static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) > { > Error *err = NULL; > void *ptr; > > if (memory_region_is_mapped(&s->ivshmem)) { > - error_report("shm already initialized"); > + error_setg(errp, "server sent unexpected shared memory message"); > close(fd); > return; > } > > if (check_shm_size(s, fd, &err) == -1) { > - error_report_err(err); > + error_propagate(errp, err); > close(fd); > return; > } > @@ -580,7 +578,7 @@ static void process_msg_shmem(IVShmemState *s, int fd) > /* mmap the region and map into the BAR2 */ > ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, > 0); > if (ptr == MAP_FAILED) { > - error_report("Failed to mmap shared memory %s", strerror(errno)); > + error_setg_errno(errp, errno, "Failed to mmap shared memory"); > close(fd); > return; > } > @@ -591,17 +589,19 @@ static void process_msg_shmem(IVShmemState *s, int fd) > memory_region_add_subregion(&s->bar, 0, &s->ivshmem); > } > > -static void process_msg_disconnect(IVShmemState *s, uint16_t posn) > +static void process_msg_disconnect(IVShmemState *s, uint16_t posn, > + Error **errp) > { > IVSHMEM_DPRINTF("posn %d has gone away\n", posn); > if (posn >= s->nb_peers || posn == s->vm_id) { > - error_report("invalid peer %d", posn); > + error_setg(errp, "invalid peer %d", posn); > return; > } > close_peer_eventfds(s, posn); > } > > -static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) > +static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, > + Error **errp) > { > Peer *peer = &s->peers[posn]; > int vector; > @@ -611,8 +611,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t > posn, int fd) > * descriptor for vector N-1. Count messages to find the vector. > */ > if (peer->nb_eventfds >= s->vectors) { > - error_report("Too many eventfd received, device has %d vectors", > - s->vectors); > + error_setg(errp, "Too many eventfd received, device has %d vectors", > + s->vectors); > close(fd); > return; > } > @@ -623,7 +623,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t > posn, int fd) > fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ > > if (posn == s->vm_id) { > - setup_interrupt(s, vector); > + setup_interrupt(s, vector, errp); > + /* TODO do we need to handle the error? */ > } > > if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { > @@ -631,18 +632,18 @@ static void process_msg_connect(IVShmemState *s, > uint16_t posn, int fd) > } > } > > -static void process_msg(IVShmemState *s, int64_t msg, int fd) > +static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) > { > IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); > > if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { > - error_report("server sent invalid message %" PRId64, msg); > + error_setg(errp, "server sent invalid message %" PRId64, msg); > close(fd); > return; > } > > if (msg == -1) { > - process_msg_shmem(s, fd); > + process_msg_shmem(s, fd, errp); > return; > } > > @@ -651,17 +652,18 @@ static void process_msg(IVShmemState *s, int64_t msg, > int fd) > } > > if (fd >= 0) { > - process_msg_connect(s, msg, fd); > + process_msg_connect(s, msg, fd, errp); > } else if (s->vm_id == -1) { > s->vm_id = msg; > } else { > - process_msg_disconnect(s, msg); > + process_msg_disconnect(s, msg, errp); > } > } > > static void ivshmem_read(void *opaque, const uint8_t *buf, int size) > { > IVShmemState *s = opaque; > + Error *err = NULL; > int fd; > int64_t msg; > > @@ -672,10 +674,13 @@ static void ivshmem_read(void *opaque, const uint8_t > *buf, int size) > fd = qemu_chr_fe_get_msgfd(s->server_chr); > IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); > > - process_msg(s, msg, fd); > + process_msg(s, msg, fd, &err); > + if (err) { > + error_report_err(err); > + } > } > > -static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) > +static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) > { > int64_t msg; > int n, ret; > @@ -685,7 +690,7 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) > ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, > sizeof(msg) - n); > if (ret < 0 && ret != -EINTR) { > - /* TODO error handling */ > + error_setg_errno(errp, -ret, "read from server failed"); > return INT64_MIN; > } > n += ret; > @@ -695,15 +700,24 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int > *pfd) > return msg; > } > > -static void ivshmem_recv_setup(IVShmemState *s) > +static void ivshmem_recv_setup(IVShmemState *s, Error **errp) > { > + Error *err = NULL; > int64_t msg; > int fd; > > - msg = ivshmem_recv_msg(s, &fd); > - if (fd != -1 || msg != IVSHMEM_PROTOCOL_VERSION) { > - fprintf(stderr, "incompatible version, you are connecting to a > ivshmem-" > - "server using a different protocol please check your > setup\n"); > + msg = ivshmem_recv_msg(s, &fd, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > + if (msg != IVSHMEM_PROTOCOL_VERSION) { > + error_setg(errp, "server sent version %" PRId64 ", expecting %d", > + msg, IVSHMEM_PROTOCOL_VERSION); > + return; > + } > + if (fd != -1) { > + error_setg(errp, "server sent invalid version message"); > return; > } > > @@ -711,9 +725,25 @@ static void ivshmem_recv_setup(IVShmemState *s) > * Receive more messages until we got shared memory. > */ > do { > - msg = ivshmem_recv_msg(s, &fd); > - process_msg(s, msg, fd); > + msg = ivshmem_recv_msg(s, &fd, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > + process_msg(s, msg, fd, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > } while (msg != -1); > + > + /* > + * This function must either map the shared memory or fail. The > + * loop above ensures that: it terminates normally only after it > + * successfully processed the server's shared memory message. > + * Assert that actually mapped the shared memory: > + */ > + assert(memory_region_is_mapped(&s->ivshmem)); > } > > /* Select the MSI-X vectors used by device. > @@ -763,7 +793,13 @@ static void ivshmem_enable_irqfd(IVShmemState *s) > int i; > > for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { > - ivshmem_add_kvm_msi_virq(s, i); > + Error *err = NULL; > + > + ivshmem_add_kvm_msi_virq(s, i, &err); > + if (err) { > + error_report_err(err); > + /* TODO do we need to handle the error? */ > + } > } > > if (msix_set_vector_notifiers(pdev, > @@ -809,7 +845,7 @@ static void ivshmem_write_config(PCIDevice *pdev, > uint32_t address, > pci_default_write_config(pdev, address, val, len); > is_enabled = msix_enabled(pdev); > > - if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) { > + if (kvm_msi_via_irqfd_enabled()) { > if (!was_enabled && is_enabled) { > ivshmem_enable_irqfd(s); > } else if (was_enabled && !is_enabled) { > @@ -928,15 +964,16 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error > **errp) > * Receive setup messages from server synchronously. > * Older versions did it asynchronously, but that creates a > * number of entertaining race conditions. > - * TODO Propagate errors! Without that, we still have races > - * on errors. > */ > - ivshmem_recv_setup(s); > - if (memory_region_is_mapped(&s->ivshmem)) { > - qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, > - ivshmem_read, NULL, s); > + ivshmem_recv_setup(s, &err); > + if (err) { > + error_propagate(errp, err); > + return; > } > > + qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, > + ivshmem_read, NULL, s); > + > if (ivshmem_setup_interrupts(s) < 0) { > error_setg(errp, "failed to initialize interrupts"); > return; > diff --git a/qemu-doc.texi b/qemu-doc.texi > index 65f3b29..8afbbcd 100644 > --- a/qemu-doc.texi > +++ b/qemu-doc.texi > @@ -1289,14 +1289,7 @@ qemu-system-i386 -device > ivshmem,size=@var{shm-size},vectors=@var{vectors},chard > > When using the server, the guest will be assigned a VM ID (>=0) that allows > guests > using the same server to communicate via interrupts. Guests can read their > -VM ID from a device register (see example code). Since receiving the shared > -memory region from the server is asynchronous, there is a (small) chance the > -guest may boot before the shared memory is attached. To allow an application > -to ensure shared memory is attached, the VM ID register will return -1 (an > -invalid VM ID) until the memory is attached. Once the shared memory is > -attached, the VM ID will return the guest's valid VM ID. With these > semantics, > -the guest application can check to ensure the shared memory is attached to > the > -guest before proceeding. > +VM ID from a device register (see ivshmem-spec.txt). > > The @option{role} argument can be set to either master or peer and will > affect > how the shared memory is migrated. With @option{role=master}, the guest will > -- > 2.4.3 > > -- Marc-André Lureau