Re: [PATCH] virtio/s390: use dev_to_virtio
On Wed, 30 Dec 2015 22:05:25 +0800 Geliang Tangwrote: > Use dev_to_virtio() instead of open-coding it. > > Signed-off-by: Geliang Tang > --- > drivers/s390/virtio/virtio_ccw.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) Thanks, added to my queue. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/1] virtio/s390: one fix
On Thu, 3 Dec 2015 17:23:59 +0100 Cornelia Huck <cornelia.h...@de.ibm.com> wrote: > Michael, > > here's one fix for the virtio-ccw driver. > > Patch is against master, as your git branches on mst/vhost.git seem > to be quite old. Is there some branch I can base my own branch on, > or do you prefer to take patches directly anyway? > > Cornelia Huck (1): > virtio/s390: handle error values in irb > > drivers/s390/virtio/virtio_ccw.c | 62 > > 1 file changed, 37 insertions(+), 25 deletions(-) > Ping? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/1] virtio/s390: one fix
Michael, here's one fix for the virtio-ccw driver. Patch is against master, as your git branches on mst/vhost.git seem to be quite old. Is there some branch I can base my own branch on, or do you prefer to take patches directly anyway? Cornelia Huck (1): virtio/s390: handle error values in irb drivers/s390/virtio/virtio_ccw.c | 62 1 file changed, 37 insertions(+), 25 deletions(-) -- 2.3.9 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/1] virtio/s390: handle error values in irb
The common I/O layer may pass an error value as the irb in the device's interrupt handler (for classic channel I/O). This won't happen in current virtio-ccw implementations, but it's better to be safe than sorry. Let's just return the error conveyed by the irb and clear any possible pending I/O indications. Signed-off-by: Cornelia Huck <cornelia.h...@de.ibm.com> Reviewed-by: Guenther Hutzl <hu...@linux.vnet.ibm.com> Reviewed-by: Pierre Morel <pmo...@linux.vnet.ibm.com> --- drivers/s390/virtio/virtio_ccw.c | 62 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b2a1a81..1b83159 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -984,6 +984,36 @@ static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev, return vq; } +static void virtio_ccw_check_activity(struct virtio_ccw_device *vcdev, + __u32 activity) +{ + if (vcdev->curr_io & activity) { + switch (activity) { + case VIRTIO_CCW_DOING_READ_FEAT: + case VIRTIO_CCW_DOING_WRITE_FEAT: + case VIRTIO_CCW_DOING_READ_CONFIG: + case VIRTIO_CCW_DOING_WRITE_CONFIG: + case VIRTIO_CCW_DOING_WRITE_STATUS: + case VIRTIO_CCW_DOING_SET_VQ: + case VIRTIO_CCW_DOING_SET_IND: + case VIRTIO_CCW_DOING_SET_CONF_IND: + case VIRTIO_CCW_DOING_RESET: + case VIRTIO_CCW_DOING_READ_VQ_CONF: + case VIRTIO_CCW_DOING_SET_IND_ADAPTER: + case VIRTIO_CCW_DOING_SET_VIRTIO_REV: + vcdev->curr_io &= ~activity; + wake_up(>wait_q); + break; + default: + /* don't know what to do... */ + dev_warn(>cdev->dev, +"Suspicious activity '%08x'\n", activity); + WARN_ON(1); + break; + } + } +} + static void virtio_ccw_int_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) @@ -995,6 +1025,12 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, if (!vcdev) return; + if (IS_ERR(irb)) { + vcdev->err = PTR_ERR(irb); + virtio_ccw_check_activity(vcdev, activity); + /* Don't poke around indicators, something's wrong. */ + return; + } /* Check if it's a notification from the host. */ if ((intparm == 0) && (scsw_stctl(>scsw) == @@ -1010,31 +1046,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, /* Map everything else to -EIO. */ vcdev->err = -EIO; } - if (vcdev->curr_io & activity) { - switch (activity) { - case VIRTIO_CCW_DOING_READ_FEAT: - case VIRTIO_CCW_DOING_WRITE_FEAT: - case VIRTIO_CCW_DOING_READ_CONFIG: - case VIRTIO_CCW_DOING_WRITE_CONFIG: - case VIRTIO_CCW_DOING_WRITE_STATUS: - case VIRTIO_CCW_DOING_SET_VQ: - case VIRTIO_CCW_DOING_SET_IND: - case VIRTIO_CCW_DOING_SET_CONF_IND: - case VIRTIO_CCW_DOING_RESET: - case VIRTIO_CCW_DOING_READ_VQ_CONF: - case VIRTIO_CCW_DOING_SET_IND_ADAPTER: - case VIRTIO_CCW_DOING_SET_VIRTIO_REV: - vcdev->curr_io &= ~activity; - wake_up(>wait_q); - break; - default: - /* don't know what to do... */ - dev_warn(>dev, "Suspicious activity '%08x'\n", -activity); - WARN_ON(1); - break; - } - } + virtio_ccw_check_activity(vcdev, activity); for_each_set_bit(i, >indicators, sizeof(vcdev->indicators) * BITS_PER_BYTE) { /* The bit clear must happen before the vring kick. */ -- 2.3.9 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 2/2] KVM: Make KVM_CAP_IRQFD dependent on KVM_CAP_IRQCHIP
On Mon, 30 Nov 2015 14:56:38 +0300 Pavel Fedinwrote: > Hello! > > > > case KVM_CAP_INTERNAL_ERROR_DATA: > > > #ifdef CONFIG_HAVE_KVM_MSI > > > case KVM_CAP_SIGNAL_MSI: > > > + /* Fallthrough */ > > > #endif > > > + case KVM_CAP_CHECK_EXTENSION_VM: > > > + return 1; > > > #ifdef CONFIG_HAVE_KVM_IRQFD > > > case KVM_CAP_IRQFD: > > > case KVM_CAP_IRQFD_RESAMPLE: > > > + return kvm_vm_ioctl_check_extension(kvm, KVM_CAP_IRQCHIP); > > > > This won't work for s390, as it doesn't have KVM_CAP_IRQCHIP but > > KVM_CAP_S390_IRQCHIP (which needs to be enabled). > > Thank you for the note, i didn't know about irqchip-specific capability > codes. There's the same issue with PowerPC, now i > understand why there's no KVM_CAP_IRQCHIP for them. Because they have > KVM_CAP_IRQ_MPIC and KVM_CAP_IRQ_XICS, similar to S390. > But isn't it just weird? I understand that perhaps we have some real need to > distinguish between different irqchip types, but > shouldn't the kernel also publish KVM_CAP_IRQCHIP, which stands just for "we > support some irqchip virtualization"? > May be we should just add this for PowerPC and S390, to make things less > ambiguous? Note that we explicitly need to _enable_ the s390 cap (for compatibility). I'd need to recall the exact details but I came to the conclusion back than that I could not simply enable KVM_CAP_IRQCHIP for s390 (and current qemu would fail to enable the s390 cap if we started advertising KVM_CAP_IRQCHIP now). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 2/2] KVM: Make KVM_CAP_IRQFD dependent on KVM_CAP_IRQCHIP
On Mon, 30 Nov 2015 12:40:45 +0300 Pavel Fedinwrote: > Now at least ARM is able to determine whether the machine has > virtualization support for irqchip or not at runtime. Obviously, > irqfd requires irqchip. > > Signed-off-by: Pavel Fedin > --- > virt/kvm/kvm_main.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 7873d6d..a057d5e 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -2716,13 +2716,15 @@ static long > kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > case KVM_CAP_INTERNAL_ERROR_DATA: > #ifdef CONFIG_HAVE_KVM_MSI > case KVM_CAP_SIGNAL_MSI: > + /* Fallthrough */ > #endif > + case KVM_CAP_CHECK_EXTENSION_VM: > + return 1; > #ifdef CONFIG_HAVE_KVM_IRQFD > case KVM_CAP_IRQFD: > case KVM_CAP_IRQFD_RESAMPLE: > + return kvm_vm_ioctl_check_extension(kvm, KVM_CAP_IRQCHIP); This won't work for s390, as it doesn't have KVM_CAP_IRQCHIP but KVM_CAP_S390_IRQCHIP (which needs to be enabled). > #endif > - case KVM_CAP_CHECK_EXTENSION_VM: > - return 1; > #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING > case KVM_CAP_IRQ_ROUTING: > return KVM_MAX_IRQ_ROUTES; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 2/2] KVM: Make KVM_CAP_IRQFD dependent on KVM_CAP_IRQCHIP
On Mon, 30 Nov 2015 15:41:20 +0300 Pavel Fedinwrote: > Hello! > > > > Thank you for the note, i didn't know about irqchip-specific capability > > > codes. There's the > > > same issue with PowerPC, now i > > > understand why there's no KVM_CAP_IRQCHIP for them. Because they have > > > KVM_CAP_IRQ_MPIC and > > > KVM_CAP_IRQ_XICS, similar to S390. > > > But isn't it just weird? I understand that perhaps we have some real > > > need to distinguish > > > between different irqchip types, but > > > shouldn't the kernel also publish KVM_CAP_IRQCHIP, which stands just for > > > "we support some > > > irqchip virtualization"? > > > May be we should just add this for PowerPC and S390, to make things less > > > ambiguous? > > > > Note that we explicitly need to _enable_ the s390 cap (for > > compatibility). I'd need to recall the exact details but I came to the > > conclusion back than that I could not simply enable KVM_CAP_IRQCHIP for > > s390 (and current qemu would fail to enable the s390 cap if we started > > advertising KVM_CAP_IRQCHIP now). > > OMG... I've looked at the code, what a mess... > If i was implementing this, i'd simply introduce kvm_vm_enable_cap(s, > KVM_CAP_IRQCHIP, 0), > which would be allowed to fail with -ENOSYS, so that backwards compatibility > is kept and an existing API is reused... But, well, > it's already impossible to unscramble an egg... :) > Ok, i think in current situation we could choose one of these ways (both are > based on the fact that it's obvious that irqfd require > IRQCHIP). > a) I look for an alternate way to report KVM_CAP_IRQFD dynamically, and > maybe PowerPC and S390 follow this way. The thing is: _when_ can you report KVM_CAP_IRQFD? It obviously requires an irqchip; but if you need some configuration/enablement beforehand, you'll get different values depending on when you retrieve the cap. So does KVM_CAP_IRQFD mean "irqfds are available in principle" or "everything has been setup for usage of irqfds"? I'd assume the former. > b) I simply drop it as it is, because current qemu knows about the > dependency and does not try to use irqfd without irqchip, > because there's simply no use for them. But, well, perhaps there would be an > exception in vhost, i don't remember testing it. Wouldn't an irqfd emulation cover vhost? > So what shall we do? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] vhost: move is_le setup to the backend
On Fri, 30 Oct 2015 12:42:35 +0100 Greg Kurz <gk...@linux.vnet.ibm.com> wrote: > The vq->is_le field is used to fix endianness when accessing the vring via > the cpu_to_vhost16() and vhost16_to_cpu() helpers in the following cases: > > 1) host is big endian and device is modern virtio > > 2) host has cross-endian support and device is legacy virtio with a different >endianness than the host > > Both cases rely on the VHOST_SET_FEATURES ioctl, but 2) also needs the > VHOST_SET_VRING_ENDIAN ioctl to be called by userspace. Since vq->is_le > is only needed when the backend is active, it was decided to set it at > backend start. > > This is currently done in vhost_init_used()->vhost_init_is_le() but it > obfuscates the core vhost code. This patch moves the is_le setup to a > dedicated function that is called from the backend code. > > Note vhost_net is the only backend that can pass vq->private_data == NULL to > vhost_init_used(), hence the "if (sock)" branch. > > No behaviour change. > > Signed-off-by: Greg Kurz <gk...@linux.vnet.ibm.com> > --- > drivers/vhost/net.c |6 ++ > drivers/vhost/scsi.c |3 +++ > drivers/vhost/test.c |2 ++ > drivers/vhost/vhost.c | 12 +++----- > drivers/vhost/vhost.h |1 + > 5 files changed, 19 insertions(+), 5 deletions(-) Makes sense. Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Thu, 5 Nov 2015 09:11:06 +0100 Cornelia Huck <cornelia.h...@de.ibm.com> wrote: > On Wed, 4 Nov 2015 10:14:11 -0800 > Andy Lutomirski <l...@amacapital.net> wrote: > > > On Wed, Nov 4, 2015 at 9:52 AM, Cornelia Huck <cornelia.h...@de.ibm.com> > > wrote: > > > On Wed, 4 Nov 2015 15:29:23 +0100 > > > Cornelia Huck <cornelia.h...@de.ibm.com> wrote: > > > > > >> I'm currently suspecting some endianness issues, probably with the ecw > > >> accesses, which is why I'd be interested in your trace information (as > > >> I currently don't have a LE test setup at hand.) > > > > > > I think I've got it. We have sense_data as a byte array, which > > > implicitly makes it BE already. When we copy to the ecws while building > > > the irb, the data ends up in 32 bit values. The conversion from host > > > endianness to BE now treats them as LE on your system... > > > > > > Could you please give the following qemu patch a try? > > > > Tested-by: Andy Lutomirski <l...@kernel.org> > > > > Now my test script panics for the right reason (init isn't actually an > > s390 binary). Thanks! > > Cool, thanks for testing! I'll get this into qemu as a proper patch. FYI: The patch has been included into the qemu git tree. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL v4 3/3] s390/dma: Allow per device dma ops
On Thu, 5 Nov 2015 21:08:56 +0100 Christian Borntraeger <borntrae...@de.ibm.com> wrote: > As virtio-ccw will have dma ops, we can no longer default to the > zPCI ones. Make use of dev_archdata to keep the dma_ops per device. > The pci devices now use that to override the default, and the > default is changed to use the noop ops for everything that does not > specify a device specific one. > To compile without PCI support we will enable HAS_DMA all the time, > via the default config in lib/Kconfig. > > Signed-off-by: Christian Borntraeger <borntrae...@de.ibm.com> > Reviewed-by: Joerg Roedel <jroe...@suse.de> > Acked-by: Sebastian Ott <seb...@linux.vnet.ibm.com> > --- > arch/s390/Kconfig | 7 ++- > arch/s390/include/asm/device.h | 6 +- > arch/s390/include/asm/dma-mapping.h | 6 -- > arch/s390/pci/pci.c | 1 + > arch/s390/pci/pci_dma.c | 4 ++-- > 5 files changed, 14 insertions(+), 10 deletions(-) Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Wed, 4 Nov 2015 10:14:11 -0800 Andy Lutomirski <l...@amacapital.net> wrote: > On Wed, Nov 4, 2015 at 9:52 AM, Cornelia Huck <cornelia.h...@de.ibm.com> > wrote: > > On Wed, 4 Nov 2015 15:29:23 +0100 > > Cornelia Huck <cornelia.h...@de.ibm.com> wrote: > > > >> I'm currently suspecting some endianness issues, probably with the ecw > >> accesses, which is why I'd be interested in your trace information (as > >> I currently don't have a LE test setup at hand.) > > > > I think I've got it. We have sense_data as a byte array, which > > implicitly makes it BE already. When we copy to the ecws while building > > the irb, the data ends up in 32 bit values. The conversion from host > > endianness to BE now treats them as LE on your system... > > > > Could you please give the following qemu patch a try? > > Tested-by: Andy Lutomirski <l...@kernel.org> > > Now my test script panics for the right reason (init isn't actually an > s390 binary). Thanks! Cool, thanks for testing! I'll get this into qemu as a proper patch. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Tue, 3 Nov 2015 10:45:12 -0800 Andy Lutomirski <l...@amacapital.net> wrote: > On Tue, Nov 3, 2015 at 9:59 AM, Cornelia Huck <cornelia.h...@de.ibm.com> > wrote: > > It's just failing very early in the setup phase. As it works for me > > with a kvm setup, I'm suspecting some error in qemu's emulation code, > > which is unfortunately not my turf. > > > > That should be easy to rule out. Can you try with -machine accel=tcg? > I can't test with kvm for obvious reasons. Well, s390-on-s390 works (at least with https://patchwork.ozlabs.org/patch/538882/ applied). I'm currently suspecting some endianness issues, probably with the ecw accesses, which is why I'd be interested in your trace information (as I currently don't have a LE test setup at hand.) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Wed, 4 Nov 2015 15:29:23 +0100 Cornelia Huck <cornelia.h...@de.ibm.com> wrote: > I'm currently suspecting some endianness issues, probably with the ecw > accesses, which is why I'd be interested in your trace information (as > I currently don't have a LE test setup at hand.) I think I've got it. We have sense_data as a byte array, which implicitly makes it BE already. When we copy to the ecws while building the irb, the data ends up in 32 bit values. The conversion from host endianness to BE now treats them as LE on your system... Could you please give the following qemu patch a try? diff --git a/hw/s390x/css.c b/hw/s390x/css.c index c033612..a13494e 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -892,8 +892,16 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) /* If a unit check is pending, copy sense data. */ if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && (p->chars & PMCW_CHARS_MASK_CSENSE)) { +uint32_t ecw[8]; +int i; + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; -memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); +/* Attention: sense_data is already BE! */ +memset(ecw, 0, sizeof(ecw)); +memcpy(ecw, sch->sense_data, sizeof(sch->sense_data)); +for (i = 0; i < ARRAY_SIZE(ecw); i++) { +irb.ecw[i] = be32_to_cpu(ecw[i]); +} irb.esw[1] = 0x0100 | (sizeof(sch->sense_data) << 8); } } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] s390/dma: Allow per device dma ops
On Tue, 3 Nov 2015 12:54:39 +0100 Christian Borntraegerwrote: > As virtio-ccw now has dma ops, we can no longer default to the PCI ones. > Make use of dev_archdata to keep the dma_ops per device. The pci devices > now use that to override the default, and the default is changed to use > the noop ops for everything that is not PCI. To compile without PCI > support we also have to enable the DMA api with virtio. Not only with virtio, but generally, right? > Signed-off-by: Christian Borntraeger > Reviewed-by: Joerg Roedel > Acked-by: Sebastian Ott > --- > arch/s390/Kconfig | 3 ++- > arch/s390/include/asm/device.h | 6 +- > arch/s390/include/asm/dma-mapping.h | 6 -- > arch/s390/pci/pci.c | 1 + > arch/s390/pci/pci_dma.c | 4 ++-- > 5 files changed, 14 insertions(+), 6 deletions(-) > > diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig > index 1d57000..04f0e02 100644 > --- a/arch/s390/Kconfig > +++ b/arch/s390/Kconfig > @@ -113,6 +113,7 @@ config S390 > select GENERIC_FIND_FIRST_BIT > select GENERIC_SMP_IDLE_THREAD > select GENERIC_TIME_VSYSCALL > + select HAS_DMA > select HAVE_ALIGNED_STRUCT_PAGE if SLUB > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_EARLY_PFN_TO_NID > @@ -124,6 +125,7 @@ config S390 > select HAVE_CMPXCHG_DOUBLE > select HAVE_CMPXCHG_LOCAL > select HAVE_DEBUG_KMEMLEAK > + select HAVE_DMA_ATTRS > select HAVE_DYNAMIC_FTRACE > select HAVE_DYNAMIC_FTRACE_WITH_REGS > select HAVE_FTRACE_MCOUNT_RECORD > @@ -580,7 +582,6 @@ config QDIO > > menuconfig PCI > bool "PCI support" > - select HAVE_DMA_ATTRS > select PCI_MSI > help > Enable PCI support. Hm. Further down in this file, there's config HAS_DMA def_bool PCI select HAVE_DMA_API_DEBUG Should we maybe select HAVE_DMA_API_DEBUG above, drop the HAS_DMA config option and rely on not defining NO_DMA instead? Otherwise, the patch looks good to me. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Mon, 2 Nov 2015 12:23:25 -0800 Andy Lutomirskiwrote: > No change. I'm stumped :( Here's what I see: (...) > CH DBG: css_interpret_ccw: cmd_code=e4 > CH DBG: css_interpret_ccw: ret=0 sense id -> works (...) > CH DBG: css_interpret_ccw: cmd_code=3 > CH DBG: css_interpret_ccw: ret=0 nop (path verification) -> works > CH DBG: css_interpret_ccw: cmd_code=83 > CH DBG: css_interpret_ccw: ret=-38 set revision -> -ENOSYS This is fine; the virtio device is in legacy mode and the kernel will try revision 1; qemu will reject this. The code should end up generating a unit check with command reject, however... > qeth: register layer 2 discipline > qeth: register layer 3 discipline > oprofile: using timer interrupt. > NET: Registered protocol family 10 > virtio_ccw 0.0.: Failed to set online: -5 ...this shows the kernel driver somehow did not end up with that command reject (it would have triggered a retry with revision 0, and the return code shows that no unit check/command reject was detected, but some other error.) > The lack of much interesting output makes me think that maybe I > misconfigured something. It's just failing very early in the setup phase. As it works for me with a kvm setup, I'm suspecting some error in qemu's emulation code, which is unfortunately not my turf. Some more poke-around-in-the-dark ideas: - Do you get more debug out put when you switch back to s390-ccw-virtio (virtio-1), i.e. does cmd 83 work and is it followed by further commands? - Can you try with the following qemu logging patch -8<--8<- diff --git a/hw/s390x/css.c b/hw/s390x/css.c index c033612..80853a6 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -868,6 +868,7 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) PMCW *p = >curr_status.pmcw; uint16_t stctl; IRB irb; +int i; if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) { return 3; @@ -898,6 +899,14 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) } } /* Store the irb to the guest. */ +fprintf(stderr, "CH DBG: %s: flags=%04x ctrl=%04x cpa=%08x\n", +__func__, irb.scsw.flags, irb.scsw.ctrl, irb.scsw.cpa); +fprintf(stderr, "CH DBG: %s: dstat=%02x cstat=%02x count=%04x\n", +__func__, irb.scsw.dstat, irb.scsw.cstat, irb.scsw.count); +for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { +fprintf(stderr, "CH DBG: %s: ecw[%d]=%08x\n", __func__, +i, irb.ecw[i]); +} copy_irb_to_guest(target_irb, , p, irb_len); return ((stctl & SCSW_STCTL_STATUS_PEND) == 0); -8<--8<- and the following kernel patch -8<--8<- diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 83da53c..ea4db09 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -540,6 +540,9 @@ callback: create_fake_irb(>private->irb, cdev->private->flags.fake_irb); cdev->private->flags.fake_irb = 0; + CIO_TRACE_EVENT(0, "fake_irb"); + CIO_HEX_EVENT(0, >private->irb, + sizeof(struct irb)); if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, >private->irb); diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 6acd0b5..e9bf357 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -446,6 +446,8 @@ ccw_device_call_handler(struct ccw_device *cdev) /* * Now we are ready to call the device driver interrupt handler. */ + CIO_TRACE_EVENT(0, "irb"); + CIO_HEX_EVENT(0, >private->irb, sizeof(struct irb)); if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, >private->irb); -8<--8<- Just to verify that qemu will produce and the kernel end up with the irb I'd expect. I'd rather prefer us getting the dma stuff right instead of chasing qemu issues :/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Fri, 30 Oct 2015 13:33:07 -0700 Andy Lutomirski <l...@amacapital.net> wrote: > On Fri, Oct 30, 2015 at 1:25 AM, Cornelia Huck <cornelia.h...@de.ibm.com> > wrote: > > On Thu, 29 Oct 2015 15:50:38 -0700 > > Andy Lutomirski <l...@amacapital.net> wrote: > > > >> Progress! After getting that sort-of-working, I figured out what was > >> wrong with my earlier command, and I got that working, too. Now I > >> get: > >> > >> qemu-system-s390x -fsdev > >> local,id=virtfs1,path=/,security_model=none,readonly -device > >> virtio-9p-ccw,fsdev=virtfs1,mount_tag=/dev/root -M s390-ccw-virtio > >> -nodefaults -device sclpconsole,chardev=console -parallel none -net > >> none -echr 1 -serial none -chardev stdio,id=console,signal=off,mux=on > >> -serial chardev:console -mon chardev=console -vga none -display none > >> -kernel arch/s390/boot/bzImage -append > >> 'init=/home/luto/devel/virtme/virtme/guest/virtme-init > >> psmouse.proto=exps "virtme_stty_con=rows 24 cols 150 iutf8" > >> TERM=xterm-256color rootfstype=9p > >> rootflags=ro,version=9p2000.L,trans=virtio,access=any > >> raid=noautodetect debug' > > > > The commandline looks sane AFAICS. > > > > (...) > > > >> vrfy: device 0.0.: rc=0 pgroup=0 mpath=0 vpm=80 > >> virtio_ccw 0.0.: Failed to set online: -5 > >> > >> ^^^ bad news! > > > > I'd like to see where in the onlining process this fails. Could you set > > up qemu tracing for css_* and virtio_ccw_* (instructions in > > qemu/docs/tracing.txt)? > > I have a file called events that contains: > > css_* > virtio_ccw_* > > pointing -trace events= at it results in a trace- file that's 549 > bytes long and contains nothing. Are wildcards not as well-supported > as the docs suggest? Just tried it, seemed to work for me as expected. And as your messages indicate, at least some of the css tracepoints are guaranteed to be hit. Odd. Can you try the following sophisticated printf debug patch? diff --git a/hw/s390x/css.c b/hw/s390x/css.c index c033612..6a87bd6 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -308,6 +308,8 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr) sch->ccw_no_data_cnt++; } +fprintf(stderr, "CH DBG: %s: cmd_code=%x\n", __func__, ccw.cmd_code); + /* Look at the command. */ switch (ccw.cmd_code) { case CCW_CMD_NOOP: @@ -375,6 +377,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr) } break; } +fprintf(stderr, "CH DBG: %s: ret=%d\n", __func__, ret); sch->last_cmd = ccw; sch->last_cmd_valid = true; if (ret == 0) { > > Which qemu version is this, btw.? > > > > git from yesterday. Hm. Might be worth trying the s390-ccw-virtio-2.4 machine instead. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 0/4] dma ops and virtio
On Thu, 29 Oct 2015 15:50:38 -0700 Andy Lutomirskiwrote: > Progress! After getting that sort-of-working, I figured out what was > wrong with my earlier command, and I got that working, too. Now I > get: > > qemu-system-s390x -fsdev > local,id=virtfs1,path=/,security_model=none,readonly -device > virtio-9p-ccw,fsdev=virtfs1,mount_tag=/dev/root -M s390-ccw-virtio > -nodefaults -device sclpconsole,chardev=console -parallel none -net > none -echr 1 -serial none -chardev stdio,id=console,signal=off,mux=on > -serial chardev:console -mon chardev=console -vga none -display none > -kernel arch/s390/boot/bzImage -append > 'init=/home/luto/devel/virtme/virtme/guest/virtme-init > psmouse.proto=exps "virtme_stty_con=rows 24 cols 150 iutf8" > TERM=xterm-256color rootfstype=9p > rootflags=ro,version=9p2000.L,trans=virtio,access=any > raid=noautodetect debug' The commandline looks sane AFAICS. (...) > vrfy: device 0.0.: rc=0 pgroup=0 mpath=0 vpm=80 > virtio_ccw 0.0.: Failed to set online: -5 > > ^^^ bad news! I'd like to see where in the onlining process this fails. Could you set up qemu tracing for css_* and virtio_ccw_* (instructions in qemu/docs/tracing.txt)? Which qemu version is this, btw.? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 2/6] virtio_ring: Support DMA APIs
On Thu, 29 Oct 2015 18:09:47 -0700 Andy Lutomirskiwrote: > virtio_ring currently sends the device (usually a hypervisor) > physical addresses of its I/O buffers. This is okay when DMA > addresses and physical addresses are the same thing, but this isn't > always the case. For example, this never works on Xen guests, and > it is likely to fail if a physical "virtio" device ever ends up > behind an IOMMU or swiotlb. > > The immediate use case for me is to enable virtio on Xen guests. > For that to work, we need vring to support DMA address translation > as well as a corresponding change to virtio_pci or to another > driver. > > With this patch, if enabled, virtfs survives kmemleak and > CONFIG_DMA_API_DEBUG. > > Signed-off-by: Andy Lutomirski > --- > drivers/virtio/Kconfig | 2 +- > drivers/virtio/virtio_ring.c | 190 > +++ > tools/virtio/linux/dma-mapping.h | 17 > 3 files changed, 172 insertions(+), 37 deletions(-) > create mode 100644 tools/virtio/linux/dma-mapping.h > static void detach_buf(struct vring_virtqueue *vq, unsigned int head) > { > - unsigned int i; > + unsigned int i, j; > + u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); > > /* Clear data ptr. */ > - vq->data[head] = NULL; > + vq->desc_state[head].data = NULL; > > - /* Put back on free list: find end */ > + /* Put back on free list: unmap first-level descriptors and find end */ > i = head; > > - /* Free the indirect table */ > - if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, > VRING_DESC_F_INDIRECT)) > - kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, > vq->vring.desc[i].addr))); > - > - while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, > VRING_DESC_F_NEXT)) { > + while (vq->vring.desc[i].flags & nextflag) { > + vring_unmap_one(vq, >vring.desc[i]); > i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); > vq->vq.num_free++; > } > > + vring_unmap_one(vq, >vring.desc[i]); > vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); > vq->free_head = head; > + > /* Plus final descriptor */ > vq->vq.num_free++; > + > + /* Free the indirect table, if any, now that it's unmapped. */ > + if (vq->desc_state[head].indir_desc) { > + struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; > + u32 len = vq->vring.desc[head].len; This one needs to be virtio32_to_cpu(...) as well. > + > + BUG_ON(!(vq->vring.desc[head].flags & > + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); > + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); > + > + for (j = 0; j < len / sizeof(struct vring_desc); j++) > + vring_unmap_one(vq, _desc[j]); > + > + kfree(vq->desc_state[head].indir_desc); > + vq->desc_state[head].indir_desc = NULL; > + } > } With that change on top of your current branch, I can boot (root on virtio-blk, either virtio-1 or legacy virtio) on current qemu master with kvm enabled on s390. Haven't tried anything further. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] s390/virtio: use noop dma ops
On Fri, 30 Oct 2015 13:26:09 +0100 Christian Borntraegerwrote: > I am currently reworking this to > > static inline struct dma_map_ops *get_dma_ops(struct device *dev) > { > if (dev && dev->archdata.dma_ops) > return dev->archdata.dma_ops; > return _noop_ops; > } > > > Which uses the dma_noop_ops for everything unless the device overrides (PCI > does) Yes, opt-in seems less error-prone here. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] s390/virtio: use noop dma ops
On Tue, 27 Oct 2015 23:48:51 +0100 Christian Borntraegerwrote: > With all infrastructure in place, lets provide dma_ops for virtio > devices on s390. > > Signed-off-by: Christian Borntraeger > --- > drivers/s390/virtio/kvm_virtio.c | 2 ++ > drivers/s390/virtio/virtio_ccw.c | 2 ++ > 2 files changed, 4 insertions(+) > > diff --git a/drivers/s390/virtio/kvm_virtio.c > b/drivers/s390/virtio/kvm_virtio.c > index 53fb975..05adaa9 100644 > --- a/drivers/s390/virtio/kvm_virtio.c > +++ b/drivers/s390/virtio/kvm_virtio.c > @@ -13,6 +13,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -318,6 +319,7 @@ static void add_kvm_device(struct kvm_device_desc *d, > unsigned int offset) > return; > } > > + kdev->vdev.dev.archdata.dma_ops = _noop_ops; This provides dma_ops for the vdev, while Andy's virtio code looks for dma_ops in the vdev's parent (in the ccw and pci cases, the proxy device; in this case, it would be our root device). With diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 05adaa9..5f79c52 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -319,7 +319,6 @@ static void add_kvm_device(struct kvm_device_desc *d, unsigned int offset) return; } - kdev->vdev.dev.archdata.dma_ops = _noop_ops; kdev->vdev.dev.parent = kvm_root; kdev->vdev.id.device = d->type; kdev->vdev.config = _vq_configspace_ops; @@ -473,6 +472,7 @@ static int __init kvm_devices_init(void) vmem_remove_mapping(total_memory_size, PAGE_SIZE); return rc; } + kvm_root->archdata.dma_ops = _noop_ops; INIT_WORK(_work, hotplug_devices); applied (and the endianness fix in the virtio code), I can boot a s390-virtio guest as well. > kdev->vdev.dev.parent = kvm_root; > kdev->vdev.id.device = d->type; > kdev->vdev.config = _vq_configspace_ops; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] s390/virtio: use noop dma ops
On Wed, 28 Oct 2015 09:43:34 +0900 Joerg Roedelwrote: > On Tue, Oct 27, 2015 at 11:48:51PM +0100, Christian Borntraeger wrote: > > @@ -1093,6 +1094,7 @@ static void virtio_ccw_auto_online(void *data, > > async_cookie_t cookie) > > struct ccw_device *cdev = data; > > int ret; > > > > + cdev->dev.archdata.dma_ops = _noop_ops; > > ret = ccw_device_set_online(cdev); > > if (ret) > > dev_warn(>dev, "Failed to set online: %d\n", ret); > > Hmm, drivers usually don't deal with setting the dma_ops for their > devices, as they depend on the platform and not so much on the device > itself. > > Can you do this special-case handling from device independent platform > code, where you also setup dma_ops for other devices? Hm, maybe at the bus level? pci devices get s390_dma_ops, ccw devices get the noop dma ops (just a bit of dead weight for non-virtio ccw devices, I guess). The old style s390-virtio devices are the odd ones around, but I'd like to invest the least time possible there to keep them going. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Fixes: 805de8f43c20 (atomic: Replace atomic_{set,clear}_mask() usage)
On Wed, 16 Sep 2015 09:13:50 -0400 "Jason J. Herne" <jjhe...@linux.vnet.ibm.com> wrote: > The offending commit accidentally replaces an atomic_clear with an > atomic_or instead of an atomic_andnot in kvm_s390_vcpu_request_handled. > The symptom is that kvm guests on s390 hang on startup. > This patch simply replaces the incorrect atomic_or with atomic_andnot > > Signed-off-by: Jason J. Herne <jjhe...@linux.vnet.ibm.com> > --- > arch/s390/kvm/kvm-s390.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index c91eb94..49e76be 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -1574,7 +1574,7 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) > > static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) > { > - atomic_or(PROG_REQUEST, >arch.sie_block->prog20); > + atomic_andnot(PROG_REQUEST, >arch.sie_block->prog20); > } > > /* Acked-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 1/6] kvm: don't try to register to KVM_FAST_MMIO_BUS for non mmio eventfd
On Tue, 15 Sep 2015 14:41:54 +0800 Jason Wang <jasow...@redhat.com> wrote: > We only want zero length mmio eventfd to be registered on > KVM_FAST_MMIO_BUS. So check this explicitly when arg->len is zero to > make sure this. > > Cc: sta...@vger.kernel.org > Cc: Gleb Natapov <g...@kernel.org> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > virt/kvm/eventfd.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 3/6] kvm: fix double free for fast mmio eventfd
On Tue, 15 Sep 2015 14:41:56 +0800 Jason Wang <jasow...@redhat.com> wrote: > We register wildcard mmio eventfd on two buses, once for KVM_MMIO_BUS > and once on KVM_FAST_MMIO_BUS but with a single iodev > instance. This will lead to an issue: kvm_io_bus_destroy() knows > nothing about the devices on two buses pointing to a single dev. Which > will lead to double free[1] during exit. Fix this by allocating two > instances of iodevs then registering one on KVM_MMIO_BUS and another > on KVM_FAST_MMIO_BUS. > > CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic > #28-Ubuntu > Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013 > task: 88009ae0c4b0 ti: 88020e7f task.ti: 88020e7f > RIP: 0010:[] [] > ioeventfd_release+0x28/0x60 [kvm] > RSP: 0018:88020e7f3bc8 EFLAGS: 00010292 > RAX: dead00200200 RBX: 8801ec19c900 RCX: 00018200016d > RDX: 8801ec19cf80 RSI: ea0008bf1d40 RDI: 8801ec19c900 > RBP: 88020e7f3bd8 R08: 2fc75a01 R09: 00018200016d > R10: c07df6ae R11: 88022fc75a98 R12: 88021e7cc000 > R13: 88021e7cca48 R14: 88021e7cca50 R15: 8801ec19c880 > FS: 7fc1ee3e6700() GS:88023e24() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 7f8f389d8000 CR3: 00023dc13000 CR4: 001427e0 > Stack: > 88021e7cc000 88020e7f3be8 c07e2622 > 88020e7f3c38 c07df69a 880232524160 88020e792d80 > 880219b78c00 0008 8802321686a8 > Call Trace: > [] ioeventfd_destructor+0x12/0x20 [kvm] > [] kvm_put_kvm+0xca/0x210 [kvm] > [] kvm_vcpu_release+0x18/0x20 [kvm] > [] __fput+0xe7/0x250 > [] fput+0xe/0x10 > [] task_work_run+0xd4/0xf0 > [] do_exit+0x368/0xa50 > [] ? recalc_sigpending+0x1f/0x60 > [] do_group_exit+0x45/0xb0 > [] get_signal+0x291/0x750 > [] do_signal+0x28/0xab0 > [] ? do_futex+0xdb/0x5d0 > [] ? __wake_up_locked_key+0x18/0x20 > [] ? SyS_futex+0x76/0x170 > [] do_notify_resume+0x69/0xb0 > [] int_signal+0x12/0x17 > Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 > e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 <48> 89 10 48 b8 00 > 01 10 00 00 > RIP [] ioeventfd_release+0x28/0x60 [kvm] > RSP > > Cc: sta...@vger.kernel.org > Cc: Gleb Natapov <g...@kernel.org> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > virt/kvm/eventfd.c | 43 +-- > 1 file changed, 25 insertions(+), 18 deletions(-) Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 2/6] kvm: factor out core eventfd assign/deassign logic
On Tue, 15 Sep 2015 14:41:55 +0800 Jason Wang <jasow...@redhat.com> wrote: > This patch factors out core eventfd assign/deassign logic and leaves > the argument checking and bus index selection to callers. > > Cc: sta...@vger.kernel.org > Cc: Gleb Natapov <g...@kernel.org> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > virt/kvm/eventfd.c | 85 > -- > 1 file changed, 50 insertions(+), 35 deletions(-) Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 4/6] kvm: fix zero length mmio searching
On Tue, 15 Sep 2015 14:41:57 +0800 Jason Wang <jasow...@redhat.com> wrote: > Currently, if we had a zero length mmio eventfd assigned on > KVM_MMIO_BUS. It will never be found by kvm_io_bus_cmp() since it > always compares the kvm_io_range() with the length that guest > wrote. This will cause e.g for vhost, kick will be trapped by qemu > userspace instead of vhost. Fixing this by using zero length if an > iodevice is zero length. > > Cc: sta...@vger.kernel.org > Cc: Gleb Natapov <g...@kernel.org> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > virt/kvm/kvm_main.c | 19 +-- > 1 file changed, 17 insertions(+), 2 deletions(-) Reviewed-by: Cornelia Huck <cornelia.h...@de.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 6/6] kvm: add fast mmio capabilitiy
On Tue, 15 Sep 2015 17:07:55 +0200 Paolo Bonziniwrote: > On 15/09/2015 08:41, Jason Wang wrote: > > +With KVM_CAP_FAST_MMIO, a zero length mmio eventfd is allowed for > > +kernel to ignore the length of guest write and get a possible faster > > +response. Note the speedup may only work on some specific > > +architectures and setups. Otherwise, it's as fast as wildcard mmio > > +eventfd. > > I don't really like tying the capability to MMIO, especially since > zero length ioeventfd is already accepted for virtio-ccw. Actually, zero length ioeventfd does not make sense for virtio-ccw; we just don't check it (although we probably should). > > What about the following? > > diff --git a/Documentation/virtual/kvm/api.txt > b/Documentation/virtual/kvm/api.txt > index 7a3cb48a644d..247944071cc8 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -1627,11 +1627,10 @@ to the registered address is equal to datamatch in > struct kvm_ioeventfd. > For virtio-ccw devices, addr contains the subchannel id and datamatch the > virtqueue index. > > -With KVM_CAP_FAST_MMIO, a zero length mmio eventfd is allowed for > -kernel to ignore the length of guest write and get a possible faster > -response. Note the speedup may only work on some specific > -architectures and setups. Otherwise, it's as fast as wildcard mmio > -eventfd. > +With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and > +the kernel will ignore the length of guest write and get a faster vmexit. s/get/may get/ ? > +The speedup may only apply to specific architectures, but the ioeventfd will > +work anyway. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V6 6/6] kvm: add fast mmio capabilitiy
On Tue, 15 Sep 2015 18:29:49 +0200 Paolo Bonzini <pbonz...@redhat.com> wrote: > On 15/09/2015 18:13, Cornelia Huck wrote: > > On Tue, 15 Sep 2015 17:07:55 +0200 > > Paolo Bonzini <pbonz...@redhat.com> wrote: > > > >> On 15/09/2015 08:41, Jason Wang wrote: > >>> +With KVM_CAP_FAST_MMIO, a zero length mmio eventfd is allowed for > >>> +kernel to ignore the length of guest write and get a possible faster > >>> +response. Note the speedup may only work on some specific > >>> +architectures and setups. Otherwise, it's as fast as wildcard mmio > >>> +eventfd. > >> > >> I don't really like tying the capability to MMIO, especially since > >> zero length ioeventfd is already accepted for virtio-ccw. > > > > Actually, zero length ioeventfd does not make sense for virtio-ccw; > > Can you explain why? If there is any non-zero valid length, "wildcard > length" (represented by zero) would also make sense. What is a wildcard match supposed to mean in this case? The datamatch field contains the queue index for the device specified in the address field. The hypercall interface associated with the eventfd always has device + queue index in its parameters; there is no interface for "notify device with all its queues". But maybe I'm just lacking imagination :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V4 3/4] kvm: fix zero length mmio searching
On Fri, 11 Sep 2015 10:26:41 +0200 Paolo Bonziniwrote: > On 11/09/2015 05:17, Jason Wang wrote: > > + int len = r2->len ? r1->len : 0; > > + > > if (r1->addr < r2->addr) > > return -1; > > - if (r1->addr + r1->len > r2->addr + r2->len) > > + if (r1->addr + len > r2->addr + r2->len) > > return 1; > > Perhaps better: > > gpa_t addr1 = r1->addr; > gpa_t addr2 = r2->addr; > > if (addr1 < addr2) > return -1; > > /* If r2->len == 0, match the exact address. If r2->len != 0, >* accept any overlapping write. Any order is acceptable for >* overlapping ranges, because kvm_io_bus_get_first_dev ensures >* we process all of them. >*/ > if (r2->len) { > addr1 += r1->len; > addr2 += r2->len; > } > > if (addr1 > addr2) > return 1; > > return 0; > +1 to documenting what the semantics are :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V4 1/4] kvm: factor out core eventfd assign/deassign logic
On Fri, 11 Sep 2015 11:17:34 +0800 Jason Wangwrote: > This patch factors out core eventfd assign/deassign logic and leave > the argument checking and bus index selection to callers. > > Cc: Gleb Natapov > Cc: Paolo Bonzini > Signed-off-by: Jason Wang > --- > virt/kvm/eventfd.c | 83 > -- > 1 file changed, 49 insertions(+), 34 deletions(-) > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 9ff4193..163258d 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 > flags) > return KVM_MMIO_BUS; > } > > -static int > -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, > + enum kvm_bus bus_idx, > + struct kvm_ioeventfd *args) > { > - enum kvm_bus bus_idx; > - struct _ioeventfd*p; > - struct eventfd_ctx *eventfd; > - int ret; > - > - bus_idx = ioeventfd_bus_from_flags(args->flags); > - /* must be natural-word sized, or 0 to ignore length */ > - switch (args->len) { > - case 0: > - case 1: > - case 2: > - case 4: > - case 8: > - break; > - default: > - return -EINVAL; > - } > > - /* check for range overflow */ > - if (args->addr + args->len < args->addr) > - return -EINVAL; > - > - /* check for extra flags that we don't understand */ > - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) > - return -EINVAL; > - > - /* ioeventfd with no length can't be combined with DATAMATCH */ > - if (!args->len && > - args->flags & (KVM_IOEVENTFD_FLAG_PIO | > -KVM_IOEVENTFD_FLAG_DATAMATCH)) > - return -EINVAL; > + struct eventfd_ctx *eventfd; > + struct _ioeventfd *p; > + int ret; > > eventfd = eventfd_ctx_fdget(args->fd); > if (IS_ERR(eventfd)) > @@ -873,14 +847,48 @@ fail: > } > > static int > -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) You'll move this function to below the deassign function in patch 2. Maybe do it already here? > { > enum kvm_bus bus_idx; > + > + bus_idx = ioeventfd_bus_from_flags(args->flags); > + /* must be natural-word sized, or 0 to ignore length */ > + switch (args->len) { > + case 0: > + case 1: > + case 2: > + case 4: > + case 8: > + break; > + default: > + return -EINVAL; > + } > + > + /* check for range overflow */ > + if (args->addr + args->len < args->addr) > + return -EINVAL; > + > + /* check for extra flags that we don't understand */ > + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) > + return -EINVAL; > + > + /* ioeventfd with no length can't be combined with DATAMATCH */ > + if (!args->len && > + args->flags & (KVM_IOEVENTFD_FLAG_PIO | > +KVM_IOEVENTFD_FLAG_DATAMATCH)) > + return -EINVAL; > + > + return kvm_assign_ioeventfd_idx(kvm, bus_idx, args); > +} > + > +static int > +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, > +struct kvm_ioeventfd *args) While this file uses newline before function name quite often, putting it on the same line seems more common - don't know which one the maintainers prefer. > +{ > struct _ioeventfd*p, *tmp; > struct eventfd_ctx *eventfd; > int ret = -ENOENT; > > - bus_idx = ioeventfd_bus_from_flags(args->flags); > eventfd = eventfd_ctx_fdget(args->fd); > if (IS_ERR(eventfd)) > return PTR_ERR(eventfd); > @@ -918,6 +926,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct > kvm_ioeventfd *args) > return ret; > } > > +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd > *args) > +{ > + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); > + > + return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); > +} > + > int > kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > { -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V4 2/4] kvm: fix double free for fast mmio eventfd
On Fri, 11 Sep 2015 11:17:35 +0800 Jason Wangwrote: > We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS > and another is KVM_FAST_MMIO_BUS but with a single iodev > instance. This will lead an issue: kvm_io_bus_destroy() knows nothing > about the devices on two buses points to a single dev. Which will lead s/points/pointing/ > double free[1] during exit. Fixing this by using allocate two s/using allocate/allocating/ > instances of iodevs then register one on KVM_MMIO_BUS and another on > KVM_FAST_MMIO_BUS. > (...) > @@ -929,8 +878,66 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus > bus_idx, > static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd > *args) > { > enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); > + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); > + > + if (!args->len) > + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); I think it would be good to explicitly check for bus_idx == KVM_MMIO_BUS here. > + > + return ret; > +} > > - return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); > +static int > +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) > +{ > + enum kvm_bus bus_idx; > + int ret; > + > + bus_idx = ioeventfd_bus_from_flags(args->flags); > + /* must be natural-word sized, or 0 to ignore length */ > + switch (args->len) { > + case 0: > + case 1: > + case 2: > + case 4: > + case 8: > + break; > + default: > + return -EINVAL; > + } > + > + /* check for range overflow */ > + if (args->addr + args->len < args->addr) > + return -EINVAL; > + > + /* check for extra flags that we don't understand */ > + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) > + return -EINVAL; > + > + /* ioeventfd with no length can't be combined with DATAMATCH */ > + if (!args->len && > + args->flags & (KVM_IOEVENTFD_FLAG_PIO | > +KVM_IOEVENTFD_FLAG_DATAMATCH)) > + return -EINVAL; > + > + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); > + if (ret) > + goto fail; > + > + /* When length is ignored, MMIO is also put on a separate bus, for > + * faster lookups. > + */ > + if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { Dito on a positive check for bus_idx == KVM_MMIO_BUS. > + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); > + if (ret < 0) > + goto fast_fail; > + } > + > + return 0; > + > +fast_fail: > + kvm_deassign_ioeventfd(kvm, args); Shouldn't you use kvm_deassign_ioeventfd(kvm, bus_idx, args) here? > +fail: > + return ret; > } > > int -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V4 2/4] kvm: fix double free for fast mmio eventfd
On Fri, 11 Sep 2015 17:25:45 +0800 Jason Wang <jasow...@redhat.com> wrote: > On 09/11/2015 03:46 PM, Cornelia Huck wrote: > > On Fri, 11 Sep 2015 11:17:35 +0800 > > Jason Wang <jasow...@redhat.com> wrote: > >> + > >> + /* When length is ignored, MMIO is also put on a separate bus, for > >> + * faster lookups. > >> + */ > >> + if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { > > Dito on a positive check for bus_idx == KVM_MMIO_BUS. > > I was thinking maybe this should be done in a separate patch on top. > What's your opinion? The check is an independent issue, an extra patch is fine (current usage does not trigger any problems). > > >> + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); > >> + if (ret < 0) > >> + goto fast_fail; > >> + } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/1] virtio/s390: one bugfix
Michael, here's a bugfix for the virtio-ccw driver. Question: How would you like to get patches? This one is formatted against your current linux-next branch; I can prepare a branch for you to pull from as well. Pierre Morel (1): virtio/s390: handle failures of READ_VQ_CONF ccw drivers/s390/virtio/virtio_ccw.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) -- 2.3.8 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/1] virtio/s390: handle failures of READ_VQ_CONF ccw
From: Pierre Morel <pmo...@linux.vnet.ibm.com> In virtio_ccw_read_vq_conf() the return value of ccw_io_helper() was not checked. If the configuration could not be read properly, we'd wrongly assume a queue size of 0. Let's propagate any I/O error to virtio_ccw_setup_vq() so it may properly fail. Signed-off-by: Pierre Morel <pmo...@linux.vnet.ibm.com> Signed-off-by: Cornelia Huck <cornelia.h...@de.ibm.com> --- drivers/s390/virtio/virtio_ccw.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index f8d8fdb..e9fae30 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -400,12 +400,16 @@ static bool virtio_ccw_kvm_notify(struct virtqueue *vq) static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, struct ccw1 *ccw, int index) { + int ret; + vcdev->config_block->index = index; ccw->cmd_code = CCW_CMD_READ_VQ_CONF; ccw->flags = 0; ccw->count = sizeof(struct vq_config_block); ccw->cda = (__u32)(unsigned long)(vcdev->config_block); - ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + if (ret) + return ret; return vcdev->config_block->num; } @@ -503,6 +507,10 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, goto out_err; } info->num = virtio_ccw_read_vq_conf(vcdev, ccw, i); + if (info->num < 0) { + err = info->num; + goto out_err; + } size = PAGE_ALIGN(vring_size(info->num, KVM_VIRTIO_CCW_RING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); if (info->queue == NULL) { -- 2.3.8 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On Tue, 25 Aug 2015 15:47:14 +0800 Jason Wang jasow...@redhat.com wrote: diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193..95f2901 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -762,13 +762,15 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } -static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +static enum kvm_bus ioeventfd_bus_from_flags(struct kvm_ioeventfd *args) ioeventfd_bus_from_args()? But _from_flags() is not wrong either :) { - if (flags KVM_IOEVENTFD_FLAG_PIO) + if (args-flags KVM_IOEVENTFD_FLAG_PIO) return KVM_PIO_BUS; - if (flags KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + if (args-flags KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) return KVM_VIRTIO_CCW_NOTIFY_BUS; - return KVM_MMIO_BUS; + if (args-len) + return KVM_MMIO_BUS; + return KVM_FAST_MMIO_BUS; Hm... /* When length is ignored, MMIO is put on a separate bus, for * faster lookups. */ return args-len ? KVM_MMIO_BUS : KVM_FAST_MMIO_BUS; } static int This version of the patch looks nice and compact. Regardless whether you want to follow my (minor) style suggestions, consider this patch Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V3 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On Tue, 25 Aug 2015 17:05:47 +0800 Jason Wang jasow...@redhat.com wrote: We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS and another is KVM_FAST_MMIO_BUS. This leads to issue: - kvm_io_bus_destroy() knows nothing about the devices on two buses points to a single dev. Which will lead double free [1] during exit. - wildcard eventfd ignores data len, so it was registered as a kvm_io_range with zero length. This will fail the binary search in kvm_io_bus_get_first_dev() when we try to emulate through KVM_MMIO_BUS. This will cause userspace io emulation request instead of a eventfd notification (virtqueue kick will be trapped by qemu instead of vhost in this case). Fixing this by don't register wildcard mmio eventfd on two buses. Instead, only register it in KVM_FAST_MMIO_BUS. This fixes the double free issue of kvm_io_bus_destroy(). For the arch/setups that does not utilize KVM_FAST_MMIO_BUS, before searching KVM_MMIO_BUS, try KVM_FAST_MMIO_BUS first to see it it has a match. [1] Panic caused by double free: CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic #28-Ubuntu Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013 task: 88009ae0c4b0 ti: 88020e7f task.ti: 88020e7f RIP: 0010:[c07e25d8] [c07e25d8] ioeventfd_release+0x28/0x60 [kvm] RSP: 0018:88020e7f3bc8 EFLAGS: 00010292 RAX: dead00200200 RBX: 8801ec19c900 RCX: 00018200016d RDX: 8801ec19cf80 RSI: ea0008bf1d40 RDI: 8801ec19c900 RBP: 88020e7f3bd8 R08: 2fc75a01 R09: 00018200016d R10: c07df6ae R11: 88022fc75a98 R12: 88021e7cc000 R13: 88021e7cca48 R14: 88021e7cca50 R15: 8801ec19c880 FS: 7fc1ee3e6700() GS:88023e24() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7f8f389d8000 CR3: 00023dc13000 CR4: 001427e0 Stack: 88021e7cc000 88020e7f3be8 c07e2622 88020e7f3c38 c07df69a 880232524160 88020e792d80 880219b78c00 0008 8802321686a8 Call Trace: [c07e2622] ioeventfd_destructor+0x12/0x20 [kvm] [c07df69a] kvm_put_kvm+0xca/0x210 [kvm] [c07df818] kvm_vcpu_release+0x18/0x20 [kvm] [811f69f7] __fput+0xe7/0x250 [811f6bae] fput+0xe/0x10 [81093f04] task_work_run+0xd4/0xf0 [81079358] do_exit+0x368/0xa50 [81082c8f] ? recalc_sigpending+0x1f/0x60 [81079ad5] do_group_exit+0x45/0xb0 [81085c71] get_signal+0x291/0x750 [810144d8] do_signal+0x28/0xab0 [810f3a3b] ? do_futex+0xdb/0x5d0 [810b7028] ? __wake_up_locked_key+0x18/0x20 [810f3fa6] ? SyS_futex+0x76/0x170 [81014fc9] do_notify_resume+0x69/0xb0 [817cb9af] int_signal+0x12/0x17 Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 48 89 10 48 b8 00 01 10 00 00 RIP [c07e25d8] ioeventfd_release+0x28/0x60 [kvm] RSP 88020e7f3bc8 Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: Jason Wang jasow...@redhat.com --- Changes from V2: - Tweak styles and comment suggested by Cornelia. Changes from v1: - change ioeventfd_bus_from_flags() to return KVM_FAST_MMIO_BUS when needed to save lots of unnecessary changes. --- virt/kvm/eventfd.c | 31 +-- virt/kvm/kvm_main.c | 16 ++-- 2 files changed, 23 insertions(+), 24 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On Mon, 24 Aug 2015 11:29:29 +0800 Jason Wang jasow...@redhat.com wrote: On 08/21/2015 05:29 PM, Cornelia Huck wrote: On Fri, 21 Aug 2015 16:03:52 +0800 Jason Wang jasow...@redhat.com wrote: @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) Unfortunately snipped by diff, but the check here is on !len !PIO, which only does the desired thing as VIRTIO_CCW always uses len == 8. Should the check be for !len MMIO instead? I think the answer depends on whether len == 0 is valid for ccw. If not we can fail the assign earlier. Since even without this patch, if userspace tries to register a dev with len equals to zero, it will also be registered to KVM_FAST_MMIO_BUS. If yes, we need check as you suggested here. I don't think len != 8 makes much sense for the way ioeventfd is defined for ccw (we handle hypercalls with a payload specifying the device), but we currently don't actively fence it. But regardless, I'd prefer to decide directly upon whether userspace actually tried to register for the mmio bus. ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p-addr, 0, p-dev); if (ret 0) - goto register_fail; + goto unlock_fail; + } else { + ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length, +p-dev); + if (ret 0) + goto unlock_fail; } Hm... maybe the following would be more obvious: my_bus = (p-length == 0) (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS : bus_idx; ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); + kvm-buses[bus_idx]-ioeventfd_count++; list_add_tail(p-list, kvm-ioeventfds); (...) @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (!p-wildcard p-datamatch != args-datamatch) continue; - kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); if (!p-length) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, p-dev); + } else { + kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); } Similar comments here... do you want to check for bus_idx == KVM_MMIO_BUS as well? Good catch. I think keep the original code as is will be also ok to solve this. (with changing the bus_idx to KVM_FAST_MMIO_BUS during registering if it was an wildcard mmio). Do you need to handle the ioeventfd_count changes on the fast mmio bus as well? kvm-buses[bus_idx]-ioeventfd_count--; ioeventfd_release(p); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On Fri, 21 Aug 2015 16:03:52 +0800 Jason Wang jasow...@redhat.com wrote: diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193..834a409 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -838,11 +838,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(p-dev, ioeventfd_ops); - ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length, - p-dev); - if (ret 0) - goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for * faster lookups. You probably want to change this comment as well? */ @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) Unfortunately snipped by diff, but the check here is on !len !PIO, which only does the desired thing as VIRTIO_CCW always uses len == 8. Should the check be for !len MMIO instead? ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p-addr, 0, p-dev); if (ret 0) - goto register_fail; + goto unlock_fail; + } else { + ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length, + p-dev); + if (ret 0) + goto unlock_fail; } Hm... maybe the following would be more obvious: my_bus = (p-length == 0) (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS : bus_idx; ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); + kvm-buses[bus_idx]-ioeventfd_count++; list_add_tail(p-list, kvm-ioeventfds); (...) @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (!p-wildcard p-datamatch != args-datamatch) continue; - kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); if (!p-length) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, p-dev); + } else { + kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); } Similar comments here... do you want to check for bus_idx == KVM_MMIO_BUS as well? kvm-buses[bus_idx]-ioeventfd_count--; ioeventfd_release(p); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: Add Kconfig option to signal cross-endian guests
On Thu, 9 Jul 2015 09:49:05 +0200 Thomas Huth th...@redhat.com wrote: The option for supporting cross-endianness legacy guests in s/cross-endianness/cross-endian/ ? the vhost and tun code should only be available on systems that support cross-endian guests. Signed-off-by: Thomas Huth th...@redhat.com --- arch/arm/kvm/Kconfig | 1 + arch/arm64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + drivers/net/Kconfig | 1 + drivers/vhost/Kconfig| 1 + virt/kvm/Kconfig | 3 +++ 6 files changed, 8 insertions(+) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: Add Kconfig option to signal cross-endian guests
On Thu, 9 Jul 2015 09:49:05 +0200 Thomas Huth th...@redhat.com wrote: The option for supporting cross-endianness legacy guests in s/cross-endianness/cross-endian/ ? the vhost and tun code should only be available on systems that support cross-endian guests. Signed-off-by: Thomas Huth th...@redhat.com --- arch/arm/kvm/Kconfig | 1 + arch/arm64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + drivers/net/Kconfig | 1 + drivers/vhost/Kconfig| 1 + virt/kvm/Kconfig | 3 +++ 6 files changed, 8 insertions(+) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] virtio/s390: rename drivers/s390/kvm - drivers/s390/virtio
This more accurately reflects what these drivers actually do. Suggested-by: Paolo Bonzini pbonz...@redhat.com Acked-by: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- MAINTAINERS | 2 +- drivers/s390/Makefile | 2 +- drivers/s390/{kvm = virtio}/Makefile | 0 drivers/s390/{kvm = virtio}/kvm_virtio.c | 0 drivers/s390/{kvm = virtio}/virtio_ccw.c | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename drivers/s390/{kvm = virtio}/Makefile (100%) rename drivers/s390/{kvm = virtio}/kvm_virtio.c (100%) rename drivers/s390/{kvm = virtio}/virtio_ccw.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 280a568..fbef7d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10577,7 +10577,7 @@ L: linux-s...@vger.kernel.org L: virtualizat...@lists.linux-foundation.org L: kvm@vger.kernel.org S: Supported -F: drivers/s390/kvm/ +F: drivers/s390/virtio/ VIRTIO HOST (VHOST) M: Michael S. Tsirkin m...@redhat.com diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile index 95bccfd..e5225ad 100644 --- a/drivers/s390/Makefile +++ b/drivers/s390/Makefile @@ -2,7 +2,7 @@ # Makefile for the S/390 specific device drivers # -obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/ +obj-y += cio/ block/ char/ crypto/ net/ scsi/ virtio/ drivers-y += drivers/s390/built-in.o diff --git a/drivers/s390/kvm/Makefile b/drivers/s390/virtio/Makefile similarity index 100% rename from drivers/s390/kvm/Makefile rename to drivers/s390/virtio/Makefile diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c similarity index 100% rename from drivers/s390/kvm/kvm_virtio.c rename to drivers/s390/virtio/kvm_virtio.c diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c similarity index 100% rename from drivers/s390/kvm/virtio_ccw.c rename to drivers/s390/virtio/virtio_ccw.c -- 2.3.8 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] MAINTAINERS: separate section for s390 virtio drivers
On Wed, 1 Jul 2015 17:17:41 +0200 Paolo Bonzini pbonz...@redhat.com wrote: On 01/07/2015 17:15, Cornelia Huck wrote: The s390-specific virtio drivers have probably more to do with virtio than with kvm today; let's move them out into a separate section to reflect this and to be able to add relevant mailing lists. CC: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- MAINTAINERS | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 246d9d8..fca5c00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5766,7 +5766,6 @@ S:Supported F: Documentation/s390/kvm.txt F: arch/s390/include/asm/kvm* F: arch/s390/kvm/ -F: drivers/s390/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR ARM M: Christoffer Dall christoffer.d...@linaro.org @@ -10671,6 +10670,15 @@ F: drivers/block/virtio_blk.c F: include/linux/virtio_*.h F: include/uapi/linux/virtio_*.h +VIRTIO DRIVERS FOR S390 +M: Christian Borntraeger borntrae...@de.ibm.com +M: Cornelia Huck cornelia.h...@de.ibm.com +L: linux-s...@vger.kernel.org +L: virtualizat...@lists.linux-foundation.org +L: kvm@vger.kernel.org Keeping the KVM mailing list is probably a good idea. +S: Supported +F: drivers/s390/kvm/ Since we are at it, do we want to rename the directory to drivers/s390/virtio? Makes sense, just sent a patch. Anyway: Acked-by: Paolo Bonzini pbonz...@redhat.com Paolo VIRTIO HOST (VHOST) M: Michael S. Tsirkin m...@redhat.com L: kvm@vger.kernel.org OK, how does this go upstream (I guess through the virtio tree)? Michael, will you pick the patches or should I prepare a branch? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] MAINTAINERS: separate section for s390 virtio drivers
The s390-specific virtio drivers have probably more to do with virtio than with kvm today; let's move them out into a separate section to reflect this and to be able to add relevant mailing lists. CC: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- MAINTAINERS | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 246d9d8..fca5c00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5766,7 +5766,6 @@ S:Supported F: Documentation/s390/kvm.txt F: arch/s390/include/asm/kvm* F: arch/s390/kvm/ -F: drivers/s390/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR ARM M: Christoffer Dall christoffer.d...@linaro.org @@ -10671,6 +10670,15 @@ F: drivers/block/virtio_blk.c F: include/linux/virtio_*.h F: include/uapi/linux/virtio_*.h +VIRTIO DRIVERS FOR S390 +M: Christian Borntraeger borntrae...@de.ibm.com +M: Cornelia Huck cornelia.h...@de.ibm.com +L: linux-s...@vger.kernel.org +L: virtualizat...@lists.linux-foundation.org +L: kvm@vger.kernel.org +S: Supported +F: drivers/s390/kvm/ + VIRTIO HOST (VHOST) M: Michael S. Tsirkin m...@redhat.com L: kvm@vger.kernel.org -- 2.3.8 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 4/8] vringh: introduce vringh_is_little_endian() helper
On Fri, 24 Apr 2015 14:24:58 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- include/linux/vringh.h | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 3/8] macvtap: introduce macvtap_is_little_endian() helper
On Fri, 24 Apr 2015 14:24:48 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- drivers/net/macvtap.c |9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 7/8] vhost: cross-endian support for legacy devices
On Fri, 24 Apr 2015 14:27:24 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The vq-is_le boolean field is added to cache the endianness to be used for ring accesses. It defaults to native endian, as expected by legacy virtio devices. When the ring gets active, we force little endian if the device is modern. When the ring is deactivated, we revert to the native endian default. If cross-endian was compiled in, a vq-user_be boolean field is added so that userspace may request a specific endianness. This field is used to override the default when activating the ring of a legacy device. It has no effect on modern devices. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- Changes since v5: - fixed description in Kconfig - fixed error description in uapi header - dropped useless semi-colon in the vhost_vq_reset_user_be() stub drivers/vhost/Kconfig | 15 drivers/vhost/vhost.c | 85 +++- drivers/vhost/vhost.h | 11 +- include/uapi/linux/vhost.h | 14 +++ 4 files changed, 122 insertions(+), 3 deletions(-) Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 6/8] virtio: add explicit big-endian support to memory accessors
On Fri, 24 Apr 2015 14:26:24 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- Changes since v5: - changed endian checking helpers as suggested by Thomas (use || and line breaker) drivers/net/macvtap.c|3 ++- drivers/net/tun.c|3 ++- drivers/vhost/vhost.h|3 ++- include/linux/virtio_byteorder.h | 24 ++-- include/linux/virtio_config.h|3 ++- include/linux/vringh.h |3 ++- 6 files changed, 24 insertions(+), 15 deletions(-) Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 5/8] vhost: introduce vhost_is_little_endian() helper
On Fri, 24 Apr 2015 14:25:12 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- drivers/vhost/vhost.h | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 1/8] virtio: introduce virtio_is_little_endian() helper
On Fri, 24 Apr 2015 14:24:27 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- include/linux/virtio_config.h | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ca3ed78..bd1a582 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); +} + While this reads a bit funny in this patch, it will end up correctly at the end of the series, so Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 2/8] tun: add tun_is_little_endian() helper
On Fri, 24 Apr 2015 14:24:38 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- drivers/net/tun.c |9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH v5 11/17] target-s390x: Add KVM VM attribute interface for S390 CPU models
On Mon, 27 Apr 2015 13:07:58 +0200 Michael Mueller m...@linux.vnet.ibm.com wrote: On Mon, 27 Apr 2015 12:52:54 +0200 Christian Borntraeger borntrae...@de.ibm.com wrote: Am 27.04.2015 um 11:43 schrieb Michael Mueller: On Mon, 27 Apr 2015 10:15:47 +0200 Christian Borntraeger borntrae...@de.ibm.com wrote: Am 13.04.2015 um 15:56 schrieb Michael Mueller: [...] +static int cpu_model_get(KVMState *s, uint64_t attr, uint64_t addr) +{ +int rc = -ENOSYS; +struct kvm_device_attr dev_attr = { +.group = KVM_S390_VM_CPU_MODEL, +.attr = attr, +.addr = addr, Would it make sense to do the cast here cpu_model_get/set() is used to handle both attributes, KVM_S390_VM_CPU_MACHINE and KVM_S390_VM_CPU_PROCESSOR. Both require a different type in the signature, (S390ProcessorProps*) and (S390MachineProps*). Adding both as parameters seems to be odd and would require additionally logic in the function. Thus I think doing the cast outside is just the right thing to do. So what about a void pointer then as parameter? I prefer a pointer for qemu process memory over uint64_t as part of the function interface. This makes it somewhat clearer that this is an address within QEMU. Both ways will certainly work, though. The interface calls are: int kvm_s390_get_machine_props(KVMState *s, S390MachineProps *prop) int kvm_s390_get_processor_props(S390ProcessorProps *prop) cpu_model_get/set() are just static helpers. So this makes them internal calls... Conny, I guess you will pick up the patches. Any preference? ...and I'd prefer using a void pointer for them. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 6/8] virtio: add explicit big-endian support to memory accessors
On Thu, 23 Apr 2015 21:27:19 +0200 Thomas Huth th...@redhat.com wrote: On Thu, 23 Apr 2015 17:29:06 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- Changes since v4: - style fixes (I have chosen if ... else in most places to stay below 80 columns, with the notable exception of the vhost helper which gets shorten in a later patch) drivers/net/macvtap.c|5 - drivers/net/tun.c|5 - drivers/vhost/vhost.h|2 +- include/linux/virtio_byteorder.h | 24 ++-- include/linux/virtio_config.h|5 - include/linux/vringh.h |2 +- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a2f2958..6cf6b3e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -51,7 +51,10 @@ struct macvtap_queue { static inline bool macvtap_is_little_endian(struct macvtap_queue *q) { - return q-flags MACVTAP_VNET_LE; + if (q-flags MACVTAP_VNET_LE) + return true; + else + return virtio_legacy_is_little_endian(); simply: return (q-flags MACVTAP_VNET_LE) || virtio_legacy_is_little_endian(); ? ISTR that MST preferred the current notation, but I like either your way or ?: (with linebreak) even better. } (...) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 6a49960..954c657 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -175,7 +175,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) { - return vhost_has_feature(vq, VIRTIO_F_VERSION_1); + return vhost_has_feature(vq, VIRTIO_F_VERSION_1) ? true : virtio_legacy_is_little_endian(); } That line is way longer than 80 characters ... may I suggest to switch at least here to: return vhost_has_feature(vq, VIRTIO_F_VERSION_1) || virtio_legacy_is_little_endian(); I think the line will collapse in a further patch anyway. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 7/8] vhost: cross-endian support for legacy devices
On Fri, 24 Apr 2015 10:06:19 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: On Fri, 24 Apr 2015 09:19:26 +0200 Cornelia Huck cornelia.h...@de.ibm.com wrote: On Thu, 23 Apr 2015 17:29:42 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bb6a5b4..b980b53 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -103,6 +103,18 @@ struct vhost_memory { /* Get accessor: reads index, writes value in num */ #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN + * or VHOST_VRING_BIG_ENDIAN (other values return EINVAL). -EINVAL? Oops, yes. :) Should you also mention when you return -EBUSY? Indeed... what about: The byte order cannot be changed when the device is active: trying to do so returns -EBUSY. s/when/while/ ? But sounds good. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 7/8] vhost: cross-endian support for legacy devices
On Thu, 23 Apr 2015 17:29:42 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The vq-is_le boolean field is added to cache the endianness to be used for ring accesses. It defaults to native endian, as expected by legacy virtio devices. When the ring gets active, we force little endian if the device is modern. When the ring is deactivated, we revert to the native endian default. If cross-endian was compiled in, a vq-user_be boolean field is added so that userspace may request a specific endianness. This field is used to override the default when activating the ring of a legacy device. It has no effect on modern devices. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- Changes since v4: - rewrote patch title to mention cross-endian - renamed config to VHOST_CROSS_ENDIAN_LEGACY - rewrote config description and help - moved ifdefery to top of vhost.c - added a detailed comment about the lifecycle of vq-user_be in vhost_init_is_le() - renamed ioctls to VHOST_[GS]ET_VRING_ENDIAN - added LE/BE defines to the ioctl API - rewrote ioctl sanity check with the LE/BE defines - updated comment in uapi/linux/vhost.h to mention that the availibility of both SET and GET ioctls depends on the kernel config drivers/vhost/Kconfig | 15 drivers/vhost/vhost.c | 86 +++- drivers/vhost/vhost.h | 10 + include/uapi/linux/vhost.h | 12 ++ 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 017a1e8..74d7380 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -32,3 +32,18 @@ config VHOST ---help--- This option is selected by any driver which needs to access the core of vhost. + +config VHOST_CROSS_ENDIAN_LEGACY + bool Cross-endian support for vhost + default n + ---help--- + This option allows vhost to support guests with a different byte + ordering from host. ...while using legacy virtio. Might help to explain the LEGACY in the config option ;) + + Userspace programs can control the feature using the + VHOST_SET_VRING_ENDIAN and VHOST_GET_VRING_ENDIAN ioctls. + + This is only useful on a few platforms (ppc64 and arm64). Since it + adds some overhead, it is disabled default. s/default/by default/ + + If unsure, say N. diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee2826..8c4390d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -36,6 +36,78 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)vq-avail-ring[vq-num]) #define vhost_avail_event(vq) ((__virtio16 __user *)vq-used-ring[vq-num]) +#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ + vq-user_be = !virtio_legacy_is_little_endian(); +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + struct vhost_vring_state s; + + if (vq-private_data) + return -EBUSY; + + if (copy_from_user(s, argp, sizeof(s))) + return -EFAULT; + + if (s.num != VHOST_VRING_LITTLE_ENDIAN + s.num != VHOST_VRING_BIG_ENDIAN) + return -EINVAL; + + vq-user_be = s.num; + + return 0; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, +int __user *argp) +{ + struct vhost_vring_state s = { + .index = idx, + .num = vq-user_be + }; + + if (copy_to_user(argp, s, sizeof(s))) + return -EFAULT; + + return 0; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + /* Note for legacy virtio: user_be is initialized at reset time + * according to the host endianness. If userspace does not set an + * explicit endianness, the default behavior is native endian, as + * expected by legacy virtio. + */ + vq-is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq-user_be; +} +#else +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ + ; Just leave the function body empty? +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, +int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq-is_le = true; +} +#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ +
Re: [PATCH v4 7/8] vhost: feature to set the vring endianness
On Tue, 14 Apr 2015 17:13:52 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: On Tue, 14 Apr 2015 16:20:23 +0200 Cornelia Huck cornelia.h...@de.ibm.com wrote: On Fri, 10 Apr 2015 12:19:16 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + vq-is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq-user_be; So if the endianness is not explicitly set to BE, it will be forced to LE (instead of native endian)? Won't that break userspace that does not yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set? If userspace doesn't use the new interface, then vq-user_be will retain its default value that was set in vhost_vq_reset(), i.e. : vq-user_be = !virtio_legacy_is_little_endian(); This means default is native endian. What about adding this comment ? static void vhost_init_is_le(struct vhost_virtqueue *vq) { /* Note for legacy virtio: user_be is initialized in vhost_vq_reset() * according to the host endianness. If userspace does not set an * explicit endianness, the default behavior is native endian, as * expected by legacy virtio. */ Good idea, as this is easy to miss. vq-is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq-user_be; } +} +#else +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq-is_le = true; +} +#endif + int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq-private_data) + if (!vq-private_data) { + vq-is_le = virtio_legacy_is_little_endian(); return 0; + } + + vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 7/8] vhost: feature to set the vring endianness
On Fri, 10 Apr 2015 12:19:16 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The vq-is_le boolean field is added to cache the endianness to be used for ring accesses. It defaults to native endian, as expected by legacy virtio devices. When the ring gets active, we force little endian if the device is modern. When the ring is deactivated, we revert to the native endian default. If cross-endian was compiled in, a vq-user_be boolean field is added so that userspace may request a specific endianness. This field is used to override the default when activating the ring of a legacy device. It has no effect on modern devices. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- drivers/vhost/Kconfig | 10 ++ drivers/vhost/vhost.c | 76 +++- drivers/vhost/vhost.h | 12 +-- include/uapi/linux/vhost.h |9 + 4 files changed, 103 insertions(+), 4 deletions(-) +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, +int __user *argp) +{ + struct vhost_vring_state s; + + if (vq-private_data) + return -EBUSY; + + if (copy_from_user(s, argp, sizeof(s))) + return -EFAULT; + + if (s.num s.num != 1) Checking for s.num 1 might be more obvious at a glance? + return -EINVAL; + + vq-user_be = s.num; + + return 0; +} + (...) +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + vq-is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq-user_be; So if the endianness is not explicitly set to BE, it will be forced to LE (instead of native endian)? Won't that break userspace that does not yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set? +} +#else +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq-is_le = true; +} +#endif + int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq-private_data) + if (!vq-private_data) { + vq-is_le = virtio_legacy_is_little_endian(); return 0; + } + + vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 7/7] vhost: feature to set the vring endianness
On Tue, 07 Apr 2015 14:19:31 +0200 Greg Kurz gk...@linux.vnet.ibm.com wrote: This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The ioctls introduced by this patch are for legacy only: virtio 1.0 devices are returned EPERM. Signed-off-by: Greg Kurz gk...@linux.vnet.ibm.com --- drivers/vhost/Kconfig | 10 drivers/vhost/vhost.c | 55 drivers/vhost/vhost.h | 17 +- include/uapi/linux/vhost.h |5 4 files changed, 86 insertions(+), 1 deletion(-) +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static long vhost_set_vring_endian_legacy(struct vhost_virtqueue *vq, + void __user *argp) +{ + struct vhost_vring_state s; + + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + return -EPERM; + + if (copy_from_user(s, argp, sizeof(s))) + return -EFAULT; + + vq-legacy_is_little_endian = !!s.num; + return 0; +} + +static long vhost_get_vring_endian_legacy(struct vhost_virtqueue *vq, + u32 idx, + void __user *argp) +{ + struct vhost_vring_state s = { + .index = idx, + .num = vq-legacy_is_little_endian + }; + + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + return -EPERM; + + if (copy_to_user(argp, s, sizeof(s))) + return -EFAULT; + + return 0; +} +#else +static long vhost_set_vring_endian_legacy(struct vhost_virtqueue *vq, + void __user *argp) +{ + return 0; I'm wondering whether this handler should return an error if the feature is not configured for the kernel? How can the userspace caller find out whether it has successfully prompted the kernel to handle the endianness correctly? +} + +static long vhost_get_vring_endian_legacy(struct vhost_virtqueue *vq, + u32 idx, + void __user *argp) +{ + return 0; +} +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 6/9] KVM: s390: Guest's memory access functions get access registers
On Mon, 16 Mar 2015 09:51:42 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Alexander Yarygin yary...@linux.vnet.ibm.com In access register mode, the write_guest() read_guest() and other functions will invoke the access register translation, which requires an ar, designated by one of the instruction fields. Signed-off-by: Alexander Yarygin yary...@linux.vnet.ibm.com Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- arch/s390/kvm/diag.c | 4 +-- arch/s390/kvm/gaccess.c | 8 ++--- arch/s390/kvm/gaccess.h | 16 +- arch/s390/kvm/intercept.c | 4 +-- arch/s390/kvm/kvm-s390.c | 10 +++--- arch/s390/kvm/kvm-s390.h | 26 +--- arch/s390/kvm/priv.c | 79 --- arch/s390/kvm/sigp.c | 4 +-- 8 files changed, 93 insertions(+), 58 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 7/9] KVM: s390: Optimize paths where get_vcpu_asce() is invoked
On Mon, 16 Mar 2015 09:51:43 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Alexander Yarygin yary...@linux.vnet.ibm.com During dynamic address translation the get_vcpu_asce() function can be invoked several times. It's ok for usual modes, but will be slow if CPUs are in AR mode. Let's call the get_vcpu_asce() once and pass the result to the called functions. Signed-off-by: Alexander Yarygin yary...@linux.vnet.ibm.com Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- arch/s390/kvm/gaccess.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 4/9] KVM: s390: Add MEMOP ioctls for reading/writing guest memory
On Mon, 16 Mar 2015 09:51:40 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Thomas Huth th...@linux.vnet.ibm.com On s390, we've got to make sure to hold the IPTE lock while accessing logical memory. So let's add an ioctl for reading and writing logical memory to provide this feature for userspace, too. The maximum transfer size of this call is limited to 64kB to prevent that the guest can trigger huge copy_from/to_user transfers. QEMU currently only requests up to one or two pages so far, so 16*4kB seems to be a reasonable limit here. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- Documentation/virtual/kvm/api.txt | 46 arch/s390/kvm/gaccess.c | 22 arch/s390/kvm/gaccess.h | 2 ++ arch/s390/kvm/kvm-s390.c | 74 +++ include/uapi/linux/kvm.h | 21 +++ 5 files changed, 165 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ee47998e..f03178d 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2716,6 +2716,52 @@ The fields in each entry are defined as follows: eax, ebx, ecx, edx: the values returned by the cpuid instruction for this function/index combination +4.89 KVM_S390_MEM_OP + +Capability: KVM_CAP_S390_MEM_OP +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_mem_op (in) +Returns: = 0 on success, + 0 on generic error (e.g. -EFAULT or -ENOMEM), + 0 if an exception occurred while walking the page tables + +Read or write data from/to the logical (virtual) memory of a VPCU. + +Parameters are specified via the following structure: + +struct kvm_s390_mem_op { + __u64 gaddr;/* the guest address */ + __u64 flags;/* arch specific flags */ Drop the arch? This is a s390-only ioctl :) + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar;/* the access register number */ This makes me wonder whether the patches should be reordered to introduce access register mode first: It seems strange that you can specify a parameter that is ignored. Same for the STSI patch. + __u8 reserved[31]; /* should be set to 0 */ +}; + +The type of operation is specified in the op field. It is either +KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or +KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The +KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the flags field to check +whether the corresponding memory access would create an access exception +(without touching the data in the memory at the destination). In case an +access exception occurred while walking the MMU tables of the guest, the +ioctl returns a positive error number to indicate the type of exception. +This exception is also raised directly at the corresponding VCPU if the +flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the flags field. + +The start address of the memory region has to be specified in the gaddr +field, and the length of the region in the size field. buf is the buffer +supplied by the userspace application where the read data should be written +to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written +is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. buf is unused and can be NULL +when KVM_S390_MEMOP_F_CHECK_ONLY is specified. ar designates the access +register number to be used. + +The reserved field is meant for future extensions. It is not used by +KVM with the currently defined set of flags. + 5. The kvm_run structure -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 2/9] KVM: s390: cleanup jump lables in kvm_arch_init_vm
On Mon, 16 Mar 2015 09:51:38 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Dominik Dingel din...@linux.vnet.ibm.com As all cleanup functions can handle their respective NULL case there is no need to have more than one error jump label. Signed-off-by: Dominik Dingel din...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- arch/s390/kvm/kvm-s390.c | 14 +- 1 file changed, 5 insertions(+), 9 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 9/9] KVM: s390: Create ioctl for Getting/Setting guest storage keys
On Mon, 16 Mar 2015 09:51:45 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Jason J. Herne jjhe...@linux.vnet.ibm.com Provide the KVM_S390_GET_SKEYS and KVM_S390_SET_SKEYS ioctl which can be used to get/set guest storage keys. This functionality is needed for live migration of s390 guests that use storage keys. Signed-off-by: Jason J. Herne jjhe...@linux.vnet.ibm.com Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- Documentation/virtual/kvm/api.txt | 58 ++ arch/s390/kvm/kvm-s390.c | 122 ++ include/uapi/linux/kvm.h | 14 + 3 files changed, 194 insertions(+) @@ -791,6 +801,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_VECTOR_REGISTERS 107 #define KVM_CAP_S390_MEM_OP 108 #define KVM_CAP_S390_USER_STSI 109 +#define KVM_CAP_S390_SKEYS 110 Hm... did you forget to actually advertise this cap? #ifdef KVM_CAP_IRQ_ROUTING -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH/RFC 3/9] KVM: s390: Fix low-address protection for real addresses
On Mon, 16 Mar 2015 09:51:39 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: From: Alexander Yarygin yary...@linux.vnet.ibm.com The kvm_s390_check_low_addr_protection() function is used only with real addresses. According to the POP (the Low-Address Protection paragraph in chapter 3), if the effective address is real or absolute, the low-address protection procedure should rise PROTECTION exception s/rise/raise a/ only when the low-address protection is enabled in the control register 0 and the address is low. This patch removes ASCE checks from the function and renames it to better reflect its behavior. Cc: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Alexander Yarygin yary...@linux.vnet.ibm.com Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com Reviewed-by: David Hildenbrand d...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- arch/s390/kvm/gaccess.c | 11 ++- arch/s390/kvm/gaccess.h | 2 +- arch/s390/kvm/priv.c| 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) Acked-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio fixes pull for 4.0?
On Mon, 09 Mar 2015 17:43:19 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Michael S. Tsirkin m...@redhat.com writes: virtio-balloon: do not call blocking ops when !TASK_RUNNING Rusty, it seems you prefer a different fix for this issue, while Cornelia prefers mine. Maybe both me and Cornelia misunderstand the issue? I know you dealt with a ton of similar issues recently so you are more likely to be right, but I'd like to understand what's going on better all the same. Could you help please? In the longer run, we should handle failures from these callbacks. But we don't need to do that right now. So we want the minimal fix. And an annotation is the minimal fix. The bug has been there for ages; it's just the warning that is new (if we *always* slept, we would spin, but in practice we'll rarely sleep). I don't think doing_io() in virtio-ccw will sleep often, although it might happen under loads that delay posting the interrupt. I understand why you'd like to do the minimal fix, and it's not likely that new problems start popping up now. So I'm OK with doing the annotation for 4.0 and more involved changes for 4.1. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Mon, 2 Mar 2015 21:44:10 +0100 Michael S. Tsirkin m...@redhat.com wrote: Normally, hotunplug requires guest cooperation. IOW unplug request should send guest interrupt, then block until guest confirms it's not using the device anymore. virtio pci already handles that fine, can't ccw do something similar? Hotunplug for channel devices does not require guest feedback. (In fact, I was surprised to hear that there is somthing like guest cooperation on other platforms.) Consider a storage device. If you don't flush out caches before removing the disk, you might lose a bunch of data. Yes, that is a problem. But hotunplug is indistinguishable from a hw failure on s390, so there's not really much we can do here. Basically, the guest is simply presented with the fact that the device is gone and has to deal with it. It does not matter whether the device was removed by operator request or due to a hardware failure. (We do have support in the s390 channel device core to be able to deal with devices going away and coming back gracefully. ccw devices can be put into a special state where they retain their configuration so that they can be reactivated if they become available again. For example, dasd (disk) devices survive being detached and reattached just fine, even under I/O load. See the -notify() callback of the ccw driver for details.) How does guest distinguish between this and intentional permanent removal? It can't. It will get the same kind of notifications (and channel I/O failures) for both. Only the admin has a chance of knowing, and they may kill off a device in that state permanently (which, of course, triggers the flush problems etc. which have just been delayed from the initial detach). Given that this is what the architecture gives us on all hypervisors (LPAR and z/VM) and is for all I know decades old, it is what we have to implement in qemu/kvm as well. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Wed, 4 Mar 2015 11:25:56 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Wed, Mar 04, 2015 at 04:44:54PM +1030, Rusty Russell wrote: Michael S. Tsirkin m...@redhat.com writes: On Mon, Mar 02, 2015 at 10:37:26AM +1030, Rusty Russell wrote: Thomas Huth th...@linux.vnet.ibm.com writes: On Thu, 26 Feb 2015 11:50:42 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Neither get_config nor set_config are expected to fail. AFAIK this is currently not a problem. According to http://lwn.net/Articles/627419/ these kmalloc calls never fail because they allocate less than a page. I strongly suggest you unlearn that fact. The fix for this is in two parts: 1) Annotate using sched_annotate_sleep() and add a comment: we may spin a few times in low memory situations, but this isn't a high performance path. 2) Handle get_config (and other) failure in some more elegant way. Cheers, Rusty. I agree, but I'd like to point out that even without kmalloc, on s390 get_config is blocking - it's waiting for a hardware interrupt. And it makes sense: config is not data path, I don't think we should spin there. So I think besides these two parts, we still need my two patches: virtio-balloon: do not call blocking ops when !TASK_RUNNING I prefer to annotate, over trying to fix this. Because it's not important. We might spin a few times, but it's very unlikely, and it's certainly not performance critical. Thanks, Rusty. Subject: virtio_balloon: annotate possible sleep waiting for event. CCW (s390) does this. Reported-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Rusty Russell ru...@rustcorp.com.au diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0413157f3b49..3f4d5acdbde0 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -340,6 +340,15 @@ static int balloon(void *_vballoon) s64 diff; try_to_freeze(); + + /* +* Reading the config on the ccw backend involves an +* allocation, so we may actually sleep and have an +* extra iteration. It's extremely unlikely, Hmm, this part of the comment seems wrong to me. Reading the config on the ccw backend always sleeps because it's interrupt driven. (...) So I suspect http://mid.gmane.org/1424874878-17155-1-git-send-email-...@redhat.com is better. What do you think? I'd prefer to fix this as well. While the I/O request completes instantly on current qemu (the ssch backend handles the start function immediately, not asynchronously as on real hardware), this (a) is an implementation detail that may change and (b) doesn't account for the need to deliver the interrupt to the guest - which might take non-zero time. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Mon, 2 Mar 2015 12:46:57 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 12:31:06PM +0100, Cornelia Huck wrote: On Mon, 2 Mar 2015 12:13:58 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 10:37:26AM +1030, Rusty Russell wrote: Thomas Huth th...@linux.vnet.ibm.com writes: On Thu, 26 Feb 2015 11:50:42 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Neither get_config nor set_config are expected to fail. AFAIK this is currently not a problem. According to http://lwn.net/Articles/627419/ these kmalloc calls never fail because they allocate less than a page. I strongly suggest you unlearn that fact. The fix for this is in two parts: 1) Annotate using sched_annotate_sleep() and add a comment: we may spin a few times in low memory situations, but this isn't a high performance path. 2) Handle get_config (and other) failure in some more elegant way. Do you mean we need to enable the caller to deal with get_config failures (and the transport to relay those failures)? I agree with that. We can certainly tweak code to bypass need to kmalloc on get_config. Why is it doing these allocs? What's wrong with using vcdev-config directly? We'd need to make sure that vcdev-config is allocated with GFP_DMA, as we need it to be under 2G. And we need to be more careful wrt serialization, especially if we want to reuse the ccw structure as well, for example. Nothing complicated, I'd just need some free time to do it :) The more likely reason for get_config to fail is a device hotunplug, however. We'll get a seperate notification about that (via machine check + channel report), but it would be nice if we could stop poking the device immediately, as there's no use trying to do something with it anymore. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Qemu and virtio 1.0
On Mon, 2 Mar 2015 12:43:43 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Wed, Feb 25, 2015 at 02:50:22PM +1030, Rusty Russell wrote: OK, I am trying to experiment with virtio 1.0 support using the latest kernel and MST's qemu tree: https://git.kernel.org/cgit/virt/kvm/mst/qemu.git/?h=virtio-1.0 The first issue is that the device config endian was wrong (see attached patch). I'm now setting up a BE guest on my x86 laptop, and a BE and LE guest on a BE powerpc machine, to check that all combinations work correctly. If others test too, that would be appreciated! Cheers, Rusty. Thanks a lot for finding this! The issue is certainly there, though I think looking at guest features is not the right thing to do: drivers can access config before acking features. Ah right. I'm just wondering what the device-specific accessors (in net and so on) will do? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Mon, 2 Mar 2015 13:19:43 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 01:11:02PM +0100, Cornelia Huck wrote: On Mon, 2 Mar 2015 12:46:57 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 12:31:06PM +0100, Cornelia Huck wrote: On Mon, 2 Mar 2015 12:13:58 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 10:37:26AM +1030, Rusty Russell wrote: Thomas Huth th...@linux.vnet.ibm.com writes: On Thu, 26 Feb 2015 11:50:42 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Neither get_config nor set_config are expected to fail. AFAIK this is currently not a problem. According to http://lwn.net/Articles/627419/ these kmalloc calls never fail because they allocate less than a page. I strongly suggest you unlearn that fact. The fix for this is in two parts: 1) Annotate using sched_annotate_sleep() and add a comment: we may spin a few times in low memory situations, but this isn't a high performance path. 2) Handle get_config (and other) failure in some more elegant way. Do you mean we need to enable the caller to deal with get_config failures (and the transport to relay those failures)? I agree with that. We can certainly tweak code to bypass need to kmalloc on get_config. Why is it doing these allocs? What's wrong with using vcdev-config directly? We'd need to make sure that vcdev-config is allocated with GFP_DMA, as we need it to be under 2G. And we need to be more careful wrt serialization, especially if we want to reuse the ccw structure as well, for example. Nothing complicated, I'd just need some free time to do it :) The more likely reason for get_config to fail is a device hotunplug, however. We'll get a seperate notification about that (via machine check + channel report), but it would be nice if we could stop poking the device immediately, as there's no use trying to do something with it anymore. Normally, hotunplug requires guest cooperation. IOW unplug request should send guest interrupt, then block until guest confirms it's not using the device anymore. virtio pci already handles that fine, can't ccw do something similar? Hotunplug for channel devices does not require guest feedback. (In fact, I was surprised to hear that there is somthing like guest cooperation on other platforms.) Basically, the guest is simply presented with the fact that the device is gone and has to deal with it. It does not matter whether the device was removed by operator request or due to a hardware failure. (We do have support in the s390 channel device core to be able to deal with devices going away and coming back gracefully. ccw devices can be put into a special state where they retain their configuration so that they can be reactivated if they become available again. For example, dasd (disk) devices survive being detached and reattached just fine, even under I/O load. See the -notify() callback of the ccw driver for details.) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Mon, 2 Mar 2015 12:13:58 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Mon, Mar 02, 2015 at 10:37:26AM +1030, Rusty Russell wrote: Thomas Huth th...@linux.vnet.ibm.com writes: On Thu, 26 Feb 2015 11:50:42 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Neither get_config nor set_config are expected to fail. AFAIK this is currently not a problem. According to http://lwn.net/Articles/627419/ these kmalloc calls never fail because they allocate less than a page. I strongly suggest you unlearn that fact. The fix for this is in two parts: 1) Annotate using sched_annotate_sleep() and add a comment: we may spin a few times in low memory situations, but this isn't a high performance path. 2) Handle get_config (and other) failure in some more elegant way. Do you mean we need to enable the caller to deal with get_config failures (and the transport to relay those failures)? I agree with that. Cheers, Rusty. I agree, but I'd like to point out that even without kmalloc, on s390 get_config is blocking - it's waiting for a hardware interrupt. And it makes sense: config is not data path, I don't think we should spin there. So I think besides these two parts, we still need my two patches: virtio-balloon: do not call blocking ops when !TASK_RUNNING virtio_console: avoid config access from irq in 4.0. agree? I agree that we need those fixes as well. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Thu, 26 Feb 2015 11:50:42 +1030 Rusty Russell ru...@rustcorp.com.au wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Neither get_config nor set_config are expected to fail. It is a problem that we cannot relay failures back to the caller: not only for the memory allocations. We need to do channel I/O, and any channel I/O can fail. For our virtio case, we don't have to deal with the failures that may happen on real hardware (like path failures), but what can happen is a hotunplug, which means we cannot talk to the device anymore from one moment to the other. Cornelia, I think ccw and config_area should be allocated inside vcdev. You could either use pointers, or simply allocate vcdev with GDP_DMA. This would avoid the kmalloc inside these calls. I can certainly look into that, but I'm not sure it's worth it. We still have to deal with possible failures from doing channel I/O. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Thu, 26 Feb 2015 09:45:29 +0100 Michael S. Tsirkin m...@redhat.com wrote: On Thu, Feb 26, 2015 at 11:50:42AM +1030, Rusty Russell wrote: Thomas Huth th...@linux.vnet.ibm.com writes: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: The deeper problem is that virtio_ccw_get_config just silently fails on OOM. Same problem with virtio_ccw_reset. But avoiding kmalloc calls in virtio_ccw_get_config isn't enough I think, it might still sleep. It is probably a problem with all calls into the transport that assume an implementation that cannot fail: If we have a channel I/O backing, we need to be able to handle things not working. The only case we need to care about for virtio is probably a -ENODEV triggered by a hotunplug, though. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] virtio-balloon: do not call blocking ops when !TASK_RUNNING
On Wed, 25 Feb 2015 15:14:36 +0100 Michael S. Tsirkin m...@redhat.com wrote: virtio balloon has this code: wait_event_interruptible(vb-config_change, (diff = towards_target(vb)) != 0 || vb-need_stats_update || kthread_should_stop() || freezing(current)); Which is a problem because towards_target() call might block after wait_event_interruptible sets task state to TAST_INTERRUPTIBLE, causing the task_struct::state collision typical of nesting of sleeping primitives See also http://lwn.net/Articles/628628/ or Thomas's bug report http://article.gmane.org/gmane.linux.kernel.virtualization/24846 for a fuller explanation. To fix, rewrite using wait_woken. Cc: sta...@vger.kernel.org Reported-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_balloon.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0413157..2f19f65 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -29,6 +29,7 @@ #include linux/module.h #include linux/balloon_compaction.h #include linux/oom.h +#include linux/wait.h /* * Balloon device works in 4K page units. So each page is pointed to by @@ -334,12 +335,25 @@ static int virtballoon_oom_notify(struct notifier_block *self, static int balloon(void *_vballoon) { struct virtio_balloon *vb = _vballoon; + DEFINE_WAIT_FUNC(wait, woken_wake_function); set_freezable(); while (!kthread_should_stop()) { s64 diff; try_to_freeze(); + + add_wait_queue(vb-config_change, wait); + for (;;) { + if ((diff = towards_target(vb)) != 0 || + vb-need_stats_update || + kthread_should_stop() || + freezing(current)) + break; + wait_woken(wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } + remove_wait_queue(vb-config_change, wait); + wait_event_interruptible(vb-config_change, (diff = towards_target(vb)) != 0 || vb-need_stats_update Forgot to remove the wait_event_interruptible()? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] virtio-balloon: do not call blocking ops when !TASK_RUNNING
On Wed, 25 Feb 2015 15:36:02 +0100 Michael S. Tsirkin m...@redhat.com wrote: virtio balloon has this code: wait_event_interruptible(vb-config_change, (diff = towards_target(vb)) != 0 || vb-need_stats_update || kthread_should_stop() || freezing(current)); Which is a problem because towards_target() call might block after wait_event_interruptible sets task state to TAST_INTERRUPTIBLE, causing the task_struct::state collision typical of nesting of sleeping primitives See also http://lwn.net/Articles/628628/ or Thomas's bug report http://article.gmane.org/gmane.linux.kernel.virtualization/24846 for a fuller explanation. To fix, rewrite using wait_woken. Cc: sta...@vger.kernel.org Reported-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- changes from v1: remove wait_event_interruptible noticed by Cornelia Huck cornelia.h...@de.ibm.com drivers/virtio/virtio_balloon.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) I was able to reproduce Thomas' original problem and can confirm that it is gone with this patch. Reviewed-by: Cornelia Huck cornelia.h...@de.ibm.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: virtio balloon: do not call blocking ops when !TASK_RUNNING
On Wed, 25 Feb 2015 11:13:18 +0100 Thomas Huth th...@linux.vnet.ibm.com wrote: Hi all, with the recent kernel 3.19, I get a kernel warning when I start my KVM guest on s390 with virtio balloon enabled: [0.839687] do not call blocking ops when !TASK_RUNNING; state=1 set at [00174a1e] prepare_to_wait_event+0x7e/0x108 [0.839694] [ cut here ] [0.839697] WARNING: at kernel/sched/core.c:7326 [0.839698] Modules linked in: [0.839702] CPU: 0 PID: 46 Comm: vballoon Not tainted 3.19.0 #233 [0.839705] task: 021d ti: 021d8000 task.ti: 021d8000 [0.839707] Krnl PSW : 0704c0018000 0015bf8e (__might_sleep+0x8e/0x98) [0.839713]R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3 Krnl GPRS: 000d 021d 0071 0001 [0.839718]00675ace 01998c50 [0.839720]00982134 0058f824 00a008a8 [0.839722]04d9 007ea992 0015bf8a 021dbc28 [0.839731] Krnl Code: 0015bf7e: c0200033e838 larl %r2,7d8fee 0015bf84: c0e50028cd62 brasl %r14,675a48 #0015bf8a: a7f40001 brc 15,15bf8c 0015bf8e: 9201a000 mvi 0(%r10),1 0015bf92: a7f4ffe2 brc 15,15bf56 0015bf96: 0707 bcr 0,%r7 0015bf98: ebdff0800024 stmg%r13,%r15,128(%r15) 0015bf9e: a7f13fe0 tmll%r15,16352 [0.839749] Call Trace: [0.839751] ([0015bf8a] __might_sleep+0x8a/0x98) [0.839756] [0028a562] __kmalloc+0x272/0x350 [0.839759] [0058f824] virtio_ccw_get_config+0x3c/0x100 [0.839762] [0049fcb0] balloon+0x1b8/0x330 [0.839765] [001529c8] kthread+0x120/0x138 [0.839767] [00683c22] kernel_thread_starter+0x6/0xc [0.839770] [00683c1c] kernel_thread_starter+0x0/0xc [0.839772] no locks held by vballoon/46. [0.839773] Last Breaking-Event-Address: [0.839776] [0015bf8a] __might_sleep+0x8a/0x98 [0.839778] ---[ end trace d27fcdfa27273d7c ]--- The problem seems to be this code in balloon() in drivers/virtio/virtio_balloon.c: wait_event_interruptible(vb-config_change, (diff = towards_target(vb)) != 0 || vb-need_stats_update || kthread_should_stop() || freezing(current)); wait_event_interruptible() sets the state of the current task to TASK_INTERRUPTIBLE, then checks the condition. The condition contains towards_target() which reads the virtio config space via virtio_cread(). On s390, this then triggers virtio_ccw_get_config() - and this function calls some other functions again that might sleep (e.g. kzalloc or wait_event in ccw_io_helper) ... and this causes the new kernel warning message with kernel 3.19. I think it would be quite difficult or at least ugly to rewrite virtio_ccw_get_config() so that it does not call sleepable functions anymore. Yes: The config-space interacting functions for virtio-ccw trigger channel I/O, which is by nature asynchronous. No way to get this non-sleeping without really ugly hacks. So would it be feasible to rewrite the balloon() function that it does not call the towards_target() in its wait_event condition anymore? I am unfortunately not that familiar with the balloon code semantics, so any help is very appreciated here! It might be possible to use nested wait event functions like wake_woken(), but I haven't looked into that deeply. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Qemu and virtio 1.0
On Wed, 25 Feb 2015 14:50:22 +1030 Rusty Russell ru...@rustcorp.com.au wrote: OK, I am trying to experiment with virtio 1.0 support using the latest kernel and MST's qemu tree: https://git.kernel.org/cgit/virt/kvm/mst/qemu.git/?h=virtio-1.0 The first issue is that the device config endian was wrong (see attached patch). I'm now setting up a BE guest on my x86 laptop, and a BE and LE guest on a BE powerpc machine, to check that all combinations work correctly. If others test too, that would be appreciated! My virtio-1 work had been taking the back seat recently, but I plan to look into it again soon. Cheers, Rusty. From 95ac91554ed602f856a2a5fcc25eaffcad1b1c8d Mon Sep 17 00:00:00 2001 From: Rusty Russell ru...@rustcorp.com.au Date: Tue, 24 Feb 2015 14:47:44 +1030 Subject: [PATCH] virtio_config_write*/virtio_config_read*: Don't endian swap for virtio 1.0. Ah, virtio-ccw doesn't use these config space accessors, that's why I did not look at them. Signed-off-by: Rusty Russell ru...@rustcorp.com.au diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 079944c..882a31b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -662,7 +662,12 @@ uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) k-get_config(vdev, vdev-config); -val = lduw_p(vdev-config + addr); +/* Virtio 1.0 is always LE */ +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +val = lduw_le_p(vdev-config + addr); +} else { +val = lduw_p(vdev-config + addr); +} Can't you use the virtio_* helpers for that? return val; } looks at the callers It seems virtio-pci does some conditional swapping based on virtio-endianness already, which should account for virtio-1. virtio-mmio does not seem to do so. But: Device code accessing config space already does use the virtio accessors - or virtio-ccw would not work as it simply copies the whole config space. So it seems this change simply undos the endian swap in virtio-pci (while probably breaking virtio-mmio). Can we simply get rid of the endian swap in virtio-pci instead, or have I been throuroughly confused? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: s390: Add MEMOP ioctls for reading/writing guest memory
On Mon, 16 Feb 2015 13:16:41 +0100 Thomas Huth th...@linux.vnet.ibm.com wrote: On s390, we've got to make sure to hold the IPTE lock while accessing logical memory. So let's add an ioctl for reading and writing logical memory to provide this feature for userspace, too. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com --- Documentation/virtual/kvm/api.txt | 45 +++ arch/s390/kvm/gaccess.c | 22 +++ arch/s390/kvm/gaccess.h |2 + arch/s390/kvm/kvm-s390.c | 70 + include/uapi/linux/kvm.h | 20 ++ 5 files changed, 159 insertions(+), 0 deletions(-) Looks good, except for one minor thing: +static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop-buf; + void *tmpbuf = NULL; + int r, srcu_idx; + const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + + if (mop-flags ~supported_flags) + return -EINVAL; + + if (mop-size 16 * PAGE_SIZE) + return -E2BIG; Do you want to document this limit? Or make it discoverable by having the capability check return the number of pages? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC 1/1] KVM: s390: Add MEMOP ioctl for reading/writing guest memory
On Wed, 04 Feb 2015 09:26:11 +0100 Christian Borntraeger borntrae...@de.ibm.com wrote: the classic channel I/O does set the storage key change/reference and also triggers errors in the storage key protection value mismatches. Just a bit of background for the benefit of innocent bystanders: When classic channel I/O is initiated, the caller provides a so-called operation request block (orb) which references the actual channel program it wants to start. In this same orb, the caller also provides the storage key to be used for all memory fetches (either of the individual channel commands or of the memory they reference). qemu interpretation of channel commands currently ignores all of this. Conny: I am asking myself, if we should explicitly add a comment in the virtio-ccw spec, that all accesses are assumed to be with key 0 and thus never cause key protection. The change/reference bit is set by the underlying I/O or memory copy anyway. We can then add a ccw later on to set a different key if we ever need that. I don't think we need to clarify anything for the normal channel commands used by virtio: They are treated as any other channel command wrt key protection; and the lack of key checking is a feature of qemu, not of the spec. For the memory used for the virtqueues, I agree that we should clarify that we assume key 0. I'm not sure whether there's a case for extending this in the future, but who knows. I'll probably have to open an issue against the virtio spec for this. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH RFC v5 05/19] virtio: support more feature bits
On Tue, 2 Dec 2014 14:00:13 +0100 Cornelia Huck cornelia.h...@de.ibm.com wrote: diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 070006c..23d713b 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -51,6 +51,17 @@ extern PropertyInfo qdev_prop_arraylen; .defval= (bool)_defval, \ } +#define DEFINE_PROP_BIT64(_name, _state, _field, _bit, _defval) { \ +.name = (_name),\ +.info = (qdev_prop_bit), \ +.bitnr= (_bit), \ +.offset= offsetof(_state, _field)\ ++ type_check(uint64_t,typeof_field(_state, _field)), \ +.qtype = QTYPE_QBOOL,\ +.defval= (bool)_defval, \ +} + + #define DEFINE_PROP_BOOL(_name, _state, _field, _defval) { \ .name = (_name),\ .info = (qdev_prop_bool), \ This one is of course broken. I'll send an updated patch tomorrow. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH RFC v5 18/19] virtio: support revision-specific features
On Tue, 2 Dec 2014 14:00:26 +0100 Cornelia Huck cornelia.h...@de.ibm.com wrote: Devices may support different sets of feature bits depending on which revision they're operating at. Let's give the transport a way to re-query the device about its features when the revision has been changed. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 12 ++-- hw/virtio/virtio-bus.c | 14 -- include/hw/virtio/virtio-bus.h |3 +++ include/hw/virtio/virtio.h |3 +++ 4 files changed, 28 insertions(+), 4 deletions(-) There seems to be something wrong with this patch - I noticed when I fixed prop_bit64. Needs debugging. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC v5 14/19] s390x/virtio-ccw: enable virtio 1.0
On Tue, 9 Dec 2014 15:46:46 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Tue, Dec 02, 2014 at 02:00:22PM +0100, Cornelia Huck wrote: virtio-ccw should now have everything in place to operate virtio 1.0 devices, so let's enable revision 1. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com Looks like this will allow revision 1 for all devices, we only want this for virtio 1 devices. The following should fix it I think: Signed-off-by: Michael S. Tsirkin m...@redhat.com @@ -104,6 +101,12 @@ struct VirtioCcwDevice { uint64_t ind_bit; }; +/* The maximum virtio revision we support. */ +static int virtio_ccw_rev_max(VirtioCcwDevice *dev) Make this static inline int and I'm fine with this :) +{ +return dev-host_features (1ULL VIRTIO_F_VERSION_1) ? 1 : 0; +} + /* virtual css bus type */ typedef struct VirtualCssBus { BusState parent_obj; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC v5 10/19] s390x/virtio-ccw: add virtio set-revision call
On Thu, 4 Dec 2014 18:20:05 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Tue, Dec 02, 2014 at 02:00:18PM +0100, Cornelia Huck wrote: From: Thomas Huth th...@linux.vnet.ibm.com Handle the virtio-ccw revision according to what the guest sets. When revision 1 is selected, we have a virtio-1 standard device with byteswapping for the virtio rings. When a channel gets disabled, we have to revert to the legacy behavior in case the next user of the device does not negotiate the revision 1 anymore (e.g. the boot firmware uses revision 1, but the operating system only uses the legacy mode). Note that revisions 0 are still disabled. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 52 + hw/s390x/virtio-ccw.h |5 + 2 files changed, 57 insertions(+) @@ -747,6 +797,8 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) sch-id.cu_type = VIRTIO_CCW_CU_TYPE; sch-id.cu_model = vdev-device_id; +dev-revision = -1; + /* Set default feature bits that are offered by the host. */ dev-host_features = 0; virtio_add_feature(dev-host_features, VIRTIO_F_NOTIFY_ON_EMPTY); You should also clear it on device reset. Nope, revision survives reset and can only be cleared by disabling and reenabling the device. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC v5 07/19] virtio: allow virtio-1 queue layout
On Tue, 2 Dec 2014 21:03:45 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Tue, Dec 02, 2014 at 04:41:36PM +0100, Cornelia Huck wrote: void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) { +/* + * For virtio-1 devices, the number of buffers may only be + * updated if the ring addresses have not yet been set up. Where does it say that? Hmpf, may have imagined that. This means we either need to track whether used/avail have been specified or calculated or move responsibility for re-calculation of used/avail for the old layout into the callers. + */ +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1) +vdev-vq[n].vring.desc) { +error_report(tried to modify buffer num for virtio-1 device); +return; +} /* Don't allow guest to flip queue between existent and * nonexistent states, or to set it to an invalid size. */ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC v5 07/19] virtio: allow virtio-1 queue layout
On Wed, 3 Dec 2014 10:27:36 +0100 Cornelia Huck cornelia.h...@de.ibm.com wrote: On Tue, 2 Dec 2014 21:03:45 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Tue, Dec 02, 2014 at 04:41:36PM +0100, Cornelia Huck wrote: void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) { +/* + * For virtio-1 devices, the number of buffers may only be + * updated if the ring addresses have not yet been set up. Where does it say that? Hmpf, may have imagined that. This means we either need to track whether used/avail have been specified or calculated or move responsibility for re-calculation of used/avail for the old layout into the callers. What about this one instead? diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 43b7e02..1e2a720 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -244,9 +244,13 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, case VIRTIO_MMIO_QUEUENUM: DPRINTF(mmio_queue write %d max %d\n, (int)value, VIRTQUEUE_MAX_SIZE); virtio_queue_set_num(vdev, vdev-queue_sel, value); +/* Note: only call this function for legacy devices */ +virtio_queue_update_rings(vdev, vdev-queue_sel); break; case VIRTIO_MMIO_QUEUEALIGN: +/* Note: this is only valid for legacy devices */ virtio_queue_set_align(vdev, vdev-queue_sel, value); +virtio_queue_update_rings(vdev, vdev-queue_sel); break; case VIRTIO_MMIO_QUEUEPFN: if (value == 0) { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 8f69ffa..b2d553e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -69,7 +69,6 @@ typedef struct VRing struct VirtQueue { VRing vring; -hwaddr pa; uint16_t last_avail_idx; /* Last used index value we have signalled on */ uint16_t signalled_used; @@ -92,15 +91,18 @@ struct VirtQueue }; /* virt queue functions */ -static void virtqueue_init(VirtQueue *vq) +void virtio_queue_update_rings(VirtIODevice *vdev, int n) { -hwaddr pa = vq-pa; +VRing *vring = vdev-vq[n].vring; -vq-vring.desc = pa; -vq-vring.avail = pa + vq-vring.num * sizeof(VRingDesc); -vq-vring.used = vring_align(vq-vring.avail + - offsetof(VRingAvail, ring[vq-vring.num]), - vq-vring.align); +if (!vring-desc) { +/* not yet setup - nothing to do */ +return; +} +vring-avail = vring-desc + vring-num * sizeof(VRingDesc); +vring-used = vring_align(vring-avail + + offsetof(VRingAvail, ring[vring-num]), + vring-align); } static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa, @@ -605,7 +607,6 @@ void virtio_reset(void *opaque) vdev-vq[i].vring.avail = 0; vdev-vq[i].vring.used = 0; vdev-vq[i].last_avail_idx = 0; -vdev-vq[i].pa = 0; vdev-vq[i].vector = VIRTIO_NO_VECTOR; vdev-vq[i].signalled_used = 0; vdev-vq[i].signalled_used_valid = false; @@ -708,13 +709,21 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) { -vdev-vq[n].pa = addr; -virtqueue_init(vdev-vq[n]); +vdev-vq[n].vring.desc = addr; +virtio_queue_update_rings(vdev, n); } hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) { -return vdev-vq[n].pa; +return vdev-vq[n].vring.desc; +} + +void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, +hwaddr avail, hwaddr used) +{ +vdev-vq[n].vring.desc = desc; +vdev-vq[n].vring.avail = avail; +vdev-vq[n].vring.used = used; } void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) @@ -728,7 +737,6 @@ void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) return; } vdev-vq[n].vring.num = num; -virtqueue_init(vdev-vq[n]); } int virtio_queue_get_num(VirtIODevice *vdev, int n) @@ -748,6 +756,11 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +/* virtio-1 compliant devices cannot change the aligment */ +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +error_report(tried to modify queue alignment for virtio-1 device); +return; +} /* Check that the transport told us it was going to do this * (so a buggy transport will immediately assert rather than * silently failing to migrate this state) @@ -755,7 +768,6 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) assert(k-has_variable_vring_alignment); vdev-vq[n].vring.align = align; -virtqueue_init(vdev-vq[n]); } void virtio_queue_notify_vq(VirtQueue *vq) @@ -949,7 +961,8 @@ void
Re: [PATCH RFC v5 07/19] virtio: allow virtio-1 queue layout
On Wed, 3 Dec 2014 12:52:51 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Wed, Dec 03, 2014 at 10:50:04AM +0100, Cornelia Huck wrote: diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 43b7e02..1e2a720 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -244,9 +244,13 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, case VIRTIO_MMIO_QUEUENUM: DPRINTF(mmio_queue write %d max %d\n, (int)value, VIRTQUEUE_MAX_SIZE); virtio_queue_set_num(vdev, vdev-queue_sel, value); +/* Note: only call this function for legacy devices */ +virtio_queue_update_rings(vdev, vdev-queue_sel); break; case VIRTIO_MMIO_QUEUEALIGN: +/* Note: this is only valid for legacy devices */ virtio_queue_set_align(vdev, vdev-queue_sel, value); +virtio_queue_update_rings(vdev, vdev-queue_sel); break; case VIRTIO_MMIO_QUEUEPFN: if (value == 0) { Let's just call virtio_queue_update_rings from virtio_queue_set_align? You're right, set_align is legacy only so we can always call update_rings. @@ -748,6 +756,11 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +/* virtio-1 compliant devices cannot change the aligment */ +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +error_report(tried to modify queue alignment for virtio-1 device); +return; +} /* Check that the transport told us it was going to do this * (so a buggy transport will immediately assert rather than * silently failing to migrate this state) Do we have to touch this now? It's only used by MMIO, right? I don't think it hurts to put a guard in here. @@ -755,7 +768,6 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) assert(k-has_variable_vring_alignment); vdev-vq[n].vring.align = align; -virtqueue_init(vdev-vq[n]); Don't we need to update rings? See above, I'll call update_rings in there. } void virtio_queue_notify_vq(VirtQueue *vq) @@ -949,7 +961,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) if (k-has_variable_vring_alignment) { qemu_put_be32(f, vdev-vq[i].vring.align); } -qemu_put_be64(f, vdev-vq[i].pa); +/* XXX virtio-1 devices */ +qemu_put_be64(f, vdev-vq[i].vring.desc); qemu_put_be16s(f, vdev-vq[i].last_avail_idx); if (k-save_queue) { k-save_queue(qbus-parent, i, f); @@ -1044,13 +1057,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) if (k-has_variable_vring_alignment) { vdev-vq[i].vring.align = qemu_get_be32(f); } -vdev-vq[i].pa = qemu_get_be64(f); +vdev-vq[i].vring.desc = qemu_get_be64(f); qemu_get_be16s(f, vdev-vq[i].last_avail_idx); vdev-vq[i].signalled_used_valid = false; vdev-vq[i].notification = true; -if (vdev-vq[i].pa) { -virtqueue_init(vdev-vq[i]); +if (vdev-vq[i].vring.desc) { +/* XXX virtio-1 devices */ What does XXX mean here? That I have not cared about migration of virtio-1 devices yet :) +virtio_queue_update_rings(vdev, i); } else if (vdev-vq[i].last_avail_idx) { error_report(VQ %d address 0x0 inconsistent with Host index 0x%x, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC v5 07/19] virtio: allow virtio-1 queue layout
On Wed, 3 Dec 2014 13:19:17 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Wed, Dec 03, 2014 at 12:14:10PM +0100, Cornelia Huck wrote: On Wed, 3 Dec 2014 12:52:51 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Wed, Dec 03, 2014 at 10:50:04AM +0100, Cornelia Huck wrote: @@ -748,6 +756,11 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +/* virtio-1 compliant devices cannot change the aligment */ +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +error_report(tried to modify queue alignment for virtio-1 device); +return; +} /* Check that the transport told us it was going to do this * (so a buggy transport will immediately assert rather than * silently failing to migrate this state) Do we have to touch this now? It's only used by MMIO, right? I don't think it hurts to put a guard in here. I'd say let's not touch mmio ATM. This is not mmio but common code :) I don't really see how this can possibly hurt us; when mmio is converted to virtio-1, their queue setup code needs to be changed anyway. @@ -949,7 +961,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) if (k-has_variable_vring_alignment) { qemu_put_be32(f, vdev-vq[i].vring.align); } -qemu_put_be64(f, vdev-vq[i].pa); +/* XXX virtio-1 devices */ +qemu_put_be64(f, vdev-vq[i].vring.desc); qemu_put_be16s(f, vdev-vq[i].last_avail_idx); if (k-save_queue) { k-save_queue(qbus-parent, i, f); @@ -1044,13 +1057,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) if (k-has_variable_vring_alignment) { vdev-vq[i].vring.align = qemu_get_be32(f); } -vdev-vq[i].pa = qemu_get_be64(f); +vdev-vq[i].vring.desc = qemu_get_be64(f); qemu_get_be16s(f, vdev-vq[i].last_avail_idx); vdev-vq[i].signalled_used_valid = false; vdev-vq[i].notification = true; -if (vdev-vq[i].pa) { -virtqueue_init(vdev-vq[i]); +if (vdev-vq[i].vring.desc) { +/* XXX virtio-1 devices */ What does XXX mean here? That I have not cared about migration of virtio-1 devices yet :) OK sure, but why put comment here not at start of function? I find it easier to annotate the places I notice. YMMV. +virtio_queue_update_rings(vdev, i); } else if (vdev-vq[i].last_avail_idx) { error_report(VQ %d address 0x0 inconsistent with Host index 0x%x, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 00/19] qemu: towards virtio-1 host support
Another iteration of virtio-1 patches for qemu, as always available on git://github.com/cohuck/qemu virtio-1 This one seems to work together with the current vhost-next patches (well, I can ping :) Changes from v4: - add helpers for feature bit manipulation and checking - use 64 bit feature bits instead of 32 bit arrays - infrastructure to allow devices to offer different sets of feature bits for legacy and standard devices - several fixes (mainly regarding, you guessed it, feature bits) Cornelia Huck (16): virtio: cull virtio_bus_set_vdev_features virtio: feature bit manipulation helpers virtio: add feature checking helpers virtio: support more feature bits virtio: endianness checks for virtio 1.0 devices virtio: allow virtio-1 queue layout dataplane: allow virtio-1 devices s390x/virtio-ccw: support virtio-1 set_vq format virtio: disallow late feature changes for virtio-1 virtio: allow to fail setting status s390x/virtio-ccw: enable virtio 1.0 virtio-net: no writeable mac for virtio-1 virtio-net: support longer header virtio-net: enable virtio 1.0 virtio: support revision-specific features virtio-blk: revision specific feature bits Thomas Huth (3): linux-headers/virtio_config: Update with VIRTIO_F_VERSION_1 s390x/css: Add a callback for when subchannel gets disabled s390x/virtio-ccw: add virtio set-revision call hw/9pfs/virtio-9p-device.c|4 +- hw/block/dataplane/virtio-blk.c |4 +- hw/block/virtio-blk.c | 44 +++-- hw/char/virtio-serial-bus.c |6 +- hw/net/virtio-net.c | 100 ++- hw/s390x/css.c| 12 ++ hw/s390x/css.h|1 + hw/s390x/s390-virtio-bus.c|3 +- hw/s390x/s390-virtio-bus.h|2 +- hw/s390x/virtio-ccw.c | 235 ++--- hw/s390x/virtio-ccw.h |8 +- hw/scsi/vhost-scsi.c |3 +- hw/scsi/virtio-scsi-dataplane.c |2 +- hw/scsi/virtio-scsi.c | 12 +- hw/virtio/Makefile.objs |2 +- hw/virtio/dataplane/Makefile.objs |2 +- hw/virtio/dataplane/vring.c | 96 +- hw/virtio/virtio-balloon.c|4 +- hw/virtio/virtio-bus.c| 24 ++- hw/virtio/virtio-mmio.c |6 +- hw/virtio/virtio-pci.c|7 +- hw/virtio/virtio-pci.h|2 +- hw/virtio/virtio-rng.c|2 +- hw/virtio/virtio.c| 83 +++-- include/hw/qdev-properties.h | 11 ++ include/hw/virtio/dataplane/vring-accessors.h | 75 include/hw/virtio/dataplane/vring.h | 14 +- include/hw/virtio/virtio-access.h |4 + include/hw/virtio/virtio-bus.h| 14 +- include/hw/virtio/virtio-net.h| 46 ++--- include/hw/virtio/virtio-scsi.h |6 +- include/hw/virtio/virtio.h| 61 +-- linux-headers/linux/virtio_config.h |3 + 33 files changed, 625 insertions(+), 273 deletions(-) create mode 100644 include/hw/virtio/dataplane/vring-accessors.h -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 16/19] virtio-net: support longer header
virtio-1 devices always use num_buffers in the header, even if mergeable rx buffers have not been negotiated. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/net/virtio-net.c | 21 +++-- 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index ebbea60..7ee2bd6 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -373,15 +373,21 @@ static int peer_has_ufo(VirtIONet *n) return n-has_ufo; } -static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs) +static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + int version_1) { int i; NetClientState *nc; n-mergeable_rx_bufs = mergeable_rx_bufs; -n-guest_hdr_len = n-mergeable_rx_bufs ? -sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); +if (version_1) { +n-guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); +} else { +n-guest_hdr_len = n-mergeable_rx_bufs ? +sizeof(struct virtio_net_hdr_mrg_rxbuf) : +sizeof(struct virtio_net_hdr); +} for (i = 0; i n-max_queues; i++) { nc = qemu_get_subqueue(n-nic, i); @@ -525,7 +531,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) virtio_net_set_mrg_rx_bufs(n, __virtio_has_feature(features, -VIRTIO_NET_F_MRG_RXBUF)); +VIRTIO_NET_F_MRG_RXBUF), + __virtio_has_feature(features, +VIRTIO_F_VERSION_1)); if (n-has_vnet_hdr) { n-curr_guest_offloads = @@ -1407,7 +1415,8 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, qemu_get_buffer(f, n-mac, ETH_ALEN); n-vqs[0].tx_waiting = qemu_get_be32(f); -virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f)); +virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f), + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)); if (version_id = 3) n-status = qemu_get_be16(f); @@ -1653,7 +1662,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n-vqs[0].tx_waiting = 0; n-tx_burst = n-net_conf.txburst; -virtio_net_set_mrg_rx_bufs(n, 0); +virtio_net_set_mrg_rx_bufs(n, 0, 0); n-promisc = 1; /* for compatibility */ n-mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 17/19] virtio-net: enable virtio 1.0
virtio-net (non-vhost) now should have everything in place to support virtio 1.0: let's enable the feature bit for it. Note that VIRTIO_F_VERSION_1 is technically a transport feature; once every device is ready for virtio 1.0, we can move setting this feature bit out of the individual devices. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/net/virtio-net.c |1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7ee2bd6..b5dd356 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -473,6 +473,7 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features) } if (!get_vhost_net(nc-peer)) { +virtio_add_feature(features, VIRTIO_F_VERSION_1); return features; } return vhost_net_get_features(get_vhost_net(nc-peer), features); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 19/19] virtio-blk: revision specific feature bits
Wire up virtio-blk to provide different feature bit sets depending on whether legacy or v1.0 has been requested. Note that VERSION_1 is still disabled due to missing ANY_LAYOUT support. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/block/virtio-blk.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 9cfae66..fdc236a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -587,6 +587,24 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features) return features; } +static uint64_t virtio_blk_get_features_rev(VirtIODevice *vdev, +uint64_t features, +unsigned int revision) +{ +if (revision == 0) { +/* legacy */ +virtio_clear_feature(features, VIRTIO_F_VERSION_1); +return virtio_blk_get_features(vdev, features); +} +/* virtio 1.0 or later */ +virtio_clear_feature(features, VIRTIO_BLK_F_SCSI); +virtio_clear_feature(features, VIRTIO_BLK_F_CONFIG_WCE); +virtio_clear_feature(features, VIRTIO_BLK_F_WCE); +/* we're still missing ANY_LAYOUT */ +/* virtio_add_feature(features, VIRTIO_F_VERSION_1); */ +return features; +} + static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBlock *s = VIRTIO_BLK(vdev); @@ -821,6 +839,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) vdc-get_config = virtio_blk_update_config; vdc-set_config = virtio_blk_set_config; vdc-get_features = virtio_blk_get_features; +vdc-get_features_rev = virtio_blk_get_features_rev; vdc-set_status = virtio_blk_set_status; vdc-reset = virtio_blk_reset; vdc-save = virtio_blk_save_device; -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 18/19] virtio: support revision-specific features
Devices may support different sets of feature bits depending on which revision they're operating at. Let's give the transport a way to re-query the device about its features when the revision has been changed. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 12 ++-- hw/virtio/virtio-bus.c | 14 -- include/hw/virtio/virtio-bus.h |3 +++ include/hw/virtio/virtio.h |3 +++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index ec492b8..3826074 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -699,6 +699,10 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) } ret = 0; dev-revision = revinfo.revision; +/* Re-evaluate which features the device wants to offer. */ +dev-host_features = +virtio_bus_get_vdev_features_rev(dev-bus, dev-host_features, + dev-revision = 1 ? 1 : 0); break; default: ret = -ENOSYS; @@ -712,6 +716,9 @@ static void virtio_sch_disable_cb(SubchDev *sch) VirtioCcwDevice *dev = sch-driver_data; dev-revision = -1; +/* Reset the device's features to legacy. */ +dev-host_features = +virtio_bus_get_vdev_features_rev(dev-bus, dev-host_features, 0); } static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) @@ -854,8 +861,9 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) virtio_add_feature(dev-host_features, VIRTIO_F_NOTIFY_ON_EMPTY); virtio_add_feature(dev-host_features, VIRTIO_F_BAD_FEATURE); -dev-host_features = virtio_bus_get_vdev_features(dev-bus, - dev-host_features); +/* All devices start in legacy mode. */ +dev-host_features = +virtio_bus_get_vdev_features_rev(dev-bus, dev-host_features, 0); css_generate_sch_crws(sch-cssid, sch-ssid, sch-schid, parent-hotplugged, 1); diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 32e3fab..a30826c 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -97,18 +97,28 @@ size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus) } /* Get the features of the plugged device. */ -uint64_t virtio_bus_get_vdev_features(VirtioBusState *bus, - uint64_t requested_features) +uint64_t virtio_bus_get_vdev_features_rev(VirtioBusState *bus, + uint64_t requested_features, + unsigned int revision) { VirtIODevice *vdev = virtio_bus_get_device(bus); VirtioDeviceClass *k; assert(vdev != NULL); k = VIRTIO_DEVICE_GET_CLASS(vdev); +if (revision 0 k-get_features_rev) { +return k-get_features_rev(vdev, requested_features, revision); +} assert(k-get_features != NULL); return k-get_features(vdev, requested_features); } +uint64_t virtio_bus_get_vdev_features(VirtioBusState *bus, + uint64_t requested_features) +{ +return virtio_bus_get_vdev_features_rev(bus, requested_features, 0); +} + /* Get bad features of the plugged device. */ uint64_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus) { diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 0a4dde1..f0916ef 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -84,6 +84,9 @@ size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus); /* Get the features of the plugged device. */ uint64_t virtio_bus_get_vdev_features(VirtioBusState *bus, uint64_t requested_features); +uint64_t virtio_bus_get_vdev_features_rev(VirtioBusState *bus, + uint64_t requested_features, + unsigned int revision); /* Get bad features of the plugged device. */ uint64_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus); /* Get config of the plugged device. */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index e7bedd1..f31e3df 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -147,6 +147,9 @@ typedef struct VirtioDeviceClass { DeviceRealize realize; DeviceUnrealize unrealize; uint64_t (*get_features)(VirtIODevice *vdev, uint64_t requested_features); +uint64_t (*get_features_rev)(VirtIODevice *vdev, + uint64_t requested_features, + unsigned int revision); uint64_t (*bad_features)(VirtIODevice *vdev); void (*set_features)(VirtIODevice *vdev, uint64_t val); int (*validate_features)(VirtIODevice *vdev); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message
[PATCH RFC v5 06/19] virtio: endianness checks for virtio 1.0 devices
Add code that checks for the VERSION_1 feature bit in order to make decisions about the device's endianness. This allows us to support transitional devices. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/virtio/virtio.c|6 +- include/hw/virtio/virtio-access.h |4 include/hw/virtio/virtio.h|8 ++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7f74ae5..8f69ffa 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -881,7 +881,11 @@ static bool virtio_device_endian_needed(void *opaque) VirtIODevice *vdev = opaque; assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); -return vdev-device_endian != virtio_default_endian(); +if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +return vdev-device_endian != virtio_default_endian(); +} +/* Devices conforming to VIRTIO 1.0 or later are always LE. */ +return vdev-device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; } static const VMStateDescription vmstate_virtio_device_endian = { diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index 46456fd..ee28c21 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -19,6 +19,10 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +/* Devices conforming to VIRTIO 1.0 or later are always LE. */ +return false; +} #if defined(TARGET_IS_BIENDIAN) return virtio_is_big_endian(vdev); #elif defined(TARGET_WORDS_BIGENDIAN) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 08141c7..68c40db 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -297,7 +297,11 @@ static inline bool virtio_has_feature(VirtIODevice *vdev, unsigned int fbit) static inline bool virtio_is_big_endian(VirtIODevice *vdev) { -assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); -return vdev-device_endian == VIRTIO_DEVICE_ENDIAN_BIG; +if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +assert(vdev-device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); +return vdev-device_endian == VIRTIO_DEVICE_ENDIAN_BIG; +} +/* Devices conforming to VIRTIO 1.0 or later are always LE. */ +return false; } #endif -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 07/19] virtio: allow virtio-1 queue layout
For virtio-1 devices, we allow a more complex queue layout that doesn't require descriptor table and rings on a physically-contigous memory area: add virtio_queue_set_rings() to allow transports to set this up. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/virtio/virtio.c | 16 include/hw/virtio/virtio.h |2 ++ 2 files changed, 18 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 8f69ffa..508dccf 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -96,6 +96,13 @@ static void virtqueue_init(VirtQueue *vq) { hwaddr pa = vq-pa; +if (pa == -1ULL) { +/* + * This is a virtio-1 style vq that has already been setup + * in virtio_queue_set. + */ +return; +} vq-vring.desc = pa; vq-vring.avail = pa + vq-vring.num * sizeof(VRingDesc); vq-vring.used = vring_align(vq-vring.avail + @@ -717,6 +724,15 @@ hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) return vdev-vq[n].pa; } +void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, +hwaddr avail, hwaddr used) +{ +vdev-vq[n].pa = -1ULL; +vdev-vq[n].vring.desc = desc; +vdev-vq[n].vring.avail = avail; +vdev-vq[n].vring.used = used; +} + void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) { /* Don't allow guest to flip queue between existent and diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 68c40db..80ee313 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -224,6 +224,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr); hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n); void virtio_queue_set_num(VirtIODevice *vdev, int n, int num); int virtio_queue_get_num(VirtIODevice *vdev, int n); +void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, +hwaddr avail, hwaddr used); void virtio_queue_set_align(VirtIODevice *vdev, int n, int align); void virtio_queue_notify(VirtIODevice *vdev, int n); uint16_t virtio_queue_vector(VirtIODevice *vdev, int n); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 12/19] virtio: disallow late feature changes for virtio-1
For virtio-1 devices, the driver must not attempt to set feature bits after it set FEATURES_OK in the device status. Simply reject it in that case. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/virtio/virtio.c | 16 ++-- include/hw/virtio/virtio.h |2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 508dccf..4f2dc48 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -980,7 +980,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vmstate_save_state(f, vmstate_virtio, vdev); } -int virtio_set_features(VirtIODevice *vdev, uint64_t val) +static int __virtio_set_features(VirtIODevice *vdev, uint64_t val) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus); @@ -996,6 +996,18 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return bad ? -1 : 0; } +int virtio_set_features(VirtIODevice *vdev, uint64_t val) +{ + /* + * The driver must not attempt to set features after feature negotiation + * has finished. + */ +if (vdev-status VIRTIO_CONFIG_S_FEATURES_OK) { +return -EINVAL; +} +return __virtio_set_features(vdev, val); +} + int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) { int i, ret; @@ -1028,7 +1040,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) qemu_get_be32s(f, features); /* XXX features = 32 */ -if (virtio_set_features(vdev, features) 0) { +if (__virtio_set_features(vdev, features) 0) { supported_features = k-get_features(qbus-parent); error_report(Features 0x%x unsupported. Allowed features: 0x%lx, features, supported_features); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 80ee313..9a984c2 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -32,6 +32,8 @@ #define VIRTIO_CONFIG_S_DRIVER 2 /* Driver has used its parts of the config, and is happy */ #define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* Driver has finished configuring features */ +#define VIRTIO_CONFIG_S_FEATURES_OK 8 /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 09/19] s390x/css: Add a callback for when subchannel gets disabled
From: Thomas Huth th...@linux.vnet.ibm.com We need a possibility to run code when a subchannel gets disabled. This patch adds the necessary infrastructure. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/css.c | 12 hw/s390x/css.h |1 + 2 files changed, 13 insertions(+) diff --git a/hw/s390x/css.c b/hw/s390x/css.c index b67c039..735ec55 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -588,6 +588,7 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib) { SCSW *s = sch-curr_status.scsw; PMCW *p = sch-curr_status.pmcw; +uint16_t oldflags; int ret; SCHIB schib; @@ -610,6 +611,7 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib) copy_schib_from_guest(schib, orig_schib); /* Only update the program-modifiable fields. */ p-intparm = schib.pmcw.intparm; +oldflags = p-flags; p-flags = ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA | PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME | PMCW_FLAGS_MASK_MP); @@ -625,6 +627,12 @@ int css_do_msch(SubchDev *sch, SCHIB *orig_schib) (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE); sch-curr_status.mba = schib.mba; +/* Has the channel been disabled? */ +if (sch-disable_cb (oldflags PMCW_FLAGS_MASK_ENA) != 0 + (p-flags PMCW_FLAGS_MASK_ENA) == 0) { +sch-disable_cb(sch); +} + ret = 0; out: @@ -1443,6 +1451,10 @@ void css_reset_sch(SubchDev *sch) { PMCW *p = sch-curr_status.pmcw; +if ((p-flags PMCW_FLAGS_MASK_ENA) != 0 sch-disable_cb) { +sch-disable_cb(sch); +} + p-intparm = 0; p-flags = ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA | PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME | diff --git a/hw/s390x/css.h b/hw/s390x/css.h index 33104ac..7fa807b 100644 --- a/hw/s390x/css.h +++ b/hw/s390x/css.h @@ -81,6 +81,7 @@ struct SubchDev { uint8_t ccw_no_data_cnt; /* transport-provided data: */ int (*ccw_cb) (SubchDev *, CCW1); +void (*disable_cb)(SubchDev *); SenseId id; void *driver_data; }; -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 13/19] virtio: allow to fail setting status
virtio-1 allow setting of the FEATURES_OK status bit to fail if the negotiated feature bits are inconsistent: let's fail virtio_set_status() in that case and update virtio-ccw to post an error to the guest. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 20 hw/virtio/virtio.c | 24 +++- include/hw/virtio/virtio.h |3 ++- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 75c9ff9..ec492b8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -555,15 +555,19 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) if (!(status VIRTIO_CONFIG_S_DRIVER_OK)) { virtio_ccw_stop_ioeventfd(dev); } -virtio_set_status(vdev, status); -if (vdev-status == 0) { -virtio_reset(vdev); -} -if (status VIRTIO_CONFIG_S_DRIVER_OK) { -virtio_ccw_start_ioeventfd(dev); +if (virtio_set_status(vdev, status) == 0) { +if (vdev-status == 0) { +virtio_reset(vdev); +} +if (status VIRTIO_CONFIG_S_DRIVER_OK) { +virtio_ccw_start_ioeventfd(dev); +} +sch-curr_status.scsw.count = ccw.count - sizeof(status); +ret = 0; +} else { +/* Trigger a command reject. */ +ret = -ENOSYS; } -sch-curr_status.scsw.count = ccw.count - sizeof(status); -ret = 0; } break; case CCW_CMD_SET_IND: diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4f2dc48..be128f7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -548,15 +548,37 @@ void virtio_update_irq(VirtIODevice *vdev) virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); } -void virtio_set_status(VirtIODevice *vdev, uint8_t val) +static int virtio_validate_features(VirtIODevice *vdev) +{ +VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + +if (k-validate_features) { +return k-validate_features(vdev); +} else { +return 0; +} +} + +int virtio_set_status(VirtIODevice *vdev, uint8_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); trace_virtio_set_status(vdev, val); +if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { +if (!(vdev-status VIRTIO_CONFIG_S_FEATURES_OK) +val VIRTIO_CONFIG_S_FEATURES_OK) { +int ret = virtio_validate_features(vdev); + +if (ret) { +return ret; +} +} +} if (k-set_status) { k-set_status(vdev, val); } vdev-status = val; +return 0; } bool target_words_bigendian(void); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9a984c2..e7bedd1 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -149,6 +149,7 @@ typedef struct VirtioDeviceClass { uint64_t (*get_features)(VirtIODevice *vdev, uint64_t requested_features); uint64_t (*bad_features)(VirtIODevice *vdev); void (*set_features)(VirtIODevice *vdev, uint64_t val); +int (*validate_features)(VirtIODevice *vdev); void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); @@ -232,7 +233,7 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align); void virtio_queue_notify(VirtIODevice *vdev, int n); uint16_t virtio_queue_vector(VirtIODevice *vdev, int n); void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector); -void virtio_set_status(VirtIODevice *vdev, uint8_t val); +int virtio_set_status(VirtIODevice *vdev, uint8_t val); void virtio_reset(void *opaque); void virtio_update_irq(VirtIODevice *vdev); int virtio_set_features(VirtIODevice *vdev, uint64_t val); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 01/19] linux-headers/virtio_config: Update with VIRTIO_F_VERSION_1
From: Thomas Huth th...@linux.vnet.ibm.com Add the new VIRTIO_F_VERSION_1 definition to the virtio_config.h linux header. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- linux-headers/linux/virtio_config.h |3 +++ 1 file changed, 3 insertions(+) diff --git a/linux-headers/linux/virtio_config.h b/linux-headers/linux/virtio_config.h index 75dc20b..16aa289 100644 --- a/linux-headers/linux/virtio_config.h +++ b/linux-headers/linux/virtio_config.h @@ -54,4 +54,7 @@ /* Can the device handle any descriptor layout? */ #define VIRTIO_F_ANY_LAYOUT27 +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 32 + #endif /* _LINUX_VIRTIO_CONFIG_H */ -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 15/19] virtio-net: no writeable mac for virtio-1
Devices operating as virtio 1.0 may not allow writes to the mac address in config space. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/net/virtio-net.c |1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d6d1b98..ebbea60 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -87,6 +87,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) memcpy(netcfg, config, n-config_size); if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) +!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) memcmp(netcfg.mac, n-mac, ETH_ALEN)) { memcpy(n-mac, netcfg.mac, ETH_ALEN); qemu_format_nic_info_str(qemu_get_queue(n-nic), n-mac); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC v5 10/19] s390x/virtio-ccw: add virtio set-revision call
From: Thomas Huth th...@linux.vnet.ibm.com Handle the virtio-ccw revision according to what the guest sets. When revision 1 is selected, we have a virtio-1 standard device with byteswapping for the virtio rings. When a channel gets disabled, we have to revert to the legacy behavior in case the next user of the device does not negotiate the revision 1 anymore (e.g. the boot firmware uses revision 1, but the operating system only uses the legacy mode). Note that revisions 0 are still disabled. Signed-off-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 52 + hw/s390x/virtio-ccw.h |5 + 2 files changed, 57 insertions(+) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index e434718..5311d9f 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -20,9 +20,11 @@ #include hw/virtio/virtio-net.h #include hw/sysbus.h #include qemu/bitops.h +#include hw/virtio/virtio-access.h #include hw/virtio/virtio-bus.h #include hw/s390x/adapter.h #include hw/s390x/s390_flic.h +#include linux/virtio_config.h #include ioinst.h #include css.h @@ -260,6 +262,12 @@ typedef struct VirtioThinintInfo { uint8_t isc; } QEMU_PACKED VirtioThinintInfo; +typedef struct VirtioRevInfo { +uint16_t revision; +uint16_t length; +uint8_t data[0]; +} QEMU_PACKED VirtioRevInfo; + /* Specify where the virtqueues for the subchannel are in guest memory. */ static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align, uint16_t index, uint16_t num) @@ -299,6 +307,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) { int ret; VqInfoBlock info; +VirtioRevInfo revinfo; uint8_t status; VirtioFeatDesc features; void *config; @@ -375,6 +384,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) features.features = (uint32_t)dev-host_features; } else if (features.index == 1) { features.features = (uint32_t)(dev-host_features 32); +/* + * Don't offer version 1 to the guest if it did not + * negotiate at least revision 1. + */ +if (dev-revision = 0) { +features.features = ~(1 (VIRTIO_F_VERSION_1 - 32)); +} } else { /* Return zeroes if the guest supports more feature bits. */ features.features = 0; @@ -406,6 +422,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) (vdev-guest_features 0x) | features.features); } else if (features.index == 1) { +/* + * The guest should not set version 1 if it didn't + * negotiate a revision = 1. + */ +if (dev-revision = 0) { +features.features = ~(1 (VIRTIO_F_VERSION_1 - 32)); +} virtio_set_features(vdev, (vdev-guest_features 0x) | ((uint64_t)features.features 32)); @@ -608,6 +631,25 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) } } break; +case CCW_CMD_SET_VIRTIO_REV: +len = sizeof(revinfo); +if (ccw.count len || (check_len ccw.count len)) { +ret = -EINVAL; +break; +} +if (!ccw.cda) { +ret = -EFAULT; +break; +} +cpu_physical_memory_read(ccw.cda, revinfo, len); +if (dev-revision = 0 || +revinfo.revision VIRTIO_CCW_REV_MAX) { +ret = -ENOSYS; +break; +} +ret = 0; +dev-revision = revinfo.revision; +break; default: ret = -ENOSYS; break; @@ -615,6 +657,13 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) return ret; } +static void virtio_sch_disable_cb(SubchDev *sch) +{ +VirtioCcwDevice *dev = sch-driver_data; + +dev-revision = -1; +} + static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) { unsigned int cssid = 0; @@ -740,6 +789,7 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE); sch-ccw_cb = virtio_ccw_cb; +sch-disable_cb = virtio_sch_disable_cb; /* Build senseid data. */ memset(sch-id, 0, sizeof(SenseId)); @@ -747,6 +797,8 @@ static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev) sch-id.cu_type = VIRTIO_CCW_CU_TYPE; sch-id.cu_model = vdev-device_id; +dev-revision = -1; + /* Set default feature bits that are offered by the host. */ dev-host_features = 0; virtio_add_feature(dev
[PATCH RFC v5 11/19] s390x/virtio-ccw: support virtio-1 set_vq format
Support the new CCW_CMD_SET_VQ format for virtio-1 devices. While we're at it, refactor the code a bit and enforce big endian fields (which had always been required, even for legacy). Reviewed-by: Thomas Huth th...@linux.vnet.ibm.com Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- hw/s390x/virtio-ccw.c | 114 ++--- 1 file changed, 80 insertions(+), 34 deletions(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 5311d9f..75c9ff9 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -238,11 +238,20 @@ VirtualCssBus *virtual_css_bus_init(void) } /* Communication blocks used by several channel commands. */ -typedef struct VqInfoBlock { +typedef struct VqInfoBlockLegacy { uint64_t queue; uint32_t align; uint16_t index; uint16_t num; +} QEMU_PACKED VqInfoBlockLegacy; + +typedef struct VqInfoBlock { +uint64_t desc; +uint32_t res0; +uint16_t index; +uint16_t num; +uint64_t avail; +uint64_t used; } QEMU_PACKED VqInfoBlock; typedef struct VqConfigBlock { @@ -269,17 +278,20 @@ typedef struct VirtioRevInfo { } QEMU_PACKED VirtioRevInfo; /* Specify where the virtqueues for the subchannel are in guest memory. */ -static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align, - uint16_t index, uint16_t num) +static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info, + VqInfoBlockLegacy *linfo) { VirtIODevice *vdev = virtio_ccw_get_vdev(sch); +uint16_t index = info ? info-index : linfo-index; +uint16_t num = info ? info-num : linfo-num; +uint64_t desc = info ? info-desc : linfo-queue; if (index VIRTIO_PCI_QUEUE_MAX) { return -EINVAL; } /* Current code in virtio.c relies on 4K alignment. */ -if (addr (align != 4096)) { +if (linfo desc (linfo-align != 4096)) { return -EINVAL; } @@ -287,8 +299,12 @@ static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align, return -EINVAL; } -virtio_queue_set_addr(vdev, index, addr); -if (!addr) { +if (info) { +virtio_queue_set_rings(vdev, index, desc, info-avail, info-used); +} else { +virtio_queue_set_addr(vdev, index, desc); +} +if (!desc) { virtio_queue_set_vector(vdev, index, 0); } else { /* Fail if we don't have a big enough queue. */ @@ -303,10 +319,66 @@ static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align, return 0; } -static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) +static int virtio_ccw_handle_set_vq(SubchDev *sch, CCW1 ccw, bool check_len, +bool is_legacy) { int ret; VqInfoBlock info; +VqInfoBlockLegacy linfo; +size_t info_len = is_legacy ? sizeof(linfo) : sizeof(info); + +if (check_len) { +if (ccw.count != info_len) { +return -EINVAL; +} +} else if (ccw.count info_len) { +/* Can't execute command. */ +return -EINVAL; +} +if (!ccw.cda) { +return -EFAULT; +} +if (is_legacy) { +linfo.queue = ldq_be_phys(address_space_memory, ccw.cda); +linfo.align = ldl_be_phys(address_space_memory, + ccw.cda + sizeof(linfo.queue)); +linfo.index = lduw_be_phys(address_space_memory, + ccw.cda + sizeof(linfo.queue) + + sizeof(linfo.align)); +linfo.num = lduw_be_phys(address_space_memory, + ccw.cda + sizeof(linfo.queue) + + sizeof(linfo.align) + + sizeof(linfo.index)); +ret = virtio_ccw_set_vqs(sch, NULL, linfo); +} else { +info.desc = ldq_be_phys(address_space_memory, ccw.cda); +info.index = lduw_be_phys(address_space_memory, + ccw.cda + sizeof(info.desc) + + sizeof(info.res0)); +info.num = lduw_be_phys(address_space_memory, +ccw.cda + sizeof(info.desc) + + sizeof(info.res0) + + sizeof(info.index)); +info.avail = ldq_be_phys(address_space_memory, + ccw.cda + sizeof(info.desc) + + sizeof(info.res0) + + sizeof(info.index) + + sizeof(info.num)); +info.used = ldq_be_phys(address_space_memory, +ccw.cda + sizeof(info.desc) ++ sizeof(info.res0) ++ sizeof(info.index) ++ sizeof(info.num) ++ sizeof(info.avail)); +ret = virtio_ccw_set_vqs(sch