Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On 2020/6/10 下午9:11, Pierre Morel wrote: Protected Virtualisation protects the memory of the guest and do not allow a the host to access all of its memory. Let's refuse a VIRTIO device which does not use IOMMU protected access. Signed-off-by: Pierre Morel --- drivers/s390/virtio/virtio_ccw.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..06ffbc96587a 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) if (!ccw) return; + /* Protected Virtualisation guest needs IOMMU */ + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; + /* Write the status to the host. */ vcdev->dma_area->status = status; ccw->cmd_code = CCW_CMD_WRITE_STATUS; I wonder whether we need move it to virtio core instead of ccw. I think the other memory protection technologies may suffer from this as well. Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RFC v6 02/11] vhost: use batched get_vq_desc version
On 2020/6/10 下午7:05, Michael S. Tsirkin wrote: +EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { + unfetch_descs(vq); vq->last_avail_idx -= n; So unfetch_descs() has decreased last_avail_idx. Can we fix this by letting unfetch_descs() return the number and then we can do: int d = unfetch_descs(vq); vq->last_avail_idx -= (n > d) ? n - d: 0; Thanks That's intentional I think - we need both. Yes, but: Unfetch_descs drops the descriptors in the cache that were *not returned to caller* through get_vq_desc. vhost_discard_vq_desc drops the ones that were returned through get_vq_desc. Did I miss anything? We could count some descriptors twice, consider the case e.g we only cache on descriptor: fetch_descs() fetch_buf() last_avail_idx++; Then we want do discard it: vhost_discard_avail_buf(1) unfetch_descs() last_avail_idx--; last_avail_idx -= 1; Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH V3] vdpa: introduce virtio pci driver
On 2020/6/10 下午4:51, Michael S. Tsirkin wrote: On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote: + +#define VP_VDPA_FEATURES \ + ((1ULL << VIRTIO_F_ANY_LAYOUT)| \ This is presumably for transitional devices only. In fact looking at code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT. Spec violation I guess ... but what should we do? Relax the spec or fix drivers? I don't get how it violates the spec. Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT I don't see this in the spec, maybe you can point it to me? But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't. Only legacy virtio net does. Maybe it should have been transitional drivers when using the lgacy interface, but there you are. +(1ULL << VIRTIO_F_VERSION_1) | \ +(1ULL << VIRTIO_F_ORDER_PLATFORM)| \ +(1ULL << VIRTIO_F_IOMMU_PLATFORM)) + +struct vp_vring { + void __iomem *notify; + char msix_name[256]; + resource_size_t notify_pa; + struct vdpa_callback cb; + int irq; +}; + +struct vp_vdpa { + struct vdpa_device vdpa; + struct pci_dev *pdev; + + struct virtio_device_id id; + + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; + + /* The IO mapping for the PCI config space */ + void __iomem * const *base; + struct virtio_pci_common_cfg __iomem *common; + void __iomem *device; + /* Base of vq notifications */ + void __iomem *notify; + + /* Multiplier for queue_notify_off. */ + u32 notify_off_multiplier; + + int modern_bars; + int vectors; +}; + +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vp_vdpa, vdpa); +} + +/* + * Type-safe wrappers for io accesses. + * Use these to enforce at compile time the following spec requirement: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses + * for 16-bit fields and 8-bit accesses for 8-bit fields. + */ +static inline u8 vp_ioread8(u8 __iomem *addr) +{ + return ioread8(addr); +} +static inline u16 vp_ioread16(__le16 __iomem *addr) +{ + return ioread16(addr); +} + +static inline u32 vp_ioread32(__le32 __iomem *addr) +{ + return ioread32(addr); +} + +static inline void vp_iowrite8(u8 value, u8 __iomem *addr) +{ + iowrite8(value, addr); +} + +static inline void vp_iowrite16(u16 value, __le16 __iomem *addr) +{ + iowrite16(value, addr); +} + +static inline void vp_iowrite32(u32 value, __le32 __iomem *addr) +{ + iowrite32(value, addr); +} + +static void vp_iowrite64_twopart(u64 val, +__le32 __iomem *lo, __le32 __iomem *hi) +{ + vp_iowrite32((u32)val, lo); + vp_iowrite32(val >> 32, hi); +} + +static int find_capability(struct pci_dev *dev, u8 cfg_type, + u32 ioresource_types, int *bars) +{ + int pos; + + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); +pos > 0; +pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { + u8 type, bar; + + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, +cfg_type), +); + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, +bar), +); + + /* Ignore structures with reserved BAR values */ + if (bar > 0x5) + continue; + + if (type == cfg_type) { + if (pci_resource_len(dev, bar) && + pci_resource_flags(dev, bar) & ioresource_types) { + *bars |= (1 << bar); + return pos; + } + } + } + return 0; +} + +static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off, + resource_size_t *pa) +{ + struct pci_dev *pdev = vp_vdpa->pdev; + u32 offset; + u8 bar; + + pci_read_config_byte(pdev, +off + offsetof(struct virtio_pci_cap, bar), +); + pci_read_config_dword(pdev, + off + offsetof(struct virtio_pci_cap, offset), + ); + + if (pa) + *pa = pci_resource_start(pdev, bar) + offset; + + return vp_vdpa->base[bar] + offset; +} + +static u64 vp_vdpa_get_features(struct vdpa_device *vdpa) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + u64 features; + + vp_iowrite32(0, _vdpa->common->device_feature_select); + features = vp_ioread32(_vdpa->common->device_feature); +
Re: [GIT PULL] virtio: features, fixes
The pull request you sent on Wed, 10 Jun 2020 00:44:55 -0400: > https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/09102704c67457c6cdea6c0394c34843484a852c Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [GIT PULL] virtio: features, fixes
On Tue, Jun 9, 2020 at 9:45 PM Michael S. Tsirkin wrote: > > I also upgraded the machine I used to sign > the tag (didn't change the key) - hope the signature is still ok. If not > pls let me know! All looks normal as far as I can tell, Linus ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On Wed, 10 Jun 2020 15:11:51 +0200 Pierre Morel wrote: > Protected Virtualisation protects the memory of the guest and > do not allow a the host to access all of its memory. > > Let's refuse a VIRTIO device which does not use IOMMU > protected access. > Should we ever get a virtio-ccw device implemented in hardware, we could in theory have a trusted device, i.e. one that is not affected by the memory protection. IMHO this restriction applies to paravitualized devices, that is devices realized by the hypervisor. In that sense this is not about ccw, but could in the future affect PCI as well. Thus the transport code may not be the best place to fence this, but frankly looking at how the QEMU discussion is going (the one where I try to prevent this condition) I would be glad to have something like this as a safety net. I would however like the commit message to reflect what is written above. Do we need backports (and cc-stable) for this? Connie what do you think? > Signed-off-by: Pierre Morel > --- > drivers/s390/virtio/virtio_ccw.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/drivers/s390/virtio/virtio_ccw.c > b/drivers/s390/virtio/virtio_ccw.c > index 5730572b52cd..06ffbc96587a 100644 > --- a/drivers/s390/virtio/virtio_ccw.c > +++ b/drivers/s390/virtio/virtio_ccw.c > @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device > *vdev, u8 status) > if (!ccw) > return; > > + /* Protected Virtualisation guest needs IOMMU */ > + if (is_prot_virt_guest() && > + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) If you were to add && !__virtio_test_bit(vdev, VIRTIO_F_ORDER_PLATFORM) we could confine this check and the bail out to paravirtualized devices, because a device realized in HW is expected to give us both F_ACCESS_PLATFORM and F_ORDER_PLATFORM. I would not bet it will ever matter for virtio-ccw though. Connie, do you have an opinion on this? Regards, Halil > + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; > + > /* Write the status to the host. */ > vcdev->dma_area->status = status; > ccw->cmd_code = CCW_CMD_WRITE_STATUS; ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On 2020-06-10 16:53, Cornelia Huck wrote: On Wed, 10 Jun 2020 16:37:55 +0200 Pierre Morel wrote: On 2020-06-10 15:24, Cornelia Huck wrote: On Wed, 10 Jun 2020 15:11:51 +0200 Pierre Morel wrote: Protected Virtualisation protects the memory of the guest and do not allow a the host to access all of its memory. Let's refuse a VIRTIO device which does not use IOMMU protected access. Signed-off-by: Pierre Morel --- drivers/s390/virtio/virtio_ccw.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..06ffbc96587a 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) if (!ccw) return; + /* Protected Virtualisation guest needs IOMMU */ + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; + set_status seems like an odd place to look at features; shouldn't that rather be done in finalize_features? Right, looks better to me too. What about: diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 06ffbc96587a..227676297ea0 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev) ret = -ENOMEM; goto out_free; } + + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) Add a comment, and (maybe) a message? Otherwise, I think this is fine, as it should fail the probe, which is what we want. yes right a message is needed. and I extend a little the comment I had before. thanks Regards, Pierre -- Pierre Morel IBM Lab Boeblingen ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RFC v7 03/14] vhost: use batched get_vq_desc version
On Wed, Jun 10, 2020 at 02:37:50PM +0200, Eugenio Perez Martin wrote: > > +/* This function returns a value > 0 if a descriptor was found, or 0 if > > none were found. > > + * A negative code is returned on error. */ > > +static int fetch_descs(struct vhost_virtqueue *vq) > > +{ > > + int ret; > > + > > + if (unlikely(vq->first_desc >= vq->ndescs)) { > > + vq->first_desc = 0; > > + vq->ndescs = 0; > > + } > > + > > + if (vq->ndescs) > > + return 1; > > + > > + for (ret = 1; > > +ret > 0 && vq->ndescs <= vhost_vq_num_batch_descs(vq); > > +ret = fetch_buf(vq)) > > + ; > > (Expanding comment in V6): > > We get an infinite loop this way: > * vq->ndescs == 0, so we call fetch_buf() here > * fetch_buf gets less than vhost_vq_num_batch_descs(vq); descriptors. ret = 1 > * This loop calls again fetch_buf, but vq->ndescs > 0 (and avail_vq == > last_avail_vq), so it just return 1 That's what [PATCH RFC v7 08/14] fixup! vhost: use batched get_vq_desc version is supposed to fix. -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RFC v7 03/14] vhost: use batched get_vq_desc version
On Wed, Jun 10, 2020 at 04:29:29PM +0200, Eugenio Pérez wrote: > On Wed, 2020-06-10 at 07:36 -0400, Michael S. Tsirkin wrote: > > As testing shows no performance change, switch to that now. > > > > Signed-off-by: Michael S. Tsirkin > > Signed-off-by: Eugenio Pérez > > Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com > > Signed-off-by: Michael S. Tsirkin > > --- > > drivers/vhost/test.c | 2 +- > > drivers/vhost/vhost.c | 318 -- > > drivers/vhost/vhost.h | 7 +- > > 3 files changed, 65 insertions(+), 262 deletions(-) > > > > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c > > index 0466921f4772..7d69778aaa26 100644 > > --- a/drivers/vhost/test.c > > +++ b/drivers/vhost/test.c > > @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct > > file *f) > > dev = >dev; > > vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ]; > > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; > > - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, > > + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64, > >VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); > > > > f->private_data = n; > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 11433d709651..28f324fd77df 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, > > { > > vq->num = 1; > > vq->ndescs = 0; > > + vq->first_desc = 0; > > vq->desc = NULL; > > vq->avail = NULL; > > vq->used = NULL; > > @@ -372,6 +373,11 @@ static int vhost_worker(void *data) > > return 0; > > } > > > > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq) > > +{ > > + return vq->max_descs - UIO_MAXIOV; > > +} > > + > > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) > > { > > kfree(vq->descs); > > @@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev > > *dev) > > for (i = 0; i < dev->nvqs; ++i) { > > vq = dev->vqs[i]; > > vq->max_descs = dev->iov_limit; > > + if (vhost_vq_num_batch_descs(vq) < 0) { > > + return -EINVAL; > > + } > > vq->descs = kmalloc_array(vq->max_descs, > > sizeof(*vq->descs), > > GFP_KERNEL); > > @@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned > > int ioctl, void __user *arg > > vq->last_avail_idx = s.num; > > /* Forget the cached index value. */ > > vq->avail_idx = vq->last_avail_idx; > > + vq->ndescs = vq->first_desc = 0; > > This is not needed if it is done in vhost_vq_set_backend, as far as I can > tell. > > Actually, maybe it is even better to move `vq->avail_idx = > vq->last_avail_idx;` line to vhost_vq_set_backend, it is part > of the backend "set up" procedure, isn't it? > > I tested with virtio_test + batch tests sent in > https://lkml.kernel.org/lkml/20200418102217.32327-1-epere...@redhat.com/T/. Ow did I forget to merge them for rc1? Should I have? Maybe Linus won't yell to hard at me if I merge them after rc1. > I append here what I'm proposing in case it is clearer this way. > > Thanks! > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 4d198994e7be..809ad2cd2879 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -1617,9 +1617,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned > int ioctl, void __user *arg > break; > } > vq->last_avail_idx = s.num; > - /* Forget the cached index value. */ > - vq->avail_idx = vq->last_avail_idx; > - vq->ndescs = vq->first_desc = 0; > break; > case VHOST_GET_VRING_BASE: > s.index = idx; > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index fed36af5c444..f4902dc808e4 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -258,6 +258,7 @@ static inline void vhost_vq_set_backend(struct > vhost_virtqueue *vq, > void *private_data) > { > vq->private_data = private_data; > + vq->avail_idx = vq->last_avail_idx; > vq->ndescs = 0; > vq->first_desc = 0; > } > Seems like a nice cleanup, though it's harmless right? -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On Wed, 10 Jun 2020 16:37:55 +0200 Pierre Morel wrote: > On 2020-06-10 15:24, Cornelia Huck wrote: > > On Wed, 10 Jun 2020 15:11:51 +0200 > > Pierre Morel wrote: > > > >> Protected Virtualisation protects the memory of the guest and > >> do not allow a the host to access all of its memory. > >> > >> Let's refuse a VIRTIO device which does not use IOMMU > >> protected access. > >> > >> Signed-off-by: Pierre Morel > >> --- > >> drivers/s390/virtio/virtio_ccw.c | 5 + > >> 1 file changed, 5 insertions(+) > >> > >> diff --git a/drivers/s390/virtio/virtio_ccw.c > >> b/drivers/s390/virtio/virtio_ccw.c > >> index 5730572b52cd..06ffbc96587a 100644 > >> --- a/drivers/s390/virtio/virtio_ccw.c > >> +++ b/drivers/s390/virtio/virtio_ccw.c > >> @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct > >> virtio_device *vdev, u8 status) > >>if (!ccw) > >>return; > >> > >> + /* Protected Virtualisation guest needs IOMMU */ > >> + if (is_prot_virt_guest() && > >> + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) > >> + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; > >> + > > > > set_status seems like an odd place to look at features; shouldn't that > > rather be done in finalize_features? > > Right, looks better to me too. > What about: > > > > diff --git a/drivers/s390/virtio/virtio_ccw.c > b/drivers/s390/virtio/virtio_ccw.c > index 06ffbc96587a..227676297ea0 100644 > --- a/drivers/s390/virtio/virtio_ccw.c > +++ b/drivers/s390/virtio/virtio_ccw.c > @@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct > virtio_device *vdev) > ret = -ENOMEM; > goto out_free; > } > + > + if (is_prot_virt_guest() && > + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) Add a comment, and (maybe) a message? Otherwise, I think this is fine, as it should fail the probe, which is what we want. > + return -EIO; > + > /* Give virtio_ring a chance to accept features. */ > vring_transport_features(vdev); > > > > Thanks, > > Regards, > Pierre > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On 2020-06-10 15:24, Cornelia Huck wrote: On Wed, 10 Jun 2020 15:11:51 +0200 Pierre Morel wrote: Protected Virtualisation protects the memory of the guest and do not allow a the host to access all of its memory. Let's refuse a VIRTIO device which does not use IOMMU protected access. Signed-off-by: Pierre Morel --- drivers/s390/virtio/virtio_ccw.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..06ffbc96587a 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) if (!ccw) return; + /* Protected Virtualisation guest needs IOMMU */ + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; + set_status seems like an odd place to look at features; shouldn't that rather be done in finalize_features? Right, looks better to me too. What about: diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 06ffbc96587a..227676297ea0 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev) ret = -ENOMEM; goto out_free; } + + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) + return -EIO; + /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); Thanks, Regards, Pierre -- Pierre Morel IBM Lab Boeblingen ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU
On Wed, 10 Jun 2020 15:11:51 +0200 Pierre Morel wrote: > Protected Virtualisation protects the memory of the guest and > do not allow a the host to access all of its memory. > > Let's refuse a VIRTIO device which does not use IOMMU > protected access. > > Signed-off-by: Pierre Morel > --- > drivers/s390/virtio/virtio_ccw.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/drivers/s390/virtio/virtio_ccw.c > b/drivers/s390/virtio/virtio_ccw.c > index 5730572b52cd..06ffbc96587a 100644 > --- a/drivers/s390/virtio/virtio_ccw.c > +++ b/drivers/s390/virtio/virtio_ccw.c > @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device > *vdev, u8 status) > if (!ccw) > return; > > + /* Protected Virtualisation guest needs IOMMU */ > + if (is_prot_virt_guest() && > + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) > + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; > + set_status seems like an odd place to look at features; shouldn't that rather be done in finalize_features? > /* Write the status to the host. */ > vcdev->dma_area->status = status; > ccw->cmd_code = CCW_CMD_WRITE_STATUS; ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH] s390: protvirt: virtio: Refuse device without IOMMU
Protected Virtualisation protects the memory of the guest and do not allow a the host to access all of its memory. Let's refuse a VIRTIO device which does not use IOMMU protected access. Signed-off-by: Pierre Morel --- drivers/s390/virtio/virtio_ccw.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..06ffbc96587a 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) if (!ccw) return; + /* Protected Virtualisation guest needs IOMMU */ + if (is_prot_virt_guest() && + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM)) + status &= ~VIRTIO_CONFIG_S_FEATURES_OK; + /* Write the status to the host. */ vcdev->dma_area->status = status; ccw->cmd_code = CCW_CMD_WRITE_STATUS; -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 02/14] fixup! vhost: option to fetch descriptors through an independent struct
--- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 180b7b58c76b..11433d709651 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2614,7 +2614,7 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq, err_fetch: vq->ndescs = 0; - return ret; + return ret ? ret : vq->num; } EXPORT_SYMBOL_GPL(vhost_get_vq_desc_batch); -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 12/14] vhost/scsi: switch to buf APIs
Switch to buf APIs. Doing this exposes a spec violation in vhost scsi: all used bufs are marked with length 0. Fix that is left for another day. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 73 ++-- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0cbaa0b3893d..a5cdd4c01a3a 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -71,8 +71,8 @@ struct vhost_scsi_inflight { }; struct vhost_scsi_cmd { - /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ - int tvc_vq_desc; + /* Descriptor from vhost_get_avail_buf() for virt_queue segment */ + struct vhost_buf tvc_vq_desc; /* virtio-scsi initiator task attribute */ int tvc_task_attr; /* virtio-scsi response incoming iovecs */ @@ -213,7 +213,7 @@ struct vhost_scsi { * Context for processing request and control queue operations. */ struct vhost_scsi_ctx { - int head; + struct vhost_buf buf; unsigned int out, in; size_t req_size, rsp_size; size_t out_size, in_size; @@ -443,6 +443,20 @@ static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) return target_put_sess_cmd(se_cmd); } +/* Signal to guest that request finished with no input buffer. */ +/* TODO calling this when writing into buffer and most likely a bug */ +static void vhost_scsi_signal_noinput(struct vhost_dev *vdev, + struct vhost_virtqueue *vq, + struct vhost_buf *bufp) +{ + struct vhost_buf buf = *bufp; + + buf.in_len = 0; + vhost_put_used_buf(vq, ); + vhost_signal(vdev, vq); +} + + static void vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt) { @@ -450,7 +464,8 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt) struct virtio_scsi_event *event = >event; struct virtio_scsi_event __user *eventp; unsigned out, in; - int head, ret; + struct vhost_buf buf; + int ret; if (!vhost_vq_get_backend(vq)) { vs->vs_events_missed = true; @@ -459,14 +474,14 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt) again: vhost_disable_notify(>dev, vq); - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), , , - NULL, NULL); - if (head < 0) { + ret = vhost_get_avail_buf(vq, , + vq->iov, ARRAY_SIZE(vq->iov), , , + NULL, NULL); + if (ret < 0) { vs->vs_events_missed = true; return; } - if (head == vq->num) { + if (!ret) { if (vhost_enable_notify(>dev, vq)) goto again; vs->vs_events_missed = true; @@ -488,7 +503,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt) eventp = vq->iov[out].iov_base; ret = __copy_to_user(eventp, event, sizeof(*event)); if (!ret) - vhost_add_used_and_signal(>dev, vq, head, 0); + vhost_scsi_signal_noinput(>dev, vq, ); else vq_err(vq, "Faulted on vhost_scsi_send_event\n"); } @@ -549,7 +564,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ret = copy_to_iter(_rsp, sizeof(v_rsp), _iter); if (likely(ret == sizeof(v_rsp))) { struct vhost_scsi_virtqueue *q; - vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); + vhost_put_used_buf(cmd->tvc_vq, >tvc_vq_desc); q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); vq = q - vs->vqs; __set_bit(vq, signal); @@ -793,7 +808,7 @@ static void vhost_scsi_submission_work(struct work_struct *work) static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned out) + struct vhost_buf *buf, unsigned out) { struct virtio_scsi_cmd_resp __user *resp; struct virtio_scsi_cmd_resp rsp; @@ -804,7 +819,7 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs, resp = vq->iov[out].iov_base; ret = __copy_to_user(resp, , sizeof(rsp)); if (!ret) - vhost_add_used_and_signal(>dev, vq, head, 0); + vhost_scsi_signal_noinput(>dev, vq, buf); else pr_err("Faulted on virtio_scsi_cmd_resp\n"); } @@ -813,21 +828,21 @@ static int vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) { - int ret = -ENXIO; + int r, ret = -ENXIO; - vc->head =
[PATCH RFC v7 09/14] vhost/net: convert to new API: heads->bufs
Convert vhost net to use the new format-agnostic API. In particular, don't poke at vq internals such as the heads array. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 154 +++- 1 file changed, 82 insertions(+), 72 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ff594eec8ae3..830fe84912a5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -59,13 +59,13 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" * status internally; used for zerocopy tx only. */ /* Lower device DMA failed */ -#define VHOST_DMA_FAILED_LEN ((__force __virtio32)3) +#define VHOST_DMA_FAILED_LEN (3) /* Lower device DMA done */ -#define VHOST_DMA_DONE_LEN ((__force __virtio32)2) +#define VHOST_DMA_DONE_LEN (2) /* Lower device DMA in progress */ -#define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1) +#define VHOST_DMA_IN_PROGRESS (1) /* Buffer unused */ -#define VHOST_DMA_CLEAR_LEN((__force __virtio32)0) +#define VHOST_DMA_CLEAR_LEN(0) #define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) @@ -112,9 +112,12 @@ struct vhost_net_virtqueue { /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; /* For TX, first used idx for DMA done zerocopy buffers -* For RX, number of batched heads +* For RX, number of batched bufs */ int done_idx; + /* Outstanding user bufs. UIO_MAXIOV in length. */ + /* TODO: we can make this smaller for sure. */ + struct vhost_buf *bufs; /* Number of XDP frames batched */ int batched_xdp; /* an array of userspace buffers info */ @@ -271,6 +274,8 @@ static void vhost_net_clear_ubuf_info(struct vhost_net *n) int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + kfree(n->vqs[i].bufs); + n->vqs[i].bufs = NULL; kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; } @@ -282,6 +287,12 @@ static int vhost_net_set_ubuf_info(struct vhost_net *n) int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + n->vqs[i].bufs = kmalloc_array(UIO_MAXIOV, + sizeof(*n->vqs[i].bufs), + GFP_KERNEL); + if (!n->vqs[i].bufs) + goto err; + zcopy = vhost_net_zcopy_mask & (0x1 << i); if (!zcopy) continue; @@ -364,18 +375,18 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net, int j = 0; for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { - if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) + if (nvq->bufs[i].in_len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); - if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { - vq->heads[i].len = VHOST_DMA_CLEAR_LEN; + if (VHOST_DMA_IS_DONE(nvq->bufs[i].in_len)) { + nvq->bufs[i].in_len = VHOST_DMA_CLEAR_LEN; ++j; } else break; } while (j) { add = min(UIO_MAXIOV - nvq->done_idx, j); - vhost_add_used_and_signal_n(vq->dev, vq, - >heads[nvq->done_idx], add); + vhost_put_used_n_bufs(vq, >bufs[nvq->done_idx], add); + vhost_signal(vq->dev, vq); nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; j -= add; } @@ -390,7 +401,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_lock_bh(); /* set len to mark this desc buffers done DMA */ - nvq->vq.heads[ubuf->desc].in_len = success ? + nvq->bufs[ubuf->desc].in_len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; cnt = vhost_net_ubuf_put(ubufs); @@ -452,7 +463,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) if (!nvq->done_idx) return; - vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); + vhost_put_used_n_bufs(vq, nvq->bufs, nvq->done_idx); + vhost_signal(dev, vq); nvq->done_idx = 0; } @@ -558,6 +570,7 @@ static void vhost_net_busy_poll(struct vhost_net *net, static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_net_virtqueue *tnvq, + struct vhost_buf *buf, unsigned int *out_num, unsigned int *in_num, struct msghdr *msghdr, bool *busyloop_intr) { @@ -565,10 +578,10 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_virtqueue *rvq = >vq;
[PATCH RFC v7 14/14] vhost: drop head based APIs
Everyone's using buf APIs, no need for head based ones anymore. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 64 ++- drivers/vhost/vhost.h | 12 2 files changed, 8 insertions(+), 68 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 03e6bca02288..9096bd291c91 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2425,39 +2425,11 @@ EXPORT_SYMBOL_GPL(vhost_get_avail_buf); void vhost_discard_avail_bufs(struct vhost_virtqueue *vq, struct vhost_buf *buf, unsigned count) { - vhost_discard_vq_desc(vq, count); + unfetch_descs(vq); + vq->last_avail_idx -= count; } EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs); -/* This function returns the descriptor number found, or vq->num (which is - * never a valid descriptor number) if none was found. A negative code is - * returned on error. */ -int vhost_get_vq_desc(struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num) -{ - struct vhost_buf buf; - int ret = vhost_get_avail_buf(vq, , - iov, iov_size, out_num, in_num, - log, log_num); - - if (likely(ret > 0)) - return buf->id; - if (likely(!ret)) - return vq->num; - return ret; -} -EXPORT_SYMBOL_GPL(vhost_get_vq_desc); - -/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ -void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) -{ - unfetch_descs(vq); - vq->last_avail_idx -= n; -} -EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); - static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) @@ -2490,8 +2462,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, return 0; } -/* After we've used one of their buffers, we tell them about it. We'll then - * want to notify the guest, using eventfd. */ +static int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { @@ -2525,10 +2496,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, } return r; } -EXPORT_SYMBOL_GPL(vhost_add_used_n); -/* After we've used one of their buffers, we tell them about it. We'll then - * want to notify the guest, using eventfd. */ +static int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { struct vring_used_elem heads = { @@ -2538,14 +2507,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) return vhost_add_used_n(vq, , 1); } -EXPORT_SYMBOL_GPL(vhost_add_used); +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using vhost_signal. */ int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf) { return vhost_add_used(vq, buf->id, buf->in_len); } EXPORT_SYMBOL_GPL(vhost_put_used_buf); +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using vhost_signal. */ int vhost_put_used_n_bufs(struct vhost_virtqueue *vq, struct vhost_buf *bufs, unsigned count) { @@ -2606,26 +2578,6 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_signal); -/* And here's the combo meal deal. Supersize me! */ -void vhost_add_used_and_signal(struct vhost_dev *dev, - struct vhost_virtqueue *vq, - unsigned int head, int len) -{ - vhost_add_used(vq, head, len); - vhost_signal(dev, vq); -} -EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); - -/* multi-buffer version of vhost_add_used_and_signal */ -void vhost_add_used_and_signal_n(struct vhost_dev *dev, -struct vhost_virtqueue *vq, -struct vring_used_elem *heads, unsigned count) -{ - vhost_add_used_n(vq, heads, count); - vhost_signal(dev, vq); -} -EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); - /* return true if we're sure that avaiable ring is empty */ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 28eea0155efb..264a2a2fae97 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -197,11 +197,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg bool vhost_vq_access_ok(struct vhost_virtqueue *vq); bool vhost_log_access_ok(struct vhost_dev *); -int vhost_get_vq_desc(struct vhost_virtqueue *, - struct iovec
[PATCH RFC v7 03/14] vhost: use batched get_vq_desc version
As testing shows no performance change, switch to that now. Signed-off-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 2 +- drivers/vhost/vhost.c | 318 -- drivers/vhost/vhost.h | 7 +- 3 files changed, 65 insertions(+), 262 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 0466921f4772..7d69778aaa26 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = >dev; vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64, VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); f->private_data = n; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 11433d709651..28f324fd77df 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, { vq->num = 1; vq->ndescs = 0; + vq->first_desc = 0; vq->desc = NULL; vq->avail = NULL; vq->used = NULL; @@ -372,6 +373,11 @@ static int vhost_worker(void *data) return 0; } +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq) +{ + return vq->max_descs - UIO_MAXIOV; +} + static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->descs); @@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->max_descs = dev->iov_limit; + if (vhost_vq_num_batch_descs(vq) < 0) { + return -EINVAL; + } vq->descs = kmalloc_array(vq->max_descs, sizeof(*vq->descs), GFP_KERNEL); @@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg vq->last_avail_idx = s.num; /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; + vq->ndescs = vq->first_desc = 0; break; case VHOST_GET_VRING_BASE: s.index = idx; @@ -2078,253 +2088,6 @@ static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) return next; } -static int get_indirect(struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num, - struct vring_desc *indirect) -{ - struct vring_desc desc; - unsigned int i = 0, count, found = 0; - u32 len = vhost32_to_cpu(vq, indirect->len); - struct iov_iter from; - int ret, access; - - /* Sanity check */ - if (unlikely(len % sizeof desc)) { - vq_err(vq, "Invalid length in indirect descriptor: " - "len 0x%llx not multiple of 0x%zx\n", - (unsigned long long)len, - sizeof desc); - return -EINVAL; - } - - ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, -UIO_MAXIOV, VHOST_ACCESS_RO); - if (unlikely(ret < 0)) { - if (ret != -EAGAIN) - vq_err(vq, "Translation failure %d in indirect.\n", ret); - return ret; - } - iov_iter_init(, READ, vq->indirect, ret, len); - - /* We will use the result as an address to read from, so most -* architectures only need a compiler barrier here. */ - read_barrier_depends(); - - count = len / sizeof desc; - /* Buffers are chained via a 16 bit next field, so -* we can have at most 2^16 of these. */ - if (unlikely(count > USHRT_MAX + 1)) { - vq_err(vq, "Indirect buffer length too big: %d\n", - indirect->len); - return -E2BIG; - } - - do { - unsigned iov_count = *in_num + *out_num; - if (unlikely(++found > count)) { - vq_err(vq, "Loop detected: last one at %u " - "indirect size %u\n", - i, count); - return -EINVAL; - } - if (unlikely(!copy_from_iter_full(, sizeof(desc), ))) { - vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", - i, (size_t)vhost64_to_cpu(vq,
[PATCH RFC v7 00/14] vhost: ring format independence
This intentionally leaves "fixup" changes separate - hopefully that is enough to fix vhost-net crashes reported here, but it helps me keep track of what changed. I will naturally squash them later when we are done. This adds infrastructure required for supporting multiple ring formats. The idea is as follows: we convert descriptors to an independent format first, and process that converting to iov later. Used ring is similar: we fetch into an independent struct first, convert that to IOV later. The point is that we have a tight loop that fetches descriptors, which is good for cache utilization. This will also allow all kind of batching tricks - e.g. it seems possible to keep SMAP disabled while we are fetching multiple descriptors. For used descriptors, this allows keeping track of the buffer length without need to rescan IOV. This seems to perform exactly the same as the original code based on a microbenchmark. Lightly tested. More testing would be very much appreciated. changes from v6: - fixes some bugs introduced in v6 and v5 changes from v5: - addressed comments by Jason: squashed API changes, fixed up discard changes from v4: - added used descriptor format independence - addressed comments by jason - fixed a crash detected by the lkp robot. changes from v3: - fixed error handling in case of indirect descriptors - add BUG_ON to detect buffer overflow in case of bugs in response to comment by Jason Wang - minor code tweaks Changes from v2: - fixed indirect descriptor batching reported by Jason Wang Changes from v1: - typo fixes Michael S. Tsirkin (14): vhost: option to fetch descriptors through an independent struct fixup! vhost: option to fetch descriptors through an independent struct vhost: use batched get_vq_desc version vhost/net: pass net specific struct pointer vhost: reorder functions vhost: format-independent API for used buffers fixup! vhost: format-independent API for used buffers fixup! vhost: use batched get_vq_desc version vhost/net: convert to new API: heads->bufs vhost/net: avoid iov length math vhost/test: convert to the buf API vhost/scsi: switch to buf APIs vhost/vsock: switch to the buf API vhost: drop head based APIs drivers/vhost/net.c | 174 +-- drivers/vhost/scsi.c | 73 drivers/vhost/test.c | 22 +-- drivers/vhost/vhost.c | 378 +++--- drivers/vhost/vhost.h | 44 +++-- drivers/vhost/vsock.c | 30 ++-- 6 files changed, 439 insertions(+), 282 deletions(-) -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 13/14] vhost/vsock: switch to the buf API
A straight-forward conversion. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 30 ++ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a483cec31d5c..61c6d3dd2ae3 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -103,7 +103,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, unsigned out, in; size_t nbytes; size_t iov_len, payload_len; - int head; + struct vhost_buf buf; + int ret; spin_lock_bh(>send_pkt_list_lock); if (list_empty(>send_pkt_list)) { @@ -117,16 +118,17 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, list_del_init(>list); spin_unlock_bh(>send_pkt_list_lock); - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -, , NULL, NULL); - if (head < 0) { + ret = vhost_get_avail_buf(vq, , + vq->iov, ARRAY_SIZE(vq->iov), + , , NULL, NULL); + if (ret < 0) { spin_lock_bh(>send_pkt_list_lock); list_add(>list, >send_pkt_list); spin_unlock_bh(>send_pkt_list_lock); break; } - if (head == vq->num) { + if (!ret) { spin_lock_bh(>send_pkt_list_lock); list_add(>list, >send_pkt_list); spin_unlock_bh(>send_pkt_list_lock); @@ -186,7 +188,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, */ virtio_transport_deliver_tap_pkt(pkt); - vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); + buf.in_len = sizeof(pkt->hdr) + payload_len; + vhost_put_used_buf(vq, ); added = true; pkt->off += payload_len; @@ -440,7 +443,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, dev); struct virtio_vsock_pkt *pkt; - int head, pkts = 0, total_len = 0; + int ret, pkts = 0, total_len = 0; + struct vhost_buf buf; unsigned int out, in; bool added = false; @@ -461,12 +465,13 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) goto no_more_replies; } - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -, , NULL, NULL); - if (head < 0) + ret = vhost_get_avail_buf(vq, , + vq->iov, ARRAY_SIZE(vq->iov), + , , NULL, NULL); + if (ret < 0) break; - if (head == vq->num) { + if (!ret) { if (unlikely(vhost_enable_notify(>dev, vq))) { vhost_disable_notify(>dev, vq); continue; @@ -494,7 +499,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) virtio_transport_free_pkt(pkt); len += sizeof(pkt->hdr); - vhost_add_used(vq, head, len); + buf.in_len = len; + vhost_put_used_buf(vq, ); total_len += len; added = true; } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 04/14] vhost/net: pass net specific struct pointer
In preparation for further cleanup, pass net specific pointer to ubuf callbacks so we can move net specific fields out to net structures. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index bf5e1d81ae25..ff594eec8ae3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -94,7 +94,7 @@ struct vhost_net_ubuf_ref { */ atomic_t refcount; wait_queue_head_t wait; - struct vhost_virtqueue *vq; + struct vhost_net_virtqueue *nvq; }; #define VHOST_NET_BATCH 64 @@ -231,7 +231,7 @@ static void vhost_net_enable_zcopy(int vq) } static struct vhost_net_ubuf_ref * -vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) +vhost_net_ubuf_alloc(struct vhost_net_virtqueue *nvq, bool zcopy) { struct vhost_net_ubuf_ref *ubufs; /* No zero copy backend? Nothing to count. */ @@ -242,7 +242,7 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) return ERR_PTR(-ENOMEM); atomic_set(>refcount, 1); init_waitqueue_head(>wait); - ubufs->vq = vq; + ubufs->nvq = nvq; return ubufs; } @@ -384,13 +384,13 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net, static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) { struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; - struct vhost_virtqueue *vq = ubufs->vq; + struct vhost_net_virtqueue *nvq = ubufs->nvq; int cnt; rcu_read_lock_bh(); /* set len to mark this desc buffers done DMA */ - vq->heads[ubuf->desc].len = success ? + nvq->vq.heads[ubuf->desc].in_len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; cnt = vhost_net_ubuf_put(ubufs); @@ -402,7 +402,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) * less than 10% of times). */ if (cnt <= 1 || !(cnt % 16)) - vhost_poll_queue(>poll); + vhost_poll_queue(>vq.poll); rcu_read_unlock_bh(); } @@ -1525,7 +1525,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) /* start polling new socket */ oldsock = vhost_vq_get_backend(vq); if (sock != oldsock) { - ubufs = vhost_net_ubuf_alloc(vq, + ubufs = vhost_net_ubuf_alloc(nvq, sock && vhost_sock_zcopy(sock)); if (IS_ERR(ubufs)) { r = PTR_ERR(ubufs); -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 10/14] vhost/net: avoid iov length math
Now that API exposes buffer length, we no longer need to scan IOVs to figure it out. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 830fe84912a5..0b509be8d7b1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -607,11 +607,9 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net) } static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, - size_t hdr_size, int out) + size_t len, size_t hdr_size, int out) { /* Skip header. TODO: support TSO. */ - size_t len = iov_length(vq->iov, out); - iov_iter_init(iter, WRITE, vq->iov, out, len); iov_iter_advance(iter, hdr_size); @@ -640,7 +638,7 @@ static int get_tx_bufs(struct vhost_net *net, } /* Sanity check */ - *len = init_iov_iter(vq, >msg_iter, nvq->vhost_hlen, *out); + *len = init_iov_iter(vq, >msg_iter, buf->out_len, nvq->vhost_hlen, *out); if (*len == 0) { vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n", *len, nvq->vhost_hlen); @@ -1080,7 +1078,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, nlogs += *log_num; log += *log_num; } - len = iov_length(vq->iov + seg, in); + len = bufs[bufcount].in_len; datalen -= len; ++bufcount; seg += in; -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 06/14] vhost: format-independent API for used buffers
Add a new API that doesn't assume used ring, heads, etc. For now, we keep the old APIs around to make it easier to convert drivers. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 52 ++- drivers/vhost/vhost.h | 17 +- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 506208b63126..e5763d81bf0f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2339,13 +2339,12 @@ static void unfetch_descs(struct vhost_virtqueue *vq) * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * - * This function returns the descriptor number found, or vq->num (which is - * never a valid descriptor number) if none was found. A negative code is - * returned on error. */ -int vhost_get_vq_desc(struct vhost_virtqueue *vq, - struct iovec iov[], unsigned int iov_size, - unsigned int *out_num, unsigned int *in_num, - struct vhost_log *log, unsigned int *log_num) + * This function returns a value > 0 if a descriptor was found, or 0 if none were found. + * A negative code is returned on error. */ +int vhost_get_avail_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) { int ret = fetch_descs(vq); int i; @@ -2358,6 +2357,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, *out_num = *in_num = 0; if (unlikely(log)) *log_num = 0; + buf->in_len = buf->out_len = 0; + buf->descs = 0; for (i = vq->first_desc; i < vq->ndescs; ++i) { unsigned iov_count = *in_num + *out_num; @@ -2387,6 +2388,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* If this is an input descriptor, * increment that count. */ *in_num += ret; + buf->in_len += desc->len; if (unlikely(log && ret)) { log[*log_num].addr = desc->addr; log[*log_num].len = desc->len; @@ -2402,9 +2404,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, goto err; } *out_num += ret; + buf->out_len += desc->len; } - ret = desc->id; + buf->id = desc->id; + ++buf->descs; if (!(desc->flags & VRING_DESC_F_NEXT)) break; @@ -2412,14 +2416,22 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, vq->first_desc = i + 1; - return ret; + return 1; err: unfetch_descs(vq); return ret ? ret : vq->num; } -EXPORT_SYMBOL_GPL(vhost_get_vq_desc); +EXPORT_SYMBOL_GPL(vhost_get_avail_buf); + +/* Reverse the effect of vhost_get_avail_buf. Useful for error handling. */ +void vhost_discard_avail_bufs(struct vhost_virtqueue *vq, + struct vhost_buf *buf, unsigned count) +{ + vhost_discard_vq_desc(vq, count); +} +EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) @@ -2511,6 +2523,26 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) } EXPORT_SYMBOL_GPL(vhost_add_used); +int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf) +{ + return vhost_add_used(vq, buf->id, buf->in_len); +} +EXPORT_SYMBOL_GPL(vhost_put_used_buf); + +int vhost_put_used_n_bufs(struct vhost_virtqueue *vq, + struct vhost_buf *bufs, unsigned count) +{ + unsigned i; + + for (i = 0; i < count; ++i) { + vq->heads[i].id = cpu_to_vhost32(vq, bufs[i].id); + vq->heads[i].len = cpu_to_vhost32(vq, bufs[i].in_len); + } + + return vhost_add_used_n(vq, vq->heads, count); +} +EXPORT_SYMBOL_GPL(vhost_put_used_n_bufs); + static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 old, new; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index fed36af5c444..28eea0155efb 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -67,6 +67,13 @@ struct vhost_desc { u16 id; }; +struct vhost_buf { + u32 out_len; + u32 in_len; + u16 descs; + u16 id; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -195,7 +202,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
[PATCH RFC v7 07/14] fixup! vhost: format-independent API for used buffers
--- drivers/vhost/vhost.c | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e5763d81bf0f..7a587b13095c 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2421,7 +2421,7 @@ int vhost_get_avail_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf, err: unfetch_descs(vq); - return ret ? ret : vq->num; + return ret; } EXPORT_SYMBOL_GPL(vhost_get_avail_buf); @@ -2433,6 +2433,27 @@ void vhost_discard_avail_bufs(struct vhost_virtqueue *vq, } EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs); +/* This function returns the descriptor number found, or vq->num (which is + * never a valid descriptor number) if none was found. A negative code is + * returned on error. */ +int vhost_get_vq_desc(struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + struct vhost_buf buf; + int ret = vhost_get_avail_buf(vq, , + iov, iov_size, out_num, in_num, + log, log_num); + + if (likely(ret > 0)) + return buf->id; + if (likely(!ret)) + return vq->num; + return ret; +} +EXPORT_SYMBOL_GPL(vhost_get_vq_desc); + /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 11/14] vhost/test: convert to the buf API
Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 7d69778aaa26..12304eb8da15 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -44,9 +44,10 @@ static void handle_vq(struct vhost_test *n) { struct vhost_virtqueue *vq = >vqs[VHOST_TEST_VQ]; unsigned out, in; - int head; + int ret; size_t len, total_len = 0; void *private; + struct vhost_buf buf; mutex_lock(>mutex); private = vhost_vq_get_backend(vq); @@ -58,15 +59,15 @@ static void handle_vq(struct vhost_test *n) vhost_disable_notify(>dev, vq); for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + ret = vhost_get_avail_buf(vq, , vq->iov, + ARRAY_SIZE(vq->iov), + , , + NULL, NULL); /* On error, stop handling until the next kick. */ - if (unlikely(head < 0)) + if (unlikely(ret < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (head == vq->num) { + if (!ret) { if (unlikely(vhost_enable_notify(>dev, vq))) { vhost_disable_notify(>dev, vq); continue; @@ -78,13 +79,14 @@ static void handle_vq(struct vhost_test *n) "out %d, int %d\n", out, in); break; } - len = iov_length(vq->iov, out); + len = buf.out_len; /* Sanity check */ if (!len) { vq_err(vq, "Unexpected 0 len for TX\n"); break; } - vhost_add_used_and_signal(>dev, vq, head, 0); + vhost_put_used_buf(vq, ); + vhost_signal(>dev, vq); total_len += len; if (unlikely(vhost_exceeds_weight(vq, 0, total_len))) break; -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH RFC v7 05/14] vhost: reorder functions
Reorder functions in the file to not rely on forward declarations, in preparation to making them static down the road. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 28f324fd77df..506208b63126 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2429,19 +2429,6 @@ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) } EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); -/* After we've used one of their buffers, we tell them about it. We'll then - * want to notify the guest, using eventfd. */ -int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) -{ - struct vring_used_elem heads = { - cpu_to_vhost32(vq, head), - cpu_to_vhost32(vq, len) - }; - - return vhost_add_used_n(vq, , 1); -} -EXPORT_SYMBOL_GPL(vhost_add_used); - static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) @@ -2511,6 +2498,19 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, } EXPORT_SYMBOL_GPL(vhost_add_used_n); +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using eventfd. */ +int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem heads = { + cpu_to_vhost32(vq, head), + cpu_to_vhost32(vq, len) + }; + + return vhost_add_used_n(vq, , 1); +} +EXPORT_SYMBOL_GPL(vhost_add_used); + static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 old, new; -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RFC v6 02/11] vhost: use batched get_vq_desc version
On Wed, Jun 10, 2020 at 11:14:49AM +0800, Jason Wang wrote: > > On 2020/6/8 下午8:52, Michael S. Tsirkin wrote: > > As testing shows no performance change, switch to that now. > > > > Signed-off-by: Michael S. Tsirkin > > Signed-off-by: Eugenio Pérez > > Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com > > Signed-off-by: Michael S. Tsirkin > > --- > > drivers/vhost/test.c | 2 +- > > drivers/vhost/vhost.c | 318 -- > > drivers/vhost/vhost.h | 7 +- > > 3 files changed, 65 insertions(+), 262 deletions(-) > > > > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c > > index 0466921f4772..7d69778aaa26 100644 > > --- a/drivers/vhost/test.c > > +++ b/drivers/vhost/test.c > > @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct > > file *f) > > dev = >dev; > > vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ]; > > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; > > - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, > > + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64, > >VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); > > f->private_data = n; > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 180b7b58c76b..41d6b132c234 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, > > { > > vq->num = 1; > > vq->ndescs = 0; > > + vq->first_desc = 0; > > vq->desc = NULL; > > vq->avail = NULL; > > vq->used = NULL; > > @@ -372,6 +373,11 @@ static int vhost_worker(void *data) > > return 0; > > } > > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq) > > +{ > > + return vq->max_descs - UIO_MAXIOV; > > +} > > + > > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) > > { > > kfree(vq->descs); > > @@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev > > *dev) > > for (i = 0; i < dev->nvqs; ++i) { > > vq = dev->vqs[i]; > > vq->max_descs = dev->iov_limit; > > + if (vhost_vq_num_batch_descs(vq) < 0) { > > + return -EINVAL; > > + } > > vq->descs = kmalloc_array(vq->max_descs, > > sizeof(*vq->descs), > > GFP_KERNEL); > > @@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned > > int ioctl, void __user *arg > > vq->last_avail_idx = s.num; > > /* Forget the cached index value. */ > > vq->avail_idx = vq->last_avail_idx; > > + vq->ndescs = vq->first_desc = 0; > > break; > > case VHOST_GET_VRING_BASE: > > s.index = idx; > > @@ -2078,253 +2088,6 @@ static unsigned next_desc(struct vhost_virtqueue > > *vq, struct vring_desc *desc) > > return next; > > } > > -static int get_indirect(struct vhost_virtqueue *vq, > > - struct iovec iov[], unsigned int iov_size, > > - unsigned int *out_num, unsigned int *in_num, > > - struct vhost_log *log, unsigned int *log_num, > > - struct vring_desc *indirect) > > -{ > > - struct vring_desc desc; > > - unsigned int i = 0, count, found = 0; > > - u32 len = vhost32_to_cpu(vq, indirect->len); > > - struct iov_iter from; > > - int ret, access; > > - > > - /* Sanity check */ > > - if (unlikely(len % sizeof desc)) { > > - vq_err(vq, "Invalid length in indirect descriptor: " > > - "len 0x%llx not multiple of 0x%zx\n", > > - (unsigned long long)len, > > - sizeof desc); > > - return -EINVAL; > > - } > > - > > - ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, > > vq->indirect, > > -UIO_MAXIOV, VHOST_ACCESS_RO); > > - if (unlikely(ret < 0)) { > > - if (ret != -EAGAIN) > > - vq_err(vq, "Translation failure %d in indirect.\n", > > ret); > > - return ret; > > - } > > - iov_iter_init(, READ, vq->indirect, ret, len); > > - > > - /* We will use the result as an address to read from, so most > > -* architectures only need a compiler barrier here. */ > > - read_barrier_depends(); > > - > > - count = len / sizeof desc; > > - /* Buffers are chained via a 16 bit next field, so > > -* we can have at most 2^16 of these. */ > > - if (unlikely(count > USHRT_MAX + 1)) { > > - vq_err(vq, "Indirect buffer length too big: %d\n", > > - indirect->len); > > - return -E2BIG; > > - } > > - > > - do { > > - unsigned iov_count = *in_num + *out_num; > > - if (unlikely(++found > count)) { > > - vq_err(vq, "Loop detected: last one at %u " > > - "indirect size %u\n", > > -
Re: [PATCH] virtio-mem: silence a static checker warning
> Am 10.06.2020 um 10:59 schrieb Dan Carpenter : > > Smatch complains that "rc" can be uninitialized if we hit the "break;" > statement on the first iteration through the loop. I suspect that this > can't happen in real life, but returning a zero literal is cleaner and > silence the static checker warning. > Right, it‘s impossible in real life. Thanks! Acked-by: David Hildenbrand > Signed-off-by: Dan Carpenter > --- > drivers/virtio/virtio_mem.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c > index f658fe9149beb..893ef18060a02 100644 > --- a/drivers/virtio/virtio_mem.c > +++ b/drivers/virtio/virtio_mem.c > @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem > *vm, unsigned long mb_id, >VIRTIO_MEM_MB_STATE_OFFLINE); >} > > -return rc; > +return 0; > } > > /* > -- > 2.26.2 > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap()
On Wed, Jun 10, 2020 at 11:58:52AM +0300, Dan Carpenter wrote: > The "vma->vm_pgoff" variable is an unsigned long so if it's larger than > INT_MAX then "index" can be negative leading to an underflow. Fix this > by changing the type of "index" to "unsigned long". > > Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap") > Signed-off-by: Dan Carpenter Applied, thanks! > --- > drivers/vhost/vdpa.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 7580e34f76c10..a54b60d6623f0 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -818,7 +818,7 @@ static int vhost_vdpa_mmap(struct file *file, struct > vm_area_struct *vma) > struct vdpa_device *vdpa = v->vdpa; > const struct vdpa_config_ops *ops = vdpa->config; > struct vdpa_notification_area notify; > - int index = vma->vm_pgoff; > + unsigned long index = vma->vm_pgoff; > > if (vma->vm_end - vma->vm_start != PAGE_SIZE) > return -EINVAL; > -- > 2.26.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] virtio-mem: silence a static checker warning
On Wed, Jun 10, 2020 at 11:59:11AM +0300, Dan Carpenter wrote: > Smatch complains that "rc" can be uninitialized if we hit the "break;" > statement on the first iteration through the loop. I suspect that this > can't happen in real life, but returning a zero literal is cleaner and > silence the static checker warning. > > Signed-off-by: Dan Carpenter > --- > drivers/virtio/virtio_mem.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Applied, thanks. > diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c > index f658fe9149beb..893ef18060a02 100644 > --- a/drivers/virtio/virtio_mem.c > +++ b/drivers/virtio/virtio_mem.c > @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem > *vm, unsigned long mb_id, > VIRTIO_MEM_MB_STATE_OFFLINE); > } > > - return rc; > + return 0; > } > > /* > -- > 2.26.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH] vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap()
The "vma->vm_pgoff" variable is an unsigned long so if it's larger than INT_MAX then "index" can be negative leading to an underflow. Fix this by changing the type of "index" to "unsigned long". Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap") Signed-off-by: Dan Carpenter --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7580e34f76c10..a54b60d6623f0 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -818,7 +818,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; struct vdpa_notification_area notify; - int index = vma->vm_pgoff; + unsigned long index = vma->vm_pgoff; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; -- 2.26.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] mm/balloon_compaction: Fix trivial spelling
Hello! On 09.06.2020 17:34, Kieran Bingham wrote: The word 'descriptor' is misspelled throughout the tree. Fix it up accordingly: decriptors -> descriptors decriptor -> descriptor really. ;-) Signed-off-by: Kieran Bingham Reviewed-by: David Hildenbrand --- mm/balloon_compaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 26de020aae7b..907fefde2572 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(balloon_page_list_enqueue); /** * balloon_page_list_dequeue() - removes pages from balloon's page list and * returns a list of the pages. - * @b_dev_info: balloon device decriptor where we will grab a page from. + * @b_dev_info: balloon device descriptor where we will grab a page from. * @pages: pointer to the list of pages that would be returned to the caller. * @n_req_pages: number of requested pages. * @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(balloon_page_enqueue); /* * balloon_page_dequeue - removes a page from balloon's page list and returns * its address to allow the driver to release the page. - * @b_dev_info: balloon device decriptor where we will grab a page from. + * @b_dev_info: balloon device descriptor where we will grab a page from. * * Driver must call this function to properly dequeue a previously enqueued page * before definitively releasing it back to the guest system. MBR, Sergei ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH] virtio-mem: silence a static checker warning
Smatch complains that "rc" can be uninitialized if we hit the "break;" statement on the first iteration through the loop. I suspect that this can't happen in real life, but returning a zero literal is cleaner and silence the static checker warning. Signed-off-by: Dan Carpenter --- drivers/virtio/virtio_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f658fe9149beb..893ef18060a02 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, VIRTIO_MEM_MB_STATE_OFFLINE); } - return rc; + return 0; } /* -- 2.26.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH V3] vdpa: introduce virtio pci driver
On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote: > > > + > > > +#define VP_VDPA_FEATURES \ > > > + ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ > > > > This is presumably for transitional devices only. In fact looking at > > code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT. > > Spec violation I guess ... but what should we do? Relax the spec > > or fix drivers? > > > I don't get how it violates the spec. Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't. Only legacy virtio net does. Maybe it should have been transitional drivers when using the lgacy interface, but there you are. > > > > > > > > + (1ULL << VIRTIO_F_VERSION_1) | \ > > > + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ > > > + (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > > > + > > > +struct vp_vring { > > > + void __iomem *notify; > > > + char msix_name[256]; > > > + resource_size_t notify_pa; > > > + struct vdpa_callback cb; > > > + int irq; > > > +}; > > > + > > > +struct vp_vdpa { > > > + struct vdpa_device vdpa; > > > + struct pci_dev *pdev; > > > + > > > + struct virtio_device_id id; > > > + > > > + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; > > > + > > > + /* The IO mapping for the PCI config space */ > > > + void __iomem * const *base; > > > + struct virtio_pci_common_cfg __iomem *common; > > > + void __iomem *device; > > > + /* Base of vq notifications */ > > > + void __iomem *notify; > > > + > > > + /* Multiplier for queue_notify_off. */ > > > + u32 notify_off_multiplier; > > > + > > > + int modern_bars; > > > + int vectors; > > > +}; > > > + > > > +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) > > > +{ > > > + return container_of(vdpa, struct vp_vdpa, vdpa); > > > +} > > > + > > > +/* > > > + * Type-safe wrappers for io accesses. > > > + * Use these to enforce at compile time the following spec requirement: > > > + * > > > + * The driver MUST access each field using the “natural” access > > > + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses > > > + * for 16-bit fields and 8-bit accesses for 8-bit fields. > > > + */ > > > +static inline u8 vp_ioread8(u8 __iomem *addr) > > > +{ > > > + return ioread8(addr); > > > +} > > > +static inline u16 vp_ioread16(__le16 __iomem *addr) > > > +{ > > > + return ioread16(addr); > > > +} > > > + > > > +static inline u32 vp_ioread32(__le32 __iomem *addr) > > > +{ > > > + return ioread32(addr); > > > +} > > > + > > > +static inline void vp_iowrite8(u8 value, u8 __iomem *addr) > > > +{ > > > + iowrite8(value, addr); > > > +} > > > + > > > +static inline void vp_iowrite16(u16 value, __le16 __iomem *addr) > > > +{ > > > + iowrite16(value, addr); > > > +} > > > + > > > +static inline void vp_iowrite32(u32 value, __le32 __iomem *addr) > > > +{ > > > + iowrite32(value, addr); > > > +} > > > + > > > +static void vp_iowrite64_twopart(u64 val, > > > + __le32 __iomem *lo, __le32 __iomem *hi) > > > +{ > > > + vp_iowrite32((u32)val, lo); > > > + vp_iowrite32(val >> 32, hi); > > > +} > > > + > > > +static int find_capability(struct pci_dev *dev, u8 cfg_type, > > > +u32 ioresource_types, int *bars) > > > +{ > > > + int pos; > > > + > > > + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); > > > + pos > 0; > > > + pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { > > > + u8 type, bar; > > > + > > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, > > > + cfg_type), > > > + ); > > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, > > > + bar), > > > + ); > > > + > > > + /* Ignore structures with reserved BAR values */ > > > + if (bar > 0x5) > > > + continue; > > > + > > > + if (type == cfg_type) { > > > + if (pci_resource_len(dev, bar) && > > > + pci_resource_flags(dev, bar) & ioresource_types) { > > > + *bars |= (1 << bar); > > > + return pos; > > > + } > > > + } > > > + } > > > + return 0; > > > +} > > > + > > > +static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off, > > > + resource_size_t *pa) > > > +{ > > > + struct pci_dev *pdev = vp_vdpa->pdev; > > > + u32 offset; > > > + u8 bar; > > > + > > > + pci_read_config_byte(pdev, > > > + off + offsetof(struct virtio_pci_cap, bar), > > > + ); > > > + pci_read_config_dword(pdev, > > > + off + offsetof(struct virtio_pci_cap, offset), > > > + ); > > > + > > > + if (pa) > > > + *pa = pci_resource_start(pdev, bar) + offset; > > > + > > > + return
Re: [PATCH V3] vdpa: introduce virtio pci driver
On 2020/6/10 下午3:08, Michael S. Tsirkin wrote: On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote: This patch introduce a vDPA driver for virtio-pci device. It bridges the virtio-pci control command to the vDPA bus. This will be used for developing new features for both software vDPA framework and hardware vDPA feature. Compared to vdpa_sim, it has several advantages: - it's a real device driver which allow us to play with real hardware features - type independent instead of networking specific Note that since virtio specification does not support get/restore virtqueue state. So we can not use this driver for VM. This can be addressed by extending the virtio specification. Consider the driver is mainly for testing and development for vDPA features, it can only be bound via dynamic ids to make sure it's not conflict with the drivers like virtio-pci or IFCVF. Signed-off-by: Jason Wang --- Changes from V2: - rebase on vhost.git vhost branch --- drivers/vdpa/Kconfig | 8 + drivers/vdpa/Makefile | 1 + drivers/vdpa/vp_vdpa/Makefile | 2 + drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 + 4 files changed, 612 insertions(+) create mode 100644 drivers/vdpa/vp_vdpa/Makefile create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 3e1ceb8e9f2b..deb85e43a4c2 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -28,4 +28,12 @@ config IFCVF To compile this driver as a module, choose M here: the module will be called ifcvf. +config VP_VDPA + tristate "Virtio PCI bridge vDPA driver" + depends on PCI_MSI + help + This kernel module that bridges virtio PCI device to vDPA + bus. It allows us to test and develop vDPA subsystem inside + an VM with the emulated virtio-pci device + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 8bbb686ca7a2..37d00f49b3bf 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ obj-$(CONFIG_IFCVF)+= ifcvf/ +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/ diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile new file mode 100644 index ..231088d3af7d --- /dev/null +++ b/drivers/vdpa/vp_vdpa/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c new file mode 100644 index ..2070298ab9fc --- /dev/null +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bridge driver for modern virtio-pci device And judging by the code, transitional too? Or maybe we should drop transitional device support here. Yes, I will simply drop the transitional device support. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + * Based on virtio_pci_modern.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* TBD: read from config space */ +#define VP_VDPA_MAX_QUEUE 2 We need to fix that right? Otherwise lots of devices break ... Yes, will fix. +#define VP_VDPA_DRIVER_NAME "vp_vdpa" not sure why you need this macro ... Used only once, so I will remove this. + +#define VP_VDPA_FEATURES \ + ((1ULL << VIRTIO_F_ANY_LAYOUT)| \ This is presumably for transitional devices only. In fact looking at code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT. Spec violation I guess ... but what should we do? Relax the spec or fix drivers? I don't get how it violates the spec. +(1ULL << VIRTIO_F_VERSION_1) | \ +(1ULL << VIRTIO_F_ORDER_PLATFORM)| \ +(1ULL << VIRTIO_F_IOMMU_PLATFORM)) + +struct vp_vring { + void __iomem *notify; + char msix_name[256]; + resource_size_t notify_pa; + struct vdpa_callback cb; + int irq; +}; + +struct vp_vdpa { + struct vdpa_device vdpa; + struct pci_dev *pdev; + + struct virtio_device_id id; + + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; + + /* The IO mapping for the PCI config space */ + void __iomem * const *base; + struct virtio_pci_common_cfg __iomem *common; + void __iomem *device; + /* Base of vq notifications */ + void __iomem *notify; + + /* Multiplier for queue_notify_off. */ + u32 notify_off_multiplier; + + int modern_bars; + int vectors; +}; + +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vp_vdpa, vdpa); +} + +/* + * Type-safe wrappers for io accesses. + * Use these to enforce at compile time the following spec requirement: + * + * The driver MUST access each
Re: [PATCH V3] vdpa: introduce virtio pci driver
On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote: > This patch introduce a vDPA driver for virtio-pci device. It bridges > the virtio-pci control command to the vDPA bus. This will be used for > developing new features for both software vDPA framework and hardware > vDPA feature. > > Compared to vdpa_sim, it has several advantages: > > - it's a real device driver which allow us to play with real hardware > features > - type independent instead of networking specific > > Note that since virtio specification does not support get/restore > virtqueue state. So we can not use this driver for VM. This can be > addressed by extending the virtio specification. > > Consider the driver is mainly for testing and development for vDPA > features, it can only be bound via dynamic ids to make sure it's not > conflict with the drivers like virtio-pci or IFCVF. > > Signed-off-by: Jason Wang > --- > Changes from V2: > - rebase on vhost.git vhost branch > --- > drivers/vdpa/Kconfig | 8 + > drivers/vdpa/Makefile | 1 + > drivers/vdpa/vp_vdpa/Makefile | 2 + > drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 + > 4 files changed, 612 insertions(+) > create mode 100644 drivers/vdpa/vp_vdpa/Makefile > create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c > > diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig > index 3e1ceb8e9f2b..deb85e43a4c2 100644 > --- a/drivers/vdpa/Kconfig > +++ b/drivers/vdpa/Kconfig > @@ -28,4 +28,12 @@ config IFCVF > To compile this driver as a module, choose M here: the module will > be called ifcvf. > > +config VP_VDPA > + tristate "Virtio PCI bridge vDPA driver" > + depends on PCI_MSI > + help > + This kernel module that bridges virtio PCI device to vDPA > + bus. It allows us to test and develop vDPA subsystem inside > + an VM with the emulated virtio-pci device > + > endif # VDPA > diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile > index 8bbb686ca7a2..37d00f49b3bf 100644 > --- a/drivers/vdpa/Makefile > +++ b/drivers/vdpa/Makefile > @@ -2,3 +2,4 @@ > obj-$(CONFIG_VDPA) += vdpa.o > obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ > obj-$(CONFIG_IFCVF)+= ifcvf/ > +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/ > diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile > new file mode 100644 > index ..231088d3af7d > --- /dev/null > +++ b/drivers/vdpa/vp_vdpa/Makefile > @@ -0,0 +1,2 @@ > +# SPDX-License-Identifier: GPL-2.0 > +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o > diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c > new file mode 100644 > index ..2070298ab9fc > --- /dev/null > +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c > @@ -0,0 +1,601 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * vDPA bridge driver for modern virtio-pci device And judging by the code, transitional too? Or maybe we should drop transitional device support here. > + * > + * Copyright (c) 2020, Red Hat Inc. All rights reserved. > + * Author: Jason Wang > + * > + * Based on virtio_pci_modern.c. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +/* TBD: read from config space */ > +#define VP_VDPA_MAX_QUEUE 2 We need to fix that right? Otherwise lots of devices break ... > +#define VP_VDPA_DRIVER_NAME "vp_vdpa" not sure why you need this macro ... > + > +#define VP_VDPA_FEATURES \ > + ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ This is presumably for transitional devices only. In fact looking at code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT. Spec violation I guess ... but what should we do? Relax the spec or fix drivers? > + (1ULL << VIRTIO_F_VERSION_1) | \ > + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ > + (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + > +struct vp_vring { > + void __iomem *notify; > + char msix_name[256]; > + resource_size_t notify_pa; > + struct vdpa_callback cb; > + int irq; > +}; > + > +struct vp_vdpa { > + struct vdpa_device vdpa; > + struct pci_dev *pdev; > + > + struct virtio_device_id id; > + > + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; > + > + /* The IO mapping for the PCI config space */ > + void __iomem * const *base; > + struct virtio_pci_common_cfg __iomem *common; > + void __iomem *device; > + /* Base of vq notifications */ > + void __iomem *notify; > + > + /* Multiplier for queue_notify_off. */ > + u32 notify_off_multiplier; > + > + int modern_bars; > + int vectors; > +}; > + > +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) > +{ > + return container_of(vdpa, struct vp_vdpa, vdpa); > +} > + > +/* > + * Type-safe wrappers for io accesses. > + * Use these to enforce at compile time the following spec requirement: > + * > + * The driver MUST access each field using the “natural” access >
[PATCH V3] vdpa: introduce virtio pci driver
This patch introduce a vDPA driver for virtio-pci device. It bridges the virtio-pci control command to the vDPA bus. This will be used for developing new features for both software vDPA framework and hardware vDPA feature. Compared to vdpa_sim, it has several advantages: - it's a real device driver which allow us to play with real hardware features - type independent instead of networking specific Note that since virtio specification does not support get/restore virtqueue state. So we can not use this driver for VM. This can be addressed by extending the virtio specification. Consider the driver is mainly for testing and development for vDPA features, it can only be bound via dynamic ids to make sure it's not conflict with the drivers like virtio-pci or IFCVF. Signed-off-by: Jason Wang --- Changes from V2: - rebase on vhost.git vhost branch --- drivers/vdpa/Kconfig | 8 + drivers/vdpa/Makefile | 1 + drivers/vdpa/vp_vdpa/Makefile | 2 + drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 + 4 files changed, 612 insertions(+) create mode 100644 drivers/vdpa/vp_vdpa/Makefile create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 3e1ceb8e9f2b..deb85e43a4c2 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -28,4 +28,12 @@ config IFCVF To compile this driver as a module, choose M here: the module will be called ifcvf. +config VP_VDPA + tristate "Virtio PCI bridge vDPA driver" + depends on PCI_MSI + help + This kernel module that bridges virtio PCI device to vDPA + bus. It allows us to test and develop vDPA subsystem inside + an VM with the emulated virtio-pci device + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 8bbb686ca7a2..37d00f49b3bf 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ obj-$(CONFIG_IFCVF)+= ifcvf/ +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/ diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile new file mode 100644 index ..231088d3af7d --- /dev/null +++ b/drivers/vdpa/vp_vdpa/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c new file mode 100644 index ..2070298ab9fc --- /dev/null +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bridge driver for modern virtio-pci device + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + * Based on virtio_pci_modern.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* TBD: read from config space */ +#define VP_VDPA_MAX_QUEUE 2 +#define VP_VDPA_DRIVER_NAME "vp_vdpa" + +#define VP_VDPA_FEATURES \ + ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ +(1ULL << VIRTIO_F_VERSION_1) | \ +(1ULL << VIRTIO_F_ORDER_PLATFORM) | \ +(1ULL << VIRTIO_F_IOMMU_PLATFORM)) + +struct vp_vring { + void __iomem *notify; + char msix_name[256]; + resource_size_t notify_pa; + struct vdpa_callback cb; + int irq; +}; + +struct vp_vdpa { + struct vdpa_device vdpa; + struct pci_dev *pdev; + + struct virtio_device_id id; + + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; + + /* The IO mapping for the PCI config space */ + void __iomem * const *base; + struct virtio_pci_common_cfg __iomem *common; + void __iomem *device; + /* Base of vq notifications */ + void __iomem *notify; + + /* Multiplier for queue_notify_off. */ + u32 notify_off_multiplier; + + int modern_bars; + int vectors; +}; + +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vp_vdpa, vdpa); +} + +/* + * Type-safe wrappers for io accesses. + * Use these to enforce at compile time the following spec requirement: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses + * for 16-bit fields and 8-bit accesses for 8-bit fields. + */ +static inline u8 vp_ioread8(u8 __iomem *addr) +{ + return ioread8(addr); +} +static inline u16 vp_ioread16(__le16 __iomem *addr) +{ + return ioread16(addr); +} + +static inline u32 vp_ioread32(__le32 __iomem *addr) +{ + return ioread32(addr); +} + +static inline void vp_iowrite8(u8 value, u8 __iomem *addr) +{ + iowrite8(value, addr); +} + +static inline void vp_iowrite16(u16 value, __le16 __iomem *addr) +{ + iowrite16(value, addr); +} + +static inline void vp_iowrite32(u32 value, __le32 __iomem *addr) +{ + iowrite32(value, addr); +} +
Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver
On 2020/6/10 下午2:21, Michael S. Tsirkin wrote: On Wed, Jun 10, 2020 at 02:16:26PM +0800, Jason Wang wrote: On 2020/6/10 下午2:07, Michael S. Tsirkin wrote: On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote: This patch introduce a vDPA driver for virtio-pci device. It bridges the virtio-pci control command to the vDPA bus. This will be used for developing new features for both software vDPA framework and hardware vDPA feature. Compared to vdpa_sim, it has several advantages: - it's a real device driver which allow us to play with real hardware features - type independent instead of networking specific Note that since virtio specification does not support get/restore virtqueue state. So we can not use this driver for VM. This can be addressed by extending the virtio specification. Consider the driver is mainly for testing and development for vDPA features, it can only be bound via dynamic ids to make sure it's not conflict with the drivers like virtio-pci or IFCVF. Signed-off-by: Jason Wang error: sha1 information is lacking or useless (drivers/vdpa/Kconfig). which tree is this on top of? Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the comments for __vdpa_alloc_device() Do I need to use other branch? Thanks No it's ok, I am just wondering why doesn't it apply then. I found the reason, I generate the patch on another branch whose base does not existed in the vhost branch. Will repost. Sorry. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver
On Wed, Jun 10, 2020 at 02:16:26PM +0800, Jason Wang wrote: > > On 2020/6/10 下午2:07, Michael S. Tsirkin wrote: > > On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote: > > > This patch introduce a vDPA driver for virtio-pci device. It bridges > > > the virtio-pci control command to the vDPA bus. This will be used for > > > developing new features for both software vDPA framework and hardware > > > vDPA feature. > > > > > > Compared to vdpa_sim, it has several advantages: > > > > > > - it's a real device driver which allow us to play with real hardware > > >features > > > - type independent instead of networking specific > > > > > > Note that since virtio specification does not support get/restore > > > virtqueue state. So we can not use this driver for VM. This can be > > > addressed by extending the virtio specification. > > > > > > Consider the driver is mainly for testing and development for vDPA > > > features, it can only be bound via dynamic ids to make sure it's not > > > conflict with the drivers like virtio-pci or IFCVF. > > > > > > Signed-off-by: Jason Wang > > error: sha1 information is lacking or useless (drivers/vdpa/Kconfig). > > > > which tree is this on top of? > > > Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the > comments for __vdpa_alloc_device() > > Do I need to use other branch? > > Thanks No it's ok, I am just wondering why doesn't it apply then. > > > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver
On 2020/6/10 下午2:07, Michael S. Tsirkin wrote: On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote: This patch introduce a vDPA driver for virtio-pci device. It bridges the virtio-pci control command to the vDPA bus. This will be used for developing new features for both software vDPA framework and hardware vDPA feature. Compared to vdpa_sim, it has several advantages: - it's a real device driver which allow us to play with real hardware features - type independent instead of networking specific Note that since virtio specification does not support get/restore virtqueue state. So we can not use this driver for VM. This can be addressed by extending the virtio specification. Consider the driver is mainly for testing and development for vDPA features, it can only be bound via dynamic ids to make sure it's not conflict with the drivers like virtio-pci or IFCVF. Signed-off-by: Jason Wang error: sha1 information is lacking or useless (drivers/vdpa/Kconfig). which tree is this on top of? Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the comments for __vdpa_alloc_device() Do I need to use other branch? Thanks ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver
On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote: > This patch introduce a vDPA driver for virtio-pci device. It bridges > the virtio-pci control command to the vDPA bus. This will be used for > developing new features for both software vDPA framework and hardware > vDPA feature. > > Compared to vdpa_sim, it has several advantages: > > - it's a real device driver which allow us to play with real hardware > features > - type independent instead of networking specific > > Note that since virtio specification does not support get/restore > virtqueue state. So we can not use this driver for VM. This can be > addressed by extending the virtio specification. > > Consider the driver is mainly for testing and development for vDPA > features, it can only be bound via dynamic ids to make sure it's not > conflict with the drivers like virtio-pci or IFCVF. > > Signed-off-by: Jason Wang error: sha1 information is lacking or useless (drivers/vdpa/Kconfig). which tree is this on top of? > --- > Changes since V1: > - use NULL id_table to allow dynamic ids only > - squash the doorbell reporting > --- > drivers/vdpa/Kconfig | 8 + > drivers/vdpa/Makefile | 1 + > drivers/vdpa/vp_vdpa/Makefile | 2 + > drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 + > 4 files changed, 612 insertions(+) > create mode 100644 drivers/vdpa/vp_vdpa/Makefile > create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c > > diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig > index e8140065c8a5..5cef3a4872e3 100644 > --- a/drivers/vdpa/Kconfig > +++ b/drivers/vdpa/Kconfig > @@ -28,4 +28,12 @@ config IFCVF > To compile this driver as a module, choose M here: the module will > be called ifcvf. > > +config VP_VDPA > + tristate "Virtio PCI bridge vDPA driver" > + depends on PCI_MSI > + help > + This kernel module that bridges virtio PCI device to vDPA > + bus. It allows us to test and develop vDPA subsystem inside > + an VM with the emulated virtio-pci device > + > endif # VDPA > diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile > index 8bbb686ca7a2..37d00f49b3bf 100644 > --- a/drivers/vdpa/Makefile > +++ b/drivers/vdpa/Makefile > @@ -2,3 +2,4 @@ > obj-$(CONFIG_VDPA) += vdpa.o > obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ > obj-$(CONFIG_IFCVF)+= ifcvf/ > +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/ > diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile > new file mode 100644 > index ..231088d3af7d > --- /dev/null > +++ b/drivers/vdpa/vp_vdpa/Makefile > @@ -0,0 +1,2 @@ > +# SPDX-License-Identifier: GPL-2.0 > +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o > diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c > new file mode 100644 > index ..2070298ab9fc > --- /dev/null > +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c > @@ -0,0 +1,601 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * vDPA bridge driver for modern virtio-pci device > + * > + * Copyright (c) 2020, Red Hat Inc. All rights reserved. > + * Author: Jason Wang > + * > + * Based on virtio_pci_modern.c. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +/* TBD: read from config space */ > +#define VP_VDPA_MAX_QUEUE 2 > +#define VP_VDPA_DRIVER_NAME "vp_vdpa" > + > +#define VP_VDPA_FEATURES \ > + ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ > + (1ULL << VIRTIO_F_VERSION_1) | \ > + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ > + (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > + > +struct vp_vring { > + void __iomem *notify; > + char msix_name[256]; > + resource_size_t notify_pa; > + struct vdpa_callback cb; > + int irq; > +}; > + > +struct vp_vdpa { > + struct vdpa_device vdpa; > + struct pci_dev *pdev; > + > + struct virtio_device_id id; > + > + struct vp_vring vring[VP_VDPA_MAX_QUEUE]; > + > + /* The IO mapping for the PCI config space */ > + void __iomem * const *base; > + struct virtio_pci_common_cfg __iomem *common; > + void __iomem *device; > + /* Base of vq notifications */ > + void __iomem *notify; > + > + /* Multiplier for queue_notify_off. */ > + u32 notify_off_multiplier; > + > + int modern_bars; > + int vectors; > +}; > + > +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) > +{ > + return container_of(vdpa, struct vp_vdpa, vdpa); > +} > + > +/* > + * Type-safe wrappers for io accesses. > + * Use these to enforce at compile time the following spec requirement: > + * > + * The driver MUST access each field using the “natural” access > + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses > + * for 16-bit fields and 8-bit accesses for 8-bit fields. > + */ > +static inline u8 vp_ioread8(u8 __iomem *addr) > +{ > + return ioread8(addr); > +} > +static inline u16 vp_ioread16(__le16 __iomem *addr) > +{