Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Jason Wang


On 2020/6/10 下午9:11, Pierre Morel wrote:

Protected Virtualisation protects the memory of the guest and
do not allow a the host to access all of its memory.

Let's refuse a VIRTIO device which does not use IOMMU
protected access.

Signed-off-by: Pierre Morel 
---
  drivers/s390/virtio/virtio_ccw.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..06ffbc96587a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
*vdev, u8 status)
if (!ccw)
return;
  
+	/* Protected Virtualisation guest needs IOMMU */

+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
+   status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
+
/* Write the status to the host. */
vcdev->dma_area->status = status;
ccw->cmd_code = CCW_CMD_WRITE_STATUS;



I wonder whether we need move it to virtio core instead of ccw.

I think the other memory protection technologies may suffer from this as 
well.


Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC v6 02/11] vhost: use batched get_vq_desc version

2020-06-10 Thread Jason Wang


On 2020/6/10 下午7:05, Michael S. Tsirkin wrote:

+EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
   /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
   void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
   {
+   unfetch_descs(vq);
vq->last_avail_idx -= n;

So unfetch_descs() has decreased last_avail_idx.
Can we fix this by letting unfetch_descs() return the number and then we can
do:

int d = unfetch_descs(vq);
vq->last_avail_idx -= (n > d) ? n - d: 0;

Thanks

That's intentional I think - we need both.



Yes, but:




Unfetch_descs drops the descriptors in the cache that were
*not returned to caller*  through get_vq_desc.

vhost_discard_vq_desc drops the ones that were returned through get_vq_desc.

Did I miss anything?


We could count some descriptors twice, consider the case e.g we only 
cache on descriptor:


fetch_descs()
    fetch_buf()
        last_avail_idx++;

Then we want do discard it:
vhost_discard_avail_buf(1)
    unfetch_descs()
        last_avail_idx--;
    last_avail_idx -= 1;

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang


On 2020/6/10 下午4:51, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote:

+
+#define VP_VDPA_FEATURES \
+   ((1ULL << VIRTIO_F_ANY_LAYOUT)| \

This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?


I don't get how it violates the spec.

Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT



I don't see this in the spec, maybe you can point it to me?




But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't.
Only legacy virtio net does.

Maybe it should have been
transitional drivers when using the lgacy interface, but there
you are.




+(1ULL << VIRTIO_F_VERSION_1) | \
+(1ULL << VIRTIO_F_ORDER_PLATFORM)| \
+(1ULL << VIRTIO_F_IOMMU_PLATFORM))
+
+struct vp_vring {
+   void __iomem *notify;
+   char msix_name[256];
+   resource_size_t notify_pa;
+   struct vdpa_callback cb;
+   int irq;
+};
+
+struct vp_vdpa {
+   struct vdpa_device vdpa;
+   struct pci_dev *pdev;
+
+   struct virtio_device_id id;
+
+   struct vp_vring vring[VP_VDPA_MAX_QUEUE];
+
+   /* The IO mapping for the PCI config space */
+   void __iomem * const *base;
+   struct virtio_pci_common_cfg __iomem *common;
+   void __iomem *device;
+   /* Base of vq notifications */
+   void __iomem *notify;
+
+   /* Multiplier for queue_notify_off. */
+   u32 notify_off_multiplier;
+
+   int modern_bars;
+   int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+   return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(u8 __iomem *addr)
+{
+   return ioread8(addr);
+}
+static inline u16 vp_ioread16(__le16 __iomem *addr)
+{
+   return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(__le32 __iomem *addr)
+{
+   return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+   iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
+{
+   iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
+{
+   iowrite32(value, addr);
+}
+
+static void vp_iowrite64_twopart(u64 val,
+__le32 __iomem *lo, __le32 __iomem *hi)
+{
+   vp_iowrite32((u32)val, lo);
+   vp_iowrite32(val >> 32, hi);
+}
+
+static int find_capability(struct pci_dev *dev, u8 cfg_type,
+  u32 ioresource_types, int *bars)
+{
+   int pos;
+
+   for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+pos > 0;
+pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+   u8 type, bar;
+
+   pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+cfg_type),
+);
+   pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+bar),
+);
+
+   /* Ignore structures with reserved BAR values */
+   if (bar > 0x5)
+   continue;
+
+   if (type == cfg_type) {
+   if (pci_resource_len(dev, bar) &&
+   pci_resource_flags(dev, bar) & ioresource_types) {
+   *bars |= (1 << bar);
+   return pos;
+   }
+   }
+   }
+   return 0;
+}
+
+static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off,
+   resource_size_t *pa)
+{
+   struct pci_dev *pdev = vp_vdpa->pdev;
+   u32 offset;
+   u8 bar;
+
+   pci_read_config_byte(pdev,
+off + offsetof(struct virtio_pci_cap, bar),
+);
+   pci_read_config_dword(pdev,
+ off + offsetof(struct virtio_pci_cap, offset),
+ );
+
+   if (pa)
+   *pa = pci_resource_start(pdev, bar) + offset;
+
+   return vp_vdpa->base[bar] + offset;
+}
+
+static u64 vp_vdpa_get_features(struct vdpa_device *vdpa)
+{
+   struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+   u64 features;
+
+   vp_iowrite32(0, _vdpa->common->device_feature_select);
+   features = vp_ioread32(_vdpa->common->device_feature);
+   

Re: [GIT PULL] virtio: features, fixes

2020-06-10 Thread pr-tracker-bot
The pull request you sent on Wed, 10 Jun 2020 00:44:55 -0400:

> https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/09102704c67457c6cdea6c0394c34843484a852c

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [GIT PULL] virtio: features, fixes

2020-06-10 Thread Linus Torvalds
On Tue, Jun 9, 2020 at 9:45 PM Michael S. Tsirkin  wrote:
>
>   I also upgraded the machine I used to sign
> the tag (didn't change the key) - hope the signature is still ok. If not
> pls let me know!

All looks normal as far as I can tell,

Linus
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Halil Pasic
On Wed, 10 Jun 2020 15:11:51 +0200
Pierre Morel  wrote:

> Protected Virtualisation protects the memory of the guest and
> do not allow a the host to access all of its memory.
> 
> Let's refuse a VIRTIO device which does not use IOMMU
> protected access.
> 

Should we ever get a virtio-ccw device implemented in hardware, we could
in theory have a trusted device, i.e. one that is not affected by the
memory protection.

IMHO this restriction applies to paravitualized devices, that is devices
realized by the hypervisor. In that sense this is not about ccw, but
could in the future affect PCI as well. Thus the transport code may not
be the best place to fence this, but frankly looking at how the QEMU
discussion is going (the one where I try to prevent this condition) I
would be glad to have something like this as a safety net.

I would however like the commit message to reflect what is written above.

Do we need backports (and cc-stable) for this? Connie what do you think?

> Signed-off-by: Pierre Morel 
> ---
>  drivers/s390/virtio/virtio_ccw.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/s390/virtio/virtio_ccw.c 
> b/drivers/s390/virtio/virtio_ccw.c
> index 5730572b52cd..06ffbc96587a 100644
> --- a/drivers/s390/virtio/virtio_ccw.c
> +++ b/drivers/s390/virtio/virtio_ccw.c
> @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
> *vdev, u8 status)
>   if (!ccw)
>   return;
>  
> + /* Protected Virtualisation guest needs IOMMU */
> + if (is_prot_virt_guest() &&
> + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))

If you were to add && !__virtio_test_bit(vdev, VIRTIO_F_ORDER_PLATFORM)
we could confine this check and the bail out to paravirtualized devices,
because a device realized in HW is expected to give us both
F_ACCESS_PLATFORM and F_ORDER_PLATFORM. I would not bet it will
ever matter for virtio-ccw though.

Connie, do you have an opinion on this?

Regards,
Halil

> + status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
> +
>   /* Write the status to the host. */
>   vcdev->dma_area->status = status;
>   ccw->cmd_code = CCW_CMD_WRITE_STATUS;

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Pierre Morel




On 2020-06-10 16:53, Cornelia Huck wrote:

On Wed, 10 Jun 2020 16:37:55 +0200
Pierre Morel  wrote:


On 2020-06-10 15:24, Cornelia Huck wrote:

On Wed, 10 Jun 2020 15:11:51 +0200
Pierre Morel  wrote:
   

Protected Virtualisation protects the memory of the guest and
do not allow a the host to access all of its memory.

Let's refuse a VIRTIO device which does not use IOMMU
protected access.

Signed-off-by: Pierre Morel 
---
   drivers/s390/virtio/virtio_ccw.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..06ffbc96587a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
*vdev, u8 status)
if (!ccw)
return;
   
+	/* Protected Virtualisation guest needs IOMMU */

+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
+   status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
+


set_status seems like an odd place to look at features; shouldn't that
rather be done in finalize_features?


Right, looks better to me too.
What about:



diff --git a/drivers/s390/virtio/virtio_ccw.c
b/drivers/s390/virtio/virtio_ccw.c
index 06ffbc96587a..227676297ea0 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct
virtio_device *vdev)
  ret = -ENOMEM;
  goto out_free;
  }
+
+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))


Add a comment, and (maybe) a message?

Otherwise, I think this is fine, as it should fail the probe, which is
what we want.


yes right a message is needed.
and I extend a little the comment I had before.
thanks

Regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v7 03/14] vhost: use batched get_vq_desc version

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 02:37:50PM +0200, Eugenio Perez Martin wrote:
> > +/* This function returns a value > 0 if a descriptor was found, or 0 if 
> > none were found.
> > + * A negative code is returned on error. */
> > +static int fetch_descs(struct vhost_virtqueue *vq)
> > +{
> > +   int ret;
> > +
> > +   if (unlikely(vq->first_desc >= vq->ndescs)) {
> > +   vq->first_desc = 0;
> > +   vq->ndescs = 0;
> > +   }
> > +
> > +   if (vq->ndescs)
> > +   return 1;
> > +
> > +   for (ret = 1;
> > +ret > 0 && vq->ndescs <= vhost_vq_num_batch_descs(vq);
> > +ret = fetch_buf(vq))
> > +   ;
> 
> (Expanding comment in V6):
> 
> We get an infinite loop this way:
> * vq->ndescs == 0, so we call fetch_buf() here
> * fetch_buf gets less than vhost_vq_num_batch_descs(vq); descriptors. ret = 1
> * This loop calls again fetch_buf, but vq->ndescs > 0 (and avail_vq ==
> last_avail_vq), so it just return 1

That's what
 [PATCH RFC v7 08/14] fixup! vhost: use batched get_vq_desc version
is supposed to fix.

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v7 03/14] vhost: use batched get_vq_desc version

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 04:29:29PM +0200, Eugenio Pérez wrote:
> On Wed, 2020-06-10 at 07:36 -0400, Michael S. Tsirkin wrote:
> > As testing shows no performance change, switch to that now.
> > 
> > Signed-off-by: Michael S. Tsirkin 
> > Signed-off-by: Eugenio Pérez 
> > Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >  drivers/vhost/test.c  |   2 +-
> >  drivers/vhost/vhost.c | 318 --
> >  drivers/vhost/vhost.h |   7 +-
> >  3 files changed, 65 insertions(+), 262 deletions(-)
> > 
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 0466921f4772..7d69778aaa26 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct 
> > file *f)
> > dev = >dev;
> > vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ];
> > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> > -   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> > +   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> >VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL);
> >  
> > f->private_data = n;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 11433d709651..28f324fd77df 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> >  {
> > vq->num = 1;
> > vq->ndescs = 0;
> > +   vq->first_desc = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -372,6 +373,11 @@ static int vhost_worker(void *data)
> > return 0;
> >  }
> >  
> > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
> > +{
> > +   return vq->max_descs - UIO_MAXIOV;
> > +}
> > +
> >  static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> >  {
> > kfree(vq->descs);
> > @@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev 
> > *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > vq->max_descs = dev->iov_limit;
> > +   if (vhost_vq_num_batch_descs(vq) < 0) {
> > +   return -EINVAL;
> > +   }
> > vq->descs = kmalloc_array(vq->max_descs,
> >   sizeof(*vq->descs),
> >   GFP_KERNEL);
> > @@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned 
> > int ioctl, void __user *arg
> > vq->last_avail_idx = s.num;
> > /* Forget the cached index value. */
> > vq->avail_idx = vq->last_avail_idx;
> > +   vq->ndescs = vq->first_desc = 0;
> 
> This is not needed if it is done in vhost_vq_set_backend, as far as I can 
> tell.
> 
> Actually, maybe it is even better to move `vq->avail_idx = 
> vq->last_avail_idx;` line to vhost_vq_set_backend, it is part
> of the backend "set up" procedure, isn't it?
> 
> I tested with virtio_test + batch tests sent in 
> https://lkml.kernel.org/lkml/20200418102217.32327-1-epere...@redhat.com/T/.

Ow did I forget to merge them for rc1?  Should I have? Maybe Linus won't
yell to hard at me if I merge them after rc1.


> I append here what I'm proposing in case it is clearer this way.
> 
> Thanks!
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 4d198994e7be..809ad2cd2879 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -1617,9 +1617,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned 
> int ioctl, void __user *arg
>   break;
>   }
>   vq->last_avail_idx = s.num;
> - /* Forget the cached index value. */
> - vq->avail_idx = vq->last_avail_idx;
> - vq->ndescs = vq->first_desc = 0;
>   break;
>   case VHOST_GET_VRING_BASE:
>   s.index = idx;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index fed36af5c444..f4902dc808e4 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -258,6 +258,7 @@ static inline void vhost_vq_set_backend(struct 
> vhost_virtqueue *vq,
>   void *private_data)
>  {
>   vq->private_data = private_data;
> + vq->avail_idx = vq->last_avail_idx;
>   vq->ndescs = 0;
>   vq->first_desc = 0;
>  }
> 

Seems like a nice cleanup, though it's harmless right?


-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Cornelia Huck
On Wed, 10 Jun 2020 16:37:55 +0200
Pierre Morel  wrote:

> On 2020-06-10 15:24, Cornelia Huck wrote:
> > On Wed, 10 Jun 2020 15:11:51 +0200
> > Pierre Morel  wrote:
> >   
> >> Protected Virtualisation protects the memory of the guest and
> >> do not allow a the host to access all of its memory.
> >>
> >> Let's refuse a VIRTIO device which does not use IOMMU
> >> protected access.
> >>
> >> Signed-off-by: Pierre Morel 
> >> ---
> >>   drivers/s390/virtio/virtio_ccw.c | 5 +
> >>   1 file changed, 5 insertions(+)
> >>
> >> diff --git a/drivers/s390/virtio/virtio_ccw.c 
> >> b/drivers/s390/virtio/virtio_ccw.c
> >> index 5730572b52cd..06ffbc96587a 100644
> >> --- a/drivers/s390/virtio/virtio_ccw.c
> >> +++ b/drivers/s390/virtio/virtio_ccw.c
> >> @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct 
> >> virtio_device *vdev, u8 status)
> >>if (!ccw)
> >>return;
> >>   
> >> +  /* Protected Virtualisation guest needs IOMMU */
> >> +  if (is_prot_virt_guest() &&
> >> +  !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
> >> +  status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
> >> +  
> > 
> > set_status seems like an odd place to look at features; shouldn't that
> > rather be done in finalize_features?  
> 
> Right, looks better to me too.
> What about:
> 
> 
> 
> diff --git a/drivers/s390/virtio/virtio_ccw.c 
> b/drivers/s390/virtio/virtio_ccw.c
> index 06ffbc96587a..227676297ea0 100644
> --- a/drivers/s390/virtio/virtio_ccw.c
> +++ b/drivers/s390/virtio/virtio_ccw.c
> @@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct 
> virtio_device *vdev)
>  ret = -ENOMEM;
>  goto out_free;
>  }
> +
> +   if (is_prot_virt_guest() &&
> +   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))

Add a comment, and (maybe) a message?

Otherwise, I think this is fine, as it should fail the probe, which is
what we want.

> +   return -EIO;
> +
>  /* Give virtio_ring a chance to accept features. */
>  vring_transport_features(vdev);
> 
> 
> 
> Thanks,
> 
> Regards,
> Pierre
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Pierre Morel




On 2020-06-10 15:24, Cornelia Huck wrote:

On Wed, 10 Jun 2020 15:11:51 +0200
Pierre Morel  wrote:


Protected Virtualisation protects the memory of the guest and
do not allow a the host to access all of its memory.

Let's refuse a VIRTIO device which does not use IOMMU
protected access.

Signed-off-by: Pierre Morel 
---
  drivers/s390/virtio/virtio_ccw.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..06ffbc96587a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
*vdev, u8 status)
if (!ccw)
return;
  
+	/* Protected Virtualisation guest needs IOMMU */

+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
+   status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
+


set_status seems like an odd place to look at features; shouldn't that
rather be done in finalize_features?


Right, looks better to me too.
What about:



diff --git a/drivers/s390/virtio/virtio_ccw.c 
b/drivers/s390/virtio/virtio_ccw.c

index 06ffbc96587a..227676297ea0 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -833,6 +833,11 @@ static int virtio_ccw_finalize_features(struct 
virtio_device *vdev)

ret = -ENOMEM;
goto out_free;
}
+
+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
+   return -EIO;
+
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);



Thanks,

Regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Cornelia Huck
On Wed, 10 Jun 2020 15:11:51 +0200
Pierre Morel  wrote:

> Protected Virtualisation protects the memory of the guest and
> do not allow a the host to access all of its memory.
> 
> Let's refuse a VIRTIO device which does not use IOMMU
> protected access.
> 
> Signed-off-by: Pierre Morel 
> ---
>  drivers/s390/virtio/virtio_ccw.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/s390/virtio/virtio_ccw.c 
> b/drivers/s390/virtio/virtio_ccw.c
> index 5730572b52cd..06ffbc96587a 100644
> --- a/drivers/s390/virtio/virtio_ccw.c
> +++ b/drivers/s390/virtio/virtio_ccw.c
> @@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
> *vdev, u8 status)
>   if (!ccw)
>   return;
>  
> + /* Protected Virtualisation guest needs IOMMU */
> + if (is_prot_virt_guest() &&
> + !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
> + status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
> +

set_status seems like an odd place to look at features; shouldn't that
rather be done in finalize_features?

>   /* Write the status to the host. */
>   vcdev->dma_area->status = status;
>   ccw->cmd_code = CCW_CMD_WRITE_STATUS;

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] s390: protvirt: virtio: Refuse device without IOMMU

2020-06-10 Thread Pierre Morel
Protected Virtualisation protects the memory of the guest and
do not allow a the host to access all of its memory.

Let's refuse a VIRTIO device which does not use IOMMU
protected access.

Signed-off-by: Pierre Morel 
---
 drivers/s390/virtio/virtio_ccw.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..06ffbc96587a 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -986,6 +986,11 @@ static void virtio_ccw_set_status(struct virtio_device 
*vdev, u8 status)
if (!ccw)
return;
 
+   /* Protected Virtualisation guest needs IOMMU */
+   if (is_prot_virt_guest() &&
+   !__virtio_test_bit(vdev, VIRTIO_F_IOMMU_PLATFORM))
+   status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
+
/* Write the status to the host. */
vcdev->dma_area->status = status;
ccw->cmd_code = CCW_CMD_WRITE_STATUS;
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 02/14] fixup! vhost: option to fetch descriptors through an independent struct

2020-06-10 Thread Michael S. Tsirkin
---
 drivers/vhost/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 180b7b58c76b..11433d709651 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2614,7 +2614,7 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
 err_fetch:
vq->ndescs = 0;
 
-   return ret;
+   return ret ? ret : vq->num;
 }
 EXPORT_SYMBOL_GPL(vhost_get_vq_desc_batch);
 
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 12/14] vhost/scsi: switch to buf APIs

2020-06-10 Thread Michael S. Tsirkin
Switch to buf APIs. Doing this exposes a spec violation in vhost scsi:
all used bufs are marked with length 0.
Fix that is left for another day.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/scsi.c | 73 ++--
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 0cbaa0b3893d..a5cdd4c01a3a 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -71,8 +71,8 @@ struct vhost_scsi_inflight {
 };
 
 struct vhost_scsi_cmd {
-   /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
-   int tvc_vq_desc;
+   /* Descriptor from vhost_get_avail_buf() for virt_queue segment */
+   struct vhost_buf tvc_vq_desc;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
/* virtio-scsi response incoming iovecs */
@@ -213,7 +213,7 @@ struct vhost_scsi {
  * Context for processing request and control queue operations.
  */
 struct vhost_scsi_ctx {
-   int head;
+   struct vhost_buf buf;
unsigned int out, in;
size_t req_size, rsp_size;
size_t out_size, in_size;
@@ -443,6 +443,20 @@ static int vhost_scsi_check_stop_free(struct se_cmd 
*se_cmd)
return target_put_sess_cmd(se_cmd);
 }
 
+/* Signal to guest that request finished with no input buffer. */
+/* TODO calling this when writing into buffer and most likely a bug */
+static void vhost_scsi_signal_noinput(struct vhost_dev *vdev,
+ struct vhost_virtqueue *vq,
+ struct vhost_buf *bufp)
+{
+   struct vhost_buf buf = *bufp;
+
+   buf.in_len = 0;
+   vhost_put_used_buf(vq, );
+   vhost_signal(vdev, vq);
+}
+
+
 static void
 vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
 {
@@ -450,7 +464,8 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
struct virtio_scsi_event *event = >event;
struct virtio_scsi_event __user *eventp;
unsigned out, in;
-   int head, ret;
+   struct vhost_buf buf;
+   int ret;
 
if (!vhost_vq_get_backend(vq)) {
vs->vs_events_missed = true;
@@ -459,14 +474,14 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
 
 again:
vhost_disable_notify(>dev, vq);
-   head = vhost_get_vq_desc(vq, vq->iov,
-   ARRAY_SIZE(vq->iov), , ,
-   NULL, NULL);
-   if (head < 0) {
+   ret = vhost_get_avail_buf(vq, ,
+ vq->iov, ARRAY_SIZE(vq->iov), , ,
+ NULL, NULL);
+   if (ret < 0) {
vs->vs_events_missed = true;
return;
}
-   if (head == vq->num) {
+   if (!ret) {
if (vhost_enable_notify(>dev, vq))
goto again;
vs->vs_events_missed = true;
@@ -488,7 +503,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct 
vhost_scsi_evt *evt)
eventp = vq->iov[out].iov_base;
ret = __copy_to_user(eventp, event, sizeof(*event));
if (!ret)
-   vhost_add_used_and_signal(>dev, vq, head, 0);
+   vhost_scsi_signal_noinput(>dev, vq, );
else
vq_err(vq, "Faulted on vhost_scsi_send_event\n");
 }
@@ -549,7 +564,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work 
*work)
ret = copy_to_iter(_rsp, sizeof(v_rsp), _iter);
if (likely(ret == sizeof(v_rsp))) {
struct vhost_scsi_virtqueue *q;
-   vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
+   vhost_put_used_buf(cmd->tvc_vq, >tvc_vq_desc);
q = container_of(cmd->tvc_vq, struct 
vhost_scsi_virtqueue, vq);
vq = q - vs->vqs;
__set_bit(vq, signal);
@@ -793,7 +808,7 @@ static void vhost_scsi_submission_work(struct work_struct 
*work)
 static void
 vhost_scsi_send_bad_target(struct vhost_scsi *vs,
   struct vhost_virtqueue *vq,
-  int head, unsigned out)
+  struct vhost_buf *buf, unsigned out)
 {
struct virtio_scsi_cmd_resp __user *resp;
struct virtio_scsi_cmd_resp rsp;
@@ -804,7 +819,7 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
resp = vq->iov[out].iov_base;
ret = __copy_to_user(resp, , sizeof(rsp));
if (!ret)
-   vhost_add_used_and_signal(>dev, vq, head, 0);
+   vhost_scsi_signal_noinput(>dev, vq, buf);
else
pr_err("Faulted on virtio_scsi_cmd_resp\n");
 }
@@ -813,21 +828,21 @@ static int
 vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
struct vhost_scsi_ctx *vc)
 {
-   int ret = -ENXIO;
+   int r, ret = -ENXIO;
 
-   vc->head = 

[PATCH RFC v7 09/14] vhost/net: convert to new API: heads->bufs

2020-06-10 Thread Michael S. Tsirkin
Convert vhost net to use the new format-agnostic API.
In particular, don't poke at vq internals such as the
heads array.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/net.c | 154 +++-
 1 file changed, 82 insertions(+), 72 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ff594eec8ae3..830fe84912a5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -59,13 +59,13 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy 
TX;"
  * status internally; used for zerocopy tx only.
  */
 /* Lower device DMA failed */
-#define VHOST_DMA_FAILED_LEN   ((__force __virtio32)3)
+#define VHOST_DMA_FAILED_LEN   (3)
 /* Lower device DMA done */
-#define VHOST_DMA_DONE_LEN ((__force __virtio32)2)
+#define VHOST_DMA_DONE_LEN (2)
 /* Lower device DMA in progress */
-#define VHOST_DMA_IN_PROGRESS  ((__force __virtio32)1)
+#define VHOST_DMA_IN_PROGRESS  (1)
 /* Buffer unused */
-#define VHOST_DMA_CLEAR_LEN((__force __virtio32)0)
+#define VHOST_DMA_CLEAR_LEN(0)
 
 #define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force 
u32)VHOST_DMA_DONE_LEN)
 
@@ -112,9 +112,12 @@ struct vhost_net_virtqueue {
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
/* For TX, first used idx for DMA done zerocopy buffers
-* For RX, number of batched heads
+* For RX, number of batched bufs
 */
int done_idx;
+   /* Outstanding user bufs. UIO_MAXIOV in length. */
+   /* TODO: we can make this smaller for sure. */
+   struct vhost_buf *bufs;
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
@@ -271,6 +274,8 @@ static void vhost_net_clear_ubuf_info(struct vhost_net *n)
int i;
 
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+   kfree(n->vqs[i].bufs);
+   n->vqs[i].bufs = NULL;
kfree(n->vqs[i].ubuf_info);
n->vqs[i].ubuf_info = NULL;
}
@@ -282,6 +287,12 @@ static int vhost_net_set_ubuf_info(struct vhost_net *n)
int i;
 
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+   n->vqs[i].bufs = kmalloc_array(UIO_MAXIOV,
+  sizeof(*n->vqs[i].bufs),
+  GFP_KERNEL);
+   if (!n->vqs[i].bufs)
+   goto err;
+
zcopy = vhost_net_zcopy_mask & (0x1 << i);
if (!zcopy)
continue;
@@ -364,18 +375,18 @@ static void vhost_zerocopy_signal_used(struct vhost_net 
*net,
int j = 0;
 
for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
-   if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
+   if (nvq->bufs[i].in_len == VHOST_DMA_FAILED_LEN)
vhost_net_tx_err(net);
-   if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
-   vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+   if (VHOST_DMA_IS_DONE(nvq->bufs[i].in_len)) {
+   nvq->bufs[i].in_len = VHOST_DMA_CLEAR_LEN;
++j;
} else
break;
}
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
-   vhost_add_used_and_signal_n(vq->dev, vq,
-   >heads[nvq->done_idx], add);
+   vhost_put_used_n_bufs(vq, >bufs[nvq->done_idx], add);
+   vhost_signal(vq->dev, vq);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
@@ -390,7 +401,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, 
bool success)
rcu_read_lock_bh();
 
/* set len to mark this desc buffers done DMA */
-   nvq->vq.heads[ubuf->desc].in_len = success ?
+   nvq->bufs[ubuf->desc].in_len = success ?
VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
cnt = vhost_net_ubuf_put(ubufs);
 
@@ -452,7 +463,8 @@ static void vhost_net_signal_used(struct 
vhost_net_virtqueue *nvq)
if (!nvq->done_idx)
return;
 
-   vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+   vhost_put_used_n_bufs(vq, nvq->bufs, nvq->done_idx);
+   vhost_signal(dev, vq);
nvq->done_idx = 0;
 }
 
@@ -558,6 +570,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
 
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_net_virtqueue *tnvq,
+   struct vhost_buf *buf,
unsigned int *out_num, unsigned int *in_num,
struct msghdr *msghdr, bool *busyloop_intr)
 {
@@ -565,10 +578,10 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_virtqueue *rvq = >vq;

[PATCH RFC v7 14/14] vhost: drop head based APIs

2020-06-10 Thread Michael S. Tsirkin
Everyone's using buf APIs, no need for head based ones anymore.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vhost.c | 64 ++-
 drivers/vhost/vhost.h | 12 
 2 files changed, 8 insertions(+), 68 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 03e6bca02288..9096bd291c91 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2425,39 +2425,11 @@ EXPORT_SYMBOL_GPL(vhost_get_avail_buf);
 void vhost_discard_avail_bufs(struct vhost_virtqueue *vq,
  struct vhost_buf *buf, unsigned count)
 {
-   vhost_discard_vq_desc(vq, count);
+   unfetch_descs(vq);
+   vq->last_avail_idx -= count;
 }
 EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs);
 
-/* This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found.  A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
-{
-   struct vhost_buf buf;
-   int ret = vhost_get_avail_buf(vq, ,
- iov, iov_size, out_num, in_num,
- log, log_num);
-
-   if (likely(ret > 0))
-   return buf->id;
-   if (likely(!ret))
-   return vq->num;
-   return ret;
-}
-EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
-
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
-{
-   unfetch_descs(vq);
-   vq->last_avail_idx -= n;
-}
-EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
-
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
unsigned count)
@@ -2490,8 +2462,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
return 0;
 }
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
+static
 int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 unsigned count)
 {
@@ -2525,10 +2496,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct 
vring_used_elem *heads,
}
return r;
 }
-EXPORT_SYMBOL_GPL(vhost_add_used_n);
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
+static
 int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
 {
struct vring_used_elem heads = {
@@ -2538,14 +2507,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned 
int head, int len)
 
return vhost_add_used_n(vq, , 1);
 }
-EXPORT_SYMBOL_GPL(vhost_add_used);
 
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using vhost_signal. */
 int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf)
 {
return vhost_add_used(vq, buf->id, buf->in_len);
 }
 EXPORT_SYMBOL_GPL(vhost_put_used_buf);
 
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using vhost_signal. */
 int vhost_put_used_n_bufs(struct vhost_virtqueue *vq,
  struct vhost_buf *bufs, unsigned count)
 {
@@ -2606,26 +2578,6 @@ void vhost_signal(struct vhost_dev *dev, struct 
vhost_virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(vhost_signal);
 
-/* And here's the combo meal deal.  Supersize me! */
-void vhost_add_used_and_signal(struct vhost_dev *dev,
-  struct vhost_virtqueue *vq,
-  unsigned int head, int len)
-{
-   vhost_add_used(vq, head, len);
-   vhost_signal(dev, vq);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
-
-/* multi-buffer version of vhost_add_used_and_signal */
-void vhost_add_used_and_signal_n(struct vhost_dev *dev,
-struct vhost_virtqueue *vq,
-struct vring_used_elem *heads, unsigned count)
-{
-   vhost_add_used_n(vq, heads, count);
-   vhost_signal(dev, vq);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
-
 /* return true if we're sure that avaiable ring is empty */
 bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 28eea0155efb..264a2a2fae97 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -197,11 +197,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int 
ioctl, void __user *arg
 bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
 bool vhost_log_access_ok(struct vhost_dev *);
 
-int vhost_get_vq_desc(struct vhost_virtqueue *,
- struct iovec 

[PATCH RFC v7 03/14] vhost: use batched get_vq_desc version

2020-06-10 Thread Michael S. Tsirkin
As testing shows no performance change, switch to that now.

Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Eugenio Pérez 
Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com
Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/test.c  |   2 +-
 drivers/vhost/vhost.c | 318 --
 drivers/vhost/vhost.h |   7 +-
 3 files changed, 65 insertions(+), 262 deletions(-)

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 0466921f4772..7d69778aaa26 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct file 
*f)
dev = >dev;
vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
-   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
+   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
   VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL);
 
f->private_data = n;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 11433d709651..28f324fd77df 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 {
vq->num = 1;
vq->ndescs = 0;
+   vq->first_desc = 0;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
@@ -372,6 +373,11 @@ static int vhost_worker(void *data)
return 0;
 }
 
+static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
+{
+   return vq->max_descs - UIO_MAXIOV;
+}
+
 static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
 {
kfree(vq->descs);
@@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
vq->max_descs = dev->iov_limit;
+   if (vhost_vq_num_batch_descs(vq) < 0) {
+   return -EINVAL;
+   }
vq->descs = kmalloc_array(vq->max_descs,
  sizeof(*vq->descs),
  GFP_KERNEL);
@@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int 
ioctl, void __user *arg
vq->last_avail_idx = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
+   vq->ndescs = vq->first_desc = 0;
break;
case VHOST_GET_VRING_BASE:
s.index = idx;
@@ -2078,253 +2088,6 @@ static unsigned next_desc(struct vhost_virtqueue *vq, 
struct vring_desc *desc)
return next;
 }
 
-static int get_indirect(struct vhost_virtqueue *vq,
-   struct iovec iov[], unsigned int iov_size,
-   unsigned int *out_num, unsigned int *in_num,
-   struct vhost_log *log, unsigned int *log_num,
-   struct vring_desc *indirect)
-{
-   struct vring_desc desc;
-   unsigned int i = 0, count, found = 0;
-   u32 len = vhost32_to_cpu(vq, indirect->len);
-   struct iov_iter from;
-   int ret, access;
-
-   /* Sanity check */
-   if (unlikely(len % sizeof desc)) {
-   vq_err(vq, "Invalid length in indirect descriptor: "
-  "len 0x%llx not multiple of 0x%zx\n",
-  (unsigned long long)len,
-  sizeof desc);
-   return -EINVAL;
-   }
-
-   ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, 
vq->indirect,
-UIO_MAXIOV, VHOST_ACCESS_RO);
-   if (unlikely(ret < 0)) {
-   if (ret != -EAGAIN)
-   vq_err(vq, "Translation failure %d in indirect.\n", 
ret);
-   return ret;
-   }
-   iov_iter_init(, READ, vq->indirect, ret, len);
-
-   /* We will use the result as an address to read from, so most
-* architectures only need a compiler barrier here. */
-   read_barrier_depends();
-
-   count = len / sizeof desc;
-   /* Buffers are chained via a 16 bit next field, so
-* we can have at most 2^16 of these. */
-   if (unlikely(count > USHRT_MAX + 1)) {
-   vq_err(vq, "Indirect buffer length too big: %d\n",
-  indirect->len);
-   return -E2BIG;
-   }
-
-   do {
-   unsigned iov_count = *in_num + *out_num;
-   if (unlikely(++found > count)) {
-   vq_err(vq, "Loop detected: last one at %u "
-  "indirect size %u\n",
-  i, count);
-   return -EINVAL;
-   }
-   if (unlikely(!copy_from_iter_full(, sizeof(desc), ))) 
{
-   vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
-  i, (size_t)vhost64_to_cpu(vq, 

[PATCH RFC v7 00/14] vhost: ring format independence

2020-06-10 Thread Michael S. Tsirkin
This intentionally leaves "fixup" changes separate - hopefully
that is enough to fix vhost-net crashes reported here,
but it helps me keep track of what changed.
I will naturally squash them later when we are done.


This adds infrastructure required for supporting
multiple ring formats.

The idea is as follows: we convert descriptors to an
independent format first, and process that converting to
iov later.

Used ring is similar: we fetch into an independent struct first,
convert that to IOV later.

The point is that we have a tight loop that fetches
descriptors, which is good for cache utilization.
This will also allow all kind of batching tricks -
e.g. it seems possible to keep SMAP disabled while
we are fetching multiple descriptors.

For used descriptors, this allows keeping track of the buffer length
without need to rescan IOV.

This seems to perform exactly the same as the original
code based on a microbenchmark.
Lightly tested.
More testing would be very much appreciated.

changes from v6:
- fixes some bugs introduced in v6 and v5

changes from v5:
- addressed comments by Jason: squashed API changes, fixed up discard

changes from v4:
- added used descriptor format independence
- addressed comments by jason
- fixed a crash detected by the lkp robot.

changes from v3:
- fixed error handling in case of indirect descriptors
- add BUG_ON to detect buffer overflow in case of bugs
in response to comment by Jason Wang
- minor code tweaks

Changes from v2:
- fixed indirect descriptor batching
reported by Jason Wang

Changes from v1:
- typo fixes


Michael S. Tsirkin (14):
  vhost: option to fetch descriptors through an independent struct
  fixup! vhost: option to fetch descriptors through an independent
struct
  vhost: use batched get_vq_desc version
  vhost/net: pass net specific struct pointer
  vhost: reorder functions
  vhost: format-independent API for used buffers
  fixup! vhost: format-independent API for used buffers
  fixup! vhost: use batched get_vq_desc version
  vhost/net: convert to new API: heads->bufs
  vhost/net: avoid iov length math
  vhost/test: convert to the buf API
  vhost/scsi: switch to buf APIs
  vhost/vsock: switch to the buf API
  vhost: drop head based APIs

 drivers/vhost/net.c   | 174 +--
 drivers/vhost/scsi.c  |  73 
 drivers/vhost/test.c  |  22 +--
 drivers/vhost/vhost.c | 378 +++---
 drivers/vhost/vhost.h |  44 +++--
 drivers/vhost/vsock.c |  30 ++--
 6 files changed, 439 insertions(+), 282 deletions(-)

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 13/14] vhost/vsock: switch to the buf API

2020-06-10 Thread Michael S. Tsirkin
A straight-forward conversion.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vsock.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a483cec31d5c..61c6d3dd2ae3 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -103,7 +103,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
unsigned out, in;
size_t nbytes;
size_t iov_len, payload_len;
-   int head;
+   struct vhost_buf buf;
+   int ret;
 
spin_lock_bh(>send_pkt_list_lock);
if (list_empty(>send_pkt_list)) {
@@ -117,16 +118,17 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
list_del_init(>list);
spin_unlock_bh(>send_pkt_list_lock);
 
-   head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-, , NULL, NULL);
-   if (head < 0) {
+   ret = vhost_get_avail_buf(vq, ,
+ vq->iov, ARRAY_SIZE(vq->iov),
+ , , NULL, NULL);
+   if (ret < 0) {
spin_lock_bh(>send_pkt_list_lock);
list_add(>list, >send_pkt_list);
spin_unlock_bh(>send_pkt_list_lock);
break;
}
 
-   if (head == vq->num) {
+   if (!ret) {
spin_lock_bh(>send_pkt_list_lock);
list_add(>list, >send_pkt_list);
spin_unlock_bh(>send_pkt_list_lock);
@@ -186,7 +188,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 */
virtio_transport_deliver_tap_pkt(pkt);
 
-   vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
+   buf.in_len = sizeof(pkt->hdr) + payload_len;
+   vhost_put_used_buf(vq, );
added = true;
 
pkt->off += payload_len;
@@ -440,7 +443,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work 
*work)
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
 dev);
struct virtio_vsock_pkt *pkt;
-   int head, pkts = 0, total_len = 0;
+   int ret, pkts = 0, total_len = 0;
+   struct vhost_buf buf;
unsigned int out, in;
bool added = false;
 
@@ -461,12 +465,13 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work 
*work)
goto no_more_replies;
}
 
-   head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-, , NULL, NULL);
-   if (head < 0)
+   ret = vhost_get_avail_buf(vq, ,
+ vq->iov, ARRAY_SIZE(vq->iov),
+ , , NULL, NULL);
+   if (ret < 0)
break;
 
-   if (head == vq->num) {
+   if (!ret) {
if (unlikely(vhost_enable_notify(>dev, vq))) {
vhost_disable_notify(>dev, vq);
continue;
@@ -494,7 +499,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work 
*work)
virtio_transport_free_pkt(pkt);
 
len += sizeof(pkt->hdr);
-   vhost_add_used(vq, head, len);
+   buf.in_len = len;
+   vhost_put_used_buf(vq, );
total_len += len;
added = true;
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 04/14] vhost/net: pass net specific struct pointer

2020-06-10 Thread Michael S. Tsirkin
In preparation for further cleanup, pass net specific pointer
to ubuf callbacks so we can move net specific fields
out to net structures.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/net.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index bf5e1d81ae25..ff594eec8ae3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -94,7 +94,7 @@ struct vhost_net_ubuf_ref {
 */
atomic_t refcount;
wait_queue_head_t wait;
-   struct vhost_virtqueue *vq;
+   struct vhost_net_virtqueue *nvq;
 };
 
 #define VHOST_NET_BATCH 64
@@ -231,7 +231,7 @@ static void vhost_net_enable_zcopy(int vq)
 }
 
 static struct vhost_net_ubuf_ref *
-vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
+vhost_net_ubuf_alloc(struct vhost_net_virtqueue *nvq, bool zcopy)
 {
struct vhost_net_ubuf_ref *ubufs;
/* No zero copy backend? Nothing to count. */
@@ -242,7 +242,7 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
return ERR_PTR(-ENOMEM);
atomic_set(>refcount, 1);
init_waitqueue_head(>wait);
-   ubufs->vq = vq;
+   ubufs->nvq = nvq;
return ubufs;
 }
 
@@ -384,13 +384,13 @@ static void vhost_zerocopy_signal_used(struct vhost_net 
*net,
 static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 {
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
-   struct vhost_virtqueue *vq = ubufs->vq;
+   struct vhost_net_virtqueue *nvq = ubufs->nvq;
int cnt;
 
rcu_read_lock_bh();
 
/* set len to mark this desc buffers done DMA */
-   vq->heads[ubuf->desc].len = success ?
+   nvq->vq.heads[ubuf->desc].in_len = success ?
VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
cnt = vhost_net_ubuf_put(ubufs);
 
@@ -402,7 +402,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, 
bool success)
 * less than 10% of times).
 */
if (cnt <= 1 || !(cnt % 16))
-   vhost_poll_queue(>poll);
+   vhost_poll_queue(>vq.poll);
 
rcu_read_unlock_bh();
 }
@@ -1525,7 +1525,7 @@ static long vhost_net_set_backend(struct vhost_net *n, 
unsigned index, int fd)
/* start polling new socket */
oldsock = vhost_vq_get_backend(vq);
if (sock != oldsock) {
-   ubufs = vhost_net_ubuf_alloc(vq,
+   ubufs = vhost_net_ubuf_alloc(nvq,
 sock && vhost_sock_zcopy(sock));
if (IS_ERR(ubufs)) {
r = PTR_ERR(ubufs);
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 10/14] vhost/net: avoid iov length math

2020-06-10 Thread Michael S. Tsirkin
Now that API exposes buffer length, we no longer need to
scan IOVs to figure it out.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/net.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 830fe84912a5..0b509be8d7b1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -607,11 +607,9 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net)
 }
 
 static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
-   size_t hdr_size, int out)
+   size_t len, size_t hdr_size, int out)
 {
/* Skip header. TODO: support TSO. */
-   size_t len = iov_length(vq->iov, out);
-
iov_iter_init(iter, WRITE, vq->iov, out, len);
iov_iter_advance(iter, hdr_size);
 
@@ -640,7 +638,7 @@ static int get_tx_bufs(struct vhost_net *net,
}
 
/* Sanity check */
-   *len = init_iov_iter(vq, >msg_iter, nvq->vhost_hlen, *out);
+   *len = init_iov_iter(vq, >msg_iter, buf->out_len, nvq->vhost_hlen, 
*out);
if (*len == 0) {
vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n",
*len, nvq->vhost_hlen);
@@ -1080,7 +1078,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
-   len = iov_length(vq->iov + seg, in);
+   len = bufs[bufcount].in_len;
datalen -= len;
++bufcount;
seg += in;
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 06/14] vhost: format-independent API for used buffers

2020-06-10 Thread Michael S. Tsirkin
Add a new API that doesn't assume used ring, heads, etc.
For now, we keep the old APIs around to make it easier
to convert drivers.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vhost.c | 52 ++-
 drivers/vhost/vhost.h | 17 +-
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 506208b63126..e5763d81bf0f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2339,13 +2339,12 @@ static void unfetch_descs(struct vhost_virtqueue *vq)
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found.  A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
+ * This function returns a value > 0 if a descriptor was found, or 0 if none 
were found.
+ * A negative code is returned on error. */
+int vhost_get_avail_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num,
+   struct vhost_log *log, unsigned int *log_num)
 {
int ret = fetch_descs(vq);
int i;
@@ -2358,6 +2357,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
*out_num = *in_num = 0;
if (unlikely(log))
*log_num = 0;
+   buf->in_len = buf->out_len = 0;
+   buf->descs = 0;
 
for (i = vq->first_desc; i < vq->ndescs; ++i) {
unsigned iov_count = *in_num + *out_num;
@@ -2387,6 +2388,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* If this is an input descriptor,
 * increment that count. */
*in_num += ret;
+   buf->in_len += desc->len;
if (unlikely(log && ret)) {
log[*log_num].addr = desc->addr;
log[*log_num].len = desc->len;
@@ -2402,9 +2404,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
goto err;
}
*out_num += ret;
+   buf->out_len += desc->len;
}
 
-   ret = desc->id;
+   buf->id = desc->id;
+   ++buf->descs;
 
if (!(desc->flags & VRING_DESC_F_NEXT))
break;
@@ -2412,14 +2416,22 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
vq->first_desc = i + 1;
 
-   return ret;
+   return 1;
 
 err:
unfetch_descs(vq);
 
return ret ? ret : vq->num;
 }
-EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
+EXPORT_SYMBOL_GPL(vhost_get_avail_buf);
+
+/* Reverse the effect of vhost_get_avail_buf. Useful for error handling. */
+void vhost_discard_avail_bufs(struct vhost_virtqueue *vq,
+ struct vhost_buf *buf, unsigned count)
+{
+   vhost_discard_vq_desc(vq, count);
+}
+EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs);
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
 void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
@@ -2511,6 +2523,26 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned 
int head, int len)
 }
 EXPORT_SYMBOL_GPL(vhost_add_used);
 
+int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf)
+{
+   return vhost_add_used(vq, buf->id, buf->in_len);
+}
+EXPORT_SYMBOL_GPL(vhost_put_used_buf);
+
+int vhost_put_used_n_bufs(struct vhost_virtqueue *vq,
+ struct vhost_buf *bufs, unsigned count)
+{
+   unsigned i;
+
+   for (i = 0; i < count; ++i) {
+   vq->heads[i].id = cpu_to_vhost32(vq, bufs[i].id);
+   vq->heads[i].len = cpu_to_vhost32(vq, bufs[i].in_len);
+   }
+
+   return vhost_add_used_n(vq, vq->heads, count);
+}
+EXPORT_SYMBOL_GPL(vhost_put_used_n_bufs);
+
 static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
__u16 old, new;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index fed36af5c444..28eea0155efb 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,6 +67,13 @@ struct vhost_desc {
u16 id;
 };
 
+struct vhost_buf {
+   u32 out_len;
+   u32 in_len;
+   u16 descs;
+   u16 id;
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -195,7 +202,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
   

[PATCH RFC v7 07/14] fixup! vhost: format-independent API for used buffers

2020-06-10 Thread Michael S. Tsirkin
---
 drivers/vhost/vhost.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e5763d81bf0f..7a587b13095c 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2421,7 +2421,7 @@ int vhost_get_avail_buf(struct vhost_virtqueue *vq, 
struct vhost_buf *buf,
 err:
unfetch_descs(vq);
 
-   return ret ? ret : vq->num;
+   return ret;
 }
 EXPORT_SYMBOL_GPL(vhost_get_avail_buf);
 
@@ -2433,6 +2433,27 @@ void vhost_discard_avail_bufs(struct vhost_virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs);
 
+/* This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error. */
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+   struct vhost_buf buf;
+   int ret = vhost_get_avail_buf(vq, ,
+ iov, iov_size, out_num, in_num,
+ log, log_num);
+
+   if (likely(ret > 0))
+   return buf->id;
+   if (likely(!ret))
+   return vq->num;
+   return ret;
+}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
+
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
 void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
 {
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 11/14] vhost/test: convert to the buf API

2020-06-10 Thread Michael S. Tsirkin
Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/test.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 7d69778aaa26..12304eb8da15 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -44,9 +44,10 @@ static void handle_vq(struct vhost_test *n)
 {
struct vhost_virtqueue *vq = >vqs[VHOST_TEST_VQ];
unsigned out, in;
-   int head;
+   int ret;
size_t len, total_len = 0;
void *private;
+   struct vhost_buf buf;
 
mutex_lock(>mutex);
private = vhost_vq_get_backend(vq);
@@ -58,15 +59,15 @@ static void handle_vq(struct vhost_test *n)
vhost_disable_notify(>dev, vq);
 
for (;;) {
-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   ret = vhost_get_avail_buf(vq, , vq->iov,
+ ARRAY_SIZE(vq->iov),
+ , ,
+ NULL, NULL);
/* On error, stop handling until the next kick. */
-   if (unlikely(head < 0))
+   if (unlikely(ret < 0))
break;
/* Nothing new?  Wait for eventfd to tell us they refilled. */
-   if (head == vq->num) {
+   if (!ret) {
if (unlikely(vhost_enable_notify(>dev, vq))) {
vhost_disable_notify(>dev, vq);
continue;
@@ -78,13 +79,14 @@ static void handle_vq(struct vhost_test *n)
   "out %d, int %d\n", out, in);
break;
}
-   len = iov_length(vq->iov, out);
+   len = buf.out_len;
/* Sanity check */
if (!len) {
vq_err(vq, "Unexpected 0 len for TX\n");
break;
}
-   vhost_add_used_and_signal(>dev, vq, head, 0);
+   vhost_put_used_buf(vq, );
+   vhost_signal(>dev, vq);
total_len += len;
if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
break;
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH RFC v7 05/14] vhost: reorder functions

2020-06-10 Thread Michael S. Tsirkin
Reorder functions in the file to not rely on forward
declarations, in preparation to making them static
down the road.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vhost.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 28f324fd77df..506208b63126 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2429,19 +2429,6 @@ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, 
int n)
 }
 EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
-{
-   struct vring_used_elem heads = {
-   cpu_to_vhost32(vq, head),
-   cpu_to_vhost32(vq, len)
-   };
-
-   return vhost_add_used_n(vq, , 1);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used);
-
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
unsigned count)
@@ -2511,6 +2498,19 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct 
vring_used_elem *heads,
 }
 EXPORT_SYMBOL_GPL(vhost_add_used_n);
 
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+   struct vring_used_elem heads = {
+   cpu_to_vhost32(vq, head),
+   cpu_to_vhost32(vq, len)
+   };
+
+   return vhost_add_used_n(vq, , 1);
+}
+EXPORT_SYMBOL_GPL(vhost_add_used);
+
 static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
__u16 old, new;
-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH RFC v6 02/11] vhost: use batched get_vq_desc version

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 11:14:49AM +0800, Jason Wang wrote:
> 
> On 2020/6/8 下午8:52, Michael S. Tsirkin wrote:
> > As testing shows no performance change, switch to that now.
> > 
> > Signed-off-by: Michael S. Tsirkin 
> > Signed-off-by: Eugenio Pérez 
> > Link: https://lore.kernel.org/r/20200401183118.8334-3-epere...@redhat.com
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >   drivers/vhost/test.c  |   2 +-
> >   drivers/vhost/vhost.c | 318 --
> >   drivers/vhost/vhost.h |   7 +-
> >   3 files changed, 65 insertions(+), 262 deletions(-)
> > 
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 0466921f4772..7d69778aaa26 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct 
> > file *f)
> > dev = >dev;
> > vqs[VHOST_TEST_VQ] = >vqs[VHOST_TEST_VQ];
> > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> > -   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> > +   vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> >VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL);
> > f->private_data = n;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 180b7b58c76b..41d6b132c234 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -304,6 +304,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> >   {
> > vq->num = 1;
> > vq->ndescs = 0;
> > +   vq->first_desc = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -372,6 +373,11 @@ static int vhost_worker(void *data)
> > return 0;
> >   }
> > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
> > +{
> > +   return vq->max_descs - UIO_MAXIOV;
> > +}
> > +
> >   static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> >   {
> > kfree(vq->descs);
> > @@ -394,6 +400,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev 
> > *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > vq->max_descs = dev->iov_limit;
> > +   if (vhost_vq_num_batch_descs(vq) < 0) {
> > +   return -EINVAL;
> > +   }
> > vq->descs = kmalloc_array(vq->max_descs,
> >   sizeof(*vq->descs),
> >   GFP_KERNEL);
> > @@ -1610,6 +1619,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned 
> > int ioctl, void __user *arg
> > vq->last_avail_idx = s.num;
> > /* Forget the cached index value. */
> > vq->avail_idx = vq->last_avail_idx;
> > +   vq->ndescs = vq->first_desc = 0;
> > break;
> > case VHOST_GET_VRING_BASE:
> > s.index = idx;
> > @@ -2078,253 +2088,6 @@ static unsigned next_desc(struct vhost_virtqueue 
> > *vq, struct vring_desc *desc)
> > return next;
> >   }
> > -static int get_indirect(struct vhost_virtqueue *vq,
> > -   struct iovec iov[], unsigned int iov_size,
> > -   unsigned int *out_num, unsigned int *in_num,
> > -   struct vhost_log *log, unsigned int *log_num,
> > -   struct vring_desc *indirect)
> > -{
> > -   struct vring_desc desc;
> > -   unsigned int i = 0, count, found = 0;
> > -   u32 len = vhost32_to_cpu(vq, indirect->len);
> > -   struct iov_iter from;
> > -   int ret, access;
> > -
> > -   /* Sanity check */
> > -   if (unlikely(len % sizeof desc)) {
> > -   vq_err(vq, "Invalid length in indirect descriptor: "
> > -  "len 0x%llx not multiple of 0x%zx\n",
> > -  (unsigned long long)len,
> > -  sizeof desc);
> > -   return -EINVAL;
> > -   }
> > -
> > -   ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, 
> > vq->indirect,
> > -UIO_MAXIOV, VHOST_ACCESS_RO);
> > -   if (unlikely(ret < 0)) {
> > -   if (ret != -EAGAIN)
> > -   vq_err(vq, "Translation failure %d in indirect.\n", 
> > ret);
> > -   return ret;
> > -   }
> > -   iov_iter_init(, READ, vq->indirect, ret, len);
> > -
> > -   /* We will use the result as an address to read from, so most
> > -* architectures only need a compiler barrier here. */
> > -   read_barrier_depends();
> > -
> > -   count = len / sizeof desc;
> > -   /* Buffers are chained via a 16 bit next field, so
> > -* we can have at most 2^16 of these. */
> > -   if (unlikely(count > USHRT_MAX + 1)) {
> > -   vq_err(vq, "Indirect buffer length too big: %d\n",
> > -  indirect->len);
> > -   return -E2BIG;
> > -   }
> > -
> > -   do {
> > -   unsigned iov_count = *in_num + *out_num;
> > -   if (unlikely(++found > count)) {
> > -   vq_err(vq, "Loop detected: last one at %u "
> > -  "indirect size %u\n",
> > -  

Re: [PATCH] virtio-mem: silence a static checker warning

2020-06-10 Thread David Hildenbrand


> Am 10.06.2020 um 10:59 schrieb Dan Carpenter :
> 
> Smatch complains that "rc" can be uninitialized if we hit the "break;"
> statement on the first iteration through the loop.  I suspect that this
> can't happen in real life, but returning a zero literal is cleaner and
> silence the static checker warning.
> 

Right, it‘s impossible in real life. Thanks!

Acked-by: David Hildenbrand 

> Signed-off-by: Dan Carpenter 
> ---
> drivers/virtio/virtio_mem.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index f658fe9149beb..893ef18060a02 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem 
> *vm, unsigned long mb_id,
>VIRTIO_MEM_MB_STATE_OFFLINE);
>}
> 
> -return rc;
> +return 0;
> }
> 
> /*
> -- 
> 2.26.2
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap()

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 11:58:52AM +0300, Dan Carpenter wrote:
> The "vma->vm_pgoff" variable is an unsigned long so if it's larger than
> INT_MAX then "index" can be negative leading to an underflow.  Fix this
> by changing the type of "index" to "unsigned long".
> 
> Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap")
> Signed-off-by: Dan Carpenter 

Applied, thanks!

> ---
>  drivers/vhost/vdpa.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 7580e34f76c10..a54b60d6623f0 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -818,7 +818,7 @@ static int vhost_vdpa_mmap(struct file *file, struct 
> vm_area_struct *vma)
>   struct vdpa_device *vdpa = v->vdpa;
>   const struct vdpa_config_ops *ops = vdpa->config;
>   struct vdpa_notification_area notify;
> - int index = vma->vm_pgoff;
> + unsigned long index = vma->vm_pgoff;
>  
>   if (vma->vm_end - vma->vm_start != PAGE_SIZE)
>   return -EINVAL;
> -- 
> 2.26.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] virtio-mem: silence a static checker warning

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 11:59:11AM +0300, Dan Carpenter wrote:
> Smatch complains that "rc" can be uninitialized if we hit the "break;"
> statement on the first iteration through the loop.  I suspect that this
> can't happen in real life, but returning a zero literal is cleaner and
> silence the static checker warning.
> 
> Signed-off-by: Dan Carpenter 
> ---
>  drivers/virtio/virtio_mem.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied, thanks.

> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index f658fe9149beb..893ef18060a02 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem 
> *vm, unsigned long mb_id,
>   VIRTIO_MEM_MB_STATE_OFFLINE);
>   }
>  
> - return rc;
> + return 0;
>  }
>  
>  /*
> -- 
> 2.26.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap()

2020-06-10 Thread Dan Carpenter
The "vma->vm_pgoff" variable is an unsigned long so if it's larger than
INT_MAX then "index" can be negative leading to an underflow.  Fix this
by changing the type of "index" to "unsigned long".

Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap")
Signed-off-by: Dan Carpenter 
---
 drivers/vhost/vdpa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 7580e34f76c10..a54b60d6623f0 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -818,7 +818,7 @@ static int vhost_vdpa_mmap(struct file *file, struct 
vm_area_struct *vma)
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_notification_area notify;
-   int index = vma->vm_pgoff;
+   unsigned long index = vma->vm_pgoff;
 
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
-- 
2.26.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2] mm/balloon_compaction: Fix trivial spelling

2020-06-10 Thread Sergei Shtylyov

Hello!

On 09.06.2020 17:34, Kieran Bingham wrote:


The word 'descriptor' is misspelled throughout the tree.

Fix it up accordingly:
 decriptors -> descriptors


  decriptor -> descriptor really. ;-)


Signed-off-by: Kieran Bingham 
Reviewed-by: David Hildenbrand 
---
  mm/balloon_compaction.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 26de020aae7b..907fefde2572 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(balloon_page_list_enqueue);
  /**
   * balloon_page_list_dequeue() - removes pages from balloon's page list and
   * returns a list of the pages.
- * @b_dev_info: balloon device decriptor where we will grab a page from.
+ * @b_dev_info: balloon device descriptor where we will grab a page from.
   * @pages: pointer to the list of pages that would be returned to the caller.
   * @n_req_pages: number of requested pages.
   *
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(balloon_page_enqueue);
  /*
   * balloon_page_dequeue - removes a page from balloon's page list and returns
   *  its address to allow the driver to release the page.
- * @b_dev_info: balloon device decriptor where we will grab a page from.
+ * @b_dev_info: balloon device descriptor where we will grab a page from.
   *
   * Driver must call this function to properly dequeue a previously enqueued 
page
   * before definitively releasing it back to the guest system.


MBR, Sergei
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] virtio-mem: silence a static checker warning

2020-06-10 Thread Dan Carpenter
Smatch complains that "rc" can be uninitialized if we hit the "break;"
statement on the first iteration through the loop.  I suspect that this
can't happen in real life, but returning a zero literal is cleaner and
silence the static checker warning.

Signed-off-by: Dan Carpenter 
---
 drivers/virtio/virtio_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index f658fe9149beb..893ef18060a02 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem 
*vm, unsigned long mb_id,
VIRTIO_MEM_MB_STATE_OFFLINE);
}
 
-   return rc;
+   return 0;
 }
 
 /*
-- 
2.26.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 04:25:06PM +0800, Jason Wang wrote:
> > > +
> > > +#define VP_VDPA_FEATURES \
> > > + ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \
> > 
> > This is presumably for transitional devices only.  In fact looking at
> > code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
> > Spec violation I guess ... but what should we do? Relax the spec
> > or fix drivers?
> 
> 
> I don't get how it violates the spec.

Spec says transitional drivers must ack VIRTIO_F_ANY_LAYOUT

But grep for VIRTIO_F_ANY_LAYOUT and you will see they don't.
Only legacy virtio net does.

Maybe it should have been
transitional drivers when using the lgacy interface, but there
you are.

> 
> > 
> > 
> > > +  (1ULL << VIRTIO_F_VERSION_1)   | \
> > > +  (1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
> > > +  (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> > > +
> > > +struct vp_vring {
> > > + void __iomem *notify;
> > > + char msix_name[256];
> > > + resource_size_t notify_pa;
> > > + struct vdpa_callback cb;
> > > + int irq;
> > > +};
> > > +
> > > +struct vp_vdpa {
> > > + struct vdpa_device vdpa;
> > > + struct pci_dev *pdev;
> > > +
> > > + struct virtio_device_id id;
> > > +
> > > + struct vp_vring vring[VP_VDPA_MAX_QUEUE];
> > > +
> > > + /* The IO mapping for the PCI config space */
> > > + void __iomem * const *base;
> > > + struct virtio_pci_common_cfg __iomem *common;
> > > + void __iomem *device;
> > > + /* Base of vq notifications */
> > > + void __iomem *notify;
> > > +
> > > + /* Multiplier for queue_notify_off. */
> > > + u32 notify_off_multiplier;
> > > +
> > > + int modern_bars;
> > > + int vectors;
> > > +};
> > > +
> > > +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
> > > +{
> > > + return container_of(vdpa, struct vp_vdpa, vdpa);
> > > +}
> > > +
> > > +/*
> > > + * Type-safe wrappers for io accesses.
> > > + * Use these to enforce at compile time the following spec requirement:
> > > + *
> > > + * The driver MUST access each field using the “natural” access
> > > + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
> > > + * for 16-bit fields and 8-bit accesses for 8-bit fields.
> > > + */
> > > +static inline u8 vp_ioread8(u8 __iomem *addr)
> > > +{
> > > + return ioread8(addr);
> > > +}
> > > +static inline u16 vp_ioread16(__le16 __iomem *addr)
> > > +{
> > > + return ioread16(addr);
> > > +}
> > > +
> > > +static inline u32 vp_ioread32(__le32 __iomem *addr)
> > > +{
> > > + return ioread32(addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
> > > +{
> > > + iowrite8(value, addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
> > > +{
> > > + iowrite16(value, addr);
> > > +}
> > > +
> > > +static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
> > > +{
> > > + iowrite32(value, addr);
> > > +}
> > > +
> > > +static void vp_iowrite64_twopart(u64 val,
> > > +  __le32 __iomem *lo, __le32 __iomem *hi)
> > > +{
> > > + vp_iowrite32((u32)val, lo);
> > > + vp_iowrite32(val >> 32, hi);
> > > +}
> > > +
> > > +static int find_capability(struct pci_dev *dev, u8 cfg_type,
> > > +u32 ioresource_types, int *bars)
> > > +{
> > > + int pos;
> > > +
> > > + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
> > > +  pos > 0;
> > > +  pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
> > > + u8 type, bar;
> > > +
> > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
> > > +  cfg_type),
> > > +  );
> > > + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
> > > +  bar),
> > > +  );
> > > +
> > > + /* Ignore structures with reserved BAR values */
> > > + if (bar > 0x5)
> > > + continue;
> > > +
> > > + if (type == cfg_type) {
> > > + if (pci_resource_len(dev, bar) &&
> > > + pci_resource_flags(dev, bar) & ioresource_types) {
> > > + *bars |= (1 << bar);
> > > + return pos;
> > > + }
> > > + }
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +static void __iomem *map_capability(struct vp_vdpa *vp_vdpa, int off,
> > > + resource_size_t *pa)
> > > +{
> > > + struct pci_dev *pdev = vp_vdpa->pdev;
> > > + u32 offset;
> > > + u8 bar;
> > > +
> > > + pci_read_config_byte(pdev,
> > > +  off + offsetof(struct virtio_pci_cap, bar),
> > > +  );
> > > + pci_read_config_dword(pdev,
> > > +   off + offsetof(struct virtio_pci_cap, offset),
> > > +   );
> > > +
> > > + if (pa)
> > > + *pa = pci_resource_start(pdev, bar) + offset;
> > > +
> > > + return 

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang


On 2020/6/10 下午3:08, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote:

This patch introduce a vDPA driver for virtio-pci device. It bridges
the virtio-pci control command to the vDPA bus. This will be used for
developing new features for both software vDPA framework and hardware
vDPA feature.

Compared to vdpa_sim, it has several advantages:

- it's a real device driver which allow us to play with real hardware
   features
- type independent instead of networking specific

Note that since virtio specification does not support get/restore
virtqueue state. So we can not use this driver for VM. This can be
addressed by extending the virtio specification.

Consider the driver is mainly for testing and development for vDPA
features, it can only be bound via dynamic ids to make sure it's not
conflict with the drivers like virtio-pci or IFCVF.

Signed-off-by: Jason Wang 
---
Changes from V2:
- rebase on vhost.git vhost branch
---
  drivers/vdpa/Kconfig   |   8 +
  drivers/vdpa/Makefile  |   1 +
  drivers/vdpa/vp_vdpa/Makefile  |   2 +
  drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
  4 files changed, 612 insertions(+)
  create mode 100644 drivers/vdpa/vp_vdpa/Makefile
  create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c

diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index 3e1ceb8e9f2b..deb85e43a4c2 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -28,4 +28,12 @@ config IFCVF
  To compile this driver as a module, choose M here: the module will
  be called ifcvf.
  
+config VP_VDPA

+   tristate "Virtio PCI bridge vDPA driver"
+   depends on PCI_MSI
+   help
+ This kernel module that bridges virtio PCI device to vDPA
+ bus. It allows us to test and develop vDPA subsystem inside
+ an VM with the emulated virtio-pci device
+
  endif # VDPA
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 8bbb686ca7a2..37d00f49b3bf 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -2,3 +2,4 @@
  obj-$(CONFIG_VDPA) += vdpa.o
  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
  obj-$(CONFIG_IFCVF)+= ifcvf/
+obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
new file mode 100644
index ..231088d3af7d
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
new file mode 100644
index ..2070298ab9fc
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for modern virtio-pci device

And judging by the code, transitional too?
Or maybe we should drop transitional device support here.



Yes, I will simply drop the transitional device support.





+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang 
+ *
+ * Based on virtio_pci_modern.c.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* TBD: read from config space */
+#define VP_VDPA_MAX_QUEUE 2

We need to fix that right? Otherwise lots of devices break ...



Yes, will fix.





+#define VP_VDPA_DRIVER_NAME "vp_vdpa"

not sure why you need this macro ...



Used only once, so I will remove this.





+
+#define VP_VDPA_FEATURES \
+   ((1ULL << VIRTIO_F_ANY_LAYOUT)| \


This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?



I don't get how it violates the spec.






+(1ULL << VIRTIO_F_VERSION_1) | \
+(1ULL << VIRTIO_F_ORDER_PLATFORM)| \
+(1ULL << VIRTIO_F_IOMMU_PLATFORM))
+
+struct vp_vring {
+   void __iomem *notify;
+   char msix_name[256];
+   resource_size_t notify_pa;
+   struct vdpa_callback cb;
+   int irq;
+};
+
+struct vp_vdpa {
+   struct vdpa_device vdpa;
+   struct pci_dev *pdev;
+
+   struct virtio_device_id id;
+
+   struct vp_vring vring[VP_VDPA_MAX_QUEUE];
+
+   /* The IO mapping for the PCI config space */
+   void __iomem * const *base;
+   struct virtio_pci_common_cfg __iomem *common;
+   void __iomem *device;
+   /* Base of vq notifications */
+   void __iomem *notify;
+
+   /* Multiplier for queue_notify_off. */
+   u32 notify_off_multiplier;
+
+   int modern_bars;
+   int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+   return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each 

Re: [PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 02:52:17PM +0800, Jason Wang wrote:
> This patch introduce a vDPA driver for virtio-pci device. It bridges
> the virtio-pci control command to the vDPA bus. This will be used for
> developing new features for both software vDPA framework and hardware
> vDPA feature.
> 
> Compared to vdpa_sim, it has several advantages:
> 
> - it's a real device driver which allow us to play with real hardware
>   features
> - type independent instead of networking specific
> 
> Note that since virtio specification does not support get/restore
> virtqueue state. So we can not use this driver for VM. This can be
> addressed by extending the virtio specification.
> 
> Consider the driver is mainly for testing and development for vDPA
> features, it can only be bound via dynamic ids to make sure it's not
> conflict with the drivers like virtio-pci or IFCVF.
> 
> Signed-off-by: Jason Wang 
> ---
> Changes from V2:
> - rebase on vhost.git vhost branch
> ---
>  drivers/vdpa/Kconfig   |   8 +
>  drivers/vdpa/Makefile  |   1 +
>  drivers/vdpa/vp_vdpa/Makefile  |   2 +
>  drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
>  4 files changed, 612 insertions(+)
>  create mode 100644 drivers/vdpa/vp_vdpa/Makefile
>  create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c
> 
> diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
> index 3e1ceb8e9f2b..deb85e43a4c2 100644
> --- a/drivers/vdpa/Kconfig
> +++ b/drivers/vdpa/Kconfig
> @@ -28,4 +28,12 @@ config IFCVF
> To compile this driver as a module, choose M here: the module will
> be called ifcvf.
>  
> +config VP_VDPA
> + tristate "Virtio PCI bridge vDPA driver"
> + depends on PCI_MSI
> + help
> +   This kernel module that bridges virtio PCI device to vDPA
> +   bus. It allows us to test and develop vDPA subsystem inside
> +   an VM with the emulated virtio-pci device
> +
>  endif # VDPA
> diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
> index 8bbb686ca7a2..37d00f49b3bf 100644
> --- a/drivers/vdpa/Makefile
> +++ b/drivers/vdpa/Makefile
> @@ -2,3 +2,4 @@
>  obj-$(CONFIG_VDPA) += vdpa.o
>  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
>  obj-$(CONFIG_IFCVF)+= ifcvf/
> +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
> diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
> new file mode 100644
> index ..231088d3af7d
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
> diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> new file mode 100644
> index ..2070298ab9fc
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> @@ -0,0 +1,601 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * vDPA bridge driver for modern virtio-pci device

And judging by the code, transitional too?
Or maybe we should drop transitional device support here.

> + *
> + * Copyright (c) 2020, Red Hat Inc. All rights reserved.
> + * Author: Jason Wang 
> + *
> + * Based on virtio_pci_modern.c.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* TBD: read from config space */
> +#define VP_VDPA_MAX_QUEUE 2

We need to fix that right? Otherwise lots of devices break ...

> +#define VP_VDPA_DRIVER_NAME "vp_vdpa"

not sure why you need this macro ...

> +
> +#define VP_VDPA_FEATURES \
> + ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \


This is presumably for transitional devices only.  In fact looking at
code it seems that only net in legacy mode accepts VIRTIO_F_ANY_LAYOUT.
Spec violation I guess ... but what should we do? Relax the spec
or fix drivers?


> +  (1ULL << VIRTIO_F_VERSION_1)   | \
> +  (1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
> +  (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +
> +struct vp_vring {
> + void __iomem *notify;
> + char msix_name[256];
> + resource_size_t notify_pa;
> + struct vdpa_callback cb;
> + int irq;
> +};
> +
> +struct vp_vdpa {
> + struct vdpa_device vdpa;
> + struct pci_dev *pdev;
> +
> + struct virtio_device_id id;
> +
> + struct vp_vring vring[VP_VDPA_MAX_QUEUE];
> +
> + /* The IO mapping for the PCI config space */
> + void __iomem * const *base;
> + struct virtio_pci_common_cfg __iomem *common;
> + void __iomem *device;
> + /* Base of vq notifications */
> + void __iomem *notify;
> +
> + /* Multiplier for queue_notify_off. */
> + u32 notify_off_multiplier;
> +
> + int modern_bars;
> + int vectors;
> +};
> +
> +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
> +{
> + return container_of(vdpa, struct vp_vdpa, vdpa);
> +}
> +
> +/*
> + * Type-safe wrappers for io accesses.
> + * Use these to enforce at compile time the following spec requirement:
> + *
> + * The driver MUST access each field using the “natural” access
> 

[PATCH V3] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang
This patch introduce a vDPA driver for virtio-pci device. It bridges
the virtio-pci control command to the vDPA bus. This will be used for
developing new features for both software vDPA framework and hardware
vDPA feature.

Compared to vdpa_sim, it has several advantages:

- it's a real device driver which allow us to play with real hardware
  features
- type independent instead of networking specific

Note that since virtio specification does not support get/restore
virtqueue state. So we can not use this driver for VM. This can be
addressed by extending the virtio specification.

Consider the driver is mainly for testing and development for vDPA
features, it can only be bound via dynamic ids to make sure it's not
conflict with the drivers like virtio-pci or IFCVF.

Signed-off-by: Jason Wang 
---
Changes from V2:
- rebase on vhost.git vhost branch
---
 drivers/vdpa/Kconfig   |   8 +
 drivers/vdpa/Makefile  |   1 +
 drivers/vdpa/vp_vdpa/Makefile  |   2 +
 drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
 4 files changed, 612 insertions(+)
 create mode 100644 drivers/vdpa/vp_vdpa/Makefile
 create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c

diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index 3e1ceb8e9f2b..deb85e43a4c2 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -28,4 +28,12 @@ config IFCVF
  To compile this driver as a module, choose M here: the module will
  be called ifcvf.
 
+config VP_VDPA
+   tristate "Virtio PCI bridge vDPA driver"
+   depends on PCI_MSI
+   help
+ This kernel module that bridges virtio PCI device to vDPA
+ bus. It allows us to test and develop vDPA subsystem inside
+ an VM with the emulated virtio-pci device
+
 endif # VDPA
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 8bbb686ca7a2..37d00f49b3bf 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -2,3 +2,4 @@
 obj-$(CONFIG_VDPA) += vdpa.o
 obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
 obj-$(CONFIG_IFCVF)+= ifcvf/
+obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
new file mode 100644
index ..231088d3af7d
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
new file mode 100644
index ..2070298ab9fc
--- /dev/null
+++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for modern virtio-pci device
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang 
+ *
+ * Based on virtio_pci_modern.c.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* TBD: read from config space */
+#define VP_VDPA_MAX_QUEUE 2
+#define VP_VDPA_DRIVER_NAME "vp_vdpa"
+
+#define VP_VDPA_FEATURES \
+   ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \
+(1ULL << VIRTIO_F_VERSION_1)   | \
+(1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
+(1ULL << VIRTIO_F_IOMMU_PLATFORM))
+
+struct vp_vring {
+   void __iomem *notify;
+   char msix_name[256];
+   resource_size_t notify_pa;
+   struct vdpa_callback cb;
+   int irq;
+};
+
+struct vp_vdpa {
+   struct vdpa_device vdpa;
+   struct pci_dev *pdev;
+
+   struct virtio_device_id id;
+
+   struct vp_vring vring[VP_VDPA_MAX_QUEUE];
+
+   /* The IO mapping for the PCI config space */
+   void __iomem * const *base;
+   struct virtio_pci_common_cfg __iomem *common;
+   void __iomem *device;
+   /* Base of vq notifications */
+   void __iomem *notify;
+
+   /* Multiplier for queue_notify_off. */
+   u32 notify_off_multiplier;
+
+   int modern_bars;
+   int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+   return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(u8 __iomem *addr)
+{
+   return ioread8(addr);
+}
+static inline u16 vp_ioread16(__le16 __iomem *addr)
+{
+   return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(__le32 __iomem *addr)
+{
+   return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+   iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
+{
+   iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
+{
+   iowrite32(value, addr);
+}
+

Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang


On 2020/6/10 下午2:21, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 02:16:26PM +0800, Jason Wang wrote:

On 2020/6/10 下午2:07, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote:

This patch introduce a vDPA driver for virtio-pci device. It bridges
the virtio-pci control command to the vDPA bus. This will be used for
developing new features for both software vDPA framework and hardware
vDPA feature.

Compared to vdpa_sim, it has several advantages:

- it's a real device driver which allow us to play with real hardware
features
- type independent instead of networking specific

Note that since virtio specification does not support get/restore
virtqueue state. So we can not use this driver for VM. This can be
addressed by extending the virtio specification.

Consider the driver is mainly for testing and development for vDPA
features, it can only be bound via dynamic ids to make sure it's not
conflict with the drivers like virtio-pci or IFCVF.

Signed-off-by: Jason Wang

error: sha1 information is lacking or useless (drivers/vdpa/Kconfig).

which tree is this on top of?


Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the
comments for __vdpa_alloc_device()

Do I need to use other branch?

Thanks

No it's ok, I am just wondering why doesn't it apply then.



I found the reason, I generate the patch on another branch whose base 
does not existed in the vhost branch. Will repost.


Sorry.






___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 02:16:26PM +0800, Jason Wang wrote:
> 
> On 2020/6/10 下午2:07, Michael S. Tsirkin wrote:
> > On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote:
> > > This patch introduce a vDPA driver for virtio-pci device. It bridges
> > > the virtio-pci control command to the vDPA bus. This will be used for
> > > developing new features for both software vDPA framework and hardware
> > > vDPA feature.
> > > 
> > > Compared to vdpa_sim, it has several advantages:
> > > 
> > > - it's a real device driver which allow us to play with real hardware
> > >features
> > > - type independent instead of networking specific
> > > 
> > > Note that since virtio specification does not support get/restore
> > > virtqueue state. So we can not use this driver for VM. This can be
> > > addressed by extending the virtio specification.
> > > 
> > > Consider the driver is mainly for testing and development for vDPA
> > > features, it can only be bound via dynamic ids to make sure it's not
> > > conflict with the drivers like virtio-pci or IFCVF.
> > > 
> > > Signed-off-by: Jason Wang
> > error: sha1 information is lacking or useless (drivers/vdpa/Kconfig).
> > 
> > which tree is this on top of?
> 
> 
> Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the
> comments for __vdpa_alloc_device()
> 
> Do I need to use other branch?
> 
> Thanks

No it's ok, I am just wondering why doesn't it apply then.

> 
> > 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver

2020-06-10 Thread Jason Wang


On 2020/6/10 下午2:07, Michael S. Tsirkin wrote:

On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote:

This patch introduce a vDPA driver for virtio-pci device. It bridges
the virtio-pci control command to the vDPA bus. This will be used for
developing new features for both software vDPA framework and hardware
vDPA feature.

Compared to vdpa_sim, it has several advantages:

- it's a real device driver which allow us to play with real hardware
   features
- type independent instead of networking specific

Note that since virtio specification does not support get/restore
virtqueue state. So we can not use this driver for VM. This can be
addressed by extending the virtio specification.

Consider the driver is mainly for testing and development for vDPA
features, it can only be bound via dynamic ids to make sure it's not
conflict with the drivers like virtio-pci or IFCVF.

Signed-off-by: Jason Wang

error: sha1 information is lacking or useless (drivers/vdpa/Kconfig).

which tree is this on top of?



Your vhost.git vhost branch, HEAD is bbea3bcfd1d6 vdpa: fix typos in the 
comments for __vdpa_alloc_device()


Do I need to use other branch?

Thanks






___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RESEND V2] vdpa: introduce virtio pci driver

2020-06-10 Thread Michael S. Tsirkin
On Wed, Jun 10, 2020 at 01:49:51PM +0800, Jason Wang wrote:
> This patch introduce a vDPA driver for virtio-pci device. It bridges
> the virtio-pci control command to the vDPA bus. This will be used for
> developing new features for both software vDPA framework and hardware
> vDPA feature.
> 
> Compared to vdpa_sim, it has several advantages:
> 
> - it's a real device driver which allow us to play with real hardware
>   features
> - type independent instead of networking specific
> 
> Note that since virtio specification does not support get/restore
> virtqueue state. So we can not use this driver for VM. This can be
> addressed by extending the virtio specification.
> 
> Consider the driver is mainly for testing and development for vDPA
> features, it can only be bound via dynamic ids to make sure it's not
> conflict with the drivers like virtio-pci or IFCVF.
> 
> Signed-off-by: Jason Wang 

error: sha1 information is lacking or useless (drivers/vdpa/Kconfig).

which tree is this on top of?

> ---
> Changes since V1:
> - use NULL id_table to allow dynamic ids only
> - squash the doorbell reporting
> ---
>  drivers/vdpa/Kconfig   |   8 +
>  drivers/vdpa/Makefile  |   1 +
>  drivers/vdpa/vp_vdpa/Makefile  |   2 +
>  drivers/vdpa/vp_vdpa/vp_vdpa.c | 601 +
>  4 files changed, 612 insertions(+)
>  create mode 100644 drivers/vdpa/vp_vdpa/Makefile
>  create mode 100644 drivers/vdpa/vp_vdpa/vp_vdpa.c
> 
> diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
> index e8140065c8a5..5cef3a4872e3 100644
> --- a/drivers/vdpa/Kconfig
> +++ b/drivers/vdpa/Kconfig
> @@ -28,4 +28,12 @@ config IFCVF
> To compile this driver as a module, choose M here: the module will
> be called ifcvf.
>  
> +config VP_VDPA
> + tristate "Virtio PCI bridge vDPA driver"
> + depends on PCI_MSI
> + help
> +   This kernel module that bridges virtio PCI device to vDPA
> +   bus. It allows us to test and develop vDPA subsystem inside
> +   an VM with the emulated virtio-pci device
> +
>  endif # VDPA
> diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
> index 8bbb686ca7a2..37d00f49b3bf 100644
> --- a/drivers/vdpa/Makefile
> +++ b/drivers/vdpa/Makefile
> @@ -2,3 +2,4 @@
>  obj-$(CONFIG_VDPA) += vdpa.o
>  obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
>  obj-$(CONFIG_IFCVF)+= ifcvf/
> +obj-$(CONFIG_VP_VDPA)+= vp_vdpa/
> diff --git a/drivers/vdpa/vp_vdpa/Makefile b/drivers/vdpa/vp_vdpa/Makefile
> new file mode 100644
> index ..231088d3af7d
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
> diff --git a/drivers/vdpa/vp_vdpa/vp_vdpa.c b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> new file mode 100644
> index ..2070298ab9fc
> --- /dev/null
> +++ b/drivers/vdpa/vp_vdpa/vp_vdpa.c
> @@ -0,0 +1,601 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * vDPA bridge driver for modern virtio-pci device
> + *
> + * Copyright (c) 2020, Red Hat Inc. All rights reserved.
> + * Author: Jason Wang 
> + *
> + * Based on virtio_pci_modern.c.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* TBD: read from config space */
> +#define VP_VDPA_MAX_QUEUE 2
> +#define VP_VDPA_DRIVER_NAME "vp_vdpa"
> +
> +#define VP_VDPA_FEATURES \
> + ((1ULL << VIRTIO_F_ANY_LAYOUT)  | \
> +  (1ULL << VIRTIO_F_VERSION_1)   | \
> +  (1ULL << VIRTIO_F_ORDER_PLATFORM)  | \
> +  (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +
> +struct vp_vring {
> + void __iomem *notify;
> + char msix_name[256];
> + resource_size_t notify_pa;
> + struct vdpa_callback cb;
> + int irq;
> +};
> +
> +struct vp_vdpa {
> + struct vdpa_device vdpa;
> + struct pci_dev *pdev;
> +
> + struct virtio_device_id id;
> +
> + struct vp_vring vring[VP_VDPA_MAX_QUEUE];
> +
> + /* The IO mapping for the PCI config space */
> + void __iomem * const *base;
> + struct virtio_pci_common_cfg __iomem *common;
> + void __iomem *device;
> + /* Base of vq notifications */
> + void __iomem *notify;
> +
> + /* Multiplier for queue_notify_off. */
> + u32 notify_off_multiplier;
> +
> + int modern_bars;
> + int vectors;
> +};
> +
> +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
> +{
> + return container_of(vdpa, struct vp_vdpa, vdpa);
> +}
> +
> +/*
> + * Type-safe wrappers for io accesses.
> + * Use these to enforce at compile time the following spec requirement:
> + *
> + * The driver MUST access each field using the “natural” access
> + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
> + * for 16-bit fields and 8-bit accesses for 8-bit fields.
> + */
> +static inline u8 vp_ioread8(u8 __iomem *addr)
> +{
> + return ioread8(addr);
> +}
> +static inline u16 vp_ioread16(__le16 __iomem *addr)
> +{