Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-19 Thread Jakub Staroń via Virtualization
On 5/8/19 4:12 AM, Pankaj Gupta wrote:
> 
>>
>> On 4/25/19 10:00 PM, Pankaj Gupta wrote:
>>
>>> +void host_ack(struct virtqueue *vq)
>>> +{
>>> +   unsigned int len;
>>> +   unsigned long flags;
>>> +   struct virtio_pmem_request *req, *req_buf;
>>> +   struct virtio_pmem *vpmem = vq->vdev->priv;
>>> +
>>> +   spin_lock_irqsave(>pmem_lock, flags);
>>> +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
>>> +   req->done = true;
>>> +   wake_up(>host_acked);
>>> +
>>> +   if (!list_empty(>req_list)) {
>>> +   req_buf = list_first_entry(>req_list,
>>> +   struct virtio_pmem_request, list);
>>> +   list_del(>req_list);
>>
>> Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to
>> unlink
>> first element of the list and `vpmem->req_list` is just the list head.
> 
> This looks correct. We are not deleting head but first entry in 'req_list'
> which is device corresponding list of pending requests.
> 
> Please see below:
> 
> /**
>  * Retrieve the first list entry for the given list pointer.
>  *
>  * Example:
>  * struct foo *first;
>  * first = list_first_entry(>list_of_foos, struct foo, list_of_foos);
>  *
>  * @param ptr The list head
>  * @param type Data type of the list element to retrieve
>  * @param member Member name of the struct list_head field in the list 
> element.
>  * @return A pointer to the first list element.
>  */
> #define list_first_entry(ptr, type, member) \
> list_entry((ptr)->next, type, member)

Please look at this StackOverflow question:
https://stackoverflow.com/questions/19675419/deleting-first-element-of-a-list-h-list

Author asks about deleting first element of the queue. In our case
(and also in the question's author case), `vpmem->req_list` is not element
of any request struct and not an element of the list. It's just a list head 
storing 
`next` and `prev` pointers which are then pointing to respectively first and
last element of the list. We want to unlink the first element of the list,
so we need to pass pointer to the first element of the list to
the `list_del` function - that is, the `vpmem->req_list.next`.

Thank you,
Jakub Staron
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-19 Thread Jakub Staroń via Virtualization
On 4/25/19 10:00 PM, Pankaj Gupta wrote:

> +void host_ack(struct virtqueue *vq)
> +{
> + unsigned int len;
> + unsigned long flags;
> + struct virtio_pmem_request *req, *req_buf;
> + struct virtio_pmem *vpmem = vq->vdev->priv;
> +
> + spin_lock_irqsave(>pmem_lock, flags);
> + while ((req = virtqueue_get_buf(vq, )) != NULL) {
> + req->done = true;
> + wake_up(>host_acked);
> +
> + if (!list_empty(>req_list)) {
> + req_buf = list_first_entry(>req_list,
> + struct virtio_pmem_request, list);
> + list_del(>req_list);

Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to unlink
first element of the list and `vpmem->req_list` is just the list head.

> +int virtio_pmem_flush(struct nd_region *nd_region)
> +{
> + int err;
> + unsigned long flags;
> + struct scatterlist *sgs[2], sg, ret;
> + struct virtio_device *vdev = nd_region->provider_data;
> + struct virtio_pmem *vpmem = vdev->priv;
> + struct virtio_pmem_request *req;
> +
> + might_sleep();
> + req = kmalloc(sizeof(*req), GFP_KERNEL);
> + if (!req)
> + return -ENOMEM;
> +
> + req->done = req->wq_buf_avail = false;
> + strcpy(req->name, "FLUSH");
> + init_waitqueue_head(>host_acked);
> + init_waitqueue_head(>wq_buf);
> + sg_init_one(, req->name, strlen(req->name));
> + sgs[0] = 
> + sg_init_one(, >ret, sizeof(req->ret));
> + sgs[1] = 
> +
> + spin_lock_irqsave(>pmem_lock, flags);
> + err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> + if (err) {
> + dev_err(>dev, "failed to send command to virtio pmem 
> device\n");
> +
> + list_add_tail(>req_list, >list);
> + spin_unlock_irqrestore(>pmem_lock, flags);
> +
> + /* When host has read buffer, this completes via host_ack */
> + wait_event(req->wq_buf, req->wq_buf_avail);
> + spin_lock_irqsave(>pmem_lock, flags);
> + }

Aren't the arguments in `list_add_tail` swapped? The element we are adding 
should
be first, the list should be second. Also, shouldn't we resubmit the request 
after
waking up from `wait_event(req->wq_buf, req->wq_buf_avail)`?

I propose rewriting it like that:

diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 66b582f751a3..ff0556b04e86 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -25,7 +25,7 @@ void host_ack(struct virtqueue *vq)
if (!list_empty(>req_list)) {
req_buf = list_first_entry(>req_list,
struct virtio_pmem_request, list);
-   list_del(>req_list);
+   list_del(vpmem->req_list.next);
req_buf->wq_buf_avail = true;
wake_up(_buf->wq_buf);
}
@@ -59,17 +59,33 @@ int virtio_pmem_flush(struct nd_region *nd_region)
sgs[1] = 
 
spin_lock_irqsave(>pmem_lock, flags);
-   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
-   if (err) {
-   dev_err(>dev, "failed to send command to virtio pmem 
device\n");
+   /*
+* If virtqueue_add_sgs returns -ENOSPC then req_vq virtual queue does 
not
+* have free descriptor slots. We add the request to req_list and wait
+* for host_ack to wake us up when free slots are available.
+*/
+   while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, 
GFP_ATOMIC)) == -ENOSPC) {
+   dev_err(>dev, "failed to send command to virtio pmem 
device, no free slots in the virtqueue, postponing request\n");
+   req->wq_buf_avail = false;
 
-   list_add_tail(>req_list, >list);
+   list_add_tail(>list, >req_list);
spin_unlock_irqrestore(>pmem_lock, flags);
 
/* When host has read buffer, this completes via host_ack */
wait_event(req->wq_buf, req->wq_buf_avail);
spin_lock_irqsave(>pmem_lock, flags);
}
+
+   /*
+* virtqueue_add_sgs failed with error different than -ENOSPC, we can't
+* do anything about that.
+*/
+   if (err) {
+   dev_info(>dev, "failed to send command to virtio pmem 
device, error code %d\n", err);
+   spin_unlock_irqrestore(>pmem_lock, flags);
+   err = -EIO;
+   goto ret;
+   }
err = virtqueue_kick(vpmem->req_vq);
spin_unlock_irqrestore(>pmem_lock, flags);


Let me know if it looks reasonable to you.

Thank you,
Jakub Staron

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-10 Thread Pankaj Gupta


> >
> > Hi Dan,
> >
> > Thank you for the review. Please see my reply inline.
> >
> > >
> > > Hi Pankaj,
> > >
> > > Some minor file placement comments below.
> >
> > Sure.
> >
> > >
> > > On Thu, Apr 25, 2019 at 10:02 PM Pankaj Gupta  wrote:
> > > >
> > > > This patch adds virtio-pmem driver for KVM guest.
> > > >
> > > > Guest reads the persistent memory range information from
> > > > Qemu over VIRTIO and registers it on nvdimm_bus. It also
> > > > creates a nd_region object with the persistent memory
> > > > range information so that existing 'nvdimm/pmem' driver
> > > > can reserve this into system memory map. This way
> > > > 'virtio-pmem' driver uses existing functionality of pmem
> > > > driver to register persistent memory compatible for DAX
> > > > capable filesystems.
> > > >
> > > > This also provides function to perform guest flush over
> > > > VIRTIO from 'pmem' driver when userspace performs flush
> > > > on DAX memory range.
> > > >
> > > > Signed-off-by: Pankaj Gupta 
> > > > ---
> > > >  drivers/nvdimm/virtio_pmem.c | 114 +
> > > >  drivers/virtio/Kconfig   |  10 +++
> > > >  drivers/virtio/Makefile  |   1 +
> > > >  drivers/virtio/pmem.c| 118 +++
> > > >  include/linux/virtio_pmem.h  |  60 
> > > >  include/uapi/linux/virtio_ids.h  |   1 +
> > > >  include/uapi/linux/virtio_pmem.h |  10 +++
> > > >  7 files changed, 314 insertions(+)
> > > >  create mode 100644 drivers/nvdimm/virtio_pmem.c
> > > >  create mode 100644 drivers/virtio/pmem.c
> > > >  create mode 100644 include/linux/virtio_pmem.h
> > > >  create mode 100644 include/uapi/linux/virtio_pmem.h
> > > >
> > > > diff --git a/drivers/nvdimm/virtio_pmem.c
> > > > b/drivers/nvdimm/virtio_pmem.c
> > > > new file mode 100644
> > > > index ..66b582f751a3
> > > > --- /dev/null
> > > > +++ b/drivers/nvdimm/virtio_pmem.c
> > > > @@ -0,0 +1,114 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/*
> > > > + * virtio_pmem.c: Virtio pmem Driver
> > > > + *
> > > > + * Discovers persistent memory range information
> > > > + * from host and provides a virtio based flushing
> > > > + * interface.
> > > > + */
> > > > +#include 
> > > > +#include "nd.h"
> > > > +
> > > > + /* The interrupt handler */
> > > > +void host_ack(struct virtqueue *vq)
> > > > +{
> > > > +   unsigned int len;
> > > > +   unsigned long flags;
> > > > +   struct virtio_pmem_request *req, *req_buf;
> > > > +   struct virtio_pmem *vpmem = vq->vdev->priv;
> > > > +
> > > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > > +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
> > > > +   req->done = true;
> > > > +   wake_up(>host_acked);
> > > > +
> > > > +   if (!list_empty(>req_list)) {
> > > > +   req_buf = list_first_entry(>req_list,
> > > > +   struct virtio_pmem_request,
> > > > list);
> > > > +   list_del(>req_list);
> > > > +   req_buf->wq_buf_avail = true;
> > > > +   wake_up(_buf->wq_buf);
> > > > +   }
> > > > +   }
> > > > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(host_ack);
> > > > +
> > > > + /* The request submission function */
> > > > +int virtio_pmem_flush(struct nd_region *nd_region)
> > > > +{
> > > > +   int err;
> > > > +   unsigned long flags;
> > > > +   struct scatterlist *sgs[2], sg, ret;
> > > > +   struct virtio_device *vdev = nd_region->provider_data;
> > > > +   struct virtio_pmem *vpmem = vdev->priv;
> > > > +   struct virtio_pmem_request *req;
> > > > +
> > > > +   might_sleep();
> > > > +   req = kmalloc(sizeof(*req), GFP_KERNEL);
> > > > +   if (!req)
> > > > +   return -ENOMEM;
> > > > +
> > > > +   req->done = req->wq_buf_avail = false;
> > > > +   strcpy(req->name, "FLUSH");
> > > > +   init_waitqueue_head(>host_acked);
> > > > +   init_waitqueue_head(>wq_buf);
> > > > +   sg_init_one(, req->name, strlen(req->name));
> > > > +   sgs[0] = 
> > > > +   sg_init_one(, >ret, sizeof(req->ret));
> > > > +   sgs[1] = 
> > > > +
> > > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > > +   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req,
> > > > GFP_ATOMIC);
> > > > +   if (err) {
> > > > +   dev_err(>dev, "failed to send command to virtio
> > > > pmem
> > > > device\n");
> > > > +
> > > > +   list_add_tail(>req_list, >list);
> > > > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > > > +
> > > > +   /* When host has read buffer, this completes via
> > > > host_ack
> > > > */
> > > > +   wait_event(req->wq_buf, req->wq_buf_avail);
> > > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > > +   }
> > > > +   err = 

Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-10 Thread Dan Williams
On Wed, May 8, 2019 at 4:19 AM Pankaj Gupta  wrote:
>
>
> Hi Dan,
>
> Thank you for the review. Please see my reply inline.
>
> >
> > Hi Pankaj,
> >
> > Some minor file placement comments below.
>
> Sure.
>
> >
> > On Thu, Apr 25, 2019 at 10:02 PM Pankaj Gupta  wrote:
> > >
> > > This patch adds virtio-pmem driver for KVM guest.
> > >
> > > Guest reads the persistent memory range information from
> > > Qemu over VIRTIO and registers it on nvdimm_bus. It also
> > > creates a nd_region object with the persistent memory
> > > range information so that existing 'nvdimm/pmem' driver
> > > can reserve this into system memory map. This way
> > > 'virtio-pmem' driver uses existing functionality of pmem
> > > driver to register persistent memory compatible for DAX
> > > capable filesystems.
> > >
> > > This also provides function to perform guest flush over
> > > VIRTIO from 'pmem' driver when userspace performs flush
> > > on DAX memory range.
> > >
> > > Signed-off-by: Pankaj Gupta 
> > > ---
> > >  drivers/nvdimm/virtio_pmem.c | 114 +
> > >  drivers/virtio/Kconfig   |  10 +++
> > >  drivers/virtio/Makefile  |   1 +
> > >  drivers/virtio/pmem.c| 118 +++
> > >  include/linux/virtio_pmem.h  |  60 
> > >  include/uapi/linux/virtio_ids.h  |   1 +
> > >  include/uapi/linux/virtio_pmem.h |  10 +++
> > >  7 files changed, 314 insertions(+)
> > >  create mode 100644 drivers/nvdimm/virtio_pmem.c
> > >  create mode 100644 drivers/virtio/pmem.c
> > >  create mode 100644 include/linux/virtio_pmem.h
> > >  create mode 100644 include/uapi/linux/virtio_pmem.h
> > >
> > > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > > new file mode 100644
> > > index ..66b582f751a3
> > > --- /dev/null
> > > +++ b/drivers/nvdimm/virtio_pmem.c
> > > @@ -0,0 +1,114 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * virtio_pmem.c: Virtio pmem Driver
> > > + *
> > > + * Discovers persistent memory range information
> > > + * from host and provides a virtio based flushing
> > > + * interface.
> > > + */
> > > +#include 
> > > +#include "nd.h"
> > > +
> > > + /* The interrupt handler */
> > > +void host_ack(struct virtqueue *vq)
> > > +{
> > > +   unsigned int len;
> > > +   unsigned long flags;
> > > +   struct virtio_pmem_request *req, *req_buf;
> > > +   struct virtio_pmem *vpmem = vq->vdev->priv;
> > > +
> > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
> > > +   req->done = true;
> > > +   wake_up(>host_acked);
> > > +
> > > +   if (!list_empty(>req_list)) {
> > > +   req_buf = list_first_entry(>req_list,
> > > +   struct virtio_pmem_request, list);
> > > +   list_del(>req_list);
> > > +   req_buf->wq_buf_avail = true;
> > > +   wake_up(_buf->wq_buf);
> > > +   }
> > > +   }
> > > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > > +}
> > > +EXPORT_SYMBOL_GPL(host_ack);
> > > +
> > > + /* The request submission function */
> > > +int virtio_pmem_flush(struct nd_region *nd_region)
> > > +{
> > > +   int err;
> > > +   unsigned long flags;
> > > +   struct scatterlist *sgs[2], sg, ret;
> > > +   struct virtio_device *vdev = nd_region->provider_data;
> > > +   struct virtio_pmem *vpmem = vdev->priv;
> > > +   struct virtio_pmem_request *req;
> > > +
> > > +   might_sleep();
> > > +   req = kmalloc(sizeof(*req), GFP_KERNEL);
> > > +   if (!req)
> > > +   return -ENOMEM;
> > > +
> > > +   req->done = req->wq_buf_avail = false;
> > > +   strcpy(req->name, "FLUSH");
> > > +   init_waitqueue_head(>host_acked);
> > > +   init_waitqueue_head(>wq_buf);
> > > +   sg_init_one(, req->name, strlen(req->name));
> > > +   sgs[0] = 
> > > +   sg_init_one(, >ret, sizeof(req->ret));
> > > +   sgs[1] = 
> > > +
> > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > +   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, 
> > > GFP_ATOMIC);
> > > +   if (err) {
> > > +   dev_err(>dev, "failed to send command to virtio pmem
> > > device\n");
> > > +
> > > +   list_add_tail(>req_list, >list);
> > > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > > +
> > > +   /* When host has read buffer, this completes via host_ack
> > > */
> > > +   wait_event(req->wq_buf, req->wq_buf_avail);
> > > +   spin_lock_irqsave(>pmem_lock, flags);
> > > +   }
> > > +   err = virtqueue_kick(vpmem->req_vq);
> > > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > > +
> > > +   if (!err) {
> > > +   err = -EIO;
> > > +   goto ret;
> > > +   }
> > > +   /* When host has 

Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-08 Thread Pankaj Gupta


> > 
> > > +int virtio_pmem_flush(struct nd_region *nd_region)
> > > +{
> > > +int err;
> > > +unsigned long flags;
> > > +struct scatterlist *sgs[2], sg, ret;
> > > +struct virtio_device *vdev = nd_region->provider_data;
> > > +struct virtio_pmem *vpmem = vdev->priv;
> > > +struct virtio_pmem_request *req;
> > > +
> > > +might_sleep();
> > > +req = kmalloc(sizeof(*req), GFP_KERNEL);
> > > +if (!req)
> > > +return -ENOMEM;
> > > +
> > > +req->done = req->wq_buf_avail = false;
> > > +strcpy(req->name, "FLUSH");
> > > +init_waitqueue_head(>host_acked);
> > > +init_waitqueue_head(>wq_buf);
> > > +sg_init_one(, req->name, strlen(req->name));
> > > +sgs[0] = 
> > > +sg_init_one(, >ret, sizeof(req->ret));
> > > +sgs[1] = 
> > > +
> > > +spin_lock_irqsave(>pmem_lock, flags);
> > > +err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, 
> > > GFP_ATOMIC);
> > > +if (err) {
> > > +dev_err(>dev, "failed to send command to virtio 
> > > pmem device\n");
> > > +
> > > +list_add_tail(>req_list, >list);
> > > +spin_unlock_irqrestore(>pmem_lock, flags);
> > > +
> > > +/* When host has read buffer, this completes via 
> > > host_ack */
> > > +wait_event(req->wq_buf, req->wq_buf_avail);
> > > +spin_lock_irqsave(>pmem_lock, flags);
> > > +}
> > 
> > Aren't the arguments in `list_add_tail` swapped? The element we are adding
> 

Yes, arguments for 'list_add_tail' should be swapped.

list_add_tail(>list, >req_list);


Thank you,
Pankaj
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-08 Thread Pankaj Gupta


> 
> On 4/25/19 10:00 PM, Pankaj Gupta wrote:
> 
> > +void host_ack(struct virtqueue *vq)
> > +{
> > +   unsigned int len;
> > +   unsigned long flags;
> > +   struct virtio_pmem_request *req, *req_buf;
> > +   struct virtio_pmem *vpmem = vq->vdev->priv;
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
> > +   req->done = true;
> > +   wake_up(>host_acked);
> > +
> > +   if (!list_empty(>req_list)) {
> > +   req_buf = list_first_entry(>req_list,
> > +   struct virtio_pmem_request, list);
> > +   list_del(>req_list);
> 
> Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to
> unlink
> first element of the list and `vpmem->req_list` is just the list head.

This looks correct. We are not deleting head but first entry in 'req_list'
which is device corresponding list of pending requests.

Please see below:

/**
 * Retrieve the first list entry for the given list pointer.
 *
 * Example:
 * struct foo *first;
 * first = list_first_entry(>list_of_foos, struct foo, list_of_foos);
 *
 * @param ptr The list head
 * @param type Data type of the list element to retrieve
 * @param member Member name of the struct list_head field in the list element.
 * @return A pointer to the first list element.
 */
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)

> 
> > +int virtio_pmem_flush(struct nd_region *nd_region)
> > +{
> > +   int err;
> > +   unsigned long flags;
> > +   struct scatterlist *sgs[2], sg, ret;
> > +   struct virtio_device *vdev = nd_region->provider_data;
> > +   struct virtio_pmem *vpmem = vdev->priv;
> > +   struct virtio_pmem_request *req;
> > +
> > +   might_sleep();
> > +   req = kmalloc(sizeof(*req), GFP_KERNEL);
> > +   if (!req)
> > +   return -ENOMEM;
> > +
> > +   req->done = req->wq_buf_avail = false;
> > +   strcpy(req->name, "FLUSH");
> > +   init_waitqueue_head(>host_acked);
> > +   init_waitqueue_head(>wq_buf);
> > +   sg_init_one(, req->name, strlen(req->name));
> > +   sgs[0] = 
> > +   sg_init_one(, >ret, sizeof(req->ret));
> > +   sgs[1] = 
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> > +   if (err) {
> > +   dev_err(>dev, "failed to send command to virtio pmem 
> > device\n");
> > +
> > +   list_add_tail(>req_list, >list);
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +
> > +   /* When host has read buffer, this completes via host_ack */
> > +   wait_event(req->wq_buf, req->wq_buf_avail);
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   }
> 
> Aren't the arguments in `list_add_tail` swapped? The element we are adding

No, this is intentional. 'vpmem->req_list' maintains a list of pending requests
for entire pmem device.  'req->list'is per request list and maintains pending
request on virtio queue add failure. I think we don't need this list.

> should
> be first, the list should be second. Also, shouldn't we resubmit the request
> after
> waking up from `wait_event(req->wq_buf, req->wq_buf_avail)`?

Yes. we should. Good point.

> 
> I propose rewriting it like that:
> 
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 66b582f751a3..ff0556b04e86 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -25,7 +25,7 @@ void host_ack(struct virtqueue *vq)
>   if (!list_empty(>req_list)) {
>   req_buf = list_first_entry(>req_list,
>   struct virtio_pmem_request, list);
> - list_del(>req_list);
> + list_del(vpmem->req_list.next);

Don't think its correct.

>   req_buf->wq_buf_avail = true;
>   wake_up(_buf->wq_buf);
>   }
> @@ -59,17 +59,33 @@ int virtio_pmem_flush(struct nd_region *nd_region)
>   sgs[1] = 
>  
>   spin_lock_irqsave(>pmem_lock, flags);
> - err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> - if (err) {
> - dev_err(>dev, "failed to send command to virtio pmem 
> device\n");
> + /*
> +  * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual queue does 
> not
> +  * have free descriptor slots. We add the request to req_list and wait
> +  * for host_ack to wake us up when free slots are available.
> +  */
> + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, 
> GFP_ATOMIC))
> == -ENOSPC) {
> + dev_err(>dev, "failed to send command to virtio pmem 
> device, no free
> slots in the virtqueue, postponing request\n");
> + req->wq_buf_avail = false;
>  
> - list_add_tail(>req_list, >list);
> + list_add_tail(>list, >req_list);
>   spin_unlock_irqrestore(>pmem_lock, 

Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-05-08 Thread Pankaj Gupta


Hi Dan,

Thank you for the review. Please see my reply inline.

> 
> Hi Pankaj,
> 
> Some minor file placement comments below.

Sure.

> 
> On Thu, Apr 25, 2019 at 10:02 PM Pankaj Gupta  wrote:
> >
> > This patch adds virtio-pmem driver for KVM guest.
> >
> > Guest reads the persistent memory range information from
> > Qemu over VIRTIO and registers it on nvdimm_bus. It also
> > creates a nd_region object with the persistent memory
> > range information so that existing 'nvdimm/pmem' driver
> > can reserve this into system memory map. This way
> > 'virtio-pmem' driver uses existing functionality of pmem
> > driver to register persistent memory compatible for DAX
> > capable filesystems.
> >
> > This also provides function to perform guest flush over
> > VIRTIO from 'pmem' driver when userspace performs flush
> > on DAX memory range.
> >
> > Signed-off-by: Pankaj Gupta 
> > ---
> >  drivers/nvdimm/virtio_pmem.c | 114 +
> >  drivers/virtio/Kconfig   |  10 +++
> >  drivers/virtio/Makefile  |   1 +
> >  drivers/virtio/pmem.c| 118 +++
> >  include/linux/virtio_pmem.h  |  60 
> >  include/uapi/linux/virtio_ids.h  |   1 +
> >  include/uapi/linux/virtio_pmem.h |  10 +++
> >  7 files changed, 314 insertions(+)
> >  create mode 100644 drivers/nvdimm/virtio_pmem.c
> >  create mode 100644 drivers/virtio/pmem.c
> >  create mode 100644 include/linux/virtio_pmem.h
> >  create mode 100644 include/uapi/linux/virtio_pmem.h
> >
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > new file mode 100644
> > index ..66b582f751a3
> > --- /dev/null
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -0,0 +1,114 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * virtio_pmem.c: Virtio pmem Driver
> > + *
> > + * Discovers persistent memory range information
> > + * from host and provides a virtio based flushing
> > + * interface.
> > + */
> > +#include 
> > +#include "nd.h"
> > +
> > + /* The interrupt handler */
> > +void host_ack(struct virtqueue *vq)
> > +{
> > +   unsigned int len;
> > +   unsigned long flags;
> > +   struct virtio_pmem_request *req, *req_buf;
> > +   struct virtio_pmem *vpmem = vq->vdev->priv;
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
> > +   req->done = true;
> > +   wake_up(>host_acked);
> > +
> > +   if (!list_empty(>req_list)) {
> > +   req_buf = list_first_entry(>req_list,
> > +   struct virtio_pmem_request, list);
> > +   list_del(>req_list);
> > +   req_buf->wq_buf_avail = true;
> > +   wake_up(_buf->wq_buf);
> > +   }
> > +   }
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +}
> > +EXPORT_SYMBOL_GPL(host_ack);
> > +
> > + /* The request submission function */
> > +int virtio_pmem_flush(struct nd_region *nd_region)
> > +{
> > +   int err;
> > +   unsigned long flags;
> > +   struct scatterlist *sgs[2], sg, ret;
> > +   struct virtio_device *vdev = nd_region->provider_data;
> > +   struct virtio_pmem *vpmem = vdev->priv;
> > +   struct virtio_pmem_request *req;
> > +
> > +   might_sleep();
> > +   req = kmalloc(sizeof(*req), GFP_KERNEL);
> > +   if (!req)
> > +   return -ENOMEM;
> > +
> > +   req->done = req->wq_buf_avail = false;
> > +   strcpy(req->name, "FLUSH");
> > +   init_waitqueue_head(>host_acked);
> > +   init_waitqueue_head(>wq_buf);
> > +   sg_init_one(, req->name, strlen(req->name));
> > +   sgs[0] = 
> > +   sg_init_one(, >ret, sizeof(req->ret));
> > +   sgs[1] = 
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> > +   if (err) {
> > +   dev_err(>dev, "failed to send command to virtio pmem
> > device\n");
> > +
> > +   list_add_tail(>req_list, >list);
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +
> > +   /* When host has read buffer, this completes via host_ack
> > */
> > +   wait_event(req->wq_buf, req->wq_buf_avail);
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   }
> > +   err = virtqueue_kick(vpmem->req_vq);
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +
> > +   if (!err) {
> > +   err = -EIO;
> > +   goto ret;
> > +   }
> > +   /* When host has read buffer, this completes via host_ack */
> > +   wait_event(req->host_acked, req->done);
> > +   err = req->ret;
> > +ret:
> > +   kfree(req);
> > +   return err;
> > +};
> > +
> > + /* The asynchronous flush callback function */
> > +int async_pmem_flush(struct nd_region *nd_region, struct bio 

Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-04-30 Thread Pankaj Gupta


> 
> On Fri, Apr 26, 2019 at 10:30:35AM +0530, Pankaj Gupta wrote:
> > This patch adds virtio-pmem driver for KVM guest.
> > 
> > Guest reads the persistent memory range information from
> > Qemu over VIRTIO and registers it on nvdimm_bus. It also
> > creates a nd_region object with the persistent memory
> > range information so that existing 'nvdimm/pmem' driver
> > can reserve this into system memory map. This way
> > 'virtio-pmem' driver uses existing functionality of pmem
> > driver to register persistent memory compatible for DAX
> > capable filesystems.
> > 
> > This also provides function to perform guest flush over
> > VIRTIO from 'pmem' driver when userspace performs flush
> > on DAX memory range.
> > 
> > Signed-off-by: Pankaj Gupta 
> > ---
> >  drivers/nvdimm/virtio_pmem.c | 114 +
> >  drivers/virtio/Kconfig   |  10 +++
> >  drivers/virtio/Makefile  |   1 +
> >  drivers/virtio/pmem.c| 118 +++
> >  include/linux/virtio_pmem.h  |  60 
> >  include/uapi/linux/virtio_ids.h  |   1 +
> >  include/uapi/linux/virtio_pmem.h |  10 +++
> >  7 files changed, 314 insertions(+)
> >  create mode 100644 drivers/nvdimm/virtio_pmem.c
> >  create mode 100644 drivers/virtio/pmem.c
> >  create mode 100644 include/linux/virtio_pmem.h
> >  create mode 100644 include/uapi/linux/virtio_pmem.h
> > 
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > new file mode 100644
> > index ..66b582f751a3
> > --- /dev/null
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -0,0 +1,114 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * virtio_pmem.c: Virtio pmem Driver
> > + *
> > + * Discovers persistent memory range information
> > + * from host and provides a virtio based flushing
> > + * interface.
> > + */
> > +#include 
> > +#include "nd.h"
> > +
> > + /* The interrupt handler */
> > +void host_ack(struct virtqueue *vq)
> > +{
> > +   unsigned int len;
> > +   unsigned long flags;
> > +   struct virtio_pmem_request *req, *req_buf;
> > +   struct virtio_pmem *vpmem = vq->vdev->priv;
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   while ((req = virtqueue_get_buf(vq, )) != NULL) {
> > +   req->done = true;
> > +   wake_up(>host_acked);
> > +
> > +   if (!list_empty(>req_list)) {
> > +   req_buf = list_first_entry(>req_list,
> > +   struct virtio_pmem_request, list);
> > +   list_del(>req_list);
> > +   req_buf->wq_buf_avail = true;
> > +   wake_up(_buf->wq_buf);
> > +   }
> > +   }
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +}
> > +EXPORT_SYMBOL_GPL(host_ack);
> > +
> > + /* The request submission function */
> > +int virtio_pmem_flush(struct nd_region *nd_region)
> > +{
> > +   int err;
> > +   unsigned long flags;
> > +   struct scatterlist *sgs[2], sg, ret;
> > +   struct virtio_device *vdev = nd_region->provider_data;
> > +   struct virtio_pmem *vpmem = vdev->priv;
> > +   struct virtio_pmem_request *req;
> > +
> > +   might_sleep();
> > +   req = kmalloc(sizeof(*req), GFP_KERNEL);
> > +   if (!req)
> > +   return -ENOMEM;
> > +
> > +   req->done = req->wq_buf_avail = false;
> > +   strcpy(req->name, "FLUSH");
> > +   init_waitqueue_head(>host_acked);
> > +   init_waitqueue_head(>wq_buf);
> > +   sg_init_one(, req->name, strlen(req->name));
> > +   sgs[0] = 
> > +   sg_init_one(, >ret, sizeof(req->ret));
> > +   sgs[1] = 
> > +
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> > +   if (err) {
> > +   dev_err(>dev, "failed to send command to virtio pmem 
> > device\n");
> > +
> > +   list_add_tail(>req_list, >list);
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +
> > +   /* When host has read buffer, this completes via host_ack */
> > +   wait_event(req->wq_buf, req->wq_buf_avail);
> > +   spin_lock_irqsave(>pmem_lock, flags);
> > +   }
> > +   err = virtqueue_kick(vpmem->req_vq);
> > +   spin_unlock_irqrestore(>pmem_lock, flags);
> > +
> > +   if (!err) {
> > +   err = -EIO;
> > +   goto ret;
> > +   }
> > +   /* When host has read buffer, this completes via host_ack */
> > +   wait_event(req->host_acked, req->done);
> > +   err = req->ret;
> > +ret:
> > +   kfree(req);
> > +   return err;
> > +};
> > +
> > + /* The asynchronous flush callback function */
> > +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > +{
> > +   int rc = 0;
> > +
> > +   /* Create child bio for asynchronous flush and chain with
> > +* parent bio. Otherwise directly call nd_region flush.
> > +*/
> > +   if (bio && bio->bi_iter.bi_sector != -1) {
> > +   struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > +
> > +   if (!child)
> > +   

Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

2019-04-29 Thread Yuval Shaia
On Fri, Apr 26, 2019 at 10:30:35AM +0530, Pankaj Gupta wrote:
> This patch adds virtio-pmem driver for KVM guest.
> 
> Guest reads the persistent memory range information from
> Qemu over VIRTIO and registers it on nvdimm_bus. It also
> creates a nd_region object with the persistent memory
> range information so that existing 'nvdimm/pmem' driver
> can reserve this into system memory map. This way
> 'virtio-pmem' driver uses existing functionality of pmem
> driver to register persistent memory compatible for DAX
> capable filesystems.
> 
> This also provides function to perform guest flush over
> VIRTIO from 'pmem' driver when userspace performs flush
> on DAX memory range.
> 
> Signed-off-by: Pankaj Gupta 
> ---
>  drivers/nvdimm/virtio_pmem.c | 114 +
>  drivers/virtio/Kconfig   |  10 +++
>  drivers/virtio/Makefile  |   1 +
>  drivers/virtio/pmem.c| 118 +++
>  include/linux/virtio_pmem.h  |  60 
>  include/uapi/linux/virtio_ids.h  |   1 +
>  include/uapi/linux/virtio_pmem.h |  10 +++
>  7 files changed, 314 insertions(+)
>  create mode 100644 drivers/nvdimm/virtio_pmem.c
>  create mode 100644 drivers/virtio/pmem.c
>  create mode 100644 include/linux/virtio_pmem.h
>  create mode 100644 include/uapi/linux/virtio_pmem.h
> 
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> new file mode 100644
> index ..66b582f751a3
> --- /dev/null
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -0,0 +1,114 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * virtio_pmem.c: Virtio pmem Driver
> + *
> + * Discovers persistent memory range information
> + * from host and provides a virtio based flushing
> + * interface.
> + */
> +#include 
> +#include "nd.h"
> +
> + /* The interrupt handler */
> +void host_ack(struct virtqueue *vq)
> +{
> + unsigned int len;
> + unsigned long flags;
> + struct virtio_pmem_request *req, *req_buf;
> + struct virtio_pmem *vpmem = vq->vdev->priv;
> +
> + spin_lock_irqsave(>pmem_lock, flags);
> + while ((req = virtqueue_get_buf(vq, )) != NULL) {
> + req->done = true;
> + wake_up(>host_acked);
> +
> + if (!list_empty(>req_list)) {
> + req_buf = list_first_entry(>req_list,
> + struct virtio_pmem_request, list);
> + list_del(>req_list);
> + req_buf->wq_buf_avail = true;
> + wake_up(_buf->wq_buf);
> + }
> + }
> + spin_unlock_irqrestore(>pmem_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(host_ack);
> +
> + /* The request submission function */
> +int virtio_pmem_flush(struct nd_region *nd_region)
> +{
> + int err;
> + unsigned long flags;
> + struct scatterlist *sgs[2], sg, ret;
> + struct virtio_device *vdev = nd_region->provider_data;
> + struct virtio_pmem *vpmem = vdev->priv;
> + struct virtio_pmem_request *req;
> +
> + might_sleep();
> + req = kmalloc(sizeof(*req), GFP_KERNEL);
> + if (!req)
> + return -ENOMEM;
> +
> + req->done = req->wq_buf_avail = false;
> + strcpy(req->name, "FLUSH");
> + init_waitqueue_head(>host_acked);
> + init_waitqueue_head(>wq_buf);
> + sg_init_one(, req->name, strlen(req->name));
> + sgs[0] = 
> + sg_init_one(, >ret, sizeof(req->ret));
> + sgs[1] = 
> +
> + spin_lock_irqsave(>pmem_lock, flags);
> + err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> + if (err) {
> + dev_err(>dev, "failed to send command to virtio pmem 
> device\n");
> +
> + list_add_tail(>req_list, >list);
> + spin_unlock_irqrestore(>pmem_lock, flags);
> +
> + /* When host has read buffer, this completes via host_ack */
> + wait_event(req->wq_buf, req->wq_buf_avail);
> + spin_lock_irqsave(>pmem_lock, flags);
> + }
> + err = virtqueue_kick(vpmem->req_vq);
> + spin_unlock_irqrestore(>pmem_lock, flags);
> +
> + if (!err) {
> + err = -EIO;
> + goto ret;
> + }
> + /* When host has read buffer, this completes via host_ack */
> + wait_event(req->host_acked, req->done);
> + err = req->ret;
> +ret:
> + kfree(req);
> + return err;
> +};
> +
> + /* The asynchronous flush callback function */
> +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> +{
> + int rc = 0;
> +
> + /* Create child bio for asynchronous flush and chain with
> +  * parent bio. Otherwise directly call nd_region flush.
> +  */
> + if (bio && bio->bi_iter.bi_sector != -1) {
> + struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> +
> + if (!child)
> + return -ENOMEM;
> + bio_copy_dev(child, bio);
> + child->bi_opf = REQ_PREFLUSH;
> + child->bi_iter.bi_sector = -1;
> +