Re: [virtio-dev] Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/19/2017 02:10 AM, Michael S. Tsirkin wrote: On Fri, Aug 18, 2017 at 04:36:06PM +0800, Wei Wang wrote: On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei WangSigned-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". This can be achieved via two output signal buf here: signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END The device holds both, and can put one of them to the vq and notify. Do you mean device writes start and end in the buf? then it's an inbuf not an outbuf. Not really. I meant that the driver fills two signal buffer,_START and _STOP and send them as outbuf to the device. Then the device holds two read-only signal buffer: When request to start: add the _START elem to the vq When request to stop: add the _STOP elem to the vq It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. As we discussed before, we can't have a vq interleave the report of stats and free pages. The vq will be locked when one command is in use. So, when live migration starts, the periodically reported stats will be delayed. Would this be OK? Or would you like to have one host to guest vq, and multiple host to guest vqs? That is, - host to guest: CMD_VQ - guest to host: STATS_REPORT_VQ FREE_PAGE_VQ Best, Wei Point is stats report vq is also host to guest. So I think it can be combined with CMD VQ. If it's too hard a separate
Re: [virtio-dev] Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/19/2017 02:10 AM, Michael S. Tsirkin wrote: On Fri, Aug 18, 2017 at 04:36:06PM +0800, Wei Wang wrote: On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei Wang Signed-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". This can be achieved via two output signal buf here: signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END The device holds both, and can put one of them to the vq and notify. Do you mean device writes start and end in the buf? then it's an inbuf not an outbuf. Not really. I meant that the driver fills two signal buffer,_START and _STOP and send them as outbuf to the device. Then the device holds two read-only signal buffer: When request to start: add the _START elem to the vq When request to stop: add the _STOP elem to the vq It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. As we discussed before, we can't have a vq interleave the report of stats and free pages. The vq will be locked when one command is in use. So, when live migration starts, the periodically reported stats will be delayed. Would this be OK? Or would you like to have one host to guest vq, and multiple host to guest vqs? That is, - host to guest: CMD_VQ - guest to host: STATS_REPORT_VQ FREE_PAGE_VQ Best, Wei Point is stats report vq is also host to guest. So I think it can be combined with CMD VQ. If it's too hard a separate vq isn't too bad though. IMHO, this kind
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/19/2017 02:26 AM, Michael S. Tsirkin wrote: On Fri, Aug 18, 2017 at 04:41:41PM +0800, Wei Wang wrote: On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. Please let me introduce the current design first: 1) device put the signal buf to the vq and notify the driver (we need a buffer because currently the device can't notify when the vq is empty); 2) the driver starts the report of free page blocks via inbuf; 3) the driver adds an the signal buf via outbuf to tell the device all are reported. Could you please elaborate more on the usage of ID? While driver is free to maintain at most one buffer in flight the design must work with pipelined requests as that is important for performance. How would the pipeline be designed? Currently, once the report starts, - the driver work: add_inbuf(free_pages) & kick; - the device work: record the pages into a free page bitmap; virtqueue_push(elem); virtio_notify(); For the driver, as long as the vq has available entries, it keeps doing its work; For the device, as long as there are free pages in the vq, it also keeps doing its work. So host might be able to request the reporting twice. How does it know what is the report in response to? The request to start is sent when live migration starts, where would be the second chance to send the request to start? If we put an id in request and in response, then that fixes it. So there's a vq used for requesting free page reports. driver does add_inbuf( >id). Then when it starts reporting it does add_outbuf(>id) followed by pages. Also if device->id changes it knows it should restart reporting from beginning. +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { what if there's another error? Another error is -EIO, how about disabling the free page report feature? (I also saw it isn't handled in many other virtio devices e.g. virtio-net) + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } what is this trickery doing? needs more comments or a simplification. Just this: if the vq is full, blocking wait till an entry gets released, then retry. This is the final one, which puts the signal buf to the vq to signify the end of the report and the mm lock is not held here, so it is fine to block. But why do you kick here on failure? I would understand it if you did not kick when adding pages, as it is I don't understand. Also pls rewrite this with a for or while loop for clarity. OK, I will rewrite this part. Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/19/2017 02:26 AM, Michael S. Tsirkin wrote: On Fri, Aug 18, 2017 at 04:41:41PM +0800, Wei Wang wrote: On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. Please let me introduce the current design first: 1) device put the signal buf to the vq and notify the driver (we need a buffer because currently the device can't notify when the vq is empty); 2) the driver starts the report of free page blocks via inbuf; 3) the driver adds an the signal buf via outbuf to tell the device all are reported. Could you please elaborate more on the usage of ID? While driver is free to maintain at most one buffer in flight the design must work with pipelined requests as that is important for performance. How would the pipeline be designed? Currently, once the report starts, - the driver work: add_inbuf(free_pages) & kick; - the device work: record the pages into a free page bitmap; virtqueue_push(elem); virtio_notify(); For the driver, as long as the vq has available entries, it keeps doing its work; For the device, as long as there are free pages in the vq, it also keeps doing its work. So host might be able to request the reporting twice. How does it know what is the report in response to? The request to start is sent when live migration starts, where would be the second chance to send the request to start? If we put an id in request and in response, then that fixes it. So there's a vq used for requesting free page reports. driver does add_inbuf( >id). Then when it starts reporting it does add_outbuf(>id) followed by pages. Also if device->id changes it knows it should restart reporting from beginning. +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { what if there's another error? Another error is -EIO, how about disabling the free page report feature? (I also saw it isn't handled in many other virtio devices e.g. virtio-net) + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } what is this trickery doing? needs more comments or a simplification. Just this: if the vq is full, blocking wait till an entry gets released, then retry. This is the final one, which puts the signal buf to the vq to signify the end of the report and the mm lock is not held here, so it is fine to block. But why do you kick here on failure? I would understand it if you did not kick when adding pages, as it is I don't understand. Also pls rewrite this with a for or while loop for clarity. OK, I will rewrite this part. Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Fri, Aug 18, 2017 at 04:41:41PM +0800, Wei Wang wrote: > On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: > > On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > > > Add a new vq to report hints of guest free pages to the host. > > Please add some text here explaining the report_free_page_signal > > thing. > > > > > > I also really think we need some kind of ID in the > > buffer to do a handshake. whenever id changes you > > add another outbuf. > > Please let me introduce the current design first: > 1) device put the signal buf to the vq and notify the driver (we need > a buffer because currently the device can't notify when the vq is empty); > > 2) the driver starts the report of free page blocks via inbuf; > > 3) the driver adds an the signal buf via outbuf to tell the device all are > reported. > > > Could you please elaborate more on the usage of ID? While driver is free to maintain at most one buffer in flight the design must work with pipelined requests as that is important for performance. So host might be able to request the reporting twice. How does it know what is the report in response to? If we put an id in request and in response, then that fixes it. So there's a vq used for requesting free page reports. driver does add_inbuf( >id). Then when it starts reporting it does add_outbuf(>id) followed by pages. Also if device->id changes it knows it should restart reporting from beginning. > > > +retry: > > > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > > > + virtqueue_kick(vq); > > > + if (unlikely(ret == -ENOSPC)) { > > what if there's another error? > > Another error is -EIO, how about disabling the free page report feature? > (I also saw it isn't handled in many other virtio devices e.g. virtio-net) > > > > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > > > + goto retry; > > > + } > > what is this trickery doing? needs more comments or > > a simplification. > > Just this: > if the vq is full, blocking wait till an entry gets released, then retry. > This is the > final one, which puts the signal buf to the vq to signify the end of the > report and > the mm lock is not held here, so it is fine to block. > But why do you kick here on failure? I would understand it if you did not kick when adding pages, as it is I don't understand. Also pls rewrite this with a for or while loop for clarity. > > > > > > > +} > > > + > > > +static void report_free_page(struct work_struct *work) > > > +{ > > > + struct virtio_balloon *vb; > > > + > > > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > > > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); > > That's a lot of work here. And system_wq documentation says: > > * > > * system_wq is the one used by schedule[_delayed]_work[_on](). > > * Multi-CPU multi-threaded. There are users which expect relatively > > * short queue flush time. Don't queue works which can run for too > > * long. > > > > You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. > > Thanks for the reminder. If not creating a new wq, how about > system_unbound_wq? I don't think that one's freezeable. > The first round of live migration needs the free pages, in that way we can > have the > pages reported to the hypervisor quicker. The reason people call it *live* migration is because tasks keep running. If you pin VCPUs with maintainance tasks it becomes pointless. Maybe we need to set a special wq which will create idle class threads. Does not seem to be supported but not hard to do. > > > > > + report_free_page_completion(vb); > > So first you get list of pages, then an outbuf telling you > > what they are in end of. I think it's backwards. > > Add an outbuf first followed by inbufs that tell you > > what they are. > > > If we have the signal filled with those flags like > VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START, > Probably not necessary to have an inbuf followed by an outbuf, right? > > > Best, > Wei You really should document the messages in the commit log and in the header. -- MST
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Fri, Aug 18, 2017 at 04:41:41PM +0800, Wei Wang wrote: > On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: > > On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > > > Add a new vq to report hints of guest free pages to the host. > > Please add some text here explaining the report_free_page_signal > > thing. > > > > > > I also really think we need some kind of ID in the > > buffer to do a handshake. whenever id changes you > > add another outbuf. > > Please let me introduce the current design first: > 1) device put the signal buf to the vq and notify the driver (we need > a buffer because currently the device can't notify when the vq is empty); > > 2) the driver starts the report of free page blocks via inbuf; > > 3) the driver adds an the signal buf via outbuf to tell the device all are > reported. > > > Could you please elaborate more on the usage of ID? While driver is free to maintain at most one buffer in flight the design must work with pipelined requests as that is important for performance. So host might be able to request the reporting twice. How does it know what is the report in response to? If we put an id in request and in response, then that fixes it. So there's a vq used for requesting free page reports. driver does add_inbuf( >id). Then when it starts reporting it does add_outbuf(>id) followed by pages. Also if device->id changes it knows it should restart reporting from beginning. > > > +retry: > > > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > > > + virtqueue_kick(vq); > > > + if (unlikely(ret == -ENOSPC)) { > > what if there's another error? > > Another error is -EIO, how about disabling the free page report feature? > (I also saw it isn't handled in many other virtio devices e.g. virtio-net) > > > > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > > > + goto retry; > > > + } > > what is this trickery doing? needs more comments or > > a simplification. > > Just this: > if the vq is full, blocking wait till an entry gets released, then retry. > This is the > final one, which puts the signal buf to the vq to signify the end of the > report and > the mm lock is not held here, so it is fine to block. > But why do you kick here on failure? I would understand it if you did not kick when adding pages, as it is I don't understand. Also pls rewrite this with a for or while loop for clarity. > > > > > > > +} > > > + > > > +static void report_free_page(struct work_struct *work) > > > +{ > > > + struct virtio_balloon *vb; > > > + > > > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > > > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); > > That's a lot of work here. And system_wq documentation says: > > * > > * system_wq is the one used by schedule[_delayed]_work[_on](). > > * Multi-CPU multi-threaded. There are users which expect relatively > > * short queue flush time. Don't queue works which can run for too > > * long. > > > > You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. > > Thanks for the reminder. If not creating a new wq, how about > system_unbound_wq? I don't think that one's freezeable. > The first round of live migration needs the free pages, in that way we can > have the > pages reported to the hypervisor quicker. The reason people call it *live* migration is because tasks keep running. If you pin VCPUs with maintainance tasks it becomes pointless. Maybe we need to set a special wq which will create idle class threads. Does not seem to be supported but not hard to do. > > > > > + report_free_page_completion(vb); > > So first you get list of pages, then an outbuf telling you > > what they are in end of. I think it's backwards. > > Add an outbuf first followed by inbufs that tell you > > what they are. > > > If we have the signal filled with those flags like > VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START, > Probably not necessary to have an inbuf followed by an outbuf, right? > > > Best, > Wei You really should document the messages in the commit log and in the header. -- MST
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Fri, Aug 18, 2017 at 04:36:06PM +0800, Wei Wang wrote: > On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: > > On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > > > Add a new vq to report hints of guest free pages to the host. > > > > > > Signed-off-by: Wei Wang> > > Signed-off-by: Liang Li > > > --- > > > drivers/virtio/virtio_balloon.c | 167 > > > +++- > > > include/uapi/linux/virtio_balloon.h | 1 + > > > 2 files changed, 147 insertions(+), 21 deletions(-) > > > > > > diff --git a/drivers/virtio/virtio_balloon.c > > > b/drivers/virtio/virtio_balloon.c > > > index 72041b4..e6755bc 100644 > > > --- a/drivers/virtio/virtio_balloon.c > > > +++ b/drivers/virtio/virtio_balloon.c > > > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > > struct virtio_balloon { > > > struct virtio_device *vdev; > > > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > > > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > > /* The balloon servicing is delegated to a freezable workqueue. > > > */ > > > struct work_struct update_balloon_stats_work; > > > struct work_struct update_balloon_size_work; > > > + struct work_struct report_free_page_work; > > > /* Prevent updating balloon when it is being canceled. */ > > > spinlock_t stop_update_lock; > > > @@ -90,6 +91,13 @@ struct virtio_balloon { > > > /* Memory statistics */ > > > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > > + /* > > > + * Used by the device and driver to signal each other. > > > + * device->driver: start the free page report. > > > + * driver->device: end the free page report. > > > + */ > > > + __virtio32 report_free_page_signal; > > > + > > > /* To register callback in oom notifier call chain */ > > > struct notifier_block nb; > > > }; > > > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct > > > virtio_balloon *vb, > > > } while (unlikely(ret == -ENOSPC)); > > > } > > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > > > size) > > > +{ > > > + unsigned int len; > > > + > > > + add_one_sg(vq, addr, size); > > > + virtqueue_kick(vq); > > > + /* Release entries if there are */ > > > + while (virtqueue_get_buf(vq, )) > > > + ; > > > +} > > > + > > > /* > > >* Send balloon pages in sgs to host. The balloon pages are recorded in > > > the > > >* page xbitmap. Each bit in the bitmap corresponds to a page of > > > PAGE_SIZE. > > > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > > > work_struct *work) > > > queue_work(system_freezable_wq, work); > > > } > > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long > > > pfn, > > > +unsigned long nr_pages) > > > +{ > > > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > > > + void *addr = (void *)pfn_to_kaddr(pfn); > > > + uint32_t len = nr_pages << PAGE_SHIFT; > > > + > > > + send_free_page_sg(vb->free_page_vq, addr, len); > > > +} > > > + > > > +static void report_free_page_completion(struct virtio_balloon *vb) > > > +{ > > > + struct virtqueue *vq = vb->free_page_vq; > > > + struct scatterlist sg; > > > + unsigned int len; > > > + int ret; > > > + > > > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); > > > +retry: > > > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > > > + virtqueue_kick(vq); > > > + if (unlikely(ret == -ENOSPC)) { > > > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > > > + goto retry; > > > + } > > > +} > > So the annoying thing here is that once this starts going, > > it will keep sending free pages from the list even if > > host is no longer interested. There should be a way > > for host to tell guest "stop" or "start from the beginning". > > This can be achieved via two output signal buf here: > signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START > signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END > > The device holds both, and can put one of them to the vq and notify. Do you mean device writes start and end in the buf? then it's an inbuf not an outbuf. > > > > > > It's the result of using same vq for guest to host and > > host to guest communication, and I think it's not a great idea. > > I'd reuse stats vq for host to guest requests maybe. > > > > > As we discussed before, we can't have a vq interleave the report of stats > and free pages. > The vq will be locked when one command is in use. So, when live migration > starts, the > periodically reported stats will be delayed. > Would this be OK? Or would you > like to have > one host to guest vq, and multiple host to guest vqs? That is, > > - host to guest: > CMD_VQ > > - guest to host: > STATS_REPORT_VQ >
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Fri, Aug 18, 2017 at 04:36:06PM +0800, Wei Wang wrote: > On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: > > On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > > > Add a new vq to report hints of guest free pages to the host. > > > > > > Signed-off-by: Wei Wang > > > Signed-off-by: Liang Li > > > --- > > > drivers/virtio/virtio_balloon.c | 167 > > > +++- > > > include/uapi/linux/virtio_balloon.h | 1 + > > > 2 files changed, 147 insertions(+), 21 deletions(-) > > > > > > diff --git a/drivers/virtio/virtio_balloon.c > > > b/drivers/virtio/virtio_balloon.c > > > index 72041b4..e6755bc 100644 > > > --- a/drivers/virtio/virtio_balloon.c > > > +++ b/drivers/virtio/virtio_balloon.c > > > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > > struct virtio_balloon { > > > struct virtio_device *vdev; > > > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > > > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > > /* The balloon servicing is delegated to a freezable workqueue. > > > */ > > > struct work_struct update_balloon_stats_work; > > > struct work_struct update_balloon_size_work; > > > + struct work_struct report_free_page_work; > > > /* Prevent updating balloon when it is being canceled. */ > > > spinlock_t stop_update_lock; > > > @@ -90,6 +91,13 @@ struct virtio_balloon { > > > /* Memory statistics */ > > > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > > + /* > > > + * Used by the device and driver to signal each other. > > > + * device->driver: start the free page report. > > > + * driver->device: end the free page report. > > > + */ > > > + __virtio32 report_free_page_signal; > > > + > > > /* To register callback in oom notifier call chain */ > > > struct notifier_block nb; > > > }; > > > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct > > > virtio_balloon *vb, > > > } while (unlikely(ret == -ENOSPC)); > > > } > > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > > > size) > > > +{ > > > + unsigned int len; > > > + > > > + add_one_sg(vq, addr, size); > > > + virtqueue_kick(vq); > > > + /* Release entries if there are */ > > > + while (virtqueue_get_buf(vq, )) > > > + ; > > > +} > > > + > > > /* > > >* Send balloon pages in sgs to host. The balloon pages are recorded in > > > the > > >* page xbitmap. Each bit in the bitmap corresponds to a page of > > > PAGE_SIZE. > > > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > > > work_struct *work) > > > queue_work(system_freezable_wq, work); > > > } > > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long > > > pfn, > > > +unsigned long nr_pages) > > > +{ > > > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > > > + void *addr = (void *)pfn_to_kaddr(pfn); > > > + uint32_t len = nr_pages << PAGE_SHIFT; > > > + > > > + send_free_page_sg(vb->free_page_vq, addr, len); > > > +} > > > + > > > +static void report_free_page_completion(struct virtio_balloon *vb) > > > +{ > > > + struct virtqueue *vq = vb->free_page_vq; > > > + struct scatterlist sg; > > > + unsigned int len; > > > + int ret; > > > + > > > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); > > > +retry: > > > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > > > + virtqueue_kick(vq); > > > + if (unlikely(ret == -ENOSPC)) { > > > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > > > + goto retry; > > > + } > > > +} > > So the annoying thing here is that once this starts going, > > it will keep sending free pages from the list even if > > host is no longer interested. There should be a way > > for host to tell guest "stop" or "start from the beginning". > > This can be achieved via two output signal buf here: > signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START > signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END > > The device holds both, and can put one of them to the vq and notify. Do you mean device writes start and end in the buf? then it's an inbuf not an outbuf. > > > > > > It's the result of using same vq for guest to host and > > host to guest communication, and I think it's not a great idea. > > I'd reuse stats vq for host to guest requests maybe. > > > > > As we discussed before, we can't have a vq interleave the report of stats > and free pages. > The vq will be locked when one command is in use. So, when live migration > starts, the > periodically reported stats will be delayed. > Would this be OK? Or would you > like to have > one host to guest vq, and multiple host to guest vqs? That is, > > - host to guest: > CMD_VQ > > - guest to host: > STATS_REPORT_VQ > FREE_PAGE_VQ > > > Best, > Wei > Point
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. Please let me introduce the current design first: 1) device put the signal buf to the vq and notify the driver (we need a buffer because currently the device can't notify when the vq is empty); 2) the driver starts the report of free page blocks via inbuf; 3) the driver adds an the signal buf via outbuf to tell the device all are reported. Could you please elaborate more on the usage of ID? +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { what if there's another error? Another error is -EIO, how about disabling the free page report feature? (I also saw it isn't handled in many other virtio devices e.g. virtio-net) + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } what is this trickery doing? needs more comments or a simplification. Just this: if the vq is full, blocking wait till an entry gets released, then retry. This is the final one, which puts the signal buf to the vq to signify the end of the report and the mm lock is not held here, so it is fine to block. +} + +static void report_free_page(struct work_struct *work) +{ + struct virtio_balloon *vb; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + walk_free_mem_block(vb, 0, _balloon_send_free_pages); That's a lot of work here. And system_wq documentation says: * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. Thanks for the reminder. If not creating a new wq, how about system_unbound_wq? The first round of live migration needs the free pages, in that way we can have the pages reported to the hypervisor quicker. + report_free_page_completion(vb); So first you get list of pages, then an outbuf telling you what they are in end of. I think it's backwards. Add an outbuf first followed by inbufs that tell you what they are. If we have the signal filled with those flags like VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START, Probably not necessary to have an inbuf followed by an outbuf, right? Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/18/2017 10:13 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. Please let me introduce the current design first: 1) device put the signal buf to the vq and notify the driver (we need a buffer because currently the device can't notify when the vq is empty); 2) the driver starts the report of free page blocks via inbuf; 3) the driver adds an the signal buf via outbuf to tell the device all are reported. Could you please elaborate more on the usage of ID? +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { what if there's another error? Another error is -EIO, how about disabling the free page report feature? (I also saw it isn't handled in many other virtio devices e.g. virtio-net) + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } what is this trickery doing? needs more comments or a simplification. Just this: if the vq is full, blocking wait till an entry gets released, then retry. This is the final one, which puts the signal buf to the vq to signify the end of the report and the mm lock is not held here, so it is fine to block. +} + +static void report_free_page(struct work_struct *work) +{ + struct virtio_balloon *vb; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + walk_free_mem_block(vb, 0, _balloon_send_free_pages); That's a lot of work here. And system_wq documentation says: * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. Thanks for the reminder. If not creating a new wq, how about system_unbound_wq? The first round of live migration needs the free pages, in that way we can have the pages reported to the hypervisor quicker. + report_free_page_completion(vb); So first you get list of pages, then an outbuf telling you what they are in end of. I think it's backwards. Add an outbuf first followed by inbufs that tell you what they are. If we have the signal filled with those flags like VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START, Probably not necessary to have an inbuf followed by an outbuf, right? Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei WangSigned-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". This can be achieved via two output signal buf here: signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END The device holds both, and can put one of them to the vq and notify. It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. As we discussed before, we can't have a vq interleave the report of stats and free pages. The vq will be locked when one command is in use. So, when live migration starts, the periodically reported stats will be delayed. Would this be OK? Or would you like to have one host to guest vq, and multiple host to guest vqs? That is, - host to guest: CMD_VQ - guest to host: STATS_REPORT_VQ FREE_PAGE_VQ Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 08/18/2017 10:28 AM, Michael S. Tsirkin wrote: On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei Wang Signed-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". This can be achieved via two output signal buf here: signal_buf_start: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_START signal_buf_end: filled with VIRTIO_BALLOON_F_FREE_PAGE_REPORT_END The device holds both, and can put one of them to the vq and notify. It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. As we discussed before, we can't have a vq interleave the report of stats and free pages. The vq will be locked when one command is in use. So, when live migration starts, the periodically reported stats will be delayed. Would this be OK? Or would you like to have one host to guest vq, and multiple host to guest vqs? That is, - host to guest: CMD_VQ - guest to host: STATS_REPORT_VQ FREE_PAGE_VQ Best, Wei
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > Add a new vq to report hints of guest free pages to the host. > > Signed-off-by: Wei Wang> Signed-off-by: Liang Li > --- > drivers/virtio/virtio_balloon.c | 167 > +++- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 147 insertions(+), 21 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 72041b4..e6755bc 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > struct work_struct update_balloon_size_work; > + struct work_struct report_free_page_work; > > /* Prevent updating balloon when it is being canceled. */ > spinlock_t stop_update_lock; > @@ -90,6 +91,13 @@ struct virtio_balloon { > /* Memory statistics */ > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > + /* > + * Used by the device and driver to signal each other. > + * device->driver: start the free page report. > + * driver->device: end the free page report. > + */ > + __virtio32 report_free_page_signal; > + > /* To register callback in oom notifier call chain */ > struct notifier_block nb; > }; > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon > *vb, > } while (unlikely(ret == -ENOSPC)); > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > size) > +{ > + unsigned int len; > + > + add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + /* Release entries if there are */ > + while (virtqueue_get_buf(vq, )) > + ; > +} > + > /* > * Send balloon pages in sgs to host. The balloon pages are recorded in the > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > work_struct *work) > queue_work(system_freezable_wq, work); > } > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > +unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + void *addr = (void *)pfn_to_kaddr(pfn); > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + send_free_page_sg(vb->free_page_vq, addr, len); > +} > + > +static void report_free_page_completion(struct virtio_balloon *vb) > +{ > + struct virtqueue *vq = vb->free_page_vq; > + struct scatterlist sg; > + unsigned int len; > + int ret; > + > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); > +retry: > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > + virtqueue_kick(vq); > + if (unlikely(ret == -ENOSPC)) { > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > + goto retry; > + } > +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. > + > +static void report_free_page(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); > + report_free_page_completion(vb); > +} > + > +static void free_page_request(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb = vq->vdev->priv; > + > + queue_work(system_freezable_wq, >report_free_page_work); > +} > + > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request > }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtqueue **vqs; > + vq_callback_t **callbacks; > + const char **names; > + struct scatterlist sg; > + int i, nvqs, err = -ENOMEM; > + > + /* Inflateq and deflateq are used unconditionally */ > + nvqs = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > + nvqs++; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + nvqs++; > + > + /* Allocate space for
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > Add a new vq to report hints of guest free pages to the host. > > Signed-off-by: Wei Wang > Signed-off-by: Liang Li > --- > drivers/virtio/virtio_balloon.c | 167 > +++- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 147 insertions(+), 21 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 72041b4..e6755bc 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > struct work_struct update_balloon_size_work; > + struct work_struct report_free_page_work; > > /* Prevent updating balloon when it is being canceled. */ > spinlock_t stop_update_lock; > @@ -90,6 +91,13 @@ struct virtio_balloon { > /* Memory statistics */ > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > + /* > + * Used by the device and driver to signal each other. > + * device->driver: start the free page report. > + * driver->device: end the free page report. > + */ > + __virtio32 report_free_page_signal; > + > /* To register callback in oom notifier call chain */ > struct notifier_block nb; > }; > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon > *vb, > } while (unlikely(ret == -ENOSPC)); > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > size) > +{ > + unsigned int len; > + > + add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + /* Release entries if there are */ > + while (virtqueue_get_buf(vq, )) > + ; > +} > + > /* > * Send balloon pages in sgs to host. The balloon pages are recorded in the > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > work_struct *work) > queue_work(system_freezable_wq, work); > } > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > +unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + void *addr = (void *)pfn_to_kaddr(pfn); > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + send_free_page_sg(vb->free_page_vq, addr, len); > +} > + > +static void report_free_page_completion(struct virtio_balloon *vb) > +{ > + struct virtqueue *vq = vb->free_page_vq; > + struct scatterlist sg; > + unsigned int len; > + int ret; > + > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); > +retry: > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > + virtqueue_kick(vq); > + if (unlikely(ret == -ENOSPC)) { > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > + goto retry; > + } > +} So the annoying thing here is that once this starts going, it will keep sending free pages from the list even if host is no longer interested. There should be a way for host to tell guest "stop" or "start from the beginning". It's the result of using same vq for guest to host and host to guest communication, and I think it's not a great idea. I'd reuse stats vq for host to guest requests maybe. > + > +static void report_free_page(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); > + report_free_page_completion(vb); > +} > + > +static void free_page_request(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb = vq->vdev->priv; > + > + queue_work(system_freezable_wq, >report_free_page_work); > +} > + > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request > }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtqueue **vqs; > + vq_callback_t **callbacks; > + const char **names; > + struct scatterlist sg; > + int i, nvqs, err = -ENOMEM; > + > + /* Inflateq and deflateq are used unconditionally */ > + nvqs = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > + nvqs++; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + nvqs++; > + > + /* Allocate space for find_vqs parameters */ > + vqs =
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. > > Signed-off-by: Wei Wang> Signed-off-by: Liang Li > --- > drivers/virtio/virtio_balloon.c | 167 > +++- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 147 insertions(+), 21 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 72041b4..e6755bc 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > struct work_struct update_balloon_size_work; > + struct work_struct report_free_page_work; > > /* Prevent updating balloon when it is being canceled. */ > spinlock_t stop_update_lock; > @@ -90,6 +91,13 @@ struct virtio_balloon { > /* Memory statistics */ > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > + /* > + * Used by the device and driver to signal each other. > + * device->driver: start the free page report. > + * driver->device: end the free page report. > + */ > + __virtio32 report_free_page_signal; Weird - all I can see is driver writing 0 there, then adding it as out buf. > + > /* To register callback in oom notifier call chain */ > struct notifier_block nb; > }; > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon > *vb, > } while (unlikely(ret == -ENOSPC)); > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > size) > +{ > + unsigned int len; > + > + add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + /* Release entries if there are */ > + while (virtqueue_get_buf(vq, )) > + ; > +} > + > /* > * Send balloon pages in sgs to host. The balloon pages are recorded in the > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > work_struct *work) > queue_work(system_freezable_wq, work); > } > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > +unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + void *addr = (void *)pfn_to_kaddr(pfn); > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + send_free_page_sg(vb->free_page_vq, addr, len); > +} > + > +static void report_free_page_completion(struct virtio_balloon *vb) > +{ > + struct virtqueue *vq = vb->free_page_vq; > + struct scatterlist sg; > + unsigned int len; > + int ret; > + > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); sizeof vb->report_free_page_signal is better. > +retry: > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > + virtqueue_kick(vq); > + if (unlikely(ret == -ENOSPC)) { what if there's another error? > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > + goto retry; > + } what is this trickery doing? needs more comments or a simplification. > +} > + > +static void report_free_page(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); That's a lot of work here. And system_wq documentation says: * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. > + report_free_page_completion(vb); So first you get list of pages, then an outbuf telling you what they are in end of. I think it's backwards. Add an outbuf first followed by inbufs that tell you what they are. > +} > + > +static void free_page_request(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb = vq->vdev->priv; > + > + queue_work(system_freezable_wq, >report_free_page_work); > +} > + > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request > }; > - static
Re: [PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On Thu, Aug 17, 2017 at 11:26:56AM +0800, Wei Wang wrote: > Add a new vq to report hints of guest free pages to the host. Please add some text here explaining the report_free_page_signal thing. I also really think we need some kind of ID in the buffer to do a handshake. whenever id changes you add another outbuf. > > Signed-off-by: Wei Wang > Signed-off-by: Liang Li > --- > drivers/virtio/virtio_balloon.c | 167 > +++- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 147 insertions(+), 21 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 72041b4..e6755bc 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > struct work_struct update_balloon_size_work; > + struct work_struct report_free_page_work; > > /* Prevent updating balloon when it is being canceled. */ > spinlock_t stop_update_lock; > @@ -90,6 +91,13 @@ struct virtio_balloon { > /* Memory statistics */ > struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; > > + /* > + * Used by the device and driver to signal each other. > + * device->driver: start the free page report. > + * driver->device: end the free page report. > + */ > + __virtio32 report_free_page_signal; Weird - all I can see is driver writing 0 there, then adding it as out buf. > + > /* To register callback in oom notifier call chain */ > struct notifier_block nb; > }; > @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon > *vb, > } while (unlikely(ret == -ENOSPC)); > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t > size) > +{ > + unsigned int len; > + > + add_one_sg(vq, addr, size); > + virtqueue_kick(vq); > + /* Release entries if there are */ > + while (virtqueue_get_buf(vq, )) > + ; > +} > + > /* > * Send balloon pages in sgs to host. The balloon pages are recorded in the > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct > work_struct *work) > queue_work(system_freezable_wq, work); > } > > +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > +unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + void *addr = (void *)pfn_to_kaddr(pfn); > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + send_free_page_sg(vb->free_page_vq, addr, len); > +} > + > +static void report_free_page_completion(struct virtio_balloon *vb) > +{ > + struct virtqueue *vq = vb->free_page_vq; > + struct scatterlist sg; > + unsigned int len; > + int ret; > + > + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); sizeof vb->report_free_page_signal is better. > +retry: > + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); > + virtqueue_kick(vq); > + if (unlikely(ret == -ENOSPC)) { what if there's another error? > + wait_event(vb->acked, virtqueue_get_buf(vq, )); > + goto retry; > + } what is this trickery doing? needs more comments or a simplification. > +} > + > +static void report_free_page(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + walk_free_mem_block(vb, 0, _balloon_send_free_pages); That's a lot of work here. And system_wq documentation says: * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. You might want to create your own wq, maybe even with WQ_CPU_INTENSIVE. > + report_free_page_completion(vb); So first you get list of pages, then an outbuf telling you what they are in end of. I think it's backwards. Add an outbuf first followed by inbufs that tell you what they are. > +} > + > +static void free_page_request(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb = vq->vdev->priv; > + > + queue_work(system_freezable_wq, >report_free_page_work); > +} > + > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request > }; > - static const char * const names[] = { "inflate",
[PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei WangSigned-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} + +static void report_free_page(struct work_struct *work) +{ + struct virtio_balloon *vb; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + walk_free_mem_block(vb, 0, _balloon_send_free_pages); + report_free_page_completion(vb); +} + +static void free_page_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + queue_work(system_freezable_wq, >report_free_page_work); +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + struct scatterlist sg; + int i, nvqs, err = -ENOMEM; + + /* Inflateq and deflateq are used unconditionally */ + nvqs = 2; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) + nvqs++; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) + nvqs++; + + /* Allocate space for find_vqs parameters */ + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + callbacks[0] = balloon_ack; + names[0] = "inflate"; + callbacks[1] = balloon_ack; + names[1] = "deflate"; + + i = 2; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { +
[PATCH v14 5/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
Add a new vq to report hints of guest free pages to the host. Signed-off-by: Wei Wang Signed-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 167 +++- include/uapi/linux/virtio_balloon.h | 1 + 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 72041b4..e6755bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -54,11 +54,12 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; + struct work_struct report_free_page_work; /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; @@ -90,6 +91,13 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + /* +* Used by the device and driver to signal each other. +* device->driver: start the free page report. +* driver->device: end the free page report. +*/ + __virtio32 report_free_page_signal; + /* To register callback in oom notifier call chain */ struct notifier_block nb; }; @@ -174,6 +182,17 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, } while (unlikely(ret == -ENOSPC)); } +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) +{ + unsigned int len; + + add_one_sg(vq, addr, size); + virtqueue_kick(vq); + /* Release entries if there are */ + while (virtqueue_get_buf(vq, )) + ; +} + /* * Send balloon pages in sgs to host. The balloon pages are recorded in the * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. @@ -511,42 +530,143 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + void *addr = (void *)pfn_to_kaddr(pfn); + uint32_t len = nr_pages << PAGE_SHIFT; + + send_free_page_sg(vb->free_page_vq, addr, len); +} + +static void report_free_page_completion(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct scatterlist sg; + unsigned int len; + int ret; + + sg_init_one(, >report_free_page_signal, sizeof(__virtio32)); +retry: + ret = virtqueue_add_outbuf(vq, , 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + if (unlikely(ret == -ENOSPC)) { + wait_event(vb->acked, virtqueue_get_buf(vq, )); + goto retry; + } +} + +static void report_free_page(struct work_struct *work) +{ + struct virtio_balloon *vb; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + walk_free_mem_block(vb, 0, _balloon_send_free_pages); + report_free_page_completion(vb); +} + +static void free_page_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + queue_work(system_freezable_wq, >report_free_page_work); +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + struct scatterlist sg; + int i, nvqs, err = -ENOMEM; + + /* Inflateq and deflateq are used unconditionally */ + nvqs = 2; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) + nvqs++; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) + nvqs++; + + /* Allocate space for find_vqs parameters */ + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + callbacks[0] = balloon_ack; + names[0] = "inflate"; + callbacks[1] = balloon_ack; + names[1] = "deflate"; + + i = 2; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + callbacks[i] = stats_request;