You should Cc Nitesh who is working on a related feature.

On Mon, Nov 13, 2017 at 06:34:48PM +0800, Wei Wang wrote:
> Ping for comments, thanks.
> 
> On 11/03/2017 04:13 PM, Wei Wang wrote:
> > Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_VQ feature indicates the
> > support of reporting hints of guest free pages to the host via
> > virtio-balloon. The host requests the guest to report the free pages by
> > sending commands via the virtio-balloon configuration registers.
> > 
> > When the guest starts to report, the first element added to the free page
> > vq is a sequence id of the start reporting command. The id is given by
> > the host, and it indicates whether the following free pages correspond
> > to the command. For example, the host may stop the report and start again
> > with a new command id. The obsolete pages for the previous start command
> > can be detected by the id dismatching on the host. The id is added to the
> > vq using an output buffer, and the free pages are added to the vq using
> > input buffer.
> > 
> > Here are some explainations about the added configuration registers:
> > - host2guest_cmd: a register used by the host to send commands to the
> > guest.
> > - guest2host_cmd: written by the guest to ACK to the host about the
> > commands that have been received. The host will clear the corresponding
> > bits on the host2guest_cmd register. The guest also uses this register
> > to send commands to the host (e.g. when finish free page reporting).

I am not sure what is the role of guest2host_cmd. Reporting of
the correct cmd id seems sufficient indication that guest
received the start command. Not getting any more seems sufficient
to detect stop.


> > - free_page_cmd_id: the sequence id of the free page report command
> > given by the host.
> > 
> > Signed-off-by: Wei Wang <wei.w.w...@intel.com>
> > Signed-off-by: Liang Li <liang.z...@intel.com>
> > Cc: Michael S. Tsirkin <m...@redhat.com>
> > Cc: Michal Hocko <mho...@kernel.org>
> > ---
> >   drivers/virtio/virtio_balloon.c     | 234 
> > ++++++++++++++++++++++++++++++++----
> >   include/uapi/linux/virtio_balloon.h |  11 ++
> >   2 files changed, 223 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/virtio/virtio_balloon.c 
> > b/drivers/virtio/virtio_balloon.c
> > index b31fc25..4087f04 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -55,7 +55,12 @@ static struct vfsmount *balloon_mnt;
> >   struct virtio_balloon {
> >     struct virtio_device *vdev;
> > -   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> > +   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
> > +
> > +   /* Balloon's own wq for cpu-intensive work items */
> > +   struct workqueue_struct *balloon_wq;
> > +   /* The free page reporting work item submitted to the balloon wq */
> > +   struct work_struct report_free_page_work;
> >     /* The balloon servicing is delegated to a freezable workqueue. */
> >     struct work_struct update_balloon_stats_work;
> > @@ -65,6 +70,10 @@ struct virtio_balloon {
> >     spinlock_t stop_update_lock;
> >     bool stop_update;
> > +   /* Stop reporting free pages */
> > +   bool report_free_page_stop;

I would revert logic here: bool report_free_page;

> > +   uint32_t free_page_cmd_id;
> > +
> >     /* Waiting for host to ack the pages we released. */
> >     wait_queue_head_t acked;
> > @@ -191,6 +200,30 @@ static void send_balloon_page_sg(struct virtio_balloon 
> > *vb,
> >             kick_and_wait(vq, vb->acked);
> >   }
> > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t 
> > size)
> > +{
> > +   int err = 0;
> > +   unsigned int len;
> > +
> > +   /* Detach all the used buffers from the vq */
> > +   while (virtqueue_get_buf(vq, &len))
> > +           ;
> > +
> > +   /*
> > +    * Since this is an optimization feature, losing a couple of free
> > +    * pages to report isn't important. We simply resturn without adding
> > +    * the page if the vq is full.
> > +    */
> > +   if (vq->num_free) {
> > +           err = add_one_sg(vq, addr, size);
> > +           BUG_ON(err);
> > +   }
> > +
> > +   /* Batch till the vq is full */
> > +   if (!vq->num_free)
> > +           virtqueue_kick(vq);
> > +}
> > +
> >   /*
> >    * Send balloon pages in sgs to host. The balloon pages are recorded in 
> > the
> >    * page xbitmap. Each bit in the bitmap corresponds to a page of 
> > PAGE_SIZE.
> > @@ -495,9 +528,8 @@ static void stats_handle_request(struct virtio_balloon 
> > *vb)
> >     virtqueue_kick(vq);
> >   }
> > -static void virtballoon_changed(struct virtio_device *vdev)
> > +static void virtballoon_cmd_balloon_memory(struct virtio_balloon *vb)
> >   {
> > -   struct virtio_balloon *vb = vdev->priv;
> >     unsigned long flags;
> >     spin_lock_irqsave(&vb->stop_update_lock, flags);
> > @@ -506,6 +538,50 @@ static void virtballoon_changed(struct virtio_device 
> > *vdev)
> >     spin_unlock_irqrestore(&vb->stop_update_lock, flags);
> >   }
> > +static void virtballoon_cmd_report_free_page_start(struct virtio_balloon 
> > *vb)
> > +{
> > +   unsigned long flags;
> > +
> > +   vb->report_free_page_stop = false;

this flag is used a lot outside any locks. Why is this safe?
Please add some comments explaining access to this flag.

> > +   spin_lock_irqsave(&vb->stop_update_lock, flags);
> > +   if (!vb->stop_update)
> > +           queue_work(vb->balloon_wq, &vb->report_free_page_work);
> > +   spin_unlock_irqrestore(&vb->stop_update_lock, flags);
> > +}
> > +
> > +static void virtballoon_changed(struct virtio_device *vdev)
> > +{
> > +   struct virtio_balloon *vb = vdev->priv;
> > +   u32 host2guest_cmd, guest2host_cmd = 0;
> > +
> > +   if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) {
> > +           virtballoon_cmd_balloon_memory(vb);
> > +           return;

This might be a handy feature: one can disable balloon without
hot-unplug. But I would use a separate feature flag to
control it.

> > +   }
> > +
> > +   virtio_cread(vb->vdev, struct virtio_balloon_config, host2guest_cmd,
> > +                &host2guest_cmd);
> > +
> > +   if (host2guest_cmd & VIRTIO_BALLOON_CMD_BALLOON_MEMORY) {
> > +           virtballoon_cmd_balloon_memory(vb);
> > +           guest2host_cmd |= VIRTIO_BALLOON_CMD_BALLOON_MEMORY;
> > +   }
> > +
> > +   if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START) {
> > +           virtballoon_cmd_report_free_page_start(vb);
> > +           guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START;
> > +   }
> > +
> > +   if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP) {
> > +           vb->report_free_page_stop = true;
> > +           guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP;
> > +   }

I am not sure why free page has start+stop but e.g. balloon has a single
bit. In fact I would really just use command id. When it changes, we
know a new report is needed.

> > +
> > +   /* Ack to the host about the commands that have been received */
> > +   virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd,
> > +                 &guest2host_cmd);

So the same register is used to ack stop command and to signal
end of report. This seems buggy.

> > +}
> > +
> >   static inline s64 towards_target(struct virtio_balloon *vb)
> >   {
> >     s64 target;
> > @@ -597,42 +673,147 @@ static void update_balloon_size_func(struct 
> > work_struct *work)
> >             queue_work(system_freezable_wq, work);
> >   }
> > -static int init_vqs(struct virtio_balloon *vb)
> > +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn,
> > +                                      unsigned long nr_pages)
> >   {
> > -   struct virtqueue *vqs[3];
> > -   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request 
> > };
> > -   static const char * const names[] = { "inflate", "deflate", "stats" };
> > -   int err, nvqs;
> > +   struct virtio_balloon *vb = (struct virtio_balloon *)opaque;
> > +   void *addr = (void *)pfn_to_kaddr(pfn);

How do we know all free pages have a kaddr?

> > +   uint32_t len = nr_pages << PAGE_SHIFT;
> > +
> > +   if (vb->report_free_page_stop)
> > +           return false;
> > +
> > +   send_free_page_sg(vb->free_page_vq, addr, len);
> > +   return true;
> > +}
> > +
> > +static void report_free_page_end(struct virtio_balloon *vb)
> > +{
> > +   u32 cmd = VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP;
> >     /*
> > -    * We expect two virtqueues: inflate and deflate, and
> > -    * optionally stat.
> > +    * The host may have already requested to stop the reporting before we
> > +    * finish, so no need to notify the host in this case.
> >      */
> > -   nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> > -   err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> > +   if (vb->report_free_page_stop)
> > +           return;
> > +   vb->report_free_page_stop = true;
> > +
> > +   virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd,
> > +                 &cmd);

Wouldn't it be easier to add a buffer in the queue?

> > +}
> > +
> > +static void report_free_page_cmd_id(struct virtio_balloon *vb)
> > +{
> > +   struct scatterlist sg;
> > +   int err;
> > +
> > +   virtio_cread(vb->vdev, struct virtio_balloon_config, free_page_cmd_id,
> > +                &vb->free_page_cmd_id);
> > +   sg_init_one(&sg, &vb->free_page_cmd_id, sizeof(uint32_t));
> > +   err = virtqueue_add_outbuf(vb->free_page_vq, &sg, 1,
> > +                              &vb->free_page_cmd_id, GFP_KERNEL);
> > +   BUG_ON(err);
> > +}
> > +
> > +static void report_free_page(struct work_struct *work)
> > +{
> > +   struct virtio_balloon *vb;
> > +
> > +   vb = container_of(work, struct virtio_balloon, report_free_page_work);
> > +   report_free_page_cmd_id(vb);
> > +   walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages);
> > +   /*
> > +    * The last few free page blocks that were added may not reach the
> > +    * batch size, but need a kick to notify the device to handle them.
> > +    */
> > +   virtqueue_kick(vb->free_page_vq);
> > +   report_free_page_end(vb);
> > +}
> > +
> > +static int init_vqs(struct virtio_balloon *vb)
> > +{
> > +   struct virtqueue **vqs;
> > +   vq_callback_t **callbacks;
> > +   const char **names;
> > +   struct scatterlist sg;
> > +   int i, nvqs, err = -ENOMEM;
> > +
> > +   /* Inflateq and deflateq are used unconditionally */
> > +   nvqs = 2;
> > +   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ))
> > +           nvqs++;
> > +   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ))
> > +           nvqs++;
> > +
> > +   /* Allocate space for find_vqs parameters */
> > +   vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL);
> > +   if (!vqs)
> > +           goto err_vq;
> > +   callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL);
> > +   if (!callbacks)
> > +           goto err_callback;
> > +   names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL);
> > +   if (!names)
> > +           goto err_names;
> > +
> > +   callbacks[0] = balloon_ack;
> > +   names[0] = "inflate";
> > +   callbacks[1] = balloon_ack;
> > +   names[1] = "deflate";
> > +
> > +   i = 2;
> > +   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > +           callbacks[i] = stats_request;
> > +           names[i] = "stats";
> > +           i++;
> > +   }
> > +
> > +   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) {
> > +           callbacks[i] = NULL;
> > +           names[i] = "free_page_vq";
> > +   }
> > +
> > +   err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
> > +                                    NULL, NULL);
> >     if (err)
> > -           return err;
> > +           goto err_find;
> >     vb->inflate_vq = vqs[0];
> >     vb->deflate_vq = vqs[1];
> > +   i = 2;
> >     if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > -           struct scatterlist sg;
> > -           unsigned int num_stats;
> > -           vb->stats_vq = vqs[2];
> > -
> > +           vb->stats_vq = vqs[i++];
> >             /*
> >              * Prime this virtqueue with one buffer so the hypervisor can
> >              * use it to signal us later (it can't be broken yet!).
> >              */
> > -           num_stats = update_balloon_stats(vb);
> > -
> > -           sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
> > +           sg_init_one(&sg, vb->stats, sizeof(vb->stats));
> >             if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
> > -               < 0)
> > -                   BUG();
> > +               < 0) {
> > +                   dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n",
> > +                            __func__);
> > +                   goto err_find;
> > +           }
> >             virtqueue_kick(vb->stats_vq);
> >     }
> > +
> > +   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ))
> > +           vb->free_page_vq = vqs[i];
> > +
> > +   kfree(names);
> > +   kfree(callbacks);
> > +   kfree(vqs);
> >     return 0;
> > +
> > +err_find:
> > +   kfree(names);
> > +err_names:
> > +   kfree(callbacks);
> > +err_callback:
> > +   kfree(vqs);
> > +err_vq:
> > +   return err;
> >   }
> >   #ifdef CONFIG_BALLOON_COMPACTION
> > @@ -761,6 +942,13 @@ static int virtballoon_probe(struct virtio_device 
> > *vdev)
> >     if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG))
> >             xb_init(&vb->page_xb);
> > +   if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) {
> > +           vb->balloon_wq = alloc_workqueue("balloon-wq",
> > +                                   WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
> > +           INIT_WORK(&vb->report_free_page_work, report_free_page);
> > +           vb->report_free_page_stop = true;
> > +   }
> > +
> >     vb->nb.notifier_call = virtballoon_oom_notify;
> >     vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
> >     err = register_oom_notifier(&vb->nb);
> > @@ -825,6 +1013,7 @@ static void virtballoon_remove(struct virtio_device 
> > *vdev)
> >     spin_unlock_irq(&vb->stop_update_lock);
> >     cancel_work_sync(&vb->update_balloon_size_work);
> >     cancel_work_sync(&vb->update_balloon_stats_work);
> > +   cancel_work_sync(&vb->report_free_page_work);
> >     remove_common(vb);
> >   #ifdef CONFIG_BALLOON_COMPACTION
> > @@ -878,6 +1067,7 @@ static unsigned int features[] = {
> >     VIRTIO_BALLOON_F_STATS_VQ,
> >     VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> >     VIRTIO_BALLOON_F_SG,
> > +   VIRTIO_BALLOON_F_FREE_PAGE_VQ,
> >   };
> >   static struct virtio_driver virtio_balloon_driver = {
> > diff --git a/include/uapi/linux/virtio_balloon.h 
> > b/include/uapi/linux/virtio_balloon.h
> > index 37780a7..b758484 100644
> > --- a/include/uapi/linux/virtio_balloon.h
> > +++ b/include/uapi/linux/virtio_balloon.h
> > @@ -35,15 +35,26 @@
> >   #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> >   #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM   2 /* Deflate balloon on OOM */
> >   #define VIRTIO_BALLOON_F_SG               3 /* Use sg instead of PFN 
> > lists */
> > +#define VIRTIO_BALLOON_F_FREE_PAGE_VQ      4 /* VQ to report free pages */
> >   /* Size of a PFN in the balloon interface. */
> >   #define VIRTIO_BALLOON_PFN_SHIFT 12
> > +#define    VIRTIO_BALLOON_CMD_BALLOON_MEMORY               (1 << 0)
> > +#define    VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START       (1 << 1)
> > +#define    VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP        (1 << 2)
> > +
> >   struct virtio_balloon_config {
> >     /* Number of pages host wants Guest to give up. */
> >     __u32 num_pages;
> >     /* Number of pages we've actually got in balloon. */
> >     __u32 actual;
> > +   /* Host-to-guest command, readonly by guest */
> > +   __u32 host2guest_cmd;
> > +   /* Sequence id of the free_page report command, readonly by guest */
> > +   __u32 free_page_cmd_id;
> > +   /* Guest-to-host command */
> > +   __u32 guest2host_cmd;
> >   };
> >   #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */

Reply via email to