Re: [PATCH v19 QEMU 3/4] virtio-balloon: Provide an interface for free page reporting

2020-04-15 Thread Alexander Duyck
On Wed, Apr 15, 2020 at 1:17 AM David Hildenbrand  wrote:
>
> On 10.04.20 05:41, Alexander Duyck wrote:
> > From: Alexander Duyck 
> >
> > Add support for free page reporting. The idea is to function very similar
> > to how the balloon works in that we basically end up madvising the page as
> > not being used. However we don't really need to bother with any deflate
> > type logic since the page will be faulted back into the guest when it is
> > read or written to.
> >
> > This provides a new way of letting the guest proactively report free
> > pages to the hypervisor, so the hypervisor can reuse them. In contrast to
> > inflate/deflate that is triggered via the hypervisor explicitly.
>
> Much better, thanks!
>
> >
> > Signed-off-by: Alexander Duyck 
> > ---
> >  hw/virtio/virtio-balloon.c |   63 
> > +++-
> >  include/hw/virtio/virtio-balloon.h |2 +
> >  2 files changed, 62 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> > index 1c6d36a29a04..86d8b48a8e3a 100644
> > --- a/hw/virtio/virtio-balloon.c
> > +++ b/hw/virtio/virtio-balloon.c
> > @@ -321,6 +321,57 @@ static void balloon_stats_set_poll_interval(Object 
> > *obj, Visitor *v,
> >  balloon_stats_change_timer(s, 0);
> >  }
> >
> > +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
> > +{
> > +VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> > +VirtQueueElement *elem;
> > +
> > +while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement {
> > +unsigned int i;
> > +
> > +for (i = 0; i < elem->in_num; i++) {
> > +void *addr = elem->in_sg[i].iov_base;
> > +size_t size = elem->in_sg[i].iov_len;
> > +ram_addr_t ram_offset;
> > +size_t rb_page_size;
> > +RAMBlock *rb;
> > +
> > +if (qemu_balloon_is_inhibited() || dev->poison_val) {
> > +continue;
>
> actually, you want to do that in the outer loop, no?

I'll move that. Odds are compiler was doing that anyway.

> > +}
> > +
> > +/*
> > + * There is no need to check the memory section to see if
> > + * it is ram/readonly/romd like there is for handle_output
> > + * below. If the region is not meant to be written to then
> > + * address_space_map will have allocated a bounce buffer
> > + * and it will be freed in address_space_unmap and trigger
> > + * and unassigned_mem_write before failing to copy over the
> > + * buffer. If more than one bad descriptor is provided it
> > + * will return NULL after the first bounce buffer and fail
> > + * to map any resources.
> > + */
> > +rb = qemu_ram_block_from_host(addr, false, &ram_offset);
> > +if (!rb) {
> > +trace_virtio_balloon_bad_addr(elem->in_addr[i]);
> > +continue;
> > +}
> > +
> > +/* For now we will simply ignore unaligned memory regions */
> > +rb_page_size = qemu_ram_pagesize(rb);
> > +if (!QEMU_IS_ALIGNED(ram_offset | size, rb_page_size)) {
>
> /me thinks you can drop rb_page_size

You mean just fold it into the statement? I guess I can do that.

> I *think* there is still one remaining case to handle: Crossing RAM blocks.
>
> Most probably you should check
>
> /* For now, ignore crossing RAM blocks. */
> if (ram_offset + size >= qemu_ram_get_used_length()) {
> continue;
> }
>
> otherwise ram_block_discard_range() will report an error.

Makes sense. I can add that into the QEMU_IS_ALIGNED check.



Re: [PATCH v19 QEMU 3/4] virtio-balloon: Provide an interface for free page reporting

2020-04-15 Thread David Hildenbrand
On 15.04.20 10:17, David Hildenbrand wrote:
> On 10.04.20 05:41, Alexander Duyck wrote:
>> From: Alexander Duyck 
>>
>> Add support for free page reporting. The idea is to function very similar
>> to how the balloon works in that we basically end up madvising the page as
>> not being used. However we don't really need to bother with any deflate
>> type logic since the page will be faulted back into the guest when it is
>> read or written to.
>>
>> This provides a new way of letting the guest proactively report free
>> pages to the hypervisor, so the hypervisor can reuse them. In contrast to
>> inflate/deflate that is triggered via the hypervisor explicitly.
> 
> Much better, thanks!
> 
>>
>> Signed-off-by: Alexander Duyck 
>> ---
>>  hw/virtio/virtio-balloon.c |   63 
>> +++-
>>  include/hw/virtio/virtio-balloon.h |2 +
>>  2 files changed, 62 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>> index 1c6d36a29a04..86d8b48a8e3a 100644
>> --- a/hw/virtio/virtio-balloon.c
>> +++ b/hw/virtio/virtio-balloon.c
>> @@ -321,6 +321,57 @@ static void balloon_stats_set_poll_interval(Object 
>> *obj, Visitor *v,
>>  balloon_stats_change_timer(s, 0);
>>  }
>>  
>> +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> +VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>> +VirtQueueElement *elem;
>> +
>> +while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement {
>> +unsigned int i;
>> +
>> +for (i = 0; i < elem->in_num; i++) {
>> +void *addr = elem->in_sg[i].iov_base;
>> +size_t size = elem->in_sg[i].iov_len;
>> +ram_addr_t ram_offset;
>> +size_t rb_page_size;
>> +RAMBlock *rb;
>> +
>> +if (qemu_balloon_is_inhibited() || dev->poison_val) {
>> +continue;
> 
> actually, you want to do that in the outer loop, no?
> 
>> +}
>> +
>> +/*
>> + * There is no need to check the memory section to see if
>> + * it is ram/readonly/romd like there is for handle_output
>> + * below. If the region is not meant to be written to then
>> + * address_space_map will have allocated a bounce buffer
>> + * and it will be freed in address_space_unmap and trigger
>> + * and unassigned_mem_write before failing to copy over the
>> + * buffer. If more than one bad descriptor is provided it
>> + * will return NULL after the first bounce buffer and fail
>> + * to map any resources.
>> + */
>> +rb = qemu_ram_block_from_host(addr, false, &ram_offset);
>> +if (!rb) {
>> +trace_virtio_balloon_bad_addr(elem->in_addr[i]);
>> +continue;
>> +}
>> +
>> +/* For now we will simply ignore unaligned memory regions */
>> +rb_page_size = qemu_ram_pagesize(rb);
>> +if (!QEMU_IS_ALIGNED(ram_offset | size, rb_page_size)) {
> 
> /me thinks you can drop rb_page_size
> 
> I *think* there is still one remaining case to handle: Crossing RAM blocks.
> 
> Most probably you should check
> 
> /* For now, ignore crossing RAM blocks. */
> if (ram_offset + size >= qemu_ram_get_used_length()) {
>   continue;

(should be an > I guess)


-- 
Thanks,

David / dhildenb




Re: [PATCH v19 QEMU 3/4] virtio-balloon: Provide an interface for free page reporting

2020-04-15 Thread David Hildenbrand
On 10.04.20 05:41, Alexander Duyck wrote:
> From: Alexander Duyck 
> 
> Add support for free page reporting. The idea is to function very similar
> to how the balloon works in that we basically end up madvising the page as
> not being used. However we don't really need to bother with any deflate
> type logic since the page will be faulted back into the guest when it is
> read or written to.
> 
> This provides a new way of letting the guest proactively report free
> pages to the hypervisor, so the hypervisor can reuse them. In contrast to
> inflate/deflate that is triggered via the hypervisor explicitly.

Much better, thanks!

> 
> Signed-off-by: Alexander Duyck 
> ---
>  hw/virtio/virtio-balloon.c |   63 
> +++-
>  include/hw/virtio/virtio-balloon.h |2 +
>  2 files changed, 62 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 1c6d36a29a04..86d8b48a8e3a 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -321,6 +321,57 @@ static void balloon_stats_set_poll_interval(Object *obj, 
> Visitor *v,
>  balloon_stats_change_timer(s, 0);
>  }
>  
> +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> +VirtQueueElement *elem;
> +
> +while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement {
> +unsigned int i;
> +
> +for (i = 0; i < elem->in_num; i++) {
> +void *addr = elem->in_sg[i].iov_base;
> +size_t size = elem->in_sg[i].iov_len;
> +ram_addr_t ram_offset;
> +size_t rb_page_size;
> +RAMBlock *rb;
> +
> +if (qemu_balloon_is_inhibited() || dev->poison_val) {
> +continue;

actually, you want to do that in the outer loop, no?

> +}
> +
> +/*
> + * There is no need to check the memory section to see if
> + * it is ram/readonly/romd like there is for handle_output
> + * below. If the region is not meant to be written to then
> + * address_space_map will have allocated a bounce buffer
> + * and it will be freed in address_space_unmap and trigger
> + * and unassigned_mem_write before failing to copy over the
> + * buffer. If more than one bad descriptor is provided it
> + * will return NULL after the first bounce buffer and fail
> + * to map any resources.
> + */
> +rb = qemu_ram_block_from_host(addr, false, &ram_offset);
> +if (!rb) {
> +trace_virtio_balloon_bad_addr(elem->in_addr[i]);
> +continue;
> +}
> +
> +/* For now we will simply ignore unaligned memory regions */
> +rb_page_size = qemu_ram_pagesize(rb);
> +if (!QEMU_IS_ALIGNED(ram_offset | size, rb_page_size)) {

/me thinks you can drop rb_page_size

I *think* there is still one remaining case to handle: Crossing RAM blocks.

Most probably you should check

/* For now, ignore crossing RAM blocks. */
if (ram_offset + size >= qemu_ram_get_used_length()) {
continue;
}

otherwise ram_block_discard_range() will report an error.

-- 
Thanks,

David / dhildenb




[PATCH v19 QEMU 3/4] virtio-balloon: Provide an interface for free page reporting

2020-04-09 Thread Alexander Duyck
From: Alexander Duyck 

Add support for free page reporting. The idea is to function very similar
to how the balloon works in that we basically end up madvising the page as
not being used. However we don't really need to bother with any deflate
type logic since the page will be faulted back into the guest when it is
read or written to.

This provides a new way of letting the guest proactively report free
pages to the hypervisor, so the hypervisor can reuse them. In contrast to
inflate/deflate that is triggered via the hypervisor explicitly.

Signed-off-by: Alexander Duyck 
---
 hw/virtio/virtio-balloon.c |   63 +++-
 include/hw/virtio/virtio-balloon.h |2 +
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1c6d36a29a04..86d8b48a8e3a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -321,6 +321,57 @@ static void balloon_stats_set_poll_interval(Object *obj, 
Visitor *v,
 balloon_stats_change_timer(s, 0);
 }
 
+static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+VirtQueueElement *elem;
+
+while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement {
+unsigned int i;
+
+for (i = 0; i < elem->in_num; i++) {
+void *addr = elem->in_sg[i].iov_base;
+size_t size = elem->in_sg[i].iov_len;
+ram_addr_t ram_offset;
+size_t rb_page_size;
+RAMBlock *rb;
+
+if (qemu_balloon_is_inhibited() || dev->poison_val) {
+continue;
+}
+
+/*
+ * There is no need to check the memory section to see if
+ * it is ram/readonly/romd like there is for handle_output
+ * below. If the region is not meant to be written to then
+ * address_space_map will have allocated a bounce buffer
+ * and it will be freed in address_space_unmap and trigger
+ * and unassigned_mem_write before failing to copy over the
+ * buffer. If more than one bad descriptor is provided it
+ * will return NULL after the first bounce buffer and fail
+ * to map any resources.
+ */
+rb = qemu_ram_block_from_host(addr, false, &ram_offset);
+if (!rb) {
+trace_virtio_balloon_bad_addr(elem->in_addr[i]);
+continue;
+}
+
+/* For now we will simply ignore unaligned memory regions */
+rb_page_size = qemu_ram_pagesize(rb);
+if (!QEMU_IS_ALIGNED(ram_offset | size, rb_page_size)) {
+continue;
+}
+
+ram_block_discard_range(rb, ram_offset, size);
+}
+
+virtqueue_push(vq, elem, 0);
+virtio_notify(vdev, vq);
+g_free(elem);
+}
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -628,7 +679,8 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
 return sizeof(struct virtio_balloon_config);
 }
 if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
-virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) ||
+virtio_has_feature(features, VIRTIO_BALLOON_F_REPORTING)) {
 return sizeof(struct virtio_balloon_config);
 }
 return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
@@ -717,7 +769,8 @@ static uint64_t virtio_balloon_get_features(VirtIODevice 
*vdev, uint64_t f,
 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
 f |= dev->host_features;
 virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
-if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT) ||
+virtio_has_feature(f, VIRTIO_BALLOON_F_REPORTING)) {
 virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
 }
 
@@ -807,6 +860,10 @@ static void virtio_balloon_device_realize(DeviceState 
*dev, Error **errp)
 s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
 s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
+if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
+s->rvq = virtio_add_queue(vdev, 32, virtio_balloon_handle_report);
+}
+
 if (virtio_has_feature(s->host_features,
VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
 s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
@@ -940,6 +997,8 @@ static Property virtio_balloon_properties[] = {
  */
 DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
  qemu_4_0_config_size, false),
+DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features,
+