Re: [RFC PATCH V2 1/2] vhost: convert pre sorted vhost memory array to interval tree

2016-04-28 Thread Jason Wang


On 04/27/2016 07:30 PM, Michael S. Tsirkin wrote:
> On Fri, Mar 25, 2016 at 10:34:33AM +0800, Jason Wang wrote:
>> > Current pre-sorted memory region array has some limitations for future
>> > device IOTLB conversion:
>> > 
>> > 1) need extra work for adding and removing a single region, and it's
>> >expected to be slow because of sorting or memory re-allocation.
>> > 2) need extra work of removing a large range which may intersect
>> >several regions with different size.
>> > 3) need trick for a replacement policy like LRU
>> > 
>> > To overcome the above shortcomings, this patch convert it to interval
>> > tree which can easily address the above issue with almost no extra
>> > work.
>> > 
>> > The patch could be used for:
>> > 
>> > - Extend the current API and only let the userspace to send diffs of
>> >   memory table.
>> > - Simplify Device IOTLB implementation.
> Does this affect performance at all?
>

In pktgen test, no difference.

Thanks
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH V2 1/2] vhost: convert pre sorted vhost memory array to interval tree

2016-04-27 Thread Michael S. Tsirkin
On Fri, Mar 25, 2016 at 10:34:33AM +0800, Jason Wang wrote:
> Current pre-sorted memory region array has some limitations for future
> device IOTLB conversion:
> 
> 1) need extra work for adding and removing a single region, and it's
>expected to be slow because of sorting or memory re-allocation.
> 2) need extra work of removing a large range which may intersect
>several regions with different size.
> 3) need trick for a replacement policy like LRU
> 
> To overcome the above shortcomings, this patch convert it to interval
> tree which can easily address the above issue with almost no extra
> work.
> 
> The patch could be used for:
> 
> - Extend the current API and only let the userspace to send diffs of
>   memory table.
> - Simplify Device IOTLB implementation.

Does this affect performance at all?

> 
> Signed-off-by: Jason Wang 
> ---
>  drivers/vhost/net.c   |   8 +--
>  drivers/vhost/vhost.c | 182 
> --
>  drivers/vhost/vhost.h |  27 ++--
>  3 files changed, 128 insertions(+), 89 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 9eda69e..481db96 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -968,20 +968,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
>   struct socket *tx_sock = NULL;
>   struct socket *rx_sock = NULL;
>   long err;
> - struct vhost_memory *memory;
> + struct vhost_umem *umem;
>  
>   mutex_lock(>dev.mutex);
>   err = vhost_dev_check_owner(>dev);
>   if (err)
>   goto done;
> - memory = vhost_dev_reset_owner_prepare();
> - if (!memory) {
> + umem = vhost_dev_reset_owner_prepare();
> + if (!umem) {
>   err = -ENOMEM;
>   goto done;
>   }
>   vhost_net_stop(n, _sock, _sock);
>   vhost_net_flush(n);
> - vhost_dev_reset_owner(>dev, memory);
> + vhost_dev_reset_owner(>dev, umem);
>   vhost_net_vq_reset(n);
>  done:
>   mutex_unlock(>dev.mutex);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index ad2146a..32c35a9 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "vhost.h"
>  
> @@ -42,6 +43,10 @@ enum {
>  #define vhost_used_event(vq) ((__virtio16 __user *)>avail->ring[vq->num])
>  #define vhost_avail_event(vq) ((__virtio16 __user *)>used->ring[vq->num])
>  
> +INTERVAL_TREE_DEFINE(struct vhost_umem_node,
> +  rb, __u64, __subtree_last,
> +  START, LAST, , vhost_umem_interval_tree);
> +
>  #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
>  static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
>  {
> @@ -275,7 +280,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>   vq->call_ctx = NULL;
>   vq->call = NULL;
>   vq->log_ctx = NULL;
> - vq->memory = NULL;
> + vq->umem = NULL;
>   vq->is_le = virtio_legacy_is_little_endian();
>   vhost_vq_reset_user_be(vq);
>  }
> @@ -381,7 +386,7 @@ void vhost_dev_init(struct vhost_dev *dev,
>   mutex_init(>mutex);
>   dev->log_ctx = NULL;
>   dev->log_file = NULL;
> - dev->memory = NULL;
> + dev->umem = NULL;
>   dev->mm = NULL;
>   spin_lock_init(>work_lock);
>   INIT_LIST_HEAD(>work_list);
> @@ -486,27 +491,36 @@ err_mm:
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>  
> -struct vhost_memory *vhost_dev_reset_owner_prepare(void)
> +static void *vhost_kvzalloc(unsigned long size)
> +{
> + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
> +
> + if (!n)
> + n = vzalloc(size);
> + return n;
> +}
> +
> +struct vhost_umem *vhost_dev_reset_owner_prepare(void)
>  {
> - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
> + return vhost_kvzalloc(sizeof(struct vhost_umem));
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
>  
>  /* Caller should have device mutex */
> -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory 
> *memory)
> +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem)
>  {
>   int i;
>  
>   vhost_dev_cleanup(dev, true);
>  
>   /* Restore memory to default empty mapping. */
> - memory->nregions = 0;
> - dev->memory = memory;
> + INIT_LIST_HEAD(>umem_list);
> + dev->umem = umem;
>   /* We don't need VQ locks below since vhost_dev_cleanup makes sure
>* VQs aren't running.
>*/
>   for (i = 0; i < dev->nvqs; ++i)
> - dev->vqs[i]->memory = memory;
> + dev->vqs[i]->umem = umem;
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
>  
> @@ -523,6 +537,21 @@ void vhost_dev_stop(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_stop);
>  
> +static void vhost_umem_clean(struct vhost_umem *umem)
> +{
> + struct vhost_umem_node *node, *tmp;
> +
> + if (!umem)
> +