On Fri, Mar 25, 2016 at 10:34:33AM +0800, Jason Wang wrote:
> Current pre-sorted memory region array has some limitations for future
> device IOTLB conversion:
>
> 1) need extra work for adding and removing a single region, and it's
>expected to be slow because of sorting or memory re-allocation.
> 2) need extra work of removing a large range which may intersect
>several regions with different size.
> 3) need trick for a replacement policy like LRU
>
> To overcome the above shortcomings, this patch convert it to interval
> tree which can easily address the above issue with almost no extra
> work.
>
> The patch could be used for:
>
> - Extend the current API and only let the userspace to send diffs of
> memory table.
> - Simplify Device IOTLB implementation.
Does this affect performance at all?
>
> Signed-off-by: Jason Wang
> ---
> drivers/vhost/net.c | 8 +--
> drivers/vhost/vhost.c | 182
> --
> drivers/vhost/vhost.h | 27 ++--
> 3 files changed, 128 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 9eda69e..481db96 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -968,20 +968,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
> struct socket *tx_sock = NULL;
> struct socket *rx_sock = NULL;
> long err;
> - struct vhost_memory *memory;
> + struct vhost_umem *umem;
>
> mutex_lock(>dev.mutex);
> err = vhost_dev_check_owner(>dev);
> if (err)
> goto done;
> - memory = vhost_dev_reset_owner_prepare();
> - if (!memory) {
> + umem = vhost_dev_reset_owner_prepare();
> + if (!umem) {
> err = -ENOMEM;
> goto done;
> }
> vhost_net_stop(n, _sock, _sock);
> vhost_net_flush(n);
> - vhost_dev_reset_owner(>dev, memory);
> + vhost_dev_reset_owner(>dev, umem);
> vhost_net_vq_reset(n);
> done:
> mutex_unlock(>dev.mutex);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index ad2146a..32c35a9 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -27,6 +27,7 @@
> #include
> #include
> #include
> +#include
>
> #include "vhost.h"
>
> @@ -42,6 +43,10 @@ enum {
> #define vhost_used_event(vq) ((__virtio16 __user *)>avail->ring[vq->num])
> #define vhost_avail_event(vq) ((__virtio16 __user *)>used->ring[vq->num])
>
> +INTERVAL_TREE_DEFINE(struct vhost_umem_node,
> + rb, __u64, __subtree_last,
> + START, LAST, , vhost_umem_interval_tree);
> +
> #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
> static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
> {
> @@ -275,7 +280,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> vq->call_ctx = NULL;
> vq->call = NULL;
> vq->log_ctx = NULL;
> - vq->memory = NULL;
> + vq->umem = NULL;
> vq->is_le = virtio_legacy_is_little_endian();
> vhost_vq_reset_user_be(vq);
> }
> @@ -381,7 +386,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> mutex_init(>mutex);
> dev->log_ctx = NULL;
> dev->log_file = NULL;
> - dev->memory = NULL;
> + dev->umem = NULL;
> dev->mm = NULL;
> spin_lock_init(>work_lock);
> INIT_LIST_HEAD(>work_list);
> @@ -486,27 +491,36 @@ err_mm:
> }
> EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>
> -struct vhost_memory *vhost_dev_reset_owner_prepare(void)
> +static void *vhost_kvzalloc(unsigned long size)
> +{
> + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
> +
> + if (!n)
> + n = vzalloc(size);
> + return n;
> +}
> +
> +struct vhost_umem *vhost_dev_reset_owner_prepare(void)
> {
> - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
> + return vhost_kvzalloc(sizeof(struct vhost_umem));
> }
> EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
>
> /* Caller should have device mutex */
> -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory
> *memory)
> +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem)
> {
> int i;
>
> vhost_dev_cleanup(dev, true);
>
> /* Restore memory to default empty mapping. */
> - memory->nregions = 0;
> - dev->memory = memory;
> + INIT_LIST_HEAD(>umem_list);
> + dev->umem = umem;
> /* We don't need VQ locks below since vhost_dev_cleanup makes sure
>* VQs aren't running.
>*/
> for (i = 0; i < dev->nvqs; ++i)
> - dev->vqs[i]->memory = memory;
> + dev->vqs[i]->umem = umem;
> }
> EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
>
> @@ -523,6 +537,21 @@ void vhost_dev_stop(struct vhost_dev *dev)
> }
> EXPORT_SYMBOL_GPL(vhost_dev_stop);
>
> +static void vhost_umem_clean(struct vhost_umem *umem)
> +{
> + struct vhost_umem_node *node, *tmp;
> +
> + if (!umem)
> +