Re: [PATCH RFC] vhost: convert pre sorted vhost memory array to interval tree
On 01/19/2016 11:24 PM, Igor Mammedov wrote: > On Mon, 18 Jan 2016 10:42:29 +0800 > Jason Wang wrote: > >> > Current pre-sorted memory region array has some limitations for future >> > device IOTLB conversion: >> > >> > 1) need extra work for adding and removing a single region, and it's >> >expected to be slow because of sorting or memory re-allocation. >> > 2) need extra work of removing a large range which may intersect >> >several regions with different size. >> > 3) need trick for a replacement policy like LRU >> > >> > To overcome the above shortcomings, this patch convert it to interval >> > tree which can easily address the above issue with almost no extra >> > work. >> > >> > The patch could be used for: >> > >> > - Extend the current API and only let the userspace to send diffs of >> > memory table. >> > - Simplify Device IOTLB implementation. > I'm curios how performance changes on translate_desc() hot-path in > default case and in the case of 256 memory regions? > Haven't measured this. But consider both methods are O(logN), should be no noticeable difference. Will measure it in the future. The main user is for the device IOTLB API[1], which: - have thousands of userspace memory region sections - may dynamically adding or removing one or some of the regions - need a replacement algorithm like LRU [1] https://lkml.org/lkml/2015/12/31/16 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 3/6] Input: Update vmmouse.c to use the common VMW_PORT macros
Updated the VMMOUSE macro to use the new VMW_PORT macro. Doing this instead of replacing all existing instances of VMWMOUSE to minimize code change. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Reviewed-by: Alok N Kataria Cc: pv-driv...@vmware.com Cc: linux-graphics-maintai...@vmware.com Cc: Dmitry Torokhov Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: linux-ker...@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: linux-in...@vger.kernel.org --- v2: Instead of replacing existing VMMOUSE defines, only modify enough to use the new VMW_PORT define. v3: Use updated VMWARE_PORT() which requires hypervisor magic as an added parameter v4: Swapped parameters 1 and 2 when calling VMW_PORT because the macro has been updated v5: Updated VMW_PORT() usage since the macro has been updated --- drivers/input/mouse/vmmouse.c | 22 +++--- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index e272f06..d06daf6 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "psmouse.h" #include "vmmouse.h" @@ -84,21 +85,12 @@ struct vmmouse_data { * implementing the vmmouse protocol. Should never execute on * bare metal hardware. */ -#define VMMOUSE_CMD(cmd, in1, out1, out2, out3, out4) \ -({ \ - unsigned long __dummy1, __dummy2; \ - __asm__ __volatile__ ("inl %%dx" : \ - "=a"(out1), \ - "=b"(out2), \ - "=c"(out3), \ - "=d"(out4), \ - "=S"(__dummy1), \ - "=D"(__dummy2) :\ - "a"(VMMOUSE_PROTO_MAGIC), \ - "b"(in1), \ - "c"(VMMOUSE_PROTO_CMD_##cmd), \ - "d"(VMMOUSE_PROTO_PORT) : \ - "memory"); \ +#define VMMOUSE_CMD(cmd, in1, out1, out2, out3, out4) \ +({ \ + unsigned long __dummy1, __dummy2; \ + VMW_PORT(VMMOUSE_PROTO_CMD_##cmd, in1, 0, 0,\ +VMMOUSE_PROTO_PORT, VMMOUSE_PROTO_MAGIC, \ +out1, out2, out3, out4, __dummy1, __dummy2); \ }) /** -- 1.9.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4/6] Input: Remove vmmouse port reservation
This port is used by quite a few guest-to-host communication capabilities, e.g. getting configuration, logging, etc. Currently multiple kernel modules, and one or more priviledged guest user mode app, e.g. open-vm-tools, use this port without reservation. It was determined that no reservation is required when accessing the port in this manner. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Cc: pv-driv...@vmware.com Cc: linux-graphics-maintai...@vmware.com Cc: virtualization@lists.linux-foundation.org Cc: linux-ker...@vger.kernel.org Cc: Greg Kroah-Hartman --- drivers/input/mouse/vmmouse.c | 19 +-- 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index d06daf6..85fb3d47 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -347,16 +347,10 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties) return -ENXIO; } - if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) { - psmouse_dbg(psmouse, "VMMouse port in use.\n"); - return -EBUSY; - } - /* Check if the device is present */ response = ~VMMOUSE_PROTO_MAGIC; VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2); if (response != VMMOUSE_PROTO_MAGIC || version == 0xU) { - release_region(VMMOUSE_PROTO_PORT, 4); return -ENXIO; } @@ -366,8 +360,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties) psmouse->model = version; } - release_region(VMMOUSE_PROTO_PORT, 4); - return 0; } @@ -386,7 +378,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse) psmouse_reset(psmouse); input_unregister_device(priv->abs_dev); kfree(priv); - release_region(VMMOUSE_PROTO_PORT, 4); } /** @@ -430,15 +421,10 @@ int vmmouse_init(struct psmouse *psmouse) struct input_dev *rel_dev = psmouse->dev, *abs_dev; int error; - if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) { - psmouse_dbg(psmouse, "VMMouse port in use.\n"); - return -EBUSY; - } - psmouse_reset(psmouse); error = vmmouse_enable(psmouse); if (error) - goto release_region; + return error; priv = kzalloc(sizeof(*priv), GFP_KERNEL); abs_dev = input_allocate_device(); @@ -493,8 +479,5 @@ init_fail: kfree(priv); psmouse->private = NULL; -release_region: - release_region(VMMOUSE_PROTO_PORT, 4); - return error; } -- 1.9.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 6/6] VMware balloon: Update vmw_balloon.c to use the VMW_PORT macro
Updated VMWARE_BALLOON_CMD to use the common VMW_PORT macro. Doing this rather than replacing all instances of VMWARE_BALLOON_CMD to minimize code change. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Reviewed-by: Alok N Kataria Acked-by: Xavier Deguillard Cc: pv-driv...@vmware.com Cc: Xavier Deguillard Cc: linux-ker...@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Greg Kroah-Hartman --- v1 Swapped parameters 1 and 2 to VMW_PORT because the macro has been updated v2 Updated VMW_PORT() usage because the macro and vmw_balloon.c's usage have changed. --- drivers/misc/vmw_balloon.c | 31 --- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 1e688bf..6476b74 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -46,6 +46,7 @@ #include #include #include +#include MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); @@ -197,25 +198,17 @@ static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, } -#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)\ -({ \ - unsigned long __status, __dummy1, __dummy2, __dummy3; \ - __asm__ __volatile__ ("inl %%dx" : \ - "=a"(__status), \ - "=c"(__dummy1), \ - "=d"(__dummy2), \ - "=b"(result), \ - "=S" (__dummy3) : \ - "0"(VMW_BALLOON_HV_MAGIC), \ - "1"(VMW_BALLOON_CMD_##cmd), \ - "2"(VMW_BALLOON_HV_PORT), \ - "3"(arg1), \ - "4" (arg2) :\ - "memory"); \ - if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ - result = __dummy1; \ - result &= -1UL; \ - __status & -1UL;\ +#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)\ +({ \ + unsigned long __status, __dummy1, __dummy2; \ + unsigned long __si, __di; \ + VMW_PORT(VMW_BALLOON_CMD_##cmd, arg1, arg2, 0, \ +VMW_BALLOON_HV_PORT, VMW_BALLOON_HV_MAGIC, \ +__status, result, __dummy1, __dummy2, __si, __di); \ + if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ + result = __dummy1; \ + result &= -1UL; \ + __status & -1UL;\ }) #ifdef CONFIG_DEBUG_FS -- 1.9.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 2/6] x86: Update vmware.c to use the common VMW_PORT macros
Updated the VMWARE_PORT macro to use the new VMW_PORT macro. Doing this instead of replacing all existing instances of VMWARE_PORT to minimize code change. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Reviewed-by: Alok N Kataria Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: pv-driv...@vmware.com Cc: virtualization@lists.linux-foundation.org Cc: linux-ker...@vger.kernel.org Cc: Greg Kroah-Hartman --- v2: Instead of replacing all existing instances of VMWARE_PORT with VMW_PORT, update VMWARE_PORT to use the new VMW_PORT. v3: Using updated VMWARE_PORT() macro, which needs hypervisor magic in the parameter v4: Swapped the first and second parameters because VMW_PORT has changed. v5: si and di are now separate input/output arguments in VMW_PORT() --- arch/x86/kernel/cpu/vmware.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 628a059..8e900d3 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -26,6 +26,7 @@ #include #include #include +#include #define CPUID_VMWARE_INFO_LEAF 0x4000 #define VMWARE_HYPERVISOR_MAGIC0x564D5868 @@ -38,12 +39,13 @@ #define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ - "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :\ - "0"(VMWARE_HYPERVISOR_MAGIC), \ - "1"(VMWARE_PORT_CMD_##cmd), \ - "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :\ - "memory"); +({ \ + unsigned long __si, __di; /* Not used */\ + VMW_PORT(VMWARE_PORT_CMD_##cmd, UINT_MAX, 0, 0, \ +VMWARE_HYPERVISOR_PORT, VMWARE_HYPERVISOR_MAGIC, \ +eax, ebx, ecx, edx, __si, __di); \ +}) + static inline int __vmware_platform(void) { -- 1.9.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 1/6] x86: Add VMWare Host Communication Macros
These macros will be used by multiple VMWare modules for handling host communication. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom Reviewed-by: Alok N Kataria Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alok Kataria Cc: linux-ker...@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Xavier Deguillard Cc: Greg Kroah-Hartman --- v2: * Keeping only the minimal common platform defines * added vmware_platform() check function v3: * Added new field to handle different hypervisor magic values v4: * Make it so that "cmd" is always the first parameter for both High-Bandwidh and non-High-Bandwidth version of the macro * Addressed comments from H. Peter Anvin on the assembly code v5: * Separate SI/DI as independent input/output arguments, given the latest update to vmw_balloon.c --- arch/x86/include/asm/vmware.h | 138 ++ 1 file changed, 138 insertions(+) create mode 100644 arch/x86/include/asm/vmware.h diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000..7d02a2b --- /dev/null +++ b/arch/x86/include/asm/vmware.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2015, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * Based on code from vmware.c and vmmouse.c. + * Author: + * Sinclair Yeh + */ +#ifndef _ASM_X86_VMWARE_H +#define _ASM_X86_VMWARE_H + + +/** + * Hypervisor-specific bi-directional communication channel. Should never + * execute on bare metal hardware. The caller must make sure to check for + * supported hypervisor before using these macros. + * + * The last two parameters are both input and output and must be initialized. + * + * @cmd: [IN] Message Cmd + * @in_ebx: [IN] Message Len, through EBX + * @in_si: [IN] Input argument through SI, set to 0 if not used + * @in_di: [IN] Input argument through DI, set ot 0 if not used + * @port_num: [IN] port number + [channel id] + * @magic: [IN] hypervisor magic value + * @eax: [OUT] value of EAX register + * @ebx: [OUT] e.g. status from an HB message status command + * @ecx: [OUT] e.g. status from a non-HB message status command + * @edx: [OUT] e.g. channel id + * @si: [OUT] + * @di: [OUT] + */ +#define VMW_PORT(cmd, in_ebx, in_si, in_di,\ +port_num, magic, \ +eax, ebx, ecx, edx, si, di)\ +({ \ + asm volatile ("inl %%dx, %%eax;" : \ + "=a"(eax), \ + "=b"(ebx), \ + "=c"(ecx), \ + "=d"(edx), \ + "=S"(si), \ + "=D"(di) : \ + "a"(magic), \ + "b"(in_ebx),\ + "c"(cmd), \ + "d"(port_num), \ + "S"(in_si), \ + "D"(in_di) :\ + "memory"); \ +}) + + +/** + * Hypervisor-specific bi-directional communication channel. Should never + * execute on bare metal hardware. The caller must make sure to check for + * supported hypervisor before using these macros. + * + * The last 3 parameters are both input and output and must be initialized. + * + * @cmd: [IN] Message Cmd + * @in_ecx: [IN] Message Len, through ECX + * @in_si: [IN] Input argument through SI, set to 0 if not used + * @in_di: [IN] Input argument through DI, set to 0 if not used + * @port_num: [IN] port number + [channel id] + * @magic: [IN] hypervisor magic value + * @eax: [OUT] value of EAX register + * @ebx: [OUT] e.g. status from an HB message status command + * @ecx: [OUT] e.g. status from a non-HB message status command + * @edx: [OUT] e.g. channel id + * @si: [OUT] + * @di: [OUT] + * @bp: [INOUT] set to 0 if not used + */ +#define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di, \ + port_num, magic,\ + eax, ebx, ecx, edx, si, di, bp) \ +({ \ + asm volatile ("push %%rbp;" \ + "xchgq %6, %%rbp;" \ + "rep outsb;"\ + "xchgq %%rbp, %6;" \ + "po
Re: [PATCH RFC] vhost: convert pre sorted vhost memory array to interval tree
On Mon, 18 Jan 2016 10:42:29 +0800 Jason Wang wrote: > Current pre-sorted memory region array has some limitations for future > device IOTLB conversion: > > 1) need extra work for adding and removing a single region, and it's >expected to be slow because of sorting or memory re-allocation. > 2) need extra work of removing a large range which may intersect >several regions with different size. > 3) need trick for a replacement policy like LRU > > To overcome the above shortcomings, this patch convert it to interval > tree which can easily address the above issue with almost no extra > work. > > The patch could be used for: > > - Extend the current API and only let the userspace to send diffs of > memory table. > - Simplify Device IOTLB implementation. I'm curios how performance changes on translate_desc() hot-path in default case and in the case of 256 memory regions? > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c | 8 +-- > drivers/vhost/vhost.c | 179 > +++--- > drivers/vhost/vhost.h | 27 ++-- > 3 files changed, 125 insertions(+), 89 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 9eda69e..481db96 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -968,20 +968,20 @@ static long vhost_net_reset_owner(struct vhost_net *n) > struct socket *tx_sock = NULL; > struct socket *rx_sock = NULL; > long err; > - struct vhost_memory *memory; > + struct vhost_umem *umem; > > mutex_lock(&n->dev.mutex); > err = vhost_dev_check_owner(&n->dev); > if (err) > goto done; > - memory = vhost_dev_reset_owner_prepare(); > - if (!memory) { > + umem = vhost_dev_reset_owner_prepare(); > + if (!umem) { > err = -ENOMEM; > goto done; > } > vhost_net_stop(n, &tx_sock, &rx_sock); > vhost_net_flush(n); > - vhost_dev_reset_owner(&n->dev, memory); > + vhost_dev_reset_owner(&n->dev, umem); > vhost_net_vq_reset(n); > done: > mutex_unlock(&n->dev.mutex); > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index ad2146a..851dce8 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -27,6 +27,7 @@ > #include > #include > #include > +#include > > #include "vhost.h" > > @@ -42,6 +43,10 @@ enum { > #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) > #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) > > +INTERVAL_TREE_DEFINE(struct vhost_umem_node, > + rb, __u64, __subtree_last, > + START, LAST, , vhost_umem_interval_tree); > + > #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY > static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) > { > @@ -275,7 +280,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, > vq->call_ctx = NULL; > vq->call = NULL; > vq->log_ctx = NULL; > - vq->memory = NULL; > + vq->umem = NULL; > vq->is_le = virtio_legacy_is_little_endian(); > vhost_vq_reset_user_be(vq); > } > @@ -381,7 +386,7 @@ void vhost_dev_init(struct vhost_dev *dev, > mutex_init(&dev->mutex); > dev->log_ctx = NULL; > dev->log_file = NULL; > - dev->memory = NULL; > + dev->umem = NULL; > dev->mm = NULL; > spin_lock_init(&dev->work_lock); > INIT_LIST_HEAD(&dev->work_list); > @@ -486,27 +491,36 @@ err_mm: > } > EXPORT_SYMBOL_GPL(vhost_dev_set_owner); > > -struct vhost_memory *vhost_dev_reset_owner_prepare(void) > +static void *vhost_kvzalloc(unsigned long size) > +{ > + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); > + > + if (!n) > + n = vzalloc(size); > + return n; > +} > + > +struct vhost_umem *vhost_dev_reset_owner_prepare(void) > { > - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); > + return vhost_kvzalloc(sizeof(struct vhost_umem)); > } > EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); > > /* Caller should have device mutex */ > -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory > *memory) > +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) > { > int i; > > vhost_dev_cleanup(dev, true); > > /* Restore memory to default empty mapping. */ > - memory->nregions = 0; > - dev->memory = memory; > + INIT_LIST_HEAD(&umem->umem_list); > + dev->umem = umem; > /* We don't need VQ locks below since vhost_dev_cleanup makes sure >* VQs aren't running. >*/ > for (i = 0; i < dev->nvqs; ++i) > - dev->vqs[i]->memory = memory; > + dev->vqs[i]->umem = umem; > } > EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); > > @@ -523,6 +537,19 @@ void vhost_dev_stop(struct vhost_dev *dev) > } > EXPORT_SYMBOL_GPL(vhost_dev_stop); > > +static void vhost_umem_clean(struct vhost