Re: [PATCH 6/8] vhost_task: Allow vhost layer to use copy_process
Hi Mike, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mst-vhost/linux-next] [also build test WARNING on tip/x86/core linus/master v5.17-rc2] [cannot apply to davem-sparc/master] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Mike-Christie/Use-copy_process-in-vhost-layer/20220203-050454 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: x86_64-randconfig-s021 (https://download.01.org/0day-ci/archive/20220203/202202032136.uq6pxzyt-...@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.4-dirty # https://github.com/0day-ci/linux/commit/2c7380ae8136c224f4c7074027303b97b0a0f84c git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Mike-Christie/Use-copy_process-in-vhost-layer/20220203-050454 git checkout 2c7380ae8136c224f4c7074027303b97b0a0f84c # save the config file to linux build tree mkdir build_dir make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot sparse warnings: (new ones prefixed by >>) >> kernel/vhost_task.c:85:24: sparse: sparse: incorrect type in argument 1 >> (different base types) @@ expected unsigned long [usertype] size @@ >> got restricted gfp_t @@ kernel/vhost_task.c:85:24: sparse: expected unsigned long [usertype] size kernel/vhost_task.c:85:24: sparse: got restricted gfp_t >> kernel/vhost_task.c:85:36: sparse: sparse: incorrect type in argument 2 >> (different base types) @@ expected restricted gfp_t [usertype] flags @@ >>got unsigned long @@ kernel/vhost_task.c:85:36: sparse: expected restricted gfp_t [usertype] flags kernel/vhost_task.c:85:36: sparse: got unsigned long vim +85 kernel/vhost_task.c 62 63 /** 64 * vhost_task_create - create a copy of a process to be used by the kernel 65 * @fn: thread stack 66 * @arg: data to be passed to fn 67 * @node: numa node to allocate task from 68 * 69 * This returns a specialized task for use by the vhost layer or NULL on 70 * failure. The returned task is inactive, and the caller must fire it up 71 * through vhost_task_start(). 72 */ 73 struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, int node) 74 { 75 struct kernel_clone_args args = { 76 .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM, 77 .exit_signal= 0, 78 .stack = (unsigned long)vhost_task_fn, 79 .worker_flags = USER_WORKER | USER_WORKER_NO_FILES | 80USER_WORKER_SIG_IGN, 81 }; 82 struct vhost_task *vtsk; 83 struct task_struct *tsk; 84 > 85 vtsk = kzalloc(GFP_KERNEL, sizeof(*vtsk)); 86 if (!vtsk) 87 return ERR_PTR(-ENOMEM); 88 89 init_completion(>exited); 90 vtsk->data = arg; 91 vtsk->fn = fn; 92 args.stack_size = (unsigned long)vtsk; 93 94 tsk = copy_process(NULL, 0, node, ); 95 if (IS_ERR(tsk)) { 96 kfree(vtsk); 97 return NULL; 98 } 99 100 vtsk->task = tsk; 101 102 return vtsk; 103 } 104 EXPORT_SYMBOL_GPL(vhost_task_create); 105 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 6/8] vhost_task: Allow vhost layer to use copy_process
Hi Mike, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mst-vhost/linux-next] [also build test WARNING on tip/x86/core linus/master v5.17-rc2 next-20220203] [cannot apply to davem-sparc/master] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Mike-Christie/Use-copy_process-in-vhost-layer/20220203-050454 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: sparc-randconfig-s032-20220130 (https://download.01.org/0day-ci/archive/20220203/202202032131.gnmg7b6h-...@intel.com/config) compiler: sparc-linux-gcc (GCC) 11.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.4-dirty # https://github.com/0day-ci/linux/commit/2c7380ae8136c224f4c7074027303b97b0a0f84c git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Mike-Christie/Use-copy_process-in-vhost-layer/20220203-050454 git checkout 2c7380ae8136c224f4c7074027303b97b0a0f84c # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=sparc SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot sparse warnings: (new ones prefixed by >>) >> kernel/vhost_task.c:85:24: sparse: sparse: incorrect type in argument 1 >> (different base types) @@ expected unsigned int [usertype] size @@ >> got restricted gfp_t @@ kernel/vhost_task.c:85:24: sparse: expected unsigned int [usertype] size kernel/vhost_task.c:85:24: sparse: got restricted gfp_t >> kernel/vhost_task.c:85:36: sparse: sparse: incorrect type in argument 2 >> (different base types) @@ expected restricted gfp_t [usertype] flags @@ >>got unsigned int @@ kernel/vhost_task.c:85:36: sparse: expected restricted gfp_t [usertype] flags kernel/vhost_task.c:85:36: sparse: got unsigned int vim +85 kernel/vhost_task.c 62 63 /** 64 * vhost_task_create - create a copy of a process to be used by the kernel 65 * @fn: thread stack 66 * @arg: data to be passed to fn 67 * @node: numa node to allocate task from 68 * 69 * This returns a specialized task for use by the vhost layer or NULL on 70 * failure. The returned task is inactive, and the caller must fire it up 71 * through vhost_task_start(). 72 */ 73 struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, int node) 74 { 75 struct kernel_clone_args args = { 76 .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM, 77 .exit_signal= 0, 78 .stack = (unsigned long)vhost_task_fn, 79 .worker_flags = USER_WORKER | USER_WORKER_NO_FILES | 80USER_WORKER_SIG_IGN, 81 }; 82 struct vhost_task *vtsk; 83 struct task_struct *tsk; 84 > 85 vtsk = kzalloc(GFP_KERNEL, sizeof(*vtsk)); 86 if (!vtsk) 87 return ERR_PTR(-ENOMEM); 88 89 init_completion(>exited); 90 vtsk->data = arg; 91 vtsk->fn = fn; 92 args.stack_size = (unsigned long)vtsk; 93 94 tsk = copy_process(NULL, 0, node, ); 95 if (IS_ERR(tsk)) { 96 kfree(vtsk); 97 return NULL; 98 } 99 100 vtsk->task = tsk; 101 102 return vtsk; 103 } 104 EXPORT_SYMBOL_GPL(vhost_task_create); 105 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 6/8] vhost_task: Allow vhost layer to use copy_process
Qemu will create vhost devices in the kernel which perform network, SCSI, etc IO and management operations from worker threads created by the kthread API. Because the kthread API does a copy_process on the kthreadd thread, the vhost layer has to use kthread_use_mm to access the Qemu thread's memory and cgroup_attach_task_all to add itself to the Qemu thread's cgroups, and it bypasses the RLIMIT_NPROC limit. This patch adds a new struct vhost_task which can be used instead of kthreads. They allow the vhost layer to use copy_process and inherit the userspace process's mm and cgroups and the task is accounted for under the userspace's nproc count. Signed-off-by: Mike Christie --- MAINTAINERS | 2 + drivers/vhost/Kconfig| 5 ++ include/linux/sched/vhost_task.h | 23 ++ kernel/Makefile | 1 + kernel/vhost_task.c | 123 +++ 5 files changed, 154 insertions(+) create mode 100644 include/linux/sched/vhost_task.h create mode 100644 kernel/vhost_task.c diff --git a/MAINTAINERS b/MAINTAINERS index f41088418aae..0d595f2ef4ae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20464,7 +20464,9 @@ L: virtualization@lists.linux-foundation.org L: net...@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git +F: kernel/vhost_task.c F: drivers/vhost/ +F: include/linux/sched/vhost_task.h F: include/linux/vhost_iotlb.h F: include/uapi/linux/vhost.h diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 587fbae06182..b455d9ab6f3d 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -13,9 +13,14 @@ config VHOST_RING This option is selected by any driver which needs to access the host side of a virtio ring. +config VHOST_TASK + bool + default n + config VHOST tristate select VHOST_IOTLB + select VHOST_TASK help This option is selected by any driver which needs to access the core of vhost. diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h new file mode 100644 index ..50d02a25d37b --- /dev/null +++ b/include/linux/sched/vhost_task.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VHOST_TASK_H +#define _LINUX_VHOST_TASK_H + +#include + +struct task_struct; + +struct vhost_task { + int (*fn)(void *data); + void *data; + struct completion exited; + unsigned long flags; + struct task_struct *task; +}; + +struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, int node); +__printf(2, 3) +void vhost_task_start(struct vhost_task *vtsk, const char namefmt[], ...); +void vhost_task_stop(struct vhost_task *vtsk); +bool vhost_task_should_stop(struct vhost_task *vtsk); + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index 56f4ee97f328..d82f388082b8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -15,6 +15,7 @@ obj-y = fork.o exec_domain.o panic.o \ obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o +obj-$(CONFIG_VHOST_TASK) += vhost_task.o ifdef CONFIG_FUNCTION_TRACER # Do not trace internal ftrace files diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c new file mode 100644 index ..cdf875617656 --- /dev/null +++ b/kernel/vhost_task.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Oracle Corporation + */ +#include +#include +#include +#include +#include + +enum vhost_task_flags { + VHOST_TASK_FLAGS_STOP, +}; + +static void vhost_task_fn(void *data) +{ + struct vhost_task *vtsk = data; + int ret; + + ret = vtsk->fn(vtsk->data); + complete(>exited); + do_exit(ret); +} + +/** + * vhost_task_stop - stop a vhost_task + * @vtsk: vhost_task to stop + * + * Callers must call vhost_task_should_stop and return from their worker + * function when it returns true; + */ +void vhost_task_stop(struct vhost_task *vtsk) +{ + pid_t pid = vtsk->task->pid; + + set_bit(VHOST_TASK_FLAGS_STOP, >flags); + wake_up_process(vtsk->task); + /* +* Make sure vhost_task_fn is no longer accessing the vhost_task before +* freeing it below. If userspace crashed or exited without closing, +* then the vhost_task->task could already be marked dead so +* kernel_wait will return early. +*/ + wait_for_completion(>exited); + /* +* If we are just closing/removing a device and the parent process is +* not exiting then reap the task. +*/ + kernel_wait4(pid, NULL, __WCLONE, NULL); + kfree(vtsk); +} +EXPORT_SYMBOL_GPL(vhost_task_stop); + +/** + * vhost_task_should_stop - should the vhost task return from the work function + */ +bool vhost_task_should_stop(struct vhost_task *vtsk) +{ +