On Mon, Apr 21, 2025 at 11:39:14AM +0800, Jason Wang wrote: > On Mon, Apr 21, 2025 at 10:45 AM Cindy Lu <l...@redhat.com> wrote: > > > > This patch reintroduces kthread mode support in vhost, > > It also introduces struct vhost_worker_ops to abstract > > worker create/stop/wakeup operations. > > > > * Bring back the original vhost_worker() implementation, > > and renamed to vhost_run_work_kthread_list(). > > > > * Add cgroup support for the kthread > > > > * Introduce struct vhost_worker_ops: > > - Encapsulates create / stop / wake‑up callbacks. > > - vhost_worker_create() selects the proper ops according to > > inherit_owner. > > > > This partially reverts or improves upon: > > commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") > > commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray") > > > > Signed-off-by: Cindy Lu <l...@redhat.com> > > --- > > drivers/vhost/vhost.c | 188 ++++++++++++++++++++++++++++++++++++++---- > > drivers/vhost/vhost.h | 12 +++ > > 2 files changed, 182 insertions(+), 18 deletions(-) > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 250dc43f1786..be97028a8baf 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -22,6 +22,7 @@ > > #include <linux/slab.h> > > #include <linux/vmalloc.h> > > #include <linux/kthread.h> > > +#include <linux/cgroup.h> > > #include <linux/module.h> > > #include <linux/sort.h> > > #include <linux/sched/mm.h> > > @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker > > *worker, > > * test_and_set_bit() implies a memory barrier. > > */ > > llist_add(&work->node, &worker->work_list); > > - vhost_task_wake(worker->vtsk); > > + worker->ops->wakeup(worker); > > } > > } > > > > @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev, > > __vhost_vq_meta_reset(vq); > > } > > > > +static int vhost_run_work_kthread_list(void *data) > > +{ > > + struct vhost_worker *worker = data; > > + struct vhost_work *work, *work_next; > > + struct vhost_dev *dev = worker->dev; > > + struct llist_node *node; > > + > > + kthread_use_mm(dev->mm); > > + > > + for (;;) { > > + /* mb paired w/ kthread_stop */ > > + set_current_state(TASK_INTERRUPTIBLE); > > + > > + if (kthread_should_stop()) { > > + __set_current_state(TASK_RUNNING); > > + break; > > + } > > + node = llist_del_all(&worker->work_list); > > + if (!node) > > + schedule(); > > + > > + node = llist_reverse_order(node); > > + /* make sure flag is seen after deletion */ > > + smp_wmb(); > > + llist_for_each_entry_safe(work, work_next, node, node) { > > + clear_bit(VHOST_WORK_QUEUED, &work->flags); > > + __set_current_state(TASK_RUNNING); > > + kcov_remote_start_common(worker->kcov_handle); > > + work->fn(work); > > + kcov_remote_stop(); > > + cond_resched(); > > + } > > + } > > + kthread_unuse_mm(dev->mm); > > + > > + return 0; > > +} > > + > > static bool vhost_run_work_list(void *data) > > { > > struct vhost_worker *worker = data; > > @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev) > > } > > EXPORT_SYMBOL_GPL(vhost_dev_check_owner); > > > > +struct vhost_attach_cgroups_struct { > > + struct vhost_work work; > > + struct task_struct *owner; > > + int ret; > > +}; > > + > > +static void vhost_attach_cgroups_work(struct vhost_work *work) > > +{ > > + struct vhost_attach_cgroups_struct *s; > > + > > + s = container_of(work, struct vhost_attach_cgroups_struct, work); > > + s->ret = cgroup_attach_task_all(s->owner, current); > > +} > > + > > +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker) > > +{ > > + struct vhost_attach_cgroups_struct attach; > > + int saved_cnt; > > + > > + attach.owner = current; > > + > > + vhost_work_init(&attach.work, vhost_attach_cgroups_work); > > + vhost_worker_queue(worker, &attach.work); > > + > > + mutex_lock(&worker->mutex); > > + > > + /* > > + * Bypass attachment_cnt check in __vhost_worker_flush: > > + * Temporarily change it to INT_MAX to bypass the check > > + */ > > + saved_cnt = worker->attachment_cnt; > > + worker->attachment_cnt = INT_MAX; > > + __vhost_worker_flush(worker); > > + worker->attachment_cnt = saved_cnt; > > I wonder if it's easier to re-introduce the flush that was used before > vhost kthread to avoid the tricks here. We can have flush ops for > example. > > Thanks
Nah we do not need ops, __vhost_worker_flush is just an internal function. Refactor it so we can call the part without the check. -- MST