Re: [PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost

2025-04-21 Thread Michael S. Tsirkin
On Mon, Apr 21, 2025 at 11:39:14AM +0800, Jason Wang wrote:
> On Mon, Apr 21, 2025 at 10:45 AM Cindy Lu  wrote:
> >
> > This patch reintroduces kthread mode support in vhost,
> > It also introduces struct vhost_worker_ops to abstract
> > worker create/stop/wakeup operations.
> >
> > * Bring back the original vhost_worker() implementation,
> >   and renamed to vhost_run_work_kthread_list().
> >
> > * Add cgroup support for the kthread
> >
> > * Introduce struct vhost_worker_ops:
> >   - Encapsulates create / stop / wake‑up callbacks.
> >   - vhost_worker_create() selects the proper ops according to
> > inherit_owner.
> >
> > This partially reverts or improves upon:
> > commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
> > commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")
> >
> > Signed-off-by: Cindy Lu 
> > ---
> >  drivers/vhost/vhost.c | 188 ++
> >  drivers/vhost/vhost.h |  12 +++
> >  2 files changed, 182 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 250dc43f1786..be97028a8baf 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -22,6 +22,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker 
> > *worker,
> >  * test_and_set_bit() implies a memory barrier.
> >  */
> > llist_add(&work->node, &worker->work_list);
> > -   vhost_task_wake(worker->vtsk);
> > +   worker->ops->wakeup(worker);
> > }
> >  }
> >
> > @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > __vhost_vq_meta_reset(vq);
> >  }
> >
> > +static int vhost_run_work_kthread_list(void *data)
> > +{
> > +   struct vhost_worker *worker = data;
> > +   struct vhost_work *work, *work_next;
> > +   struct vhost_dev *dev = worker->dev;
> > +   struct llist_node *node;
> > +
> > +   kthread_use_mm(dev->mm);
> > +
> > +   for (;;) {
> > +   /* mb paired w/ kthread_stop */
> > +   set_current_state(TASK_INTERRUPTIBLE);
> > +
> > +   if (kthread_should_stop()) {
> > +   __set_current_state(TASK_RUNNING);
> > +   break;
> > +   }
> > +   node = llist_del_all(&worker->work_list);
> > +   if (!node)
> > +   schedule();
> > +
> > +   node = llist_reverse_order(node);
> > +   /* make sure flag is seen after deletion */
> > +   smp_wmb();
> > +   llist_for_each_entry_safe(work, work_next, node, node) {
> > +   clear_bit(VHOST_WORK_QUEUED, &work->flags);
> > +   __set_current_state(TASK_RUNNING);
> > +   kcov_remote_start_common(worker->kcov_handle);
> > +   work->fn(work);
> > +   kcov_remote_stop();
> > +   cond_resched();
> > +   }
> > +   }
> > +   kthread_unuse_mm(dev->mm);
> > +
> > +   return 0;
> > +}
> > +
> >  static bool vhost_run_work_list(void *data)
> >  {
> > struct vhost_worker *worker = data;
> > @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
> >  }
> >  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
> >
> > +struct vhost_attach_cgroups_struct {
> > +   struct vhost_work work;
> > +   struct task_struct *owner;
> > +   int ret;
> > +};
> > +
> > +static void vhost_attach_cgroups_work(struct vhost_work *work)
> > +{
> > +   struct vhost_attach_cgroups_struct *s;
> > +
> > +   s = container_of(work, struct vhost_attach_cgroups_struct, work);
> > +   s->ret = cgroup_attach_task_all(s->owner, current);
> > +}
> > +
> > +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
> > +{
> > +   struct vhost_attach_cgroups_struct attach;
> > +   int saved_cnt;
> > +
> > +   attach.owner = current;
> > +
> > +   vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> > +   vhost_worker_queue(worker, &attach.work);
> > +
> > +   mutex_lock(&worker->mutex);
> > +
> > +   /*
> > +* Bypass attachment_cnt check in __vhost_worker_flush:
> > +* Temporarily change it to INT_MAX to bypass the check
> > +*/
> > +   saved_cnt = worker->attachment_cnt;
> > +   worker->attachment_cnt = INT_MAX;
> > +   __vhost_worker_flush(worker);
> > +   worker->attachment_cnt = saved_cnt;
> 
> I wonder if it's easier to re-introduce the flush that was used before
> vhost kthread to avoid the tricks here. We can have flush ops for
> example.
> 
> Thanks

Nah we do not need ops, __vhost_worker_flush is just an internal
function. Refactor it so we can call the part without the check.

-- 
MST




Re: [PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost

2025-04-21 Thread Michael S. Tsirkin
On Mon, Apr 21, 2025 at 10:44:08AM +0800, Cindy Lu wrote:
> This patch reintroduces kthread mode support in vhost,
> It also introduces struct vhost_worker_ops to abstract
> worker create/stop/wakeup operations.
> 
> * Bring back the original vhost_worker() implementation,
>   and renamed to vhost_run_work_kthread_list().
> 
> * Add cgroup support for the kthread
> 
> * Introduce struct vhost_worker_ops:
>   - Encapsulates create / stop / wake‑up callbacks.
>   - vhost_worker_create() selects the proper ops according to
> inherit_owner.
> 
> This partially reverts or improves upon:
> commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
> commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")
> 
> Signed-off-by: Cindy Lu 
> ---
>  drivers/vhost/vhost.c | 188 ++
>  drivers/vhost/vhost.h |  12 +++
>  2 files changed, 182 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 250dc43f1786..be97028a8baf 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker 
> *worker,
>* test_and_set_bit() implies a memory barrier.
>*/
>   llist_add(&work->node, &worker->work_list);
> - vhost_task_wake(worker->vtsk);
> + worker->ops->wakeup(worker);
>   }
>  }
>  
> @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>   __vhost_vq_meta_reset(vq);
>  }
>  
> +static int vhost_run_work_kthread_list(void *data)
> +{
> + struct vhost_worker *worker = data;
> + struct vhost_work *work, *work_next;
> + struct vhost_dev *dev = worker->dev;
> + struct llist_node *node;
> +
> + kthread_use_mm(dev->mm);
> +
> + for (;;) {
> + /* mb paired w/ kthread_stop */
> + set_current_state(TASK_INTERRUPTIBLE);
> +
> + if (kthread_should_stop()) {
> + __set_current_state(TASK_RUNNING);
> + break;
> + }
> + node = llist_del_all(&worker->work_list);
> + if (!node)
> + schedule();
> +
> + node = llist_reverse_order(node);
> + /* make sure flag is seen after deletion */
> + smp_wmb();
> + llist_for_each_entry_safe(work, work_next, node, node) {
> + clear_bit(VHOST_WORK_QUEUED, &work->flags);
> + __set_current_state(TASK_RUNNING);
> + kcov_remote_start_common(worker->kcov_handle);
> + work->fn(work);
> + kcov_remote_stop();
> + cond_resched();
> + }
> + }
> + kthread_unuse_mm(dev->mm);
> +
> + return 0;
> +}
> +
>  static bool vhost_run_work_list(void *data)
>  {
>   struct vhost_worker *worker = data;
> @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>  
> +struct vhost_attach_cgroups_struct {
> + struct vhost_work work;
> + struct task_struct *owner;
> + int ret;
> +};
> +
> +static void vhost_attach_cgroups_work(struct vhost_work *work)
> +{
> + struct vhost_attach_cgroups_struct *s;
> +
> + s = container_of(work, struct vhost_attach_cgroups_struct, work);
> + s->ret = cgroup_attach_task_all(s->owner, current);
> +}
> +
> +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
> +{
> + struct vhost_attach_cgroups_struct attach;
> + int saved_cnt;
> +
> + attach.owner = current;
> +
> + vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> + vhost_worker_queue(worker, &attach.work);
> +
> + mutex_lock(&worker->mutex);
> +
> + /*
> +  * Bypass attachment_cnt check in __vhost_worker_flush:
> +  * Temporarily change it to INT_MAX to bypass the check
> +  */
> + saved_cnt = worker->attachment_cnt;
> + worker->attachment_cnt = INT_MAX;
> + __vhost_worker_flush(worker);
> + worker->attachment_cnt = saved_cnt;


You mean this one?
if (!worker->attachment_cnt || worker->killed)
return;


Just introduce a variant of __vhost_worker_flush that
skips this check.

E.g.

Rename __vhost_worker_flush -> _vhost_worker_flush.

then rework:

static void _vhost_worker_flush(struct vhost_worker *worker)
{
struct vhost_flush_struct flush; 
  
if (!worker->attachment_cnt || worker->killed)
return;

__vhost_worker_flush(worker);
}





> +
> + mutex_unlock(&worker->mutex);
> +
> + return attach.ret;
> +}
> +
>  /* Caller should have device mutex */
>  bool vhost_dev_has_owner(struct vhost_dev *dev)
>  {
> @@ -627,7 +706,7 @@ static void vhost_worker_d

Re: [PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost

2025-04-20 Thread Jason Wang
On Mon, Apr 21, 2025 at 10:45 AM Cindy Lu  wrote:
>
> This patch reintroduces kthread mode support in vhost,
> It also introduces struct vhost_worker_ops to abstract
> worker create/stop/wakeup operations.
>
> * Bring back the original vhost_worker() implementation,
>   and renamed to vhost_run_work_kthread_list().
>
> * Add cgroup support for the kthread
>
> * Introduce struct vhost_worker_ops:
>   - Encapsulates create / stop / wake‑up callbacks.
>   - vhost_worker_create() selects the proper ops according to
> inherit_owner.
>
> This partially reverts or improves upon:
> commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
> commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")
>
> Signed-off-by: Cindy Lu 
> ---
>  drivers/vhost/vhost.c | 188 ++
>  drivers/vhost/vhost.h |  12 +++
>  2 files changed, 182 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 250dc43f1786..be97028a8baf 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker 
> *worker,
>  * test_and_set_bit() implies a memory barrier.
>  */
> llist_add(&work->node, &worker->work_list);
> -   vhost_task_wake(worker->vtsk);
> +   worker->ops->wakeup(worker);
> }
>  }
>
> @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> __vhost_vq_meta_reset(vq);
>  }
>
> +static int vhost_run_work_kthread_list(void *data)
> +{
> +   struct vhost_worker *worker = data;
> +   struct vhost_work *work, *work_next;
> +   struct vhost_dev *dev = worker->dev;
> +   struct llist_node *node;
> +
> +   kthread_use_mm(dev->mm);
> +
> +   for (;;) {
> +   /* mb paired w/ kthread_stop */
> +   set_current_state(TASK_INTERRUPTIBLE);
> +
> +   if (kthread_should_stop()) {
> +   __set_current_state(TASK_RUNNING);
> +   break;
> +   }
> +   node = llist_del_all(&worker->work_list);
> +   if (!node)
> +   schedule();
> +
> +   node = llist_reverse_order(node);
> +   /* make sure flag is seen after deletion */
> +   smp_wmb();
> +   llist_for_each_entry_safe(work, work_next, node, node) {
> +   clear_bit(VHOST_WORK_QUEUED, &work->flags);
> +   __set_current_state(TASK_RUNNING);
> +   kcov_remote_start_common(worker->kcov_handle);
> +   work->fn(work);
> +   kcov_remote_stop();
> +   cond_resched();
> +   }
> +   }
> +   kthread_unuse_mm(dev->mm);
> +
> +   return 0;
> +}
> +
>  static bool vhost_run_work_list(void *data)
>  {
> struct vhost_worker *worker = data;
> @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>
> +struct vhost_attach_cgroups_struct {
> +   struct vhost_work work;
> +   struct task_struct *owner;
> +   int ret;
> +};
> +
> +static void vhost_attach_cgroups_work(struct vhost_work *work)
> +{
> +   struct vhost_attach_cgroups_struct *s;
> +
> +   s = container_of(work, struct vhost_attach_cgroups_struct, work);
> +   s->ret = cgroup_attach_task_all(s->owner, current);
> +}
> +
> +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
> +{
> +   struct vhost_attach_cgroups_struct attach;
> +   int saved_cnt;
> +
> +   attach.owner = current;
> +
> +   vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> +   vhost_worker_queue(worker, &attach.work);
> +
> +   mutex_lock(&worker->mutex);
> +
> +   /*
> +* Bypass attachment_cnt check in __vhost_worker_flush:
> +* Temporarily change it to INT_MAX to bypass the check
> +*/
> +   saved_cnt = worker->attachment_cnt;
> +   worker->attachment_cnt = INT_MAX;
> +   __vhost_worker_flush(worker);
> +   worker->attachment_cnt = saved_cnt;

I wonder if it's easier to re-introduce the flush that was used before
vhost kthread to avoid the tricks here. We can have flush ops for
example.

Thanks




[PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost

2025-04-20 Thread Cindy Lu
This patch reintroduces kthread mode support in vhost,
It also introduces struct vhost_worker_ops to abstract
worker create/stop/wakeup operations.

* Bring back the original vhost_worker() implementation,
  and renamed to vhost_run_work_kthread_list().

* Add cgroup support for the kthread

* Introduce struct vhost_worker_ops:
  - Encapsulates create / stop / wake‑up callbacks.
  - vhost_worker_create() selects the proper ops according to
inherit_owner.

This partially reverts or improves upon:
commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")

Signed-off-by: Cindy Lu 
---
 drivers/vhost/vhost.c | 188 ++
 drivers/vhost/vhost.h |  12 +++
 2 files changed, 182 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 250dc43f1786..be97028a8baf 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
 * test_and_set_bit() implies a memory barrier.
 */
llist_add(&work->node, &worker->work_list);
-   vhost_task_wake(worker->vtsk);
+   worker->ops->wakeup(worker);
}
 }
 
@@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
__vhost_vq_meta_reset(vq);
 }
 
+static int vhost_run_work_kthread_list(void *data)
+{
+   struct vhost_worker *worker = data;
+   struct vhost_work *work, *work_next;
+   struct vhost_dev *dev = worker->dev;
+   struct llist_node *node;
+
+   kthread_use_mm(dev->mm);
+
+   for (;;) {
+   /* mb paired w/ kthread_stop */
+   set_current_state(TASK_INTERRUPTIBLE);
+
+   if (kthread_should_stop()) {
+   __set_current_state(TASK_RUNNING);
+   break;
+   }
+   node = llist_del_all(&worker->work_list);
+   if (!node)
+   schedule();
+
+   node = llist_reverse_order(node);
+   /* make sure flag is seen after deletion */
+   smp_wmb();
+   llist_for_each_entry_safe(work, work_next, node, node) {
+   clear_bit(VHOST_WORK_QUEUED, &work->flags);
+   __set_current_state(TASK_RUNNING);
+   kcov_remote_start_common(worker->kcov_handle);
+   work->fn(work);
+   kcov_remote_stop();
+   cond_resched();
+   }
+   }
+   kthread_unuse_mm(dev->mm);
+
+   return 0;
+}
+
 static bool vhost_run_work_list(void *data)
 {
struct vhost_worker *worker = data;
@@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
 
+struct vhost_attach_cgroups_struct {
+   struct vhost_work work;
+   struct task_struct *owner;
+   int ret;
+};
+
+static void vhost_attach_cgroups_work(struct vhost_work *work)
+{
+   struct vhost_attach_cgroups_struct *s;
+
+   s = container_of(work, struct vhost_attach_cgroups_struct, work);
+   s->ret = cgroup_attach_task_all(s->owner, current);
+}
+
+static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
+{
+   struct vhost_attach_cgroups_struct attach;
+   int saved_cnt;
+
+   attach.owner = current;
+
+   vhost_work_init(&attach.work, vhost_attach_cgroups_work);
+   vhost_worker_queue(worker, &attach.work);
+
+   mutex_lock(&worker->mutex);
+
+   /*
+* Bypass attachment_cnt check in __vhost_worker_flush:
+* Temporarily change it to INT_MAX to bypass the check
+*/
+   saved_cnt = worker->attachment_cnt;
+   worker->attachment_cnt = INT_MAX;
+   __vhost_worker_flush(worker);
+   worker->attachment_cnt = saved_cnt;
+
+   mutex_unlock(&worker->mutex);
+
+   return attach.ret;
+}
+
 /* Caller should have device mutex */
 bool vhost_dev_has_owner(struct vhost_dev *dev)
 {
@@ -627,7 +706,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
 
WARN_ON(!llist_empty(&worker->work_list));
xa_erase(&dev->worker_xa, worker->id);
-   vhost_task_stop(worker->vtsk);
+   worker->ops->stop(worker);
kfree(worker);
 }
 
@@ -650,42 +729,115 @@ static void vhost_workers_free(struct vhost_dev *dev)
xa_destroy(&dev->worker_xa);
 }
 
+static void vhost_task_wakeup(struct vhost_worker *worker)
+{
+   return vhost_task_wake(worker->vtsk);
+}
+
+static void vhost_kthread_wakeup(struct vhost_worker *worker)
+{
+   wake_up_process(worker->kthread_task);
+}
+
+static void vhost_task_do_stop(struct vhost_worker *worker)
+{
+   return vhost_task_stop(worker->vtsk);
+}
+
+static void vhost_kthread_do_stop