________________________________________ 发件人: Zhang, Qiang <qiang.zh...@windriver.com> 发送时间: 2020年10月23日 11:55 收件人: Jens Axboe 抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org 主题: 回复: Question on io-wq
________________________________________ 发件人: Jens Axboe <ax...@kernel.dk> 发送时间: 2020年10月22日 22:08 收件人: Zhang, Qiang 抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org 主题: Re: Question on io-wq On 10/22/20 3:02 AM, Zhang,Qiang wrote: > > Hi Jens Axboe > > There are some problem in 'io_wqe_worker' thread, when the > 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA > nodes, due to CPU hotplug, When the last CPU going down, the > 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes > online again, we should restore their cpu bindings? >Something like the below should help in ensuring affinities are >always correct - trigger an affinity set for an online CPU event. We >should not need to do it for offlining. Can you test it? >diff --git a/fs/io-wq.c b/fs/io-wq.c >index 4012ff541b7b..3bf029d1170e 100644 >--- a/fs/io-wq.c >+++ b/fs/io-wq.c >@@ -19,6 +19,7 @@ >#include <linux/task_work.h> >#include <linux/blk-cgroup.h> >#include <linux/audit.h> >+#include <linux/cpu.h> >#include "io-wq.h" > >@@ -123,9 +124,13 @@ struct io_wq { > refcount_t refs; > struct completion done; > >+ struct hlist_node cpuhp_node; >+ > refcount_t use_refs; >}; > >+static enum cpuhp_state io_wq_online; >+ >static bool io_worker_get(struct io_worker *worker) >{ > return refcount_inc_not_zero(&worker->ref); >@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > return ERR_PTR(-ENOMEM); > } > >+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, >&wq->cpuhp_node); >+ if (ret) { >+ kfree(wq->wqes); >+ kfree(wq); >+ return ERR_PTR(ret); >+ } >+ > wq->free_work = data->free_work; > wq->do_work = data->do_work; > >@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > ret = PTR_ERR(wq->manager); > complete(&wq->done); >err: >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node); > for_each_node(node) > kfree(wq->wqes[node]); > kfree(wq->wqes); >@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq) >{ > int node; > >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node); >+ > set_bit(IO_WQ_BIT_EXIT, &wq->state); > if (wq->manager) > kthread_stop(wq->manager); >@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq) >{ > return wq->manager; >} >+ >+static bool io_wq_worker_affinity(struct io_worker *worker, void *data) >+{ >+ struct task_struct *task = worker->task; >+ unsigned long flags; >+ struct rq_flags rf; struct rq *rq; rq = task_rq_lock(task, &rf); --- raw_spin_lock_irqsave(&task->pi_lock, flags); >+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); >+ task->flags |= PF_NO_SETAFFINITY; --- raw_spin_unlock_irqrestore(&task->pi_lock, flags); task_rq_unlock(rq, task, &rf); >+ return false; >+} >+ >+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) >+{ >+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); >+ int i; >+ >+ rcu_read_lock(); >+ for_each_node(i) >+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >>NULL); >+ rcu_read_unlock(); >+ return 0; >+} >+ >+static __init int io_wq_init(void) >+{ >+ int ret; >+ >+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online", >+ io_wq_cpu_online, NULL); >+ if (ret < 0) >+ return ret; >+ io_wq_online = ret; >+ return 0; >+} >+subsys_initcall(io_wq_init); > >-- >Jens Axboe