From: Chris Metcalf <cmetc...@ezchip.com> When queuing work, we should avoid queuing it on the local cpu if we are using WORK_CPU_UNBOUND and the local cpu is nohz_full, since the workqueue will mean a later interrupt of the nohz_full process that presumably would prefer continuing to have 100% of the core without interrupts.
Likewise, remove the nohz_full cores from unbound workqueues. If all the cores are nohz_full, we leave them in. Signed-off-by: Chris Metcalf <cmetc...@ezchip.com> --- Note that this patch depends on my earlier commit, not yet pulled into Linus' tree, that added the tick_nohz_full_clear_cpus() API: https://lkml.org/lkml/2015/3/24/956 include/linux/tick.h | 9 +++++++++ kernel/workqueue.c | 10 +++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 29456c443970..119ed00c96d5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -224,6 +224,15 @@ static inline bool is_housekeeping_cpu(int cpu) return true; } +static inline int prefer_housekeeping_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_cpu(cpu)) + return cpumask_next(-1, housekeeping_mask); +#endif + return cpu; +} + static inline void housekeeping_affine(struct task_struct *t) { #ifdef CONFIG_NO_HZ_FULL diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f28849394791..ebe5ce3ae42d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include <linux/nodemask.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> +#include <linux/tick.h> #include "workqueue_internal.h" @@ -1303,7 +1304,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, return; retry: if (req_cpu == WORK_CPU_UNBOUND) - cpu = raw_smp_processor_id(); + cpu = prefer_housekeeping_cpu(raw_smp_processor_id()); /* pwq which will be used unless @work is executing elsewhere */ if (!(wq->flags & WQ_UNBOUND)) @@ -3782,6 +3783,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, /* make a copy of @attrs and sanitize it */ copy_workqueue_attrs(new_attrs, attrs); cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); + tick_nohz_full_clear_cpus(new_attrs->cpumask); /* * We may create multiple pwqs with differing cpumasks. Make a @@ -3810,6 +3812,12 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, for_each_node(node) { if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { +#ifdef CONFIG_NO_HZ_FULL + tick_nohz_full_clear_cpus(tmp_attrs->cpumask); + if (cpumask_empty(tmp_attrs->cpumask)) + cpumask_copy(tmp_attrs->cpumask, + new_attrs->cpumask); +#endif pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); if (!pwq_tbl[node]) goto enomem_pwq; -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/