On 03/12/2015 01:00 PM, Lai Jiangshan wrote: > Allow to modify the low-level unbound workqueues cpumask through > sysfs. This is performed by traversing the entire workqueue list > and calling wq_unbound_install_ctx_prepare() on the unbound workqueues > with the low level mask passed in. Only after all the preparation are done, > we commit them all together. > > The oreder-workquue is ignore from the low level unbound workqueue cpumask, > it will be handled in near future. > > The per-nodes' pwqs are mandatorily controlled by the low level cpumask, while > the default pwq ignores the low level cpumask when (and ONLY when) the > cpumask set > by the user doesn't overlap with the low level cpumask. In this case, we can't > apply the empty cpumask to the default pwq, so we use the user-set cpumask > directly. > > Cc: Christoph Lameter <c...@linux.com> > Cc: Kevin Hilman <khil...@linaro.org> > Cc: Lai Jiangshan <la...@cn.fujitsu.com> > Cc: Mike Galbraith <bitbuc...@online.de> > Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com> > Cc: Tejun Heo <t...@kernel.org> > Cc: Viresh Kumar <viresh.ku...@linaro.org> > Cc: Frederic Weisbecker <fweis...@gmail.com> > Original-patch-by: Frederic Weisbecker <fweis...@gmail.com> > Signed-off-by: Lai Jiangshan <la...@cn.fujitsu.com>
miss a part in wq_update_unbound_numa() diff --git a/kernel/workqueue.c b/kernel/workqueue.c index facaaae..4027ec9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3722,6 +3722,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, * wq's, the default pwq should be used. */ if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { + cpumask_and(cpumask, cpumask, wq_unbound_cpumask); + if (cpumask_empty(cpumask)) + goto use_dfl_pwq; if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) goto out_unlock; } else { > --- > kernel/workqueue.c | 96 > +++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 88 insertions(+), 8 deletions(-) > > diff --git a/kernel/workqueue.c b/kernel/workqueue.c > index 61b5bfa..facaaae 100644 > --- a/kernel/workqueue.c > +++ b/kernel/workqueue.c > @@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects > wq->maydays list */ > static LIST_HEAD(workqueues); /* PR: list of all workqueues */ > static bool workqueue_freezing; /* PL: have wqs started > freezing? */ > > -static cpumask_var_t wq_unbound_cpumask; > +static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all > unbound wqs */ > > /* the per-cpu worker pools */ > static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool > [NR_STD_WORKER_POOLS], > @@ -3491,6 +3491,7 @@ static struct pool_workqueue > *numa_pwq_tbl_install(struct workqueue_struct *wq, > struct wq_unbound_install_ctx { > struct workqueue_struct *wq; /* target to be installed */ > struct workqueue_attrs *attrs; /* attrs for installing */ > + struct list_head list; /* queued for batching commit */ > struct pool_workqueue *dfl_pwq; > struct pool_workqueue *pwq_tbl[]; > }; > @@ -3513,10 +3514,11 @@ static void wq_unbound_install_ctx_free(struct > wq_unbound_install_ctx *ctx) > > static struct wq_unbound_install_ctx * > wq_unbound_install_ctx_prepare(struct workqueue_struct *wq, > - const struct workqueue_attrs *attrs) > + const struct workqueue_attrs *attrs, > + cpumask_var_t unbound_cpumask) > { > struct wq_unbound_install_ctx *ctx; > - struct workqueue_attrs *new_attrs, *tmp_attrs; > + struct workqueue_attrs *new_attrs, *pwq_attrs, *tmp_attrs; > int node; > > lockdep_assert_held(&wq_pool_mutex); > @@ -3525,13 +3527,16 @@ wq_unbound_install_ctx_prepare(struct > workqueue_struct *wq, > GFP_KERNEL); > > new_attrs = alloc_workqueue_attrs(GFP_KERNEL); > + pwq_attrs = alloc_workqueue_attrs(GFP_KERNEL); > tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); > if (!ctx || !new_attrs || !tmp_attrs) > goto out_free; > > /* make a copy of @attrs and sanitize it */ > copy_workqueue_attrs(new_attrs, attrs); > - cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask); > + copy_workqueue_attrs(pwq_attrs, attrs); > + cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); > + cpumask_and(pwq_attrs->cpumask, pwq_attrs->cpumask, unbound_cpumask); > > /* > * We may create multiple pwqs with differing cpumasks. Make a > @@ -3544,13 +3549,21 @@ wq_unbound_install_ctx_prepare(struct > workqueue_struct *wq, > * If something goes wrong during CPU up/down, we'll fall back to > * the default pwq covering whole @attrs->cpumask. Always create > * it even if we don't use it immediately. > + * > + * If the cpumask set by the user doesn't overlap with the global > + * wq_unbound_cpumask, we ignore the wq_unbound_cpumask for this wq > + * which means all its nodes' pwqs are its default pwq and its default > + * pwq's workers' cpumask is totally equals to the user setting. > */ > - ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); > + if (cpumask_empty(pwq_attrs->cpumask)) > + ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); > + else > + ctx->dfl_pwq = alloc_unbound_pwq(wq, pwq_attrs); > if (!ctx->dfl_pwq) > goto out_free; > > for_each_node(node) { > - if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { > + if (wq_calc_node_cpumask(pwq_attrs, node, -1, > tmp_attrs->cpumask)) { > ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); > if (!ctx->pwq_tbl[node]) > goto out_free; > @@ -3564,6 +3577,7 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct > *wq, > ctx->attrs = new_attrs; > > out_free: > + free_workqueue_attrs(pwq_attrs); > free_workqueue_attrs(tmp_attrs); > > if (!ctx || !ctx->wq) { > @@ -3634,7 +3648,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, > get_online_cpus(); > > mutex_lock(&wq_pool_mutex); > - ctx = wq_unbound_install_ctx_prepare(wq, attrs); > + ctx = wq_unbound_install_ctx_prepare(wq, attrs, wq_unbound_cpumask); > mutex_unlock(&wq_pool_mutex); > > put_online_cpus(); > @@ -3961,19 +3975,85 @@ static struct bus_type wq_subsys = { > .dev_groups = wq_sysfs_groups, > }; > > +static int unbounds_cpumask_apply(cpumask_var_t cpumask) > +{ > + LIST_HEAD(ctxs); > + int ret = 0; > + struct workqueue_struct *wq; > + struct wq_unbound_install_ctx *ctx, *n; > + > + lockdep_assert_held(&wq_pool_mutex); > + > + list_for_each_entry(wq, &workqueues, list) { > + if (!(wq->flags & WQ_UNBOUND)) > + continue; > + /* creating multiple pwqs breaks ordering guarantee */ > + if (wq->flags & __WQ_ORDERED) > + continue; > + > + ctx = wq_unbound_install_ctx_prepare(wq, wq->unbound_attrs, > + cpumask); > + if (!ctx) { > + ret = -ENOMEM; > + break; > + } > + > + list_add_tail(&ctx->list, &ctxs); > + } > + > + list_for_each_entry_safe(ctx, n, &ctxs, list) { > + if (ret >= 0) > + wq_unbound_install_ctx_commit(ctx); > + wq_unbound_install_ctx_free(ctx); > + } > + > + return ret; > +} > + > +static ssize_t unbounds_cpumask_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + cpumask_var_t cpumask; > + int ret = -EINVAL; > + > + if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) > + return -ENOMEM; > + > + ret = cpumask_parse(buf, cpumask); > + if (ret) > + goto out; > + > + get_online_cpus(); > + cpumask_and(cpumask, cpumask, cpu_possible_mask); > + if (cpumask_intersects(cpumask, cpu_online_mask)) { > + mutex_lock(&wq_pool_mutex); > + ret = unbounds_cpumask_apply(cpumask); > + if (ret >= 0) > + cpumask_copy(wq_unbound_cpumask, cpumask); > + mutex_unlock(&wq_pool_mutex); > + } > + put_online_cpus(); > +out: > + free_cpumask_var(cpumask); > + return ret ? ret : count; > +} > + > static ssize_t unbounds_cpumask_show(struct device *dev, > struct device_attribute *attr, char *buf) > { > int written; > > + mutex_lock(&wq_pool_mutex); > written = scnprintf(buf, PAGE_SIZE, "%*pb\n", > cpumask_pr_args(wq_unbound_cpumask)); > + mutex_unlock(&wq_pool_mutex); > > return written; > } > > static struct device_attribute wq_sysfs_cpumask_attr = > - __ATTR(cpumask, 0444, unbounds_cpumask_show, NULL); > + __ATTR(cpumask, 0644, unbounds_cpumask_show, unbounds_cpumask_store); > > static int __init wq_sysfs_init(void) > { > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/