On 03/12/2015 01:00 PM, Lai Jiangshan wrote:
> Allow to modify the low-level unbound workqueues cpumask through
> sysfs. This is performed by traversing the entire workqueue list
> and calling wq_unbound_install_ctx_prepare() on the unbound workqueues
> with the low level mask passed in. Only after all the preparation are done,
> we commit them all together.
> 
> The oreder-workquue is ignore from the low level unbound workqueue cpumask,
> it will be handled in near future.
> 
> The per-nodes' pwqs are mandatorily controlled by the low level cpumask, while
> the default pwq ignores the low level cpumask when (and ONLY when) the 
> cpumask set
> by the user doesn't overlap with the low level cpumask. In this case, we can't
> apply the empty cpumask to the default pwq, so we use the user-set cpumask
> directly.
> 
> Cc: Christoph Lameter <c...@linux.com>
> Cc: Kevin Hilman <khil...@linaro.org>
> Cc: Lai Jiangshan <la...@cn.fujitsu.com>
> Cc: Mike Galbraith <bitbuc...@online.de>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Cc: Tejun Heo <t...@kernel.org>
> Cc: Viresh Kumar <viresh.ku...@linaro.org>
> Cc: Frederic Weisbecker <fweis...@gmail.com>
> Original-patch-by: Frederic Weisbecker <fweis...@gmail.com>
> Signed-off-by: Lai Jiangshan <la...@cn.fujitsu.com>

miss a part in wq_update_unbound_numa()

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index facaaae..4027ec9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3722,6 +3722,9 @@ static void wq_update_unbound_numa(struct 
workqueue_struct *wq, int cpu,
         * wq's, the default pwq should be used.
         */
        if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+               cpumask_and(cpumask, cpumask, wq_unbound_cpumask);
+               if (cpumask_empty(cpumask))
+                       goto use_dfl_pwq;
                if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
                        goto out_unlock;
        } else {

> ---
>  kernel/workqueue.c | 96 
> +++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 88 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 61b5bfa..facaaae 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock);   /* protects 
> wq->maydays list */
>  static LIST_HEAD(workqueues);                /* PR: list of all workqueues */
>  static bool workqueue_freezing;              /* PL: have wqs started 
> freezing? */
>  
> -static cpumask_var_t wq_unbound_cpumask;
> +static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all 
> unbound wqs */
>  
>  /* the per-cpu worker pools */
>  static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool 
> [NR_STD_WORKER_POOLS],
> @@ -3491,6 +3491,7 @@ static struct pool_workqueue 
> *numa_pwq_tbl_install(struct workqueue_struct *wq,
>  struct wq_unbound_install_ctx {
>       struct workqueue_struct *wq;    /* target to be installed */
>       struct workqueue_attrs  *attrs; /* attrs for installing */
> +     struct list_head        list;   /* queued for batching commit */
>       struct pool_workqueue   *dfl_pwq;
>       struct pool_workqueue   *pwq_tbl[];
>  };
> @@ -3513,10 +3514,11 @@ static void wq_unbound_install_ctx_free(struct 
> wq_unbound_install_ctx *ctx)
>  
>  static struct wq_unbound_install_ctx *
>  wq_unbound_install_ctx_prepare(struct workqueue_struct *wq,
> -                            const struct workqueue_attrs *attrs)
> +                            const struct workqueue_attrs *attrs,
> +                            cpumask_var_t unbound_cpumask)
>  {
>       struct wq_unbound_install_ctx *ctx;
> -     struct workqueue_attrs *new_attrs, *tmp_attrs;
> +     struct workqueue_attrs *new_attrs, *pwq_attrs, *tmp_attrs;
>       int node;
>  
>       lockdep_assert_held(&wq_pool_mutex);
> @@ -3525,13 +3527,16 @@ wq_unbound_install_ctx_prepare(struct 
> workqueue_struct *wq,
>                     GFP_KERNEL);
>  
>       new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
> +     pwq_attrs = alloc_workqueue_attrs(GFP_KERNEL);
>       tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
>       if (!ctx || !new_attrs || !tmp_attrs)
>               goto out_free;
>  
>       /* make a copy of @attrs and sanitize it */
>       copy_workqueue_attrs(new_attrs, attrs);
> -     cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
> +     copy_workqueue_attrs(pwq_attrs, attrs);
> +     cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
> +     cpumask_and(pwq_attrs->cpumask, pwq_attrs->cpumask, unbound_cpumask);
>  
>       /*
>        * We may create multiple pwqs with differing cpumasks.  Make a
> @@ -3544,13 +3549,21 @@ wq_unbound_install_ctx_prepare(struct 
> workqueue_struct *wq,
>        * If something goes wrong during CPU up/down, we'll fall back to
>        * the default pwq covering whole @attrs->cpumask.  Always create
>        * it even if we don't use it immediately.
> +      *
> +      * If the cpumask set by the user doesn't overlap with the global
> +      * wq_unbound_cpumask, we ignore the wq_unbound_cpumask for this wq
> +      * which means all its nodes' pwqs are its default pwq and its default
> +      * pwq's workers' cpumask is totally equals to the user setting.
>        */
> -     ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> +     if (cpumask_empty(pwq_attrs->cpumask))
> +             ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
> +     else
> +             ctx->dfl_pwq = alloc_unbound_pwq(wq, pwq_attrs);
>       if (!ctx->dfl_pwq)
>               goto out_free;
>  
>       for_each_node(node) {
> -             if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
> +             if (wq_calc_node_cpumask(pwq_attrs, node, -1, 
> tmp_attrs->cpumask)) {
>                       ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
>                       if (!ctx->pwq_tbl[node])
>                               goto out_free;
> @@ -3564,6 +3577,7 @@ wq_unbound_install_ctx_prepare(struct workqueue_struct 
> *wq,
>       ctx->attrs = new_attrs;
>  
>  out_free:
> +     free_workqueue_attrs(pwq_attrs);
>       free_workqueue_attrs(tmp_attrs);
>  
>       if (!ctx || !ctx->wq) {
> @@ -3634,7 +3648,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
>       get_online_cpus();
>  
>       mutex_lock(&wq_pool_mutex);
> -     ctx = wq_unbound_install_ctx_prepare(wq, attrs);
> +     ctx = wq_unbound_install_ctx_prepare(wq, attrs, wq_unbound_cpumask);
>       mutex_unlock(&wq_pool_mutex);
>  
>       put_online_cpus();
> @@ -3961,19 +3975,85 @@ static struct bus_type wq_subsys = {
>       .dev_groups                     = wq_sysfs_groups,
>  };
>  
> +static int unbounds_cpumask_apply(cpumask_var_t cpumask)
> +{
> +     LIST_HEAD(ctxs);
> +     int ret = 0;
> +     struct workqueue_struct *wq;
> +     struct wq_unbound_install_ctx *ctx, *n;
> +
> +     lockdep_assert_held(&wq_pool_mutex);
> +
> +     list_for_each_entry(wq, &workqueues, list) {
> +             if (!(wq->flags & WQ_UNBOUND))
> +                     continue;
> +             /* creating multiple pwqs breaks ordering guarantee */
> +             if (wq->flags & __WQ_ORDERED)
> +                     continue;
> +
> +             ctx = wq_unbound_install_ctx_prepare(wq, wq->unbound_attrs,
> +                                                  cpumask);
> +             if (!ctx) {
> +                     ret = -ENOMEM;
> +                     break;
> +             }
> +
> +             list_add_tail(&ctx->list, &ctxs);
> +     }
> +
> +     list_for_each_entry_safe(ctx, n, &ctxs, list) {
> +             if (ret >= 0)
> +                     wq_unbound_install_ctx_commit(ctx);
> +             wq_unbound_install_ctx_free(ctx);
> +     }
> +
> +     return ret;
> +}
> +
> +static ssize_t unbounds_cpumask_store(struct device *dev,
> +                                   struct device_attribute *attr,
> +                                   const char *buf, size_t count)
> +{
> +     cpumask_var_t cpumask;
> +     int ret = -EINVAL;
> +
> +     if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
> +             return -ENOMEM;
> +
> +     ret = cpumask_parse(buf, cpumask);
> +     if (ret)
> +             goto out;
> +
> +     get_online_cpus();
> +     cpumask_and(cpumask, cpumask, cpu_possible_mask);
> +     if (cpumask_intersects(cpumask, cpu_online_mask)) {
> +             mutex_lock(&wq_pool_mutex);
> +             ret = unbounds_cpumask_apply(cpumask);
> +             if (ret >= 0)
> +                     cpumask_copy(wq_unbound_cpumask, cpumask);
> +             mutex_unlock(&wq_pool_mutex);
> +     }
> +     put_online_cpus();
> +out:
> +     free_cpumask_var(cpumask);
> +     return ret ? ret : count;
> +}
> +
>  static ssize_t unbounds_cpumask_show(struct device *dev,
>                                    struct device_attribute *attr, char *buf)
>  {
>       int written;
>  
> +     mutex_lock(&wq_pool_mutex);
>       written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
>                           cpumask_pr_args(wq_unbound_cpumask));
> +     mutex_unlock(&wq_pool_mutex);
>  
>       return written;
>  }
>  
>  static struct device_attribute wq_sysfs_cpumask_attr =
> -     __ATTR(cpumask, 0444, unbounds_cpumask_show, NULL);
> +     __ATTR(cpumask, 0644, unbounds_cpumask_show, unbounds_cpumask_store);
>  
>  static int __init wq_sysfs_init(void)
>  {
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to