[ crickets ]

-- Steve

On Thu, 10 Mar 2016 16:44:11 -0500
Steven Rostedt <[email protected]> (by way of Steven Rostedt
<[email protected]>) wrote:

> This patch was recently backported to 4.1.19, and when I merged it with -rt,
> the following bug triggered:
> 
> ===============================
> [ INFO: suspicious RCU usage. ]
> 4.1.19-test-rt22+ #1 Not tainted
> -------------------------------
> /work/rt/stable-rt.git/kernel/workqueue.c:608 sched RCU, wq->mutex or 
> wq_pool_mutex should be held!
> 
> other info that might help us debug this:
> 
> 
> rcu_scheduler_active = 1, debug_locks = 0
> 2 locks held by swapper/0/1:
>  #0:  ((pendingb_lock).lock){+.+...}, at: [<ffffffff8105e4b7>] 
> __local_lock_irq+0x21/0x74
>  #1:  (rcu_read_lock){......}, at: [<ffffffff8105fbdc>] rcu_read_lock+0x0/0x6c
> 
> stack backtrace:
> ^AdCPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.1.19-test-rt22+ #1
> ^AdHardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
>  0000000000000000 ffff8802101dfd08 ffffffff816083ae ffff880210240000
>  0000000000000001 ffff8802101dfd38 ffffffff81087cf1 ffff88021588e800
>  0000000000000000 0000000000000100 ffff88021588e800 ffff8802101dfd58
> Call Trace:
>  [<ffffffff816083ae>] dump_stack+0x67/0x90
>  [<ffffffff81087cf1>] lockdep_rcu_suspicious+0x107/0x110
>  [<ffffffff8105f9fe>] unbound_pwq_by_node+0x6c/0x93
>  [<ffffffff81060e62>] __queue_work+0xc8/0x2e7
>  [<ffffffff8106f0cc>] ? migrate_disable+0x28/0xe6
>  [<ffffffff81061126>] queue_work_on+0x85/0xb8
>  [<ffffffff81f54188>] ? acpi_battery_init+0x16/0x16
>  [<ffffffff8106a382>] __async_schedule+0x18b/0x19d
>  [<ffffffff81f54172>] ? acpi_memory_hotplug_init+0x12/0x12
>  [<ffffffff8106a3b9>] async_schedule+0x15/0x17
>  [<ffffffff81f54184>] acpi_battery_init+0x12/0x16
>  [<ffffffff81000415>] do_one_initcall+0xf7/0x18a
>  [<ffffffff8106692f>] ? parse_args+0x258/0x35f
>  [<ffffffff81f140be>] kernel_init_freeable+0x205/0x29e
>  [<ffffffff81f137d0>] ? do_early_param+0x86/0x86
>  [<ffffffff8160d9bc>] ? _raw_spin_unlock_irq+0x5d/0x72
>  [<ffffffff815fc28f>] ? rest_init+0x143/0x143
>  [<ffffffff815fc29d>] kernel_init+0xe/0xdf
>  [<ffffffff8160e712>] ret_from_fork+0x42/0x70
>  [<ffffffff815fc28f>] ? rest_init+0x143/0x143
> 
> 
> 
> On Mon, May 11, 2015 at 05:35:48PM +0800, Lai Jiangshan wrote:
> > ---
> >  kernel/workqueue.c | 44 ++++++++++++++++++++++++++------------------
> >  1 file changed, 26 insertions(+), 18 deletions(-)
> > 
> > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > index a3915ab..fa8b949 100644
> > --- a/kernel/workqueue.c
> > +++ b/kernel/workqueue.c
> > @@ -127,6 +127,12 @@ enum {
> >   *
> >   * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
> >   *
> > + * PW: wq_pool_mutex and wq->mutex protected for writes.  Any one of them
> > + *     protected for reads.
> > + *
> > + * PWR: wq_pool_mutex and wq->mutex protected for writes. Any one of them
> > + *      or sched-RCU for reads.  
> 
> How exactly is sched-RCU protecting this here? The cause for the 4.1-rt issue
> is that we converted the local_irq_save() in queue_work_on() into a
> "local_lock_irqsave()" which when PREEMPT_RT is enabled will be a mutex that
> disables migration (can not migrate). This also prevents the current CPU from
> going offline.
> 
> Does this code really need to be protected from being preempted, or is
> disabling migration good enough?
> 
> Thanks!
> 
> -- Steve
> 
> 
> > + *
> >   * WQ: wq->mutex protected.
> >   *
> >   * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
> > @@ -247,8 +253,8 @@ struct workqueue_struct {
> >     int                     nr_drainers;    /* WQ: drain in progress */
> >     int                     saved_max_active; /* WQ: saved pwq max_active */
> >  
> > -   struct workqueue_attrs  *unbound_attrs; /* WQ: only for unbound wqs */
> > -   struct pool_workqueue   *dfl_pwq;       /* WQ: only for unbound wqs */
> > +   struct workqueue_attrs  *unbound_attrs; /* PW: only for unbound wqs */
> > +   struct pool_workqueue   *dfl_pwq;       /* PW: only for unbound wqs */
> >  
> >  #ifdef CONFIG_SYSFS
> >     struct wq_device        *wq_dev;        /* I: for sysfs interface */
> > @@ -268,7 +274,7 @@ struct workqueue_struct {
> >     /* hot fields used during command issue, aligned to cacheline */
> >     unsigned int            flags ____cacheline_aligned; /* WQ: WQ_* flags 
> > */
> >     struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
> > -   struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs 
> > indexed by node */
> > +   struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs 
> > indexed by node */
> >  };
> >  
> >  static struct kmem_cache *pwq_cache;
> > @@ -349,6 +355,12 @@ static void workqueue_sysfs_unregister(struct 
> > workqueue_struct *wq);
> >                        lockdep_is_held(&wq->mutex),                 \
> >                        "sched RCU or wq->mutex should be held")
> >  
> > +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                   \
> > +   rcu_lockdep_assert(rcu_read_lock_sched_held() ||                \
> > +                      lockdep_is_held(&wq->mutex) ||               \
> > +                      lockdep_is_held(&wq_pool_mutex),             \
> > +                      "sched RCU, wq->mutex or wq_pool_mutex should be 
> > held")
> > +
> >  #define for_each_cpu_worker_pool(pool, cpu)                                
> > \
> >     for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];               \
> >          (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
> > @@ -553,7 +565,7 @@ static int worker_pool_assign_id(struct worker_pool 
> > *pool)
> >   * @wq: the target workqueue
> >   * @node: the node ID
> >   *
> > - * This must be called either with pwq_lock held or sched RCU read locked.
> > + * This must be called either with wq_pool_mutex held or sched RCU read 
> > locked.
> >   * If the pwq needs to be used beyond the locking in effect, the caller is
> >   * responsible for guaranteeing that the pwq stays online.
> >   *
> > @@ -562,7 +574,7 @@ static int worker_pool_assign_id(struct worker_pool 
> > *pool)
> >  static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct 
> > *wq,
> >                                               int node)
> >  {
> > -   assert_rcu_or_wq_mutex(wq);
> > +   assert_rcu_or_wq_mutex_or_pool_mutex(wq);
> >     return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
> >  }
> >  
> > @@ -3480,6 +3492,7 @@ static struct pool_workqueue 
> > *numa_pwq_tbl_install(struct workqueue_struct *wq,
> >     struct pool_workqueue *old_pwq;
> >  
> >     lockdep_assert_held(&wq->mutex);
> > +   lockdep_assert_held(&wq_pool_mutex);
> >  
> >     /* link_pwq() can handle duplicate calls */
> >     link_pwq(pwq);
> > @@ -3644,10 +3657,9 @@ int apply_workqueue_attrs(struct workqueue_struct 
> > *wq,
> >      * pwqs accordingly.
> >      */
> >     get_online_cpus();
> > -
> >     mutex_lock(&wq_pool_mutex);
> > +
> >     ctx = apply_wqattrs_prepare(wq, attrs);
> > -   mutex_unlock(&wq_pool_mutex);
> >  
> >     /* the ctx has been prepared successfully, let's commit it */
> >     if (ctx) {
> > @@ -3655,10 +3667,11 @@ int apply_workqueue_attrs(struct workqueue_struct 
> > *wq,
> >             ret = 0;
> >     }
> >  
> > -   put_online_cpus();
> > -
> >     apply_wqattrs_cleanup(ctx);
> >  
> > +   mutex_unlock(&wq_pool_mutex);
> > +   put_online_cpus();
> > +
> >     return ret;
> >  }
> >  
> > @@ -3695,7 +3708,8 @@ static void wq_update_unbound_numa(struct 
> > workqueue_struct *wq, int cpu,
> >  
> >     lockdep_assert_held(&wq_pool_mutex);
> >  
> > -   if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
> > +   if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
> > +       wq->unbound_attrs->no_numa)
> >             return;
> >  
> >     /*
> > @@ -3706,10 +3720,6 @@ static void wq_update_unbound_numa(struct 
> > workqueue_struct *wq, int cpu,
> >     target_attrs = wq_update_unbound_numa_attrs_buf;
> >     cpumask = target_attrs->cpumask;
> >  
> > -   mutex_lock(&wq->mutex);
> > -   if (wq->unbound_attrs->no_numa)
> > -           goto out_unlock;
> > -
> >     copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
> >     pwq = unbound_pwq_by_node(wq, node);
> >  
> > @@ -3721,19 +3731,16 @@ static void wq_update_unbound_numa(struct 
> > workqueue_struct *wq, int cpu,
> >      */
> >     if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, 
> > cpumask)) {
> >             if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
> > -                   goto out_unlock;
> > +                   return;
> >     } else {
> >             goto use_dfl_pwq;
> >     }
> >  
> > -   mutex_unlock(&wq->mutex);
> > -
> >     /* create a new pwq */
> >     pwq = alloc_unbound_pwq(wq, target_attrs);
> >     if (!pwq) {
> >             pr_warn("workqueue: allocation failed while updating NUMA 
> > affinity of \"%s\"\n",
> >                     wq->name);
> > -           mutex_lock(&wq->mutex);
> >             goto use_dfl_pwq;
> >     }
> >  
> > @@ -3748,6 +3755,7 @@ static void wq_update_unbound_numa(struct 
> > workqueue_struct *wq, int cpu,
> >     goto out_unlock;
> >  
> >  use_dfl_pwq:
> > +   mutex_lock(&wq->mutex);
> >     spin_lock_irq(&wq->dfl_pwq->pool->lock);
> >     get_pwq(wq->dfl_pwq);
> >     spin_unlock_irq(&wq->dfl_pwq->pool->lock);
> > -- 
> > 2.1.0
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/  

Reply via email to