Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-18 Thread Joonas Lahtinen
Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having 
> > > this;
> > > policy->rwsem is locked during driver initialization and the functions 
> > > called
> > > during init that actually apply CPU limits use get_online_cpus (because 
> > > they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > > called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called 
> > > after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported 
> > > by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link 
> > > for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 
> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 

I do still agree the below would be a worthy change to proceed with.

CC'd Oleg here too to give a comment.

Regards, Joonas

> 
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
>  #ifdef CONFIG_HOTPLUG_CPU
>  /* Stop CPUs going up and down. */
>  
> +extern void cpu_hotplug_init_task(struct task_struct *p);
> +
>  extern void cpu_hotplug_begin(void);
>  extern void cpu_hotplug_done(void);
>  extern void get_online_cpus(void);
> @@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
>  
>  #else/* CONFIG_HOTPLUG_CPU */
>  
> +static inline void cpu_hotplug_init_task(struct task_struct *p) {}
> +
>  static inline void cpu_hotplug_begin(void) {}
>  static inline void cpu_hotplug_done(void) {}
>  #define get_online_cpus()do { } while (0)
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
>   wait_queue_head_t   write_waitq;
>  };
>  
> +#define DEFINE_STATIC_PERCPU_RWSEM(name) \
> +static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);  \
> +static struct percpu_rw_semaphore name = {   \
> + .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),\
> + .fast_read_ctr = &__percpu_rwsem_frc_##name,\
> + .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
> + .write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq), \
> +}
> +
>  extern void percpu_down_read(struct percpu_rw_semaphore *);
>  extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
>  extern void percpu_up_read(struct percpu_rw_semaphore *);
> @@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
>   __percpu_init_rwsem(brw, #brw, _key); \
>  })
>  
> -
>  #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
>  
> +#define percpu_rwsem_assert_held(sem)  \
> + lockdep_assert_held(&(sem)->rw_sem)
> +
>  static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
>   bool read, unsigned long ip)
>  {
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1403,6 +1403,9 @@ struct task_struct {
>   struct task_struct *last_wakee;
>  
>   int wake_cpu;
> +#ifdef CONFIG_HOTPLUG_CPU
> + int cpuhp_ref;
> +#endif
>  #endif
>   int on_rq;
>  
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "smpboot.h"
>  
> @@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
>  
>  static RAW_NOTIFIER_HEAD(cpu_chain);
>  
> -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> +/*
> + * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
>   * Should always be manipulated under cpu_add_remove_lock
>   */
>  static int cpu_hotplug_disabled;
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  
> -static struct {
> - struct task_struct *active_writer;
> - /* wait queue to wake up the active_writer */
> - wait_queue_head_t wq;
> - /* verifies that no writer will get active while readers are active */
> - struct mutex lock;
> 

Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-18 Thread Joonas Lahtinen
Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having 
> > > this;
> > > policy->rwsem is locked during driver initialization and the functions 
> > > called
> > > during init that actually apply CPU limits use get_online_cpus (because 
> > > they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > > called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called 
> > > after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported 
> > > by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link 
> > > for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 
> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 

I do still agree the below would be a worthy change to proceed with.

CC'd Oleg here too to give a comment.

Regards, Joonas

> 
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
>  #ifdef CONFIG_HOTPLUG_CPU
>  /* Stop CPUs going up and down. */
>  
> +extern void cpu_hotplug_init_task(struct task_struct *p);
> +
>  extern void cpu_hotplug_begin(void);
>  extern void cpu_hotplug_done(void);
>  extern void get_online_cpus(void);
> @@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
>  
>  #else/* CONFIG_HOTPLUG_CPU */
>  
> +static inline void cpu_hotplug_init_task(struct task_struct *p) {}
> +
>  static inline void cpu_hotplug_begin(void) {}
>  static inline void cpu_hotplug_done(void) {}
>  #define get_online_cpus()do { } while (0)
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
>   wait_queue_head_t   write_waitq;
>  };
>  
> +#define DEFINE_STATIC_PERCPU_RWSEM(name) \
> +static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);  \
> +static struct percpu_rw_semaphore name = {   \
> + .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),\
> + .fast_read_ctr = &__percpu_rwsem_frc_##name,\
> + .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
> + .write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq), \
> +}
> +
>  extern void percpu_down_read(struct percpu_rw_semaphore *);
>  extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
>  extern void percpu_up_read(struct percpu_rw_semaphore *);
> @@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
>   __percpu_init_rwsem(brw, #brw, _key); \
>  })
>  
> -
>  #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
>  
> +#define percpu_rwsem_assert_held(sem)  \
> + lockdep_assert_held(&(sem)->rw_sem)
> +
>  static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
>   bool read, unsigned long ip)
>  {
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1403,6 +1403,9 @@ struct task_struct {
>   struct task_struct *last_wakee;
>  
>   int wake_cpu;
> +#ifdef CONFIG_HOTPLUG_CPU
> + int cpuhp_ref;
> +#endif
>  #endif
>   int on_rq;
>  
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "smpboot.h"
>  
> @@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
>  
>  static RAW_NOTIFIER_HEAD(cpu_chain);
>  
> -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> +/*
> + * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
>   * Should always be manipulated under cpu_add_remove_lock
>   */
>  static int cpu_hotplug_disabled;
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  
> -static struct {
> - struct task_struct *active_writer;
> - /* wait queue to wake up the active_writer */
> - wait_queue_head_t wq;
> - /* verifies that no writer will get active while readers are active */
> - struct mutex lock;
> 

Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Peter Zijlstra
On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> And for context we're hitting this on CI in a bunch of our machines, which

What's CI ?


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Peter Zijlstra
On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> And for context we're hitting this on CI in a bunch of our machines, which

What's CI ?


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Daniel Vetter
On Wed, Feb 17, 2016 at 03:20:05PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> > On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > > Quoting my original patch;
> > > > 
> > > > "See the Bugzilla link for more details.
> > > 
> > > If its not in the Changelog it doesn't exist. Patches should be self
> > > contained and not refer to external sources for critical information.
> > 
> > The exact locking case in CPUfreq drivers causing a splat is described
> > in the patch. Details were already included, that's why term "more
> > details" was used.
> 
> Barely. What was not described was why you went to tinker with the
> hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
> is good.
> 
> > This is not exactly taking us closer to a fix, 
> 
> Why you think we can discuss fixes if you've not actually described your
> problem is beyond me.

Can we please stop the petty-fights while figuring out how to work out a
solution here, thanks.

And for context we're hitting this on CI in a bunch of our machines, which
means no more lockdep checking for us. Which is, at least for me, pretty
serious, and why we're throwing complete cpu-anything newbies at that code
trying to come up with some solution to unblock our CI efforts for the
intel gfx driver. Unfortunately our attempts at just disabling lots of
Kconfig symbols proofed futile, so ideas to avoid all that code highly
welcome.

As soon as CI stops hitting this we'll jump out of your inbox, if you want
so.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Daniel Vetter
On Wed, Feb 17, 2016 at 03:20:05PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> > On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > > Quoting my original patch;
> > > > 
> > > > "See the Bugzilla link for more details.
> > > 
> > > If its not in the Changelog it doesn't exist. Patches should be self
> > > contained and not refer to external sources for critical information.
> > 
> > The exact locking case in CPUfreq drivers causing a splat is described
> > in the patch. Details were already included, that's why term "more
> > details" was used.
> 
> Barely. What was not described was why you went to tinker with the
> hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
> is good.
> 
> > This is not exactly taking us closer to a fix, 
> 
> Why you think we can discuss fixes if you've not actually described your
> problem is beyond me.

Can we please stop the petty-fights while figuring out how to work out a
solution here, thanks.

And for context we're hitting this on CI in a bunch of our machines, which
means no more lockdep checking for us. Which is, at least for me, pretty
serious, and why we're throwing complete cpu-anything newbies at that code
trying to come up with some solution to unblock our CI efforts for the
intel gfx driver. Unfortunately our attempts at just disabling lots of
Kconfig symbols proofed futile, so ideas to avoid all that code highly
welcome.

As soon as CI stops hitting this we'll jump out of your inbox, if you want
so.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Peter Zijlstra
On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > Quoting my original patch;
> > > 
> > > "See the Bugzilla link for more details.
> > 
> > If its not in the Changelog it doesn't exist. Patches should be self
> > contained and not refer to external sources for critical information.
> 
> The exact locking case in CPUfreq drivers causing a splat is described
> in the patch. Details were already included, that's why term "more
> details" was used.

Barely. What was not described was why you went to tinker with the
hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
is good.

> This is not exactly taking us closer to a fix, 

Why you think we can discuss fixes if you've not actually described your
problem is beyond me.




Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Peter Zijlstra
On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > Quoting my original patch;
> > > 
> > > "See the Bugzilla link for more details.
> > 
> > If its not in the Changelog it doesn't exist. Patches should be self
> > contained and not refer to external sources for critical information.
> 
> The exact locking case in CPUfreq drivers causing a splat is described
> in the patch. Details were already included, that's why term "more
> details" was used.

Barely. What was not described was why you went to tinker with the
hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
is good.

> This is not exactly taking us closer to a fix, 

Why you think we can discuss fixes if you've not actually described your
problem is beyond me.




Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Joonas Lahtinen
On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > Quoting my original patch;
> > 
> > "See the Bugzilla link for more details.
> 
> If its not in the Changelog it doesn't exist. Patches should be self
> contained and not refer to external sources for critical information.

The exact locking case in CPUfreq drivers causing a splat is described
in the patch. Details were already included, that's why term "more
details" was used.

This is not exactly taking us closer to a fix, so I'd rather like to
discuss about the possibility of adding one more member to task_struct.
I added Oleg as CC to catch his attention to re-evaluate based on the
previous discussion in this thread.

Regards, Joonas

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation



Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-17 Thread Joonas Lahtinen
On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > Quoting my original patch;
> > 
> > "See the Bugzilla link for more details.
> 
> If its not in the Changelog it doesn't exist. Patches should be self
> contained and not refer to external sources for critical information.

The exact locking case in CPUfreq drivers causing a splat is described
in the patch. Details were already included, that's why term "more
details" was used.

This is not exactly taking us closer to a fix, so I'd rather like to
discuss about the possibility of adding one more member to task_struct.
I added Oleg as CC to catch his attention to re-evaluate based on the
previous discussion in this thread.

Regards, Joonas

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation



Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Peter Zijlstra
On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> Quoting my original patch;
> 
> "See the Bugzilla link for more details.

If its not in the Changelog it doesn't exist. Patches should be self
contained and not refer to external sources for critical information.


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Peter Zijlstra
On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> Quoting my original patch;
> 
> "See the Bugzilla link for more details.

If its not in the Changelog it doesn't exist. Patches should be self
contained and not refer to external sources for critical information.


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Joonas Lahtinen
On ti, 2016-02-16 at 10:14 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> > I originally thought of implementing this more similar to what you
> > specify, but then I came across a discussion in the mailing list where
> > it was NAKed adding more members to task_struct;
> > 
> > http://comments.gmane.org/gmane.linux.kernel/970273
> > 
> > Adding proper recursion (the way my initial implementation was going)
> > got ugly without modifying task_struct because get_online_cpus() is a
> > speed critical code path.
> 
> Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
> considered performance critical.

Oh well, at least changes to it added quite noticeably to the bootup
time of a system.

> 
> > So I'm all for fixing the current code in a different way if that will
> > then be merged.
> 
> So I'm not sure why you're poking at this horror show to begin with.
> ISTR you mentioning a lockdep splat for SKL, but failed to provide
> detail.
> 

Quoting my original patch;

"See the Bugzilla link for more details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294;

The improvement my patch implements is to use lockref for locked
reference counting (hotplug code previously rolled its own mutex +
atomic combo), which gets rid of the deadlock scenario described and
linked in the initial patch. Trace for the scenario;

https://bugs.freedesktop.org/attachment.cgi?id=121490

I think using lockref makes it substantially less special, lockref code
being a lot more battle-tested in the FS code than the previous
cpu_hotplug.lock mess.

> Making the hotplug lock _more_ special to fix that is just wrong. Fix
> the retarded locking that lead to it.
> 

I do agree that it's still not pretty, but now it does correctly what
the previous code was trying to do with custom mutex + atomic.

I'm all for fixing the code further, but prior to proceeding there
needs to be some sort of an agreement on either making
get_online_cpus() slower (which does not seem like a good idea) or
adding more members to task_struct.

Regards, Joonas

> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Joonas Lahtinen
On ti, 2016-02-16 at 10:14 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> > I originally thought of implementing this more similar to what you
> > specify, but then I came across a discussion in the mailing list where
> > it was NAKed adding more members to task_struct;
> > 
> > http://comments.gmane.org/gmane.linux.kernel/970273
> > 
> > Adding proper recursion (the way my initial implementation was going)
> > got ugly without modifying task_struct because get_online_cpus() is a
> > speed critical code path.
> 
> Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
> considered performance critical.

Oh well, at least changes to it added quite noticeably to the bootup
time of a system.

> 
> > So I'm all for fixing the current code in a different way if that will
> > then be merged.
> 
> So I'm not sure why you're poking at this horror show to begin with.
> ISTR you mentioning a lockdep splat for SKL, but failed to provide
> detail.
> 

Quoting my original patch;

"See the Bugzilla link for more details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294;

The improvement my patch implements is to use lockref for locked
reference counting (hotplug code previously rolled its own mutex +
atomic combo), which gets rid of the deadlock scenario described and
linked in the initial patch. Trace for the scenario;

https://bugs.freedesktop.org/attachment.cgi?id=121490

I think using lockref makes it substantially less special, lockref code
being a lot more battle-tested in the FS code than the previous
cpu_hotplug.lock mess.

> Making the hotplug lock _more_ special to fix that is just wrong. Fix
> the retarded locking that lead to it.
> 

I do agree that it's still not pretty, but now it does correctly what
the previous code was trying to do with custom mutex + atomic.

I'm all for fixing the code further, but prior to proceeding there
needs to be some sort of an agreement on either making
get_online_cpus() slower (which does not seem like a good idea) or
adding more members to task_struct.

Regards, Joonas

> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Peter Zijlstra
On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> I originally thought of implementing this more similar to what you
> specify, but then I came across a discussion in the mailing list where
> it was NAKed adding more members to task_struct;
> 
> http://comments.gmane.org/gmane.linux.kernel/970273
> 
> Adding proper recursion (the way my initial implementation was going)
> got ugly without modifying task_struct because get_online_cpus() is a
> speed critical code path.

Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
considered performance critical.

> So I'm all for fixing the current code in a different way if that will
> then be merged.

So I'm not sure why you're poking at this horror show to begin with.
ISTR you mentioning a lockdep splat for SKL, but failed to provide
detail.

Making the hotplug lock _more_ special to fix that is just wrong. Fix
the retarded locking that lead to it.




Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Peter Zijlstra
On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> I originally thought of implementing this more similar to what you
> specify, but then I came across a discussion in the mailing list where
> it was NAKed adding more members to task_struct;
> 
> http://comments.gmane.org/gmane.linux.kernel/970273
> 
> Adding proper recursion (the way my initial implementation was going)
> got ugly without modifying task_struct because get_online_cpus() is a
> speed critical code path.

Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
considered performance critical.

> So I'm all for fixing the current code in a different way if that will
> then be merged.

So I'm not sure why you're poking at this horror show to begin with.
ISTR you mentioning a lockdep splat for SKL, but failed to provide
detail.

Making the hotplug lock _more_ special to fix that is just wrong. Fix
the retarded locking that lead to it.




Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Joonas Lahtinen
Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having 
> > > this;
> > > policy->rwsem is locked during driver initialization and the functions 
> > > called
> > > during init that actually apply CPU limits use get_online_cpus (because 
> > > they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > > called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called 
> > > after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported 
> > > by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link 
> > > for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 

I originally thought of implementing this more similar to what you
specify, but then I came across a discussion in the mailing list where
it was NAKed adding more members to task_struct;

http://comments.gmane.org/gmane.linux.kernel/970273

Adding proper recursion (the way my initial implementation was going)
got ugly without modifying task_struct because get_online_cpus() is a
speed critical code path.

So I'm all for fixing the current code in a different way if that will
then be merged.

Regards, Joonas

> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 
> 



-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-16 Thread Joonas Lahtinen
Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having 
> > > this;
> > > policy->rwsem is locked during driver initialization and the functions 
> > > called
> > > during init that actually apply CPU limits use get_online_cpus (because 
> > > they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > > called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called 
> > > after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported 
> > > by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link 
> > > for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 

I originally thought of implementing this more similar to what you
specify, but then I came across a discussion in the mailing list where
it was NAKed adding more members to task_struct;

http://comments.gmane.org/gmane.linux.kernel/970273

Adding proper recursion (the way my initial implementation was going)
got ugly without modifying task_struct because get_online_cpus() is a
speed critical code path.

So I'm all for fixing the current code in a different way if that will
then be merged.

Regards, Joonas

> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 
> 



-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-15 Thread Peter Zijlstra
On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions 
> > called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for 
> > more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.


The below has plenty lockdep issues because percpu-rwsem is
reader-writer fair (like the regular rwsem), so it does throw up a fair
number of very icky issues.

If at all possible, I'd really rather fix those and have a 'saner'
hotplug lock, rather than muddle on with open-coded horror lock we have
now.


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
 
 #else  /* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()  do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
wait_queue_head_t   write_waitq;
 };
 
+#define DEFINE_STATIC_PERCPU_RWSEM(name)   \
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);\
+static struct percpu_rw_semaphore name = { \
+   .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),\
+   .fast_read_ctr = &__percpu_rwsem_frc_##name,\
+   .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
+   .write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq), \
+}
+
 extern void percpu_down_read(struct percpu_rw_semaphore *);
 extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void percpu_up_read(struct percpu_rw_semaphore *);
@@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
__percpu_init_rwsem(brw, #brw, _key); \
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
+#define percpu_rwsem_assert_held(sem)  \
+   lockdep_assert_held(&(sem)->rw_sem)
+
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
 {
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,6 +1403,9 @@ struct task_struct {
struct task_struct *last_wakee;
 
int wake_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+   int cpuhp_ref;
+#endif
 #endif
int on_rq;
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 
@@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-   struct task_struct *active_writer;
-   /* wait queue to wake up the active_writer */
-   wait_queue_head_t wq;
-   /* verifies that no writer will get active while readers are active */
-   struct mutex lock;
-   /*
-* Also blocks the new readers during
-* an ongoing cpu hotplug operation.
-*/
-   atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-   struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-   .active_writer = NULL,
-   .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-   .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-   .dep_map = 

Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-15 Thread Peter Zijlstra
On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions 
> > called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> > called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for 
> > more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.


The below has plenty lockdep issues because percpu-rwsem is
reader-writer fair (like the regular rwsem), so it does throw up a fair
number of very icky issues.

If at all possible, I'd really rather fix those and have a 'saner'
hotplug lock, rather than muddle on with open-coded horror lock we have
now.


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
 
 #else  /* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()  do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
wait_queue_head_t   write_waitq;
 };
 
+#define DEFINE_STATIC_PERCPU_RWSEM(name)   \
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);\
+static struct percpu_rw_semaphore name = { \
+   .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),\
+   .fast_read_ctr = &__percpu_rwsem_frc_##name,\
+   .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
+   .write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq), \
+}
+
 extern void percpu_down_read(struct percpu_rw_semaphore *);
 extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void percpu_up_read(struct percpu_rw_semaphore *);
@@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
__percpu_init_rwsem(brw, #brw, _key); \
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
+#define percpu_rwsem_assert_held(sem)  \
+   lockdep_assert_held(&(sem)->rw_sem)
+
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
 {
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,6 +1403,9 @@ struct task_struct {
struct task_struct *last_wakee;
 
int wake_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+   int cpuhp_ref;
+#endif
 #endif
int on_rq;
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 
@@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-   struct task_struct *active_writer;
-   /* wait queue to wake up the active_writer */
-   wait_queue_head_t wq;
-   /* verifies that no writer will get active while readers are active */
-   struct mutex lock;
-   /*
-* Also blocks the new readers during
-* an ongoing cpu hotplug operation.
-*/
-   atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-   struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-   .active_writer = NULL,
-   .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-   .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-   .dep_map = 

Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-15 Thread Peter Zijlstra
On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> Instead of implementing a custom locked reference counting, use lockref.
> 
> Current implementation leads to a deadlock splat on Intel SKL platforms
> when lockdep debugging is enabled.
> 
> This is due to few of CPUfreq drivers (including Intel P-state) having this;
> policy->rwsem is locked during driver initialization and the functions called
> during init that actually apply CPU limits use get_online_cpus (because they
> have other calling paths too), which will briefly lock cpu_hotplug.lock to
> increase cpu_hotplug.refcount.
> 
> On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> called
> in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> which will lock policy->rwsem and cpu_hotplug.lock is already held by
> cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> our CI system (though it is a very unlikely one). See the Bugzilla link for 
> more
> details.

I've been meaning to change the thing into a percpu-rwsem, I just
haven't had time to look into the lockdep splat that generated.


Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

2016-02-15 Thread Peter Zijlstra
On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> Instead of implementing a custom locked reference counting, use lockref.
> 
> Current implementation leads to a deadlock splat on Intel SKL platforms
> when lockdep debugging is enabled.
> 
> This is due to few of CPUfreq drivers (including Intel P-state) having this;
> policy->rwsem is locked during driver initialization and the functions called
> during init that actually apply CPU limits use get_online_cpus (because they
> have other calling paths too), which will briefly lock cpu_hotplug.lock to
> increase cpu_hotplug.refcount.
> 
> On later calling path, when doing a suspend, when cpu_hotplug_begin() is 
> called
> in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> which will lock policy->rwsem and cpu_hotplug.lock is already held by
> cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> our CI system (though it is a very unlikely one). See the Bugzilla link for 
> more
> details.

I've been meaning to change the thing into a percpu-rwsem, I just
haven't had time to look into the lockdep splat that generated.