Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



Jeremy Fitzhardinge wrote:


diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+	softlockup_global_disable();

+
spin_lock_irqsave(_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(_key_table_lock, flags);
+
+   softlockup_global_enable();
 }
 
  


I wonder if that's too strong of a fix.  It's only the sysrq-t case that 
is the problem.  AFAIK, none of the other sysrq-t operations hold the 
tasklist_lock for a long time.  I'd move these around show_state.


P.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



Jeremy Fitzhardinge wrote:

Prarit Bhargava wrote:
  

I think that's a good idea -- I'll propose an add on patch to fix the
sysrq-t case ...



I'm working on this patch at the moment.  I'm just wondering what
happens if you do a global re-enable while a CPU is locally disabled.  I
think it won't matter; it will end up in the "enabled but need to update
timestamp" state, and the next time it gets a timer tick, it will simply
update the timestamp and carry on.

(This is relative to the other two softlockup patches, but modified
since I posted them.)

J

diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+	softlockup_global_disable();

+
spin_lock_irqsave(_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(_key_table_lock, flags);
+
+   softlockup_global_enable();
  


I think that works -- I'll test it out on a big honkin' ia64 box ;)

Shouldn't you also do softlockup_disable/softlockup_enable instead of 
touch_softlockup_watchdog?  Why do we have both?  I can't see why we 
would have two exported methods to stop/reset the softlockup timer...


P.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Jeremy Fitzhardinge
Prarit Bhargava wrote:
> I think that's a good idea -- I'll propose an add on patch to fix the
> sysrq-t case ...

I'm working on this patch at the moment.  I'm just wondering what
happens if you do a global re-enable while a CPU is locally disabled.  I
think it won't matter; it will end up in the "enabled but need to update
timestamp" state, and the next time it gets a timer tick, it will simply
update the timestamp and carry on.

(This is relative to the other two softlockup patches, but modified
since I posted them.)

J

diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+   softlockup_global_disable();
+
spin_lock_irqsave(_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(_key_table_lock, flags);
+
+   softlockup_global_enable();
 }
 
 /*
diff -r 4c81d8cafb67 include/linux/sched.h
--- a/include/linux/sched.h Tue Mar 27 01:16:07 2007 -0700
+++ b/include/linux/sched.h Tue Mar 27 01:18:05 2007 -0700
@@ -235,6 +235,8 @@ extern void softlockup_tick(void);
 extern void softlockup_tick(void);
 extern void softlockup_enable(void);
 extern void softlockup_disable(void);
+extern void softlockup_global_enable(void);
+extern void softlockup_global_disable(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 #else
@@ -245,6 +247,12 @@ static inline void softlockup_enable(voi
 {
 }
 static inline void softlockup_disable(void)
+{
+}
+static inline void softlockup_global_enable(void)
+{
+}
+static inline void softlockup_global_disable(void)
 {
 }
 static inline void spawn_softlockup_task(void)
diff -r 4c81d8cafb67 kernel/softlockup.c
--- a/kernel/softlockup.c   Tue Mar 27 01:16:07 2007 -0700
+++ b/kernel/softlockup.c   Tue Mar 27 01:18:05 2007 -0700
@@ -17,10 +17,16 @@
 
 static DEFINE_SPINLOCK(print_lock);
 
+enum enable {
+   SL_OFF = 0, /* disabled */
+   SL_UPDATE,  /* enabled, but timestamp old */
+   SL_ON,  /* enabled */
+};
+
 static DEFINE_PER_CPU(unsigned long long, touch_timestamp);
 static DEFINE_PER_CPU(unsigned long long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
-static DEFINE_PER_CPU(int, enabled);
+static DEFINE_PER_CPU(enum enable, enabled);
 
 static int did_panic = 0;
 
@@ -39,6 +45,8 @@ void touch_softlockup_watchdog(void)
 void touch_softlockup_watchdog(void)
 {
__raw_get_cpu_var(touch_timestamp) = sched_clock();
+   barrier();  /* update timestamp before enable */
+   __raw_get_cpu_var(enabled) = SL_ON;
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -57,13 +65,27 @@ void softlockup_enable(void)
 void softlockup_enable(void)
 {
touch_softlockup_watchdog();
-   barrier();  /* update timestamp before enable */
-   __get_cpu_var(enabled) = 1;
 }
 
 void softlockup_disable(void)
 {
-   __get_cpu_var(enabled) = 0;
+   __get_cpu_var(enabled) = SL_OFF;
+}
+
+void softlockup_global_enable()
+{
+   unsigned cpu;
+
+   for_each_online_cpu(cpu)
+   per_cpu(enabled, cpu) = SL_UPDATE;
+}
+
+void softlockup_global_disable()
+{
+   unsigned cpu;
+
+   for_each_online_cpu(cpu)
+   per_cpu(enabled, cpu) = SL_OFF;
 }
 
 /*
@@ -79,9 +101,19 @@ void softlockup_tick(void)
 
touch_timestamp = get_timestamp(&__get_cpu_var(touch_timestamp));
 
-   /* return if not enabled */
-   if (!__get_cpu_var(enabled))
-   return;
+   switch(__get_cpu_var(enabled)) {
+   case SL_OFF:
+   /* not enabled */
+   return;
+
+   case SL_UPDATE:
+   /* update timestamp */
+   touch_softlockup_watchdog();
+   break;
+
+   case SL_ON:
+   break;
+   }
 
print_timestamp = __get_cpu_var(print_timestamp);
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



I have another pair of softlockup patches in which I try to address:

* ignoring time stolen by hypervisors
* threads going to sleep tickless for long periods of time

  


I'm looking at the code now.  Your solution is definately better :)


I could easy add a "global disable" function, which would allow long
sysrq messages, and it would help Thilo with his long flash update freezes.

  


I think that's a good idea -- I'll propose an add on patch to fix the 
sysrq-t case ...


P.


J
  

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



I have another pair of softlockup patches in which I try to address:

* ignoring time stolen by hypervisors
* threads going to sleep tickless for long periods of time

  


I'm looking at the code now.  Your solution is definately better :)


I could easy add a global disable function, which would allow long
sysrq messages, and it would help Thilo with his long flash update freezes.

  


I think that's a good idea -- I'll propose an add on patch to fix the 
sysrq-t case ...


P.


J
  

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Jeremy Fitzhardinge
Prarit Bhargava wrote:
 I think that's a good idea -- I'll propose an add on patch to fix the
 sysrq-t case ...

I'm working on this patch at the moment.  I'm just wondering what
happens if you do a global re-enable while a CPU is locally disabled.  I
think it won't matter; it will end up in the enabled but need to update
timestamp state, and the next time it gets a timer tick, it will simply
update the timestamp and carry on.

(This is relative to the other two softlockup patches, but modified
since I posted them.)

J

diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+   softlockup_global_disable();
+
spin_lock_irqsave(sysrq_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(sysrq_key_table_lock, flags);
+
+   softlockup_global_enable();
 }
 
 /*
diff -r 4c81d8cafb67 include/linux/sched.h
--- a/include/linux/sched.h Tue Mar 27 01:16:07 2007 -0700
+++ b/include/linux/sched.h Tue Mar 27 01:18:05 2007 -0700
@@ -235,6 +235,8 @@ extern void softlockup_tick(void);
 extern void softlockup_tick(void);
 extern void softlockup_enable(void);
 extern void softlockup_disable(void);
+extern void softlockup_global_enable(void);
+extern void softlockup_global_disable(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 #else
@@ -245,6 +247,12 @@ static inline void softlockup_enable(voi
 {
 }
 static inline void softlockup_disable(void)
+{
+}
+static inline void softlockup_global_enable(void)
+{
+}
+static inline void softlockup_global_disable(void)
 {
 }
 static inline void spawn_softlockup_task(void)
diff -r 4c81d8cafb67 kernel/softlockup.c
--- a/kernel/softlockup.c   Tue Mar 27 01:16:07 2007 -0700
+++ b/kernel/softlockup.c   Tue Mar 27 01:18:05 2007 -0700
@@ -17,10 +17,16 @@
 
 static DEFINE_SPINLOCK(print_lock);
 
+enum enable {
+   SL_OFF = 0, /* disabled */
+   SL_UPDATE,  /* enabled, but timestamp old */
+   SL_ON,  /* enabled */
+};
+
 static DEFINE_PER_CPU(unsigned long long, touch_timestamp);
 static DEFINE_PER_CPU(unsigned long long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
-static DEFINE_PER_CPU(int, enabled);
+static DEFINE_PER_CPU(enum enable, enabled);
 
 static int did_panic = 0;
 
@@ -39,6 +45,8 @@ void touch_softlockup_watchdog(void)
 void touch_softlockup_watchdog(void)
 {
__raw_get_cpu_var(touch_timestamp) = sched_clock();
+   barrier();  /* update timestamp before enable */
+   __raw_get_cpu_var(enabled) = SL_ON;
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -57,13 +65,27 @@ void softlockup_enable(void)
 void softlockup_enable(void)
 {
touch_softlockup_watchdog();
-   barrier();  /* update timestamp before enable */
-   __get_cpu_var(enabled) = 1;
 }
 
 void softlockup_disable(void)
 {
-   __get_cpu_var(enabled) = 0;
+   __get_cpu_var(enabled) = SL_OFF;
+}
+
+void softlockup_global_enable()
+{
+   unsigned cpu;
+
+   for_each_online_cpu(cpu)
+   per_cpu(enabled, cpu) = SL_UPDATE;
+}
+
+void softlockup_global_disable()
+{
+   unsigned cpu;
+
+   for_each_online_cpu(cpu)
+   per_cpu(enabled, cpu) = SL_OFF;
 }
 
 /*
@@ -79,9 +101,19 @@ void softlockup_tick(void)
 
touch_timestamp = get_timestamp(__get_cpu_var(touch_timestamp));
 
-   /* return if not enabled */
-   if (!__get_cpu_var(enabled))
-   return;
+   switch(__get_cpu_var(enabled)) {
+   case SL_OFF:
+   /* not enabled */
+   return;
+
+   case SL_UPDATE:
+   /* update timestamp */
+   touch_softlockup_watchdog();
+   break;
+
+   case SL_ON:
+   break;
+   }
 
print_timestamp = __get_cpu_var(print_timestamp);
 

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



Jeremy Fitzhardinge wrote:

Prarit Bhargava wrote:
  

I think that's a good idea -- I'll propose an add on patch to fix the
sysrq-t case ...



I'm working on this patch at the moment.  I'm just wondering what
happens if you do a global re-enable while a CPU is locally disabled.  I
think it won't matter; it will end up in the enabled but need to update
timestamp state, and the next time it gets a timer tick, it will simply
update the timestamp and carry on.

(This is relative to the other two softlockup patches, but modified
since I posted them.)

J

diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+	softlockup_global_disable();

+
spin_lock_irqsave(sysrq_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(sysrq_key_table_lock, flags);
+
+   softlockup_global_enable();
  


I think that works -- I'll test it out on a big honkin' ia64 box ;)

Shouldn't you also do softlockup_disable/softlockup_enable instead of 
touch_softlockup_watchdog?  Why do we have both?  I can't see why we 
would have two exported methods to stop/reset the softlockup timer...


P.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-27 Thread Prarit Bhargava



Jeremy Fitzhardinge wrote:


diff -r 4c81d8cafb67 drivers/char/sysrq.c
--- a/drivers/char/sysrq.c  Tue Mar 27 01:16:07 2007 -0700
+++ b/drivers/char/sysrq.c  Tue Mar 27 01:18:05 2007 -0700
@@ -408,6 +408,8 @@ void __handle_sysrq(int key, struct tty_
int i;
unsigned long flags;
 
+	softlockup_global_disable();

+
spin_lock_irqsave(sysrq_key_table_lock, flags);
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -445,6 +447,8 @@ void __handle_sysrq(int key, struct tty_
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(sysrq_key_table_lock, flags);
+
+   softlockup_global_enable();
 }
 
  


I wonder if that's too strong of a fix.  It's only the sysrq-t case that 
is the problem.  AFAIK, none of the other sysrq-t operations hold the 
tasklist_lock for a long time.  I'd move these around show_state.


P.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-26 Thread Cestonaro, Thilo \(external\)
> I could easy add a "global disable" function, which would allow long
> sysrq messages, and it would help Thilo with his long flash update freezes.

A "global disable" and "reenable" functions pair which works during irq 
disabled,
would be a perfect solution for me. Thx Jeremy for your effort :)

Ciao Thilo
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-26 Thread Jeremy Fitzhardinge
Prarit Bhargava wrote:
> There are some situations when soft lockup warnings are expected in the
> kernel.  For example, when doing an alt-sysrq-t on a large number of 
> processes,
> the dump to console can take a long time and the tasklist_lock is held over
> that period.  This results in a bogus soft lockup warning.
>   

Wouldn't it be better to just temporarily disable softlockups for the
duration?

> This patch reworks touch_softlockup_watchdog to touch ALL cpu's
> touch_timestamp.  It also introduces touch_cpu_softlockup_watchdog to touch
> a single cpu's touch_timestamp.

Doesn't this mean that if one CPU gets locked up, it will be undetected
so long as some other CPU is making progress?

I have another pair of softlockup patches in which I try to address:

* ignoring time stolen by hypervisors
* threads going to sleep tickless for long periods of time

I could easy add a "global disable" function, which would allow long
sysrq messages, and it would help Thilo with his long flash update freezes.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-26 Thread Jeremy Fitzhardinge
Prarit Bhargava wrote:
 There are some situations when soft lockup warnings are expected in the
 kernel.  For example, when doing an alt-sysrq-t on a large number of 
 processes,
 the dump to console can take a long time and the tasklist_lock is held over
 that period.  This results in a bogus soft lockup warning.
   

Wouldn't it be better to just temporarily disable softlockups for the
duration?

 This patch reworks touch_softlockup_watchdog to touch ALL cpu's
 touch_timestamp.  It also introduces touch_cpu_softlockup_watchdog to touch
 a single cpu's touch_timestamp.

Doesn't this mean that if one CPU gets locked up, it will be undetected
so long as some other CPU is making progress?

I have another pair of softlockup patches in which I try to address:

* ignoring time stolen by hypervisors
* threads going to sleep tickless for long periods of time

I could easy add a global disable function, which would allow long
sysrq messages, and it would help Thilo with his long flash update freezes.

J
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-26 Thread Cestonaro, Thilo \(external\)
 I could easy add a global disable function, which would allow long
 sysrq messages, and it would help Thilo with his long flash update freezes.

A global disable and reenable functions pair which works during irq 
disabled,
would be a perfect solution for me. Thx Jeremy for your effort :)

Ciao Thilo
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-23 Thread Rick Lindsley
We've seen these here and had arrived at a similar patch.  Extensive prints
on the console can take longer than the watchdog likes.

Acked-by: Rick Lindsley <[EMAIL PROTECTED]>

Rick
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-23 Thread Rick Lindsley
We've seen these here and had arrived at a similar patch.  Extensive prints
on the console can take longer than the watchdog likes.

Acked-by: Rick Lindsley [EMAIL PROTECTED]

Rick
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-15 Thread Prarit Bhargava
There are some situations when soft lockup warnings are expected in the
kernel.  For example, when doing an alt-sysrq-t on a large number of processes,
the dump to console can take a long time and the tasklist_lock is held over
that period.  This results in a bogus soft lockup warning.

This patch reworks touch_softlockup_watchdog to touch ALL cpu's
touch_timestamp.  It also introduces touch_cpu_softlockup_watchdog to touch
a single cpu's touch_timestamp.  This makes it functionally equivalent to
touch_nmi_watchdog.

touch_nmi_watchdog is not modified -- AFAICT it was attempting to touch all
cpu's softlockup watchdogs, not just a specific cpu.

/drivers/ide/ide-iops.c does not need to call touch_softlockup_watchdog as it
is done in the call to touch_nmi_watchdog.

The EXPORT_SYMBOL for touch_softlockup_watchdog is needed by
drivers/scsi/ips.ko

Acked-by: Chris Lalancette <[EMAIL PROTECTED]>
Signed-off-by: Prarit Bhargava <[EMAIL PROTECTED]>

diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index c58e933..db514bb 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -254,7 +254,7 @@ static int __init uncached_build_memmap(unsigned long 
uc_start,
struct gen_pool *pool = uncached_pools[nid].pool;
size_t size = uc_end - uc_start;
 
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
 
if (pool != NULL) {
memset((char *)uc_start, 0, size);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index bd513f5..176c97b 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -1225,7 +1225,6 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long 
timeout)
 */
if (stat == 0xff)
return -ENODEV;
-   touch_softlockup_watchdog();
touch_nmi_watchdog();
}
return -EBUSY;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 30175c7..5fb2dce 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -313,7 +313,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct 
request *rq,
if (rq->bio)/* fs request */
rq->errors = 0;
 
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
 
switch (drive->hwif->data_phase) {
case TASKFILE_MULTI_IN:
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6af37b8..9669d5f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -425,7 +425,7 @@ void nand_wait_ready(struct mtd_info *mtd)
do {
if (chip->dev_ready(mtd))
break;
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
} while (time_before(jiffies, timeo));
led_trigger_event(nand_led_trigger, LED_OFF);
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49fe299..113421e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -225,6 +225,7 @@ extern void scheduler_tick(void);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
+extern void touch_cpu_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog(void);
 #else
 static inline void softlockup_tick(void)
@@ -233,6 +234,9 @@ static inline void softlockup_tick(void)
 static inline void spawn_softlockup_task(void)
 {
 }
+static inline void touch_cpu_softlockup_watchdog(void)
+{
+}
 static inline void touch_softlockup_watchdog(void)
 {
 }
diff --git a/kernel/panic.c b/kernel/panic.c
index 623d182..9e4565c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -132,7 +132,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 #endif
local_irq_enable();
for (i = 0;;) {
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
i += panic_blink(i);
mdelay(1);
i++;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 7fb8343..d259b12 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -323,7 +323,7 @@ int swsusp_resume(void)
 */
swsusp_free();
restore_processor_state();
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
device_power_up();
local_irq_enable();
return error;
diff --git a/kernel/sched.c b/kernel/sched.c
index a4ca632..243f558 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4783,6 +4783,7 @@ void show_state_filter(unsigned long state_filter)
if (p->state & state_filter)
show_task(p);
} while_each_thread(g, p);
+   touch_softlockup_watchdog();
 
read_unlock(_lock);
/*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 50afeb8..05b66db 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -8,6 +8,7 @@
  */
 

[PATCH]: Fix bogus softlockup warning with sysrq-t

2007-03-15 Thread Prarit Bhargava
There are some situations when soft lockup warnings are expected in the
kernel.  For example, when doing an alt-sysrq-t on a large number of processes,
the dump to console can take a long time and the tasklist_lock is held over
that period.  This results in a bogus soft lockup warning.

This patch reworks touch_softlockup_watchdog to touch ALL cpu's
touch_timestamp.  It also introduces touch_cpu_softlockup_watchdog to touch
a single cpu's touch_timestamp.  This makes it functionally equivalent to
touch_nmi_watchdog.

touch_nmi_watchdog is not modified -- AFAICT it was attempting to touch all
cpu's softlockup watchdogs, not just a specific cpu.

/drivers/ide/ide-iops.c does not need to call touch_softlockup_watchdog as it
is done in the call to touch_nmi_watchdog.

The EXPORT_SYMBOL for touch_softlockup_watchdog is needed by
drivers/scsi/ips.ko

Acked-by: Chris Lalancette [EMAIL PROTECTED]
Signed-off-by: Prarit Bhargava [EMAIL PROTECTED]

diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index c58e933..db514bb 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -254,7 +254,7 @@ static int __init uncached_build_memmap(unsigned long 
uc_start,
struct gen_pool *pool = uncached_pools[nid].pool;
size_t size = uc_end - uc_start;
 
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
 
if (pool != NULL) {
memset((char *)uc_start, 0, size);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index bd513f5..176c97b 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -1225,7 +1225,6 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long 
timeout)
 */
if (stat == 0xff)
return -ENODEV;
-   touch_softlockup_watchdog();
touch_nmi_watchdog();
}
return -EBUSY;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 30175c7..5fb2dce 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -313,7 +313,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct 
request *rq,
if (rq-bio)/* fs request */
rq-errors = 0;
 
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
 
switch (drive-hwif-data_phase) {
case TASKFILE_MULTI_IN:
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6af37b8..9669d5f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -425,7 +425,7 @@ void nand_wait_ready(struct mtd_info *mtd)
do {
if (chip-dev_ready(mtd))
break;
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
} while (time_before(jiffies, timeo));
led_trigger_event(nand_led_trigger, LED_OFF);
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49fe299..113421e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -225,6 +225,7 @@ extern void scheduler_tick(void);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
+extern void touch_cpu_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog(void);
 #else
 static inline void softlockup_tick(void)
@@ -233,6 +234,9 @@ static inline void softlockup_tick(void)
 static inline void spawn_softlockup_task(void)
 {
 }
+static inline void touch_cpu_softlockup_watchdog(void)
+{
+}
 static inline void touch_softlockup_watchdog(void)
 {
 }
diff --git a/kernel/panic.c b/kernel/panic.c
index 623d182..9e4565c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -132,7 +132,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 #endif
local_irq_enable();
for (i = 0;;) {
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
i += panic_blink(i);
mdelay(1);
i++;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 7fb8343..d259b12 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -323,7 +323,7 @@ int swsusp_resume(void)
 */
swsusp_free();
restore_processor_state();
-   touch_softlockup_watchdog();
+   touch_cpu_softlockup_watchdog();
device_power_up();
local_irq_enable();
return error;
diff --git a/kernel/sched.c b/kernel/sched.c
index a4ca632..243f558 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4783,6 +4783,7 @@ void show_state_filter(unsigned long state_filter)
if (p-state  state_filter)
show_task(p);
} while_each_thread(g, p);
+   touch_softlockup_watchdog();
 
read_unlock(tasklist_lock);
/*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 50afeb8..05b66db 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -8,6 +8,7 @@
  */
 #include