current hung_task_check_interval_secs and hung_task_timeout_secs
only supports seconds. In some cases,the TASK_UNINTERRUPTIBLE state
takes less than 1 second,may need to hung task trigger panic
get ramdump or print all cpu task.

modify hung_task_check_interval_secs to hung_task_check_interval_millisecs,
check interval use milliseconds. Add hung_task_timeout_millisecs file to
set milliseconds.
task timeout = hung_task_timeout_secs * 1000 + hung_task_timeout_millisecs.
(timeout * HZ / 1000) calculate how many are generated jiffies
in timeout milliseconds.

Signed-off-by: yang che <chey84...@gmail.com>
---

v1->v2:
 add hung_task_check_interval_millisecs,hung_task_timeout_millisecs.
 fix writing to the millisecond file silently overrides the setting in
 the seconds file.

 
[1]https://lore.kernel.org/lkml/CAN_w4MWMfoDGfpON-bYHrU=kujg2vpfj01zbn4r-iwm4ayy...@mail.gmail.com

 include/linux/sched/sysctl.h |  3 ++-
 kernel/hung_task.c           | 25 ++++++++++++++++++-------
 kernel/sysctl.c              | 12 ++++++++++--
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 660ac49..179c331 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -16,8 +16,9 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 
 extern int          sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long  sysctl_hung_task_timeout_millisecs;
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
+extern unsigned long sysctl_hung_task_check_interval_millisecs;
 extern int sysctl_hung_task_warnings;
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ce76f49..809c999 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -37,6 +37,7 @@ int __read_mostly sysctl_hung_task_check_count = 
PID_MAX_LIMIT;
  * the RCU grace period. So it needs to be upper-bound.
  */
 #define HUNG_TASK_LOCK_BREAK (HZ / 10)
+#define SECONDS 1000
 
 /*
  * Zero means infinite timeout - no checking done:
@@ -44,9 +45,14 @@ int __read_mostly sysctl_hung_task_check_count = 
PID_MAX_LIMIT;
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = 
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
 
 /*
+ * Zero means only use sysctl_hung_task_timeout_secs
+ */
+unsigned long  __read_mostly sysctl_hung_task_timeout_millisecs;
+
+/*
  * Zero (default value) means use sysctl_hung_task_timeout_secs:
  */
-unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+unsigned long __read_mostly sysctl_hung_task_check_interval_millisecs;
 
 int __read_mostly sysctl_hung_task_warnings = 10;
 
@@ -108,7 +114,8 @@ static void check_hung_task(struct task_struct *t, unsigned 
long timeout)
                t->last_switch_time = jiffies;
                return;
        }
-       if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+
+       if (time_is_after_jiffies(t->last_switch_time + (timeout * HZ) / 
SECONDS))
                return;
 
        trace_sched_process_hang(t);
@@ -126,13 +133,16 @@ static void check_hung_task(struct task_struct *t, 
unsigned long timeout)
        if (sysctl_hung_task_warnings) {
                if (sysctl_hung_task_warnings > 0)
                        sysctl_hung_task_warnings--;
-               pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
-                      t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
+
+               pr_err("INFO: task %s:%d blocked for more than %ld seconds %ld 
milliseconds.\n",
+                       t->comm, t->pid, (jiffies - t->last_switch_time) / HZ,
+                       (jiffies - t->last_switch_time) % HZ * (SECONDS / HZ));
                pr_err("      %s %s %.*s\n",
                        print_tainted(), init_utsname()->release,
                        (int)strcspn(init_utsname()->version, " "),
                        init_utsname()->version);
                pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+                       "\"echo 0 > 
/proc/sys/kernel/hung_task_timeout_millisecs\""
                        " disables this message.\n");
                sched_show_task(t);
                hung_task_show_lock = true;
@@ -217,7 +227,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
                                 unsigned long timeout)
 {
        /* timeout of 0 will disable the watchdog */
-       return timeout ? last_checked - jiffies + timeout * HZ :
+       return timeout ? last_checked - jiffies + (timeout * HZ) / SECONDS :
                MAX_SCHEDULE_TIMEOUT;
 }
 
@@ -281,8 +291,9 @@ static int watchdog(void *dummy)
        set_user_nice(current, 0);
 
        for ( ; ; ) {
-               unsigned long timeout = sysctl_hung_task_timeout_secs;
-               unsigned long interval = sysctl_hung_task_check_interval_secs;
+               unsigned long timeout = sysctl_hung_task_timeout_secs * SECONDS 
+
+                                       sysctl_hung_task_timeout_millisecs;
+               unsigned long interval = 
sysctl_hung_task_check_interval_millisecs;
                long t;
 
                if (interval == 0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index db1ce7a..8f7ac33 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2476,6 +2476,14 @@ static struct ctl_table kern_table[] = {
                .extra1         = SYSCTL_ZERO,
        },
        {
+               .procname       = "hung_task_timeout_millisecs",
+               .data           = &sysctl_hung_task_timeout_millisecs,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644,
+               .proc_handler   = proc_dohung_task_timeout_secs,
+               .extra2         = &hung_task_timeout_max,
+       },
+       {
                .procname       = "hung_task_timeout_secs",
                .data           = &sysctl_hung_task_timeout_secs,
                .maxlen         = sizeof(unsigned long),
@@ -2484,8 +2492,8 @@ static struct ctl_table kern_table[] = {
                .extra2         = &hung_task_timeout_max,
        },
        {
-               .procname       = "hung_task_check_interval_secs",
-               .data           = &sysctl_hung_task_check_interval_secs,
+               .procname       = "hung_task_check_interval_millisecs",
+               .data           = &sysctl_hung_task_check_interval_millisecs,
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = proc_dohung_task_timeout_secs,
-- 
2.7.4

Reply via email to