[Xen-devel] [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-10 Thread Hidehiro Kawai
Daniel Walker reported problems which happens when
crash_kexec_post_notifiers kernel option is enabled
(https://lkml.org/lkml/2015/6/24/44).

In that case, smp_send_stop() is called before entering kdump routines
which assume other CPUs are still online.  As the result, kdump
routines fail to save other CPUs' registers.  Additionally for MIPS
OCTEON, it misses to stop the watchdog timer.

To fix this problem, call a new kdump friendly function,
crash_smp_send_stop(), instead of the smp_send_stop() when
crash_kexec_post_notifiers is enabled.  crash_smp_send_stop() is a
weak function, and it just call smp_send_stop().  Architecture
codes should override it so that kdump can work appropriately.
This patch provides MIPS version.

Reported-by: Daniel Walker 
Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" option)
Signed-off-by: Hidehiro Kawai 
Cc: Ralf Baechle 
Cc: David Daney 
Cc: Aaro Koskinen 
Cc: "Steven J. Hill" 
Cc: Corey Minyard 

---
I'm not familiar with MIPS, and I don't have a test environment and
just did build tests only.  Please don't apply this patch until
someone does enough tests, otherwise simply drop this patch.
---
 arch/mips/cavium-octeon/setup.c  |   14 ++
 arch/mips/include/asm/kexec.h|1 +
 arch/mips/kernel/crash.c |   18 +-
 arch/mips/kernel/machine_kexec.c |1 +
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index cb16fcc..5537f95 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -267,6 +267,17 @@ static void octeon_crash_shutdown(struct pt_regs *regs)
default_machine_crash_shutdown(regs);
 }
 
+#ifdef CONFIG_SMP
+void octeon_crash_smp_send_stop(void)
+{
+   int cpu;
+
+   /* disable watchdogs */
+   for_each_online_cpu(cpu)
+   cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
+}
+#endif
+
 #endif /* CONFIG_KEXEC */
 
 #ifdef CONFIG_CAVIUM_RESERVE32
@@ -911,6 +922,9 @@ void __init prom_init(void)
_machine_kexec_shutdown = octeon_shutdown;
_machine_crash_shutdown = octeon_crash_shutdown;
_machine_kexec_prepare = octeon_kexec_prepare;
+#ifdef CONFIG_SMP
+   _crash_smp_send_stop = octeon_crash_smp_send_stop;
+#endif
 #endif
 
octeon_user_io_init();
diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h
index ee25ebb..493a3cc 100644
--- a/arch/mips/include/asm/kexec.h
+++ b/arch/mips/include/asm/kexec.h
@@ -45,6 +45,7 @@ extern const unsigned char kexec_smp_wait[];
 extern unsigned long secondary_kexec_args[4];
 extern void (*relocated_kexec_smp_wait) (void *);
 extern atomic_t kexec_ready_to_reboot;
+extern void (*_crash_smp_send_stop)(void);
 #endif
 #endif
 
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 610f0f3..1723b17 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs)
 
 static void crash_kexec_prepare_cpus(void)
 {
+   static int cpus_stopped;
unsigned int msecs;
+   unsigned int ncpus;
 
-   unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+   if (cpus_stopped)
+   return;
+
+   ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
dump_send_ipi(crash_shutdown_secondary);
smp_wmb();
@@ -64,6 +69,17 @@ static void crash_kexec_prepare_cpus(void)
cpu_relax();
mdelay(1);
}
+
+   cpus_stopped = 1;
+}
+
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+   if (_crash_smp_send_stop)
+   _crash_smp_send_stop();
+
+   crash_kexec_prepare_cpus();
 }
 
 #else /* !defined(CONFIG_SMP)  */
diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c
index 50980bf3..5972520 100644
--- a/arch/mips/kernel/machine_kexec.c
+++ b/arch/mips/kernel/machine_kexec.c
@@ -25,6 +25,7 @@ void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
 #ifdef CONFIG_SMP
 void (*relocated_kexec_smp_wait) (void *);
 atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+void (*_crash_smp_send_stop)(void) = NULL;
 #endif
 
 int



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [V4 PATCH 0/2] kexec: crash_kexec_post_notifiers boot option related fixes

2016-08-10 Thread Hidehiro Kawai
Daniel Walker reported problems which happens when
crash_kexec_post_notifiers kernel option is enabled
(https://lkml.org/lkml/2015/6/24/44).

In that case, smp_send_stop() is called before entering kdump routines
which assume other CPUs are still online.  This causes some issues
depending on architectures.  For example, for x86, kdump routines fail
to save other CPUs' registers and disable virtualization extensions.
For MIPS OCTEON, it fails to stop the watchdog timer.

To fix this problem, call a new kdump friendly function,
crash_smp_send_stop(), instead of the smp_send_stop() when
crash_kexec_post_notifiers is enabled.  crash_smp_send_stop() is a
weak function, and it just call smp_send_stop().  Architecture
codes should override it so that kdump can work appropriately.
This patch set supports only x86 and MIPS.

NOTE:
- Right solution would be to place crash_smp_send_stop() before
  __crash_kexec() invocation in all cases and remove smp_send_stop(),
  but we can't do that until all architectures implement own
  crash_smp_send_stop()
- crash_smp_send_stop()-like work is still needed by
  machine_crash_shutdown() because crash_kexec() can be called without
  entering panic()

Changes in V4:
- Keep to use smp_send_stop if crash_kexec_post_notifiers is not set
- Rename panic_smp_send_stop to crash_smp_send_stop
- Don't change the behavior for Xen's PV kernel
- Support MIPS

Changes in V3: https://lkml.org/lkml/2016/7/5/221
- Revise comments, description, and symbol names (the logic doesn't
  change)
- Make crash_kexec_post_notifiers boot option modifiable after boot

Changes in V2: https://lkml.org/lkml/2015/7/23/864
- Replace smp_send_stop() call with crash_kexec version which
  saves cpu states and does cleanups instead of changing execution
  flow
- Drop a fix for Problem 1
- Drop other patches because they aren't needed anymore

V1: https://lkml.org/lkml/2015/7/10/316

---

Hidehiro Kawai (2):
  x86/panic: Replace smp_send_stop() with kdump friendly version in panic 
path
  mips/panic: Replace smp_send_stop() with kdump friendly version in panic 
path


 arch/mips/cavium-octeon/setup.c  |   14 +++
 arch/mips/include/asm/kexec.h|1 +
 arch/mips/kernel/crash.c |   18 ++-
 arch/mips/kernel/machine_kexec.c |1 +
 arch/x86/include/asm/kexec.h |1 +
 arch/x86/include/asm/smp.h   |1 +
 arch/x86/kernel/crash.c  |   22 +++---
 arch/x86/kernel/smp.c|5 
 kernel/panic.c   |   47 --
 9 files changed, 99 insertions(+), 11 deletions(-)


-- 
Hidehiro Kawai
Hitachi, Ltd. Research & Development Group



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [V4 PATCH 1/2] x86/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-10 Thread Hidehiro Kawai
Daniel Walker reported problems which happens when
crash_kexec_post_notifiers kernel option is enabled
(https://lkml.org/lkml/2015/6/24/44).

In that case, smp_send_stop() is called before entering kdump routines
which assume other CPUs are still online.  As the result, for x86,
kdump routines fail to save other CPUs' registers  and disable
virtualization extensions.

To fix this problem, call a new kdump friendly function,
crash_smp_send_stop(), instead of the smp_send_stop() when
crash_kexec_post_notifiers is enabled.  crash_smp_send_stop() is a
weak function, and it just call smp_send_stop().  Architecture
codes should override it so that kdump can work appropriately.
This patch only provides x86-specific version.

For Xen's PV kernel, just keep the current behavior.

Changes in V4:
- Keep to use smp_send_stop if crash_kexec_post_notifiers is not set
- Rename panic_smp_send_stop to crash_smp_send_stop
- Don't change the behavior for Xen's PV kernel

Changes in V3:
- Revise comments, description, and symbol names

Changes in V2:
- Replace smp_send_stop() call with crash_kexec version which
  saves cpu states and cleans up VMX/SVM
- Drop a fix for Problem 1 at this moment

Reported-by: Daniel Walker 
Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" option)
Signed-off-by: Hidehiro Kawai 
Cc: Dave Young 
Cc: Baoquan He 
Cc: Vivek Goyal 
Cc: Eric Biederman 
Cc: Masami Hiramatsu 
Cc: Daniel Walker 
Cc: Xunlei Pang 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Borislav Petkov 
Cc: David Vrabel 
Cc: Toshi Kani 
Cc: Andrew Morton 
---
 arch/x86/include/asm/kexec.h |1 +
 arch/x86/include/asm/smp.h   |1 +
 arch/x86/kernel/crash.c  |   22 +---
 arch/x86/kernel/smp.c|5 
 kernel/panic.c   |   47 --
 5 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index d2434c1..282630e 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -210,6 +210,7 @@ struct kexec_entry64_regs {
 
 typedef void crash_vmclear_fn(void);
 extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
+extern void kdump_nmi_shootdown_cpus(void);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ebd0c16..f70989c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -50,6 +50,7 @@ struct smp_ops {
void (*smp_cpus_done)(unsigned max_cpus);
 
void (*stop_other_cpus)(int wait);
+   void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);
 
int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9616cf7..650830e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -133,15 +133,31 @@ static void kdump_nmi_callback(int cpu, struct pt_regs 
*regs)
disable_local_APIC();
 }
 
-static void kdump_nmi_shootdown_cpus(void)
+void kdump_nmi_shootdown_cpus(void)
 {
nmi_shootdown_cpus(kdump_nmi_callback);
 
disable_local_APIC();
 }
 
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+   static int cpus_stopped;
+
+   if (cpus_stopped)
+   return;
+
+   if (smp_ops.crash_stop_other_cpus)
+   smp_ops.crash_stop_other_cpus();
+   else
+   smp_send_stop();
+
+   cpus_stopped = 1;
+}
+
 #else
-static void kdump_nmi_shootdown_cpus(void)
+void crash_smp_send_stop(void)
 {
/* There are no cpus to shootdown */
 }
@@ -160,7 +176,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
/* The kernel is broken so disable interrupts */
local_irq_disable();
 
-   kdump_nmi_shootdown_cpus();
+   crash_smp_send_stop();
 
/*
 * VMCLEAR VMCSs loaded on this cpu if needed.
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 658777c..68f8cc2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -32,6 +32,8 @@
 #include 
 #include 
 #include 
+#include 
+
 /*
  * Some notes on x86 processor bugs affecting SMP operation:
  *
@@ -342,6 +344,9 @@ struct smp_ops smp_ops = {
.smp_cpus_done  = native_smp_cpus_done,
 
.stop_other_cpus= native_stop_other_cpus,
+#if defined(CONFIG_KEXEC_CORE)
+   .crash_stop_other_cpus  = kdump_nmi_shootdown_cpus,
+#endif
.smp_send_reschedule= native_smp_send_reschedule,
 
.cpu_up = native_cpu_up,
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1..e6480e2 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -71,6 +71,32 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
panic_smp_self_stop();
 }
 
+/*
+ * Stop other CPUs in panic.  Architecture dependent code may override this
+ * with mor