Re: [PATCH v11 3/7] crash: add generic infrastructure for crash hotplug support

2022-09-09 Thread Eric DeVolder




On 8/30/22 22:26, Baoquan He wrote:

On 08/26/22 at 01:37pm, Eric DeVolder wrote:

CPU and memory change notifications are received in order to
regenerate the elfcorehdr.

To support cpu hotplug, a callback is registered to capture the
CPUHP_AP_ONLINE_DYN online and offline events via
cpuhp_setup_state_nocalls().

To support memory hotplug, a notifier is registered to capture the
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The cpu callback and memory notifiers call handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event(). During the
process, the kexec_mutex is held.

Signed-off-by: Eric DeVolder 
---
  include/linux/crash_core.h |   8 +++
  include/linux/kexec.h  |  26 +++
  kernel/crash_core.c| 134 +
  3 files changed, 168 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..3b99e69b011f 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long 
system_ram,
  int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
  
+#define KEXEC_CRASH_HP_REMOVE_CPU		0

+#define KEXEC_CRASH_HP_ADD_CPU 1
+#define KEXEC_CRASH_HP_REMOVE_MEMORY   2

 ~~
 Nitpick, These arenot aligned,

+#define KEXEC_CRASH_HP_ADD_MEMORY  3
+#define KEXEC_CRASH_HP_INVALID_CPU -1U
+
+struct kimage;
+
  #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4eefa631e0ae..9597b41136ec 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -374,6 +374,13 @@ struct kimage {
struct purgatory_info purgatory_info;
  #endif
  
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)

+   bool hotplug_event;
+   unsigned int offlinecpu;
+   bool elfcorehdr_index_valid;
+   int elfcorehdr_index;
+#endif
+
  #ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
@@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, 
unsigned int pages, g
  static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) 
{ }
  #endif
  
+#ifndef arch_map_crash_pages

+static inline void *arch_map_crash_pages(unsigned long paddr,
+   unsigned long size)
+{
+   return NULL;
+}
+#endif
+
+#ifndef arch_unmap_crash_pages
+static inline void arch_unmap_crash_pages(void **ptr) { }
+#endif
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image,
+   unsigned int hp_action)
+{
+}
+#endif
+
  #else /* !CONFIG_KEXEC_CORE */
  struct pt_regs;
  struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 0f8aa659cca4..455150205ded 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  
  #include 

  #include 
@@ -18,6 +20,7 @@
  #include 
  
  #include "kallsyms_internal.h"

+#include "kexec_internal.h"
  
  /* vmcoreinfo stuff */

  unsigned char *vmcoreinfo_data;
@@ -611,3 +614,134 @@ static int __init crash_save_vmcoreinfo_init(void)
  }
  
  subsys_initcall(crash_save_vmcoreinfo_init);

+
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * To accurately reflect hot un/plug changes, the elfcorehdr (which
+ * is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources. The elfcorehdr memory is
+ * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+   /* Obtain lock while changing crash information */
+   mutex_lock(_mutex);
+
+   /* Check kdump is loaded */
+   if (kexec_crash_image) {
+   struct kimage *image = kexec_crash_image;
+
+   if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+   hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+   pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, 
cpu);
+   else
+   pr_debug("crash hp: hp_action %u", hp_action);

 

Re: [PATCH v11 3/7] crash: add generic infrastructure for crash hotplug support

2022-08-31 Thread Eric DeVolder




On 8/30/22 22:26, Baoquan He wrote:

On 08/26/22 at 01:37pm, Eric DeVolder wrote:

CPU and memory change notifications are received in order to
regenerate the elfcorehdr.

To support cpu hotplug, a callback is registered to capture the
CPUHP_AP_ONLINE_DYN online and offline events via
cpuhp_setup_state_nocalls().

To support memory hotplug, a notifier is registered to capture the
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The cpu callback and memory notifiers call handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event(). During the
process, the kexec_mutex is held.

Signed-off-by: Eric DeVolder 
---
  include/linux/crash_core.h |   8 +++
  include/linux/kexec.h  |  26 +++
  kernel/crash_core.c| 134 +
  3 files changed, 168 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..3b99e69b011f 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long 
system_ram,
  int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
  
+#define KEXEC_CRASH_HP_REMOVE_CPU		0

+#define KEXEC_CRASH_HP_ADD_CPU 1
+#define KEXEC_CRASH_HP_REMOVE_MEMORY   2

 ~~
 Nitpick, These arenot aligned,


done!


+#define KEXEC_CRASH_HP_ADD_MEMORY  3
+#define KEXEC_CRASH_HP_INVALID_CPU -1U
+
+struct kimage;
+
  #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4eefa631e0ae..9597b41136ec 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -374,6 +374,13 @@ struct kimage {
struct purgatory_info purgatory_info;
  #endif
  
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)

+   bool hotplug_event;
+   unsigned int offlinecpu;
+   bool elfcorehdr_index_valid;
+   int elfcorehdr_index;
+#endif
+
  #ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
@@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, 
unsigned int pages, g
  static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) 
{ }
  #endif
  
+#ifndef arch_map_crash_pages

+static inline void *arch_map_crash_pages(unsigned long paddr,
+   unsigned long size)
+{
+   return NULL;
+}
+#endif
+
+#ifndef arch_unmap_crash_pages
+static inline void arch_unmap_crash_pages(void **ptr) { }
+#endif
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image,
+   unsigned int hp_action)
+{
+}
+#endif
+
  #else /* !CONFIG_KEXEC_CORE */
  struct pt_regs;
  struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 0f8aa659cca4..455150205ded 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  
  #include 

  #include 
@@ -18,6 +20,7 @@
  #include 
  
  #include "kallsyms_internal.h"

+#include "kexec_internal.h"
  
  /* vmcoreinfo stuff */

  unsigned char *vmcoreinfo_data;
@@ -611,3 +614,134 @@ static int __init crash_save_vmcoreinfo_init(void)
  }
  
  subsys_initcall(crash_save_vmcoreinfo_init);

+
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * To accurately reflect hot un/plug changes, the elfcorehdr (which
+ * is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources. The elfcorehdr memory is
+ * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+   /* Obtain lock while changing crash information */
+   mutex_lock(_mutex);
+
+   /* Check kdump is loaded */
+   if (kexec_crash_image) {
+   struct kimage *image = kexec_crash_image;
+
+   if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+   hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+   pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, 
cpu);
+   else
+   pr_debug("crash hp: hp_action %u", hp_action);


Re: [PATCH v11 3/7] crash: add generic infrastructure for crash hotplug support

2022-08-30 Thread Baoquan He
On 08/26/22 at 01:37pm, Eric DeVolder wrote:
> CPU and memory change notifications are received in order to
> regenerate the elfcorehdr.
> 
> To support cpu hotplug, a callback is registered to capture the
> CPUHP_AP_ONLINE_DYN online and offline events via
> cpuhp_setup_state_nocalls().
> 
> To support memory hotplug, a notifier is registered to capture the
> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
> 
> The cpu callback and memory notifiers call handle_hotplug_event()
> which performs needed tasks and then dispatches the event to the
> architecture specific arch_crash_handle_hotplug_event(). During the
> process, the kexec_mutex is held.
> 
> Signed-off-by: Eric DeVolder 
> ---
>  include/linux/crash_core.h |   8 +++
>  include/linux/kexec.h  |  26 +++
>  kernel/crash_core.c| 134 +
>  3 files changed, 168 insertions(+)
> 
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index de62a722431e..3b99e69b011f 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long 
> long system_ram,
>  int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
>   unsigned long long *crash_size, unsigned long long *crash_base);
>  
> +#define KEXEC_CRASH_HP_REMOVE_CPU0
> +#define KEXEC_CRASH_HP_ADD_CPU   1
> +#define KEXEC_CRASH_HP_REMOVE_MEMORY 2
~~
Nitpick, These arenot aligned, 
> +#define KEXEC_CRASH_HP_ADD_MEMORY3
> +#define KEXEC_CRASH_HP_INVALID_CPU   -1U
> +
> +struct kimage;
> +
>  #endif /* LINUX_CRASH_CORE_H */
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 4eefa631e0ae..9597b41136ec 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -374,6 +374,13 @@ struct kimage {
>   struct purgatory_info purgatory_info;
>  #endif
>  
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
> + bool hotplug_event;
> + unsigned int offlinecpu;
> + bool elfcorehdr_index_valid;
> + int elfcorehdr_index;
> +#endif
> +
>  #ifdef CONFIG_IMA_KEXEC
>   /* Virtual address of IMA measurement buffer for kexec syscall */
>   void *ima_buffer;
> @@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void 
> *vaddr, unsigned int pages, g
>  static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int 
> pages) { }
>  #endif
>  
> +#ifndef arch_map_crash_pages
> +static inline void *arch_map_crash_pages(unsigned long paddr,
> + unsigned long size)
> +{
> + return NULL;
> +}
> +#endif
> +
> +#ifndef arch_unmap_crash_pages
> +static inline void arch_unmap_crash_pages(void **ptr) { }
> +#endif
> +
> +#ifndef arch_crash_handle_hotplug_event
> +static inline void arch_crash_handle_hotplug_event(struct kimage *image,
> + unsigned int hp_action)
> +{
> +}
> +#endif
> +
>  #else /* !CONFIG_KEXEC_CORE */
>  struct pt_regs;
>  struct task_struct;
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 0f8aa659cca4..455150205ded 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -11,6 +11,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 
>  #include 
> @@ -18,6 +20,7 @@
>  #include 
>  
>  #include "kallsyms_internal.h"
> +#include "kexec_internal.h"
>  
>  /* vmcoreinfo stuff */
>  unsigned char *vmcoreinfo_data;
> @@ -611,3 +614,134 @@ static int __init crash_save_vmcoreinfo_init(void)
>  }
>  
>  subsys_initcall(crash_save_vmcoreinfo_init);
> +
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
> +/*
> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
> + * is passed to the crash kernel via the elfcorehdr= parameter)
> + * must be updated with the new list of CPUs and memories.
> + *
> + * In order to make changes to elfcorehdr, two conditions are needed:
> + * First, the segment containing the elfcorehdr must be large enough
> + * to permit a growing number of resources. The elfcorehdr memory is
> + * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
> + * Second, purgatory must explicitly exclude the elfcorehdr from the
> + * list of segments it checks (since the elfcorehdr changes and thus
> + * would require an update to purgatory itself to update the digest).
> + */
> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +{
> + /* Obtain lock while changing crash information */
> + mutex_lock(_mutex);
> +
> + /* Check kdump is loaded */
> + if (kexec_crash_image) {
> + struct kimage *image = kexec_crash_image;
> +
> + if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> + hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> + pr_debug("crash hp: hp_action %u, cpu 

[PATCH v11 3/7] crash: add generic infrastructure for crash hotplug support

2022-08-26 Thread Eric DeVolder
CPU and memory change notifications are received in order to
regenerate the elfcorehdr.

To support cpu hotplug, a callback is registered to capture the
CPUHP_AP_ONLINE_DYN online and offline events via
cpuhp_setup_state_nocalls().

To support memory hotplug, a notifier is registered to capture the
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The cpu callback and memory notifiers call handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event(). During the
process, the kexec_mutex is held.

Signed-off-by: Eric DeVolder 
---
 include/linux/crash_core.h |   8 +++
 include/linux/kexec.h  |  26 +++
 kernel/crash_core.c| 134 +
 3 files changed, 168 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..3b99e69b011f 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long 
system_ram,
 int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
 
+#define KEXEC_CRASH_HP_REMOVE_CPU  0
+#define KEXEC_CRASH_HP_ADD_CPU 1
+#define KEXEC_CRASH_HP_REMOVE_MEMORY   2
+#define KEXEC_CRASH_HP_ADD_MEMORY  3
+#define KEXEC_CRASH_HP_INVALID_CPU -1U
+
+struct kimage;
+
 #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4eefa631e0ae..9597b41136ec 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -374,6 +374,13 @@ struct kimage {
struct purgatory_info purgatory_info;
 #endif
 
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+   bool hotplug_event;
+   unsigned int offlinecpu;
+   bool elfcorehdr_index_valid;
+   int elfcorehdr_index;
+#endif
+
 #ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
@@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, 
unsigned int pages, g
 static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) 
{ }
 #endif
 
+#ifndef arch_map_crash_pages
+static inline void *arch_map_crash_pages(unsigned long paddr,
+   unsigned long size)
+{
+   return NULL;
+}
+#endif
+
+#ifndef arch_unmap_crash_pages
+static inline void arch_unmap_crash_pages(void **ptr) { }
+#endif
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image,
+   unsigned int hp_action)
+{
+}
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 0f8aa659cca4..455150205ded 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -18,6 +20,7 @@
 #include 
 
 #include "kallsyms_internal.h"
+#include "kexec_internal.h"
 
 /* vmcoreinfo stuff */
 unsigned char *vmcoreinfo_data;
@@ -611,3 +614,134 @@ static int __init crash_save_vmcoreinfo_init(void)
 }
 
 subsys_initcall(crash_save_vmcoreinfo_init);
+
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * To accurately reflect hot un/plug changes, the elfcorehdr (which
+ * is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources. The elfcorehdr memory is
+ * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+   /* Obtain lock while changing crash information */
+   mutex_lock(_mutex);
+
+   /* Check kdump is loaded */
+   if (kexec_crash_image) {
+   struct kimage *image = kexec_crash_image;
+
+   if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+   hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+   pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, 
cpu);
+   else
+   pr_debug("crash hp: hp_action %u", hp_action);
+
+   /*
+* When the struct kimage is alloced, it is wiped to zero, so
+* the elfcorehdr_index_valid defaults to false. Find the
+* segment containing the elfcorehdr, if not already found.
+* This works