Re: [PATCH v2 3/3] doc/hotplug: update man and --help

2024-06-26 Thread Hari Bathini




On 14/06/24 12:37 am, Sourabh Jain wrote:

Update the man page and --help option to make the description of the
--hotplug option easier to understand.

Cc: Aditya Gupta 
Cc: Baoquan He 
Cc: Coiby Xu 
Cc: Hari Bathini 
Cc: Mahesh Salgaonkar 
Cc: Simon Horman 


Minor nits below...

Otherwise, looks good.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
---

Changelog:

Since v1:
   - Updated --hotplug option description

---
  kexec/kexec.8 | 8 
  kexec/kexec.c | 4 +++-
  2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 9e995fe..92fa3d5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -140,10 +140,10 @@ Open a help file for
  .BR kexec .
  .TP
  .B \-\-hotplug
-Setup for kernel modification of the elfcorehdr. This option performs
-the steps needed to support kernel updates to the elfcorehdr in the
-presence of hot un/plug and/or on/offline events. This option only
-useful for KEXEC_LOAD syscall.
+Setup kexec segments such that kernel can safely update them on CPU/Memory
+hot add/remove events. If this option is enabled, kernel does in-kernel



+update of kexec segments on CPU/Memory hot add/remove events. Helps avoid
+the need to reload kdump kernel.


s/events. Helps avoid the need/events, thus avoiding the need/


  .TP
  .B \-i\ (\-\-no-checks)
  Fast reboot, no memory integrity checks.
diff --git a/kexec/kexec.c b/kexec/kexec.c
index 034cea6..db8e2b3 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -1093,7 +1093,9 @@ void usage(void)
   "  back to the compatibility syscall when file 
based\n"
   "  syscall is not supported or the kernel did 
not\n"
   "  understand the image (default)\n"
-  " --hotplugSetup for kernel modification of 
elfcorehdr.\n"
+  " --hotplugDo in-kernel update of kexec segments on 
CPU/Memory\n"



+  "  hot add/remove events. This avoids the need to 
reload\n"


s/events. This avoids/events, avoiding/


+  "  kdump kernel or on/offline events.\n"


s/kernel or on/kernel on online/


   " -d, --debug  Enable debugging to help spot a 
failure.\n"
   " -S, --status Return 1 if the type (by default crash) is 
loaded,\n"
   "  0 if not.\n"


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/3] powerpc/kexec_load: add hotplug support

2024-06-26 Thread Hari Bathini




On 14/06/24 12:37 am, Sourabh Jain wrote:

Kernel commits b741092d5976 ("powerpc/crash: add crash CPU hotplug
support") and 849599b702ef ("powerpc/crash: add crash memory hotplug
support") added crash CPU/Memory hotplug support on PowerPC. This patch
extends that support for the kexec_load syscall.

During CPU/Memory hotplug events on PowerPC, two kexec segments,
elfcorehdr, and FDT, get updated by the kernel. To ensure the kernel
can safely update these two kexec segments for the kdump image loaded
using the kexec_load system call, the following changes are made:

1. Extra size is allocated for both elfcorehdr and FDT to accommodate
additional resources in the future. For the elfcorehdr, the size hint
is taken from /sys/kernel/crash_elfcorehdr_size sysfs, while for FDT,
extra size is allocated to hold possible CPU nodes.

2. Both elfcorehdr and FDT are skipped from SHA calculation.

Cc: Aditya Gupta 
Cc: Baoquan He 
Cc: Coiby Xu 
Cc: Hari Bathini 
Cc: Mahesh Salgaonkar 
Cc: Simon Horman 


LGTM.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
---

Changelog:

Since v1:
   - Find CPUs in the system using the /sys/devices/system/cpu/present sysfs
 instead of traversing all nodes under /proc/device-tree/cpus.

   - Added a new function to find present CPUs in the system.

   - Removed unnecessary NULL check on seg_ptr from arch_do_exclude_segment().

---
  kexec/arch/ppc64/crashdump-ppc64.c  |  16 +-
  kexec/arch/ppc64/fdt.c  | 236 +++-
  kexec/arch/ppc64/include/arch/fdt.h |   2 +-
  kexec/arch/ppc64/kexec-elf-ppc64.c  |   2 +-
  kexec/arch/ppc64/kexec-ppc64.c  |   9 +-
  5 files changed, 258 insertions(+), 7 deletions(-)

diff --git a/kexec/arch/ppc64/crashdump-ppc64.c 
b/kexec/arch/ppc64/crashdump-ppc64.c
index 6d47898..98d439a 100644
--- a/kexec/arch/ppc64/crashdump-ppc64.c
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
@@ -476,7 +476,7 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
uint64_t max_addr, unsigned long min_base)
  {
void *tmp;
-   unsigned long sz;
+   unsigned long sz, memsz;
uint64_t elfcorehdr;
int nr_ranges, align = 1024, i;
unsigned long long end;
@@ -531,8 +531,18 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
}
}
  
-	elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base,

-   max_addr, 1);
+   memsz = sz;
+   /* To support --hotplug, replace the calculated memsz with the value
+* from /sys/kernel/crash_elfcorehdr_size and align it correctly.
+*/
+   if (do_hotplug) {
+   if (elfcorehdrsz > sz)
+   memsz = _ALIGN(elfcorehdrsz, align);
+   }
+
+   /* Record the location of the elfcorehdr for hotplug handling */
+   info->elfcorehdr = elfcorehdr = add_buffer(info, tmp, sz, memsz, align,
+  min_base, max_addr, 1);
reserve(elfcorehdr, sz);
/* modify and store the cmdline in a global array. This is later
 * read by flatten_device_tree and modified if required
diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c
index 8bc6d2d..879240f 100644
--- a/kexec/arch/ppc64/fdt.c
+++ b/kexec/arch/ppc64/fdt.c
@@ -17,6 +17,13 @@
  #include 
  #include 
  #include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../../kexec.h"
+#include "../../kexec-syscall.h"
  
  /*

   * Let the kernel know it booted from kexec, as some things (e.g.
@@ -46,17 +53,244 @@ static int fixup_kexec_prop(void *fdt)
return 0;
  }
  
+static inline bool is_dot_dir(char * d_path)

+{
+   return d_path[0] == '.';
+}
+
+/*
+ * get_cpu_node_size - Returns size of files including file name size under
+ * the given @cpu_node_path.
+ */
+static int get_cpu_node_size(char *cpu_node_path)
+{
+   DIR *d;
+   struct dirent *de;
+   struct stat statbuf;
+   int cpu_node_size = 0;
+   char cpu_prop_path[2 * PATH_MAX];
+
+   d = opendir(cpu_node_path);
+   if (!d)
+   return 0;
+
+   while ((de = readdir(d)) != NULL) {
+   if (de->d_type != DT_REG)
+   continue;
+
+   memset(cpu_prop_path, '\0', PATH_MAX);
+   snprintf(cpu_prop_path, 2 * PATH_MAX, "%s/%s", cpu_node_path,
+de->d_name);
+
+   if (stat(cpu_prop_path, ))
+   continue;
+
+   cpu_node_size += statbuf.st_size;
+   cpu_node_size += strlen(de->d_name);
+   }
+
+   return cpu_node_size;
+}
+
+/*
+ * is_cpu_node - Checks if the node specified by the given @path
+ *   represents a CPU node.
+ *
+ * Returns true if the @path has a "device_type" file containing "cpu";
+ 

Re: [PATCH 2/3] powerpc/kexec_load: add hotplug support

2024-06-10 Thread Hari Bathini




On 22/05/24 6:43 pm, Sourabh Jain wrote:

Kernel commits b741092d5976 ("powerpc/crash: add crash CPU hotplug
support") and 849599b702ef ("powerpc/crash: add crash memory hotplug
support") added crash CPU/Memory hotplug support on PowerPC. This patch
extends that support for the kexec_load syscall.

During CPU/Memory hotplug events on PowerPC, two kexec segments,
elfcorehdr, and FDT, get updated by the kernel. To ensure the kernel
can safely update these two kexec segments for the kdump image loaded
using the kexec_load system call, the following changes are made:

1. Extra size is allocated for both elfcorehdr and FDT to accommodate
additional resources in the future. For the elfcorehdr, the size hint
is taken from /sys/kernel/crash_elfcorehdr_size sysfs, while for FDT,
extra size is allocated to hold possible CPU nodes.

2. Both elfcorehdr and FDT are skipped from SHA calculation.

Cc: Aditya Gupta 
Cc: Baoquan He 
Cc: Coiby Xu 
Cc: Hari Bathini 
Cc: Mahesh Salgaonkar 
Signed-off-by: Sourabh Jain 
---
  kexec/arch/ppc64/crashdump-ppc64.c  |  16 ++-
  kexec/arch/ppc64/fdt.c  | 200 +++-
  kexec/arch/ppc64/include/arch/fdt.h |   2 +-
  kexec/arch/ppc64/kexec-elf-ppc64.c  |   2 +-
  kexec/arch/ppc64/kexec-ppc64.c  |  12 +-
  5 files changed, 225 insertions(+), 7 deletions(-)

diff --git a/kexec/arch/ppc64/crashdump-ppc64.c 
b/kexec/arch/ppc64/crashdump-ppc64.c
index 6d47898..c14b593 100644
--- a/kexec/arch/ppc64/crashdump-ppc64.c
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
@@ -476,7 +476,7 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
uint64_t max_addr, unsigned long min_base)
  {
void *tmp;
-   unsigned long sz;
+   unsigned long sz, memsz;
uint64_t elfcorehdr;
int nr_ranges, align = 1024, i;
unsigned long long end;
@@ -531,8 +531,18 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
}
}
  
-	elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base,

-   max_addr, 1);
+   memsz = sz;
+   /* To support --hotplug, replace the calculated minimum size with the
+* value from /sys/kernel/crash_elfcorehdr_size and align it correctly.
+*/
+   if (do_hotplug) {
+   if (elfcorehdrsz > sz)
+   memsz = _ALIGN(elfcorehdrsz, align);
+   }
+
+   /* Record the location of the elfcorehdr for hotplug handling */
+   info->elfcorehdr = elfcorehdr = add_buffer(info, tmp, sz, memsz, align,
+  min_base, max_addr, 1);
reserve(elfcorehdr, sz);
/* modify and store the cmdline in a global array. This is later
 * read by flatten_device_tree and modified if required
diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c
index 8bc6d2d..10abc29 100644
--- a/kexec/arch/ppc64/fdt.c
+++ b/kexec/arch/ppc64/fdt.c
@@ -17,6 +17,13 @@
  #include 
  #include 
  #include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../../kexec.h"
+#include "../../kexec-syscall.h"
  
  /*

   * Let the kernel know it booted from kexec, as some things (e.g.
@@ -46,17 +53,208 @@ static int fixup_kexec_prop(void *fdt)
return 0;
  }
  
+static inline bool is_dot_dir(char * d_path)

+{
+   return d_path[0] == '.';
+}
+
+/*
+ * Returns size of files including file name size under the given
+ * @cpu_node_path.
+ */
+static unsigned int get_cpu_node_size(char *cpu_node_path)
+{
+   DIR *d;
+   struct dirent *de;
+   struct stat statbuf;
+   unsigned int cpu_node_size = 0;
+   char cpu_prop_path[2 * PATH_MAX];
+
+   d = opendir(cpu_node_path);
+   if (!d)
+   return 0;
+
+   while ((de = readdir(d)) != NULL) {
+   if (de->d_type != DT_REG)
+   continue;
+
+   memset(cpu_prop_path, '\0', PATH_MAX);
+   snprintf(cpu_prop_path, 2 * PATH_MAX, "%s/%s", cpu_node_path, 
de->d_name);
+
+   if (stat(cpu_prop_path, ))
+   continue;
+
+   cpu_node_size += statbuf.st_size;
+   cpu_node_size += strlen(de->d_name);
+   }
+
+   return cpu_node_size;
+}
+
+/*
+ * Checks if the node specified by the given @path represents a CPU node.
+ *
+ * Returns true if the @path has a "device_type" file containing "cpu";
+ * otherwise, returns false.
+ */
+static bool is_cpu_node(char *path)
+{
+   FILE *file;
+   bool ret = false;
+   char device_type[4];
+
+   file = fopen(path, "r");
+   if (!file)
+   return false;
+
+   memset(device_type, '\0', 4);
+   if (fread(device_type, 1, 3, file) < 3)
+   goto out;
+
+   if (strcmp(device_type, "cpu"))
+  

Re: [PATCH 3/3] doc/hotplug: update man and --help

2024-06-10 Thread Hari Bathini




On 22/05/24 6:43 pm, Sourabh Jain wrote:

Update the man page and --help option to make the description of the
--hotplug option easier to understand.

Cc: Aditya Gupta 
Cc: Baoquan He 
Cc: Coiby Xu 
Cc: Hari Bathini 
Cc: Mahesh Salgaonkar 
Signed-off-by: Sourabh Jain 
---
  kexec/kexec.8 | 8 
  kexec/kexec.c | 3 ++-
  2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 9e995fe..7dddae9 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -140,10 +140,10 @@ Open a help file for
  .BR kexec .
  .TP
  .B \-\-hotplug


Can we have the description changed like:


-Setup for kernel modification of the elfcorehdr. This option performs
-the steps needed to support kernel updates to the elfcorehdr in the
-presence of hot un/plug and/or on/offline events. This option only
-useful for KEXEC_LOAD syscall.
+Helps avoid kdump kernel reload on CPU/Memory hotplug or on/offline events.
+If this option is enabled, the kexec segments will be set up in a way that
+the kernel can safely update them on CPU/memory hotplug and/or on/offline
+events. This option is only useful for the KEXEC_LOAD syscall.


"Setup kexec segments such that kernel can safely update them on 
CPU/Memory hot add/remove events. If this option is enabled, kernel does

in-kernel update of kexec segments on CPU/Memory hot add/remove events.
Helps avoid the need to reload kdump kernel."



  .TP
  .B \-i\ (\-\-no-checks)
  Fast reboot, no memory integrity checks.
diff --git a/kexec/kexec.c b/kexec/kexec.c
index 034cea6..2b06438 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -1093,7 +1093,8 @@ void usage(void)
   "  back to the compatibility syscall when file 
based\n"
   "  syscall is not supported or the kernel did 
not\n"
   "  understand the image (default)\n"
-  " --hotplugSetup for kernel modification of 
elfcorehdr.\n"



+  " --hotplugHelps avoid kdump kernel reload on CPU/Memory 
hotplug\n"
+  " or on/offline events.\n"


"Do in-kernel update of kexec segments on CPU/Memory hot add/remove 
events. This avoids the need to reload kdump kernel."


- Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/3] kexec_load: Use new kexec flag for hotplug support

2024-06-10 Thread Hari Bathini




On 22/05/24 6:43 pm, Sourabh Jain wrote:

Kernel commit 79365026f869 (crash: add a new kexec flag for hotplug
support) has introduced a new kexec flag to generalize hotplug support.
The newly introduced kexec flags for hotplug allow architectures to
exclude all the required kexec segments from SHA calculation so that
the kernel can update them on hotplug events. This was not possible
earlier with the KEXEC_UPDATE_ELFCOREHDR kexec flags since it was added
only for the elfcorehdr segment.

To enable architectures to control the list of kexec segments to exclude
when hotplug support is enabled, add a new architecture-specific
function named arch_do_exclude_segment. During the SHA calculation, this
function gets called to let the architecture decide whether a specific
kexec segment should be considered for SHA calculation or not.

Given that the KEXEC_UPDATE_ELFCOREHDR is no longer required and was
colliding with the KEXEC_LIVE_UPDATE update flag, it is removed.

Cc: Aditya Gupta 
Cc: Baoquan He 
Cc: Coiby Xu 
Cc: Hari Bathini 
Cc: Mahesh Salgaonkar 
Signed-off-by: Sourabh Jain 
---
  kexec/arch/arm/kexec-arm.c |  5 +
  kexec/arch/arm64/kexec-arm64.c |  4 
  kexec/arch/cris/kexec-cris.c   |  4 
  kexec/arch/hppa/kexec-hppa.c   |  5 +
  kexec/arch/i386/kexec-x86.c|  8 
  kexec/arch/ia64/kexec-ia64.c   |  4 
  kexec/arch/loongarch/kexec-loongarch.c |  5 +
  kexec/arch/m68k/kexec-m68k.c   |  5 +
  kexec/arch/mips/kexec-mips.c   |  4 
  kexec/arch/ppc/kexec-ppc.c |  4 
  kexec/arch/ppc64/kexec-ppc64.c |  5 +
  kexec/arch/s390/kexec-s390.c   |  5 +
  kexec/arch/sh/kexec-sh.c   |  5 +
  kexec/arch/x86_64/kexec-x86_64.c   |  5 +
  kexec/kexec-syscall.h  |  2 +-
  kexec/kexec.c  | 14 --
  kexec/kexec.h  |  2 ++
  17 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/kexec/arch/arm/kexec-arm.c b/kexec/arch/arm/kexec-arm.c
index 49f35b1..34531f9 100644
--- a/kexec/arch/arm/kexec-arm.c
+++ b/kexec/arch/arm/kexec-arm.c
@@ -148,3 +148,8 @@ int have_sysfs_fdt(void)
  {
return !access(SYSFS_FDT, F_OK);
  }
+
+int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct 
kexec_info *UNUSED(info))
+{
+   return 0;
+}
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 4a67b0d..9d052b0 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -1363,3 +1363,7 @@ void arch_reuse_initrd(void)
  void arch_update_purgatory(struct kexec_info *UNUSED(info))
  {
  }
+int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct 
kexec_info *UNUSED(info))
+{
+   return 0;
+}
diff --git a/kexec/arch/cris/kexec-cris.c b/kexec/arch/cris/kexec-cris.c
index 3b69709..7f09121 100644
--- a/kexec/arch/cris/kexec-cris.c
+++ b/kexec/arch/cris/kexec-cris.c
@@ -109,3 +109,7 @@ unsigned long add_buffer(struct kexec_info *info, const 
void *buf,
  buf_min, buf_max, buf_end, 1);
  }
  
+int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct kexec_info *UNUSED(info))

+{
+   return 0;
+}
diff --git a/kexec/arch/hppa/kexec-hppa.c b/kexec/arch/hppa/kexec-hppa.c
index 77c9739..a64dc3d 100644
--- a/kexec/arch/hppa/kexec-hppa.c
+++ b/kexec/arch/hppa/kexec-hppa.c
@@ -146,3 +146,8 @@ unsigned long virt_to_phys(unsigned long addr)
  {
return addr - phys_offset;
  }
+
+int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct 
kexec_info *UNUSED(info))
+{
+   return 0;
+}
diff --git a/kexec/arch/i386/kexec-x86.c b/kexec/arch/i386/kexec-x86.c
index 444cb69..b4947a0 100644
--- a/kexec/arch/i386/kexec-x86.c
+++ b/kexec/arch/i386/kexec-x86.c
@@ -208,3 +208,11 @@ void arch_update_purgatory(struct kexec_info *info)
elf_rel_set_symbol(>rhdr, "panic_kernel",
_kernel, sizeof(panic_kernel));
  }
+
+int arch_do_exclude_segment(struct kexec_segment *seg_ptr, struct kexec_info 
*info)
+{
+   if (info->elfcorehdr == (unsigned long) seg_ptr->mem)
+   return 1;
+
+   return 0;
+}
diff --git a/kexec/arch/ia64/kexec-ia64.c b/kexec/arch/ia64/kexec-ia64.c
index 418d997..8d9c1f3 100644
--- a/kexec/arch/ia64/kexec-ia64.c
+++ b/kexec/arch/ia64/kexec-ia64.c
@@ -245,3 +245,7 @@ void arch_update_purgatory(struct kexec_info *UNUSED(info))
  {
  }
  
+int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct kexec_info *UNUSED(info))

+{
+   return 0;
+}
diff --git a/kexec/arch/loongarch/kexec-loongarch.c 
b/kexec/arch/loongarch/kexec-loongarch.c
index 32a42d2..9a50ff6 100644
--- a/kexec/arch/loongarch/kexec-loongarch.c
+++ b/kexec/arch/loongarch/kexec-loongarch.c
@@ -378,3 +378,8 @@ unsigned long add_buffer(struct kexec_info *info, const 
void *

Re: [PATCH bpf-next v2] bpf: fix warning for crash_kexec

2024-03-21 Thread Hari Bathini



On 20/03/24 4:26 am, Andrii Nakryiko wrote:

On Tue, Mar 19, 2024 at 1:02 AM Hari Bathini  wrote:


With [1], crash dump specific code is moved out of CONFIG_KEXEC_CORE
and placed under CONFIG_CRASH_DUMP, where it is more appropriate.
And since CONFIG_KEXEC & !CONFIG_CRASH_DUMP build option is supported
with that, it led to the below warning:

   "WARN: resolve_btfids: unresolved symbol crash_kexec"

Fix it by using the appropriate #ifdef.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

Acked-by: Baoquan He 
Fixes: 29fd9ae62910 ("crash: split crash dumping code out from kexec_core.c")


I don't think either bpf or bpf-next have this commit just yet, so
landing it in the bpf/bpf-next tree doesn't make much sense. It
probably would be best to land it through the tree which does the
CONFIG_KEXEC_CORE -> CONFIG_CRASH_DUMP change in kernel/Makefile.


Yeah, [1] patches were still not in bpf-next when I sent this v2.
Andrew, can you please consider pulling this via -mm tree if that
sounds more appropriate?

Thanks
Hari



pw-bot: cr


Acked-by: Jiri Olsa 
Acked-by: Stanislav Fomichev 
Signed-off-by: Hari Bathini 
---

Changes in v2:
* Updated changelog.
* Added Fixes and Acked-by tags.


  kernel/bpf/helpers.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a89587859571..449b9a5d3fe3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
  __bpf_kfunc_end_defs();

  BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
  BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
  #endif
  BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
--
2.44.0



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH linux-next] bpf: fix warning for crash_kexec

2024-03-19 Thread Hari Bathini



On 18/03/24 9:43 pm, Yonghong Song wrote:


On 3/17/24 11:52 PM, Hari Bathini wrote:

Just checking on whether this will go via bpf or mm tree?


Sending to bpf-next should be okay.
Could you resubmit the patch as CONFIG_CRASH_DUMP probably not
available to bpf-next when you initially submitted the patch.


OK. Sent V2 with tags added and changelog updated:


https://lore.kernel.org/all/20240319080152.36987-1-hbath...@linux.ibm.com/





On 09/02/24 6:05 pm, Hari Bathini wrote:

With [1], CONFIG_KEXEC & !CONFIG_CRASH_DUMP is supported but that led
to the below warning:

   "WARN: resolve_btfids: unresolved symbol crash_kexec"

Fix it by using the appropriate #ifdef.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

Signed-off-by: Hari Bathini 
---
  kernel/bpf/helpers.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4db1c658254c..e408d1115e26 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2545,7 +2545,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
  __bpf_kfunc_end_defs();
    BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
  BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
  #endif
  BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)




___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH bpf-next v2] bpf: fix warning for crash_kexec

2024-03-19 Thread Hari Bathini
With [1], crash dump specific code is moved out of CONFIG_KEXEC_CORE
and placed under CONFIG_CRASH_DUMP, where it is more appropriate.
And since CONFIG_KEXEC & !CONFIG_CRASH_DUMP build option is supported
with that, it led to the below warning:

  "WARN: resolve_btfids: unresolved symbol crash_kexec"

Fix it by using the appropriate #ifdef.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

Acked-by: Baoquan He 
Fixes: 29fd9ae62910 ("crash: split crash dumping code out from kexec_core.c")
Acked-by: Jiri Olsa 
Acked-by: Stanislav Fomichev 
Signed-off-by: Hari Bathini 
---

Changes in v2:
* Updated changelog.
* Added Fixes and Acked-by tags.


 kernel/bpf/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a89587859571..449b9a5d3fe3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
-- 
2.44.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH linux-next] bpf: fix warning for crash_kexec

2024-03-18 Thread Hari Bathini

Just checking on whether this will go via bpf or mm tree?

On 09/02/24 6:05 pm, Hari Bathini wrote:

With [1], CONFIG_KEXEC & !CONFIG_CRASH_DUMP is supported but that led
to the below warning:

   "WARN: resolve_btfids: unresolved symbol crash_kexec"

Fix it by using the appropriate #ifdef.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

Signed-off-by: Hari Bathini 
---
  kernel/bpf/helpers.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4db1c658254c..e408d1115e26 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2545,7 +2545,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
  __bpf_kfunc_end_defs();
  
  BTF_KFUNCS_START(generic_btf_ids)

-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
  BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
  #endif
  BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 6/6] powerpc/crash: add crash memory hotplug support

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

Extend the arch crash hotplug handler, as introduced by the patch title
("powerpc: add crash CPU hotplug support"), to also support memory
add/remove events.

Elfcorehdr describes the memory of the crash kernel to capture the
kernel; hence, it needs to be updated if memory resources change due to
memory add/remove events. Therefore, arch_crash_handle_hotplug_event()
is updated to recreate the elfcorehdr and replace it with the previous
one on memory add/remove events.

The memblock list is used to prepare the elfcorehdr. In the case of
memory hot remove, the memblock list is updated after the arch crash
hotplug handler is triggered, as depicted in Figure 1. Thus, the
hot-removed memory is explicitly removed from the crash memory ranges
to ensure that the memory ranges added to elfcorehdr do not include the
hot-removed memory.

 Memory remove
   |
   v
 Offline pages
   |
   v
  Initiate memory notify call <> crash hotplug handler
  chain for MEM_OFFLINE event
   |
   v
  Update memblock list

Figure 1

There are two system calls, `kexec_file_load` and `kexec_load`, used to
load the kdump image. A few changes have been made to ensure that the
kernel can safely update the elfcorehdr component of the kdump image for
both system calls.

For the kexec_file_load syscall, kdump image is prepared in the kernel.
To support an increasing number of memory regions, the elfcorehdr is
built with extra buffer space to ensure that it can accommodate
additional memory ranges in future.

For the kexec_load syscall, the elfcorehdr is updated only if the
KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag is passed to the kernel by the
kexec tool. Passing this flag to the kernel indicates that the
elfcorehdr is built to accommodate additional memory ranges and the
elfcorehdr segment is not considered for SHA calculation, making it safe
to update.

The changes related to this feature are kept under the CRASH_HOTPLUG
config, and it is enabled by default.



Overall, the patchset looks good. I tried out the changes too.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/include/asm/kexec.h|  3 +
  arch/powerpc/include/asm/kexec_ranges.h |  1 +
  arch/powerpc/kexec/crash.c  | 95 -
  arch/powerpc/kexec/file_load_64.c   | 20 +-
  arch/powerpc/kexec/ranges.c | 85 ++
  5 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index e75970351bcd..95a98b390d62 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -141,6 +141,9 @@ void arch_crash_handle_hotplug_event(struct kimage *image, 
void *arg);
  
  int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);

  #define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
  #endif /* CONFIG_CRASH_HOTPLUG */
  
  extern int crashing_cpu;

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index 8489e844b447..14055896cbcb 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
  void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
  struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
  int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
  int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
  int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
  int get_crash_memory_ranges(struct crash_mem **mem_ranges);
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 8938a19af12f..21b193e938a3 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -25,6 +26,7 @@
  #include 
  #include 
  #include 
+#include 
  
  /*

   * The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -398,6 +400,94 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
  #undef pr_fmt
  #define pr_fmt(fmt) "crash hp: " fmt
  
+/*

+ * Advertise preferred elfcorehdr size to use

Re: [PATCH v17 5/6] powerpc/crash: add crash CPU hotplug support

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

Due to CPU/Memory hotplug or online/offline events, the elfcorehdr
(which describes the CPUs and memory of the crashed kernel) and FDT
(Flattened Device Tree) of kdump image becomes outdated. Consequently,
attempting dump collection with an outdated elfcorehdr or FDT can lead
to failed or inaccurate dump collection.

Going forward, CPU hotplug or online/offline events are referred as
CPU/Memory add/remove events.

The current solution to address the above issue involves monitoring the
CPU/Memory add/remove events in userspace using udev rules and whenever
there are changes in CPU and memory resources, the entire kdump image
is loaded again. The kdump image includes kernel, initrd, elfcorehdr,
FDT, purgatory. Given that only elfcorehdr and FDT get outdated due to
CPU/Memory add/remove events, reloading the entire kdump image is
inefficient. More importantly, kdump remains inactive for a substantial
amount of time until the kdump reload completes.

To address the aforementioned issue, commit 247262756121 ("crash: add
generic infrastructure for crash hotplug support") added a generic
infrastructure that allows architectures to selectively update the kdump
image component during CPU or memory add/remove events within the kernel
itself.

In the event of a CPU or memory add/remove events, the generic crash
hotplug event handler, `crash_handle_hotplug_event()`, is triggered. It
then acquires the necessary locks to update the kdump image and invokes
the architecture-specific crash hotplug handler,
`arch_crash_handle_hotplug_event()`, to update the required kdump image
components.

This patch adds crash hotplug handler for PowerPC and enable support to
update the kdump image on CPU add/remove events. Support for memory
add/remove events is added in a subsequent patch with the title
"powerpc: add crash memory hotplug support"

As mentioned earlier, only the elfcorehdr and FDT kdump image components
need to be updated in the event of CPU or memory add/remove events.
However, on PowerPC architecture crash hotplug handler only updates the
FDT to enable crash hotplug support for CPU add/remove events. Here's
why.

The elfcorehdr on PowerPC is built with possible CPUs, and thus, it does
not need an update on CPU add/remove events. On the other hand, the FDT
needs to be updated on CPU add events to include the newly added CPU. If
the FDT is not updated and the kernel crashes on a newly added CPU, the
kdump kernel will fail to boot due to the unavailability of the crashing
CPU in the FDT. During the early boot, it is expected that the boot CPU
must be a part of the FDT; otherwise, the kernel will raise a BUG and
fail to boot. For more information, refer to commit 36ae37e3436b0
("powerpc: Make boot_cpuid common between 32 and 64-bit"). Since it is
okay to have an offline CPU in the kdump FDT, no action is taken in case
of CPU removal.

There are two system calls, `kexec_file_load` and `kexec_load`, used to
load the kdump image. Few changes have been made to ensure kernel can
safely update the FDT of kdump image loaded using both system calls.

For kexec_file_load syscall the kdump image is prepared in kernel. So to
support an increasing number of CPUs, the FDT is constructed with extra
buffer space to ensure it can accommodate a possible number of CPU
nodes. Additionally, a call to fdt_pack (which trims the unused space
once the FDT is prepared) is avoided if this feature is enabled.

For the kexec_load syscall, the FDT is updated only if the
KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag is passed to the kernel by
userspace (kexec tools). When userspace passes this flag to the kernel,
it indicates that the FDT is built to accommodate possible CPUs, and the
FDT segment is excluded from SHA calculation, making it safe to update.

The changes related to this feature are kept under the CRASH_HOTPLUG
config, and it is enabled by default.



Looks good.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/Kconfig  |   4 ++
  arch/powerpc/include/asm/kexec.h  |   8 +++
  arch/powerpc/kexec/crash.c| 103 ++
  arch/powerpc/kexec/elf_64.c   |   3 +-
  arch/powerpc/kexec/file_load_64.c |  17 +
  5 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e377deefa2dc..16d2b20574c4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -686,6 +686,10 @@ 

Re: [PATCH v17 4/6] PowerPC/kexec: make the update_cpus_node() function public

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

Move the update_cpus_node() from kexec/{file_load_64.c => core_64.c}
to allow other kexec components to use it.

Later in the series, this function is used for in-kernel updates
to the kdump image during CPU/memory hotplug or online/offline events for
both kexec_load and kexec_file_load syscalls.

No functional changes are intended.



Looks good to me.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/include/asm/kexec.h  |  4 ++
  arch/powerpc/kexec/core_64.c  | 91 +++
  arch/powerpc/kexec/file_load_64.c | 87 -
  3 files changed, 95 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index fdb90e24dc74..d9ff4d0e392d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -185,6 +185,10 @@ static inline void crash_send_ipi(void 
(*crash_ipi_callback)(struct pt_regs *))
  
  #endif /* CONFIG_CRASH_DUMP */
  
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)

+int update_cpus_node(void *fdt);
+#endif
+
  #ifdef CONFIG_PPC_BOOK3S_64
  #include 
  #endif
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 762e4d09aacf..85050be08a23 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -30,6 +31,7 @@
  #include 
  #include 
  #include 
+#include 
  
  int machine_kexec_prepare(struct kimage *image)

  {
@@ -419,3 +421,92 @@ static int __init export_htab_values(void)
  }
  late_initcall(export_htab_values);
  #endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ *  them to fdt.
+ * @fdt:Flattened device tree of the kernel
+ * @node_offset:offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node 
*dn)
+{
+   int ret = 0;
+   struct property *pp;
+
+   if (!dn)
+   return -EINVAL;
+
+   for_each_property_of_node(dn, pp) {
+   ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, 
pp->length);
+   if (ret < 0) {
+   pr_err("Unable to add %s property: %s\n", pp->name, 
fdt_strerror(ret));
+   return ret;
+   }
+   }
+   return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ *device node.
+ * @fdt:  Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int update_cpus_node(void *fdt)
+{
+   struct device_node *cpus_node, *dn;
+   int cpus_offset, cpus_subnode_offset, ret = 0;
+
+   cpus_offset = fdt_path_offset(fdt, "/cpus");
+   if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+   pr_err("Malformed device tree: error reading /cpus node: %s\n",
+  fdt_strerror(cpus_offset));
+   return cpus_offset;
+   }
+
+   if (cpus_offset > 0) {
+   ret = fdt_del_node(fdt, cpus_offset);
+   if (ret < 0) {
+   pr_err("Error deleting /cpus node: %s\n", 
fdt_strerror(ret));
+   return -EINVAL;
+   }
+   }
+
+   /* Add cpus node to fdt */
+   cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+   if (cpus_offset < 0) {
+   pr_err("Error creating /cpus node: %s\n", 
fdt_strerror(cpus_offset));
+   return -EINVAL;
+   }
+
+   /* Add cpus node properties */
+   cpus_node = of_find_node_by_path("/cpus");
+   ret = add_node_props(fdt, cpus_offset, cpus_node);
+   of_node_put(cpus_node);
+   if (ret < 0)
+   return ret;
+
+   /* Loop through all subnodes of cpus and add them to fdt */
+   for_each_node_by_type(dn, "cpu") {
+   cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, 
dn->full_name);
+   if (cpus_subnode_off

Re: [PATCH v17 3/6] powerpc/kexec: move *_memory_ranges functions to ranges.c

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

Move the following functions form kexec/{file_load_64.c => ranges.c} and
make them public so that components other KEXEC_FILE can also use these
functions.
1. get_exclude_memory_ranges
2. get_reserved_memory_ranges
3. get_crash_memory_ranges
4. get_usable_memory_ranges

Later in the series get_crash_memory_ranges function is utilized for
in-kernel updates to kdump image during CPU/Memory hotplug or
online/offline events for both kexec_load and kexec_file_load syscalls.

Since the above functions are moved to ranges.c, some of the helper
functions in ranges.c are no longer required to be public. Mark them as
static and removed them from kexec_ranges.h header file.

Finally, remove the CONFIG_KEXEC_FILE build dependency for range.c
because it is required for other config, such as CONFIG_CRASH_DUMP.

No functional changes are intended.



Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/include/asm/kexec_ranges.h |  19 +-
  arch/powerpc/kexec/Makefile |   4 +-
  arch/powerpc/kexec/file_load_64.c   | 190 
  arch/powerpc/kexec/ranges.c | 227 +++-
  4 files changed, 224 insertions(+), 216 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e87..8489e844b447 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,19 +7,8 @@
  void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
  struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
  int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
-int add_tce_mem_ranges(struct crash_mem **mem_ranges);
-int add_initrd_mem_range(struct crash_mem **mem_ranges);
-#ifdef CONFIG_PPC_64S_HASH_MMU
-int add_htab_mem_range(struct crash_mem **mem_ranges);
-#else
-static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
-{
-   return 0;
-}
-#endif
-int add_kernel_mem_range(struct crash_mem **mem_ranges);
-int add_rtas_mem_range(struct crash_mem **mem_ranges);
-int add_opal_mem_range(struct crash_mem **mem_ranges);
-int add_reserved_mem_ranges(struct crash_mem **mem_ranges);
-
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
+int get_crash_memory_ranges(struct crash_mem **mem_ranges);
+int get_usable_memory_ranges(struct crash_mem **mem_ranges);
  #endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 8e469c4da3f8..470eb0453e17 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -3,11 +3,11 @@
  # Makefile for the linux kernel.
  #
  
-obj-y+= core.o core_$(BITS).o

+obj-y  += core.o core_$(BITS).o ranges.o
  
  obj-$(CONFIG_PPC32)		+= relocate_32.o
  
-obj-$(CONFIG_KEXEC_FILE)	+= file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o

+obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
  obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
  obj-$(CONFIG_CRASH_DUMP)  += crash.o
  
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c

index 1bc65de6174f..6a01f62b8fcf 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -47,83 +47,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
NULL
  };
  
-/**

- * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
- * regions like opal/rtas, tce-table, initrd,
- * kernel, htab which should be avoided while
- * setting up kexec load segments.
- * @mem_ranges:Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
-{
-   int ret;
-
-   ret = add_tce_mem_ranges(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_initrd_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_htab_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_kernel_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_rtas_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_opal_mem_range(mem_ran

Re: [PATCH v17 2/6] crash: add a new kexec flag for hotplug support

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

Commit a72bbec70da2 ("crash: hotplug support for kexec_load()")
introduced a new kexec flag, `KEXEC_UPDATE_ELFCOREHDR`. Kexec tool uses
this flag to indicate to the kernel that it is safe to modify the
elfcorehdr of the kdump image loaded using the kexec_load system call.

However, it is possible that architectures may need to update kexec
segments other then elfcorehdr. For example, FDT (Flatten Device Tree)
on PowerPC. Introducing a new kexec flag for every new kexec segment
may not be a good solution. Hence, a generic kexec flag bit,
`KEXEC_CRASH_HOTPLUG_SUPPORT`, is introduced to share the CPU/Memory
hotplug support intent between the kexec tool and the kernel for the
kexec_load system call.

Now, if the kexec tool sends KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag to
the kernel, it indicates to the kernel that all the required kexec
segment is skipped from SHA calculation and it is safe to update kdump
image loaded using the kexec_load syscall.

While loading the kdump image using the kexec_load syscall, the
@update_elfcorehdr member of struct kimage is set if the kexec tool
sends the KEXEC_UPDATE_ELFCOREHDR kexec flag. This member is later used
to determine whether it is safe to update elfcorehdr on hotplug events.
However, with the introduction of the KEXEC_CRASH_HOTPLUG_SUPPORT kexec
flag, the kexec tool could mark all the required kexec segments on an
architecture as safe to update. So rename the @update_elfcorehdr to
@hotplug_support. If @hotplug_support is set, the kernel can safely
update all the required kexec segments of the kdump image during
CPU/Memory hotplug events.

Introduce an architecture-specific function to process kexec flags for
determining hotplug support. Set the @hotplug_support member of struct
kimage for both kexec_load and kexec_file_load system calls. This
simplifies kernel checks to identify hotplug support for the currently
loaded kdump image by just examining the value of @hotplug_support.



Couple of minor nits. See comments below.
Otherwise, looks good to me.

Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Eric DeVolder 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/x86/include/asm/kexec.h | 11 ++-
  arch/x86/kernel/crash.c  | 28 +---
  drivers/base/cpu.c   |  2 +-
  drivers/base/memory.c|  2 +-
  include/linux/crash_core.h   | 13 ++---
  include/linux/kexec.h| 11 +++
  include/uapi/linux/kexec.h   |  1 +
  kernel/crash_core.c  | 11 ---
  kernel/kexec.c   |  4 ++--
  kernel/kexec_file.c  |  5 +
  10 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index cb1320ebbc23..ae5482a2f0ca 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -210,15 +210,8 @@ extern void kdump_nmi_shootdown_cpus(void);
  void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
  #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
  
-#ifdef CONFIG_HOTPLUG_CPU

-int arch_crash_hotplug_cpu_support(void);
-#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_crash_hotplug_memory_support(void);
-#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
-#endif
+int arch_crash_hotplug_support(struct kimage *image, unsigned long 
kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
  
  unsigned int arch_crash_get_elfcorehdr_size(void);

  #define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2a682fe86352..f06501445cd9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -402,20 +402,26 @@ int crash_load_segments(struct kimage *image)
  #undef pr_fmt
  #define pr_fmt(fmt) "crash hp: " fmt





-/* These functions provide the value for the sysfs crash_hotplug nodes */
-#ifdef CONFIG_HOTPLUG_CPU
-int arch_crash_hotplug_cpu_support(void)
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
  {
-   return crash_check_update_elfcorehdr();
-}
-#endif
  
-#ifdef CONFIG_MEMORY_HOTPLUG

-int arch_crash_hotplug_memory_support(void)
-{
-   return crash_check_update_elfcorehdr();
-}
+#ifdef CONFIG_KEXEC_FILE
+   if (image->file_mode)
+   return 1;
  #endif
+   /*
+* Initially, crash hotplug support f

Re: [PATCH v17 1/6] crash: forward memory_notify arg to arch crash hotplug handler

2024-03-02 Thread Hari Bathini




On 26/02/24 2:11 pm, Sourabh Jain wrote:

In the event of memory hotplug or online/offline events, the crash
memory hotplug notifier `crash_memhp_notifier()` receives a
`memory_notify` object but doesn't forward that object to the
generic and architecture-specific crash hotplug handler.

The `memory_notify` object contains the starting PFN (Page Frame Number)
and the number of pages in the hot-removed memory. This information is
necessary for architectures like PowerPC to update/recreate the kdump
image, specifically `elfcorehdr`.

So update the function signature of `crash_handle_hotplug_event()` and
`arch_crash_handle_hotplug_event()` to accept the `memory_notify` object
as an argument from crash memory hotplug notifier.

Since no such object is available in the case of CPU hotplug event, the
crash CPU hotplug notifier `crash_cpuhp_online()` passes NULL to the
crash hotplug handler.



Acked-by: Hari Bathini 


Signed-off-by: Sourabh Jain 
Acked-by: Baoquan He 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/x86/include/asm/kexec.h |  2 +-
  arch/x86/kernel/crash.c  |  4 +++-
  include/linux/crash_core.h   |  2 +-
  kernel/crash_core.c  | 14 +++---
  4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 91ca9a9ee3a2..cb1320ebbc23 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,7 +207,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage 
*image);
  extern void kdump_nmi_shootdown_cpus(void);
  
  #ifdef CONFIG_CRASH_HOTPLUG

-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
  #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
  
  #ifdef CONFIG_HOTPLUG_CPU

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e74d0c4286c1..2a682fe86352 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -432,10 +432,12 @@ unsigned int arch_crash_get_elfcorehdr_size(void)
  /**
   * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
   * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and
+ *   NULL for CPU hotplug case.
   *
   * Prepare the new elfcorehdr and replace the existing elfcorehdr.
   */
-void arch_crash_handle_hotplug_event(struct kimage *image)
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
  {
void *elfbuf = NULL, *old_elfcorehdr;
unsigned long nr_mem_ranges;
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index d33352c2e386..647e928efee8 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -37,7 +37,7 @@ static inline void arch_kexec_unprotect_crashkres(void) { }
  
  
  #ifndef arch_crash_handle_hotplug_event

-static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
+static inline void arch_crash_handle_hotplug_event(struct kimage *image, void 
*arg) { }
  #endif
  
  int crash_check_update_elfcorehdr(void);

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 78b5dc7cee3a..70fa8111a9d6 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -534,7 +534,7 @@ int crash_check_update_elfcorehdr(void)
   * list of segments it checks (since the elfcorehdr changes and thus
   * would require an update to purgatory itself to update the digest).
   */
-static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int 
cpu)
+static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int 
cpu, void *arg)
  {
struct kimage *image;
  
@@ -596,7 +596,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)

image->hp_action = hp_action;
  
  	/* Now invoke arch-specific update handler */

-   arch_crash_handle_hotplug_event(image);
+   arch_crash_handle_hotplug_event(image, arg);
  
  	/* No longer handling a hotplug event */

image->hp_action = KEXEC_CRASH_HP_NONE;
@@ -612,17 +612,17 @@ static void crash_handle_hotplug_event(unsigned int 
hp_action, unsigned int cpu)
crash_hotplug_unlock();
  }
  
-static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)

+static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, 
void *arg)
  {
switch (val) {
case MEM_ONLINE:
crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
-   KEXEC_CRASH_HP_INVAL

[PATCH linux-next v2 2/3] powerpc/kexec: split CONFIG_KEXEC_FILE and CONFIG_CRASH_DUMP

2024-02-26 Thread Hari Bathini
CONFIG_KEXEC_FILE does not have to select CONFIG_CRASH_DUMP. Move
some code under CONFIG_CRASH_DUMP to support CONFIG_KEXEC_FILE and
!CONFIG_CRASH_DUMP case.

Signed-off-by: Hari Bathini 
---

* No changes in v2.

 arch/powerpc/kexec/elf_64.c   |   4 +-
 arch/powerpc/kexec/file_load_64.c | 269 --
 2 files changed, 142 insertions(+), 131 deletions(-)

diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 904016cf89ea..6d8951e8e966 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -47,7 +47,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
if (ret)
return ERR_PTR(ret);
 
-   if (image->type == KEXEC_TYPE_CRASH) {
+   if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
/* min & max buffer values for kdump case */
kbuf.buf_min = pbuf.buf_min = crashk_res.start;
kbuf.buf_max = pbuf.buf_max =
@@ -70,7 +70,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
/* Load additional segments needed for panic kernel */
-   if (image->type == KEXEC_TYPE_CRASH) {
+   if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
ret = load_crashdump_segments_ppc64(image, );
if (ret) {
pr_err("Failed to load kdump kernel segments\n");
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 5b4c5cb23354..1bc65de6174f 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -96,119 +96,6 @@ static int get_exclude_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
-/**
- * get_usable_memory_ranges - Get usable memory ranges. This list includes
- *regions like crashkernel, opal/rtas & tce-table,
- *that kdump kernel could use.
- * @mem_ranges:   Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
-{
-   int ret;
-
-   /*
-* Early boot failure observed on guests when low memory (first memory
-* block?) is not added to usable memory. So, add [0, crashk_res.end]
-* instead of [crashk_res.start, crashk_res.end] to workaround it.
-* Also, crashed kernel's memory must be added to reserve map to
-* avoid kdump kernel from using it.
-*/
-   ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
-   if (ret)
-   goto out;
-
-   ret = add_rtas_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_opal_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_tce_mem_ranges(mem_ranges);
-out:
-   if (ret)
-   pr_err("Failed to setup usable memory ranges\n");
-   return ret;
-}
-
-/**
- * get_crash_memory_ranges - Get crash memory ranges. This list includes
- *   first/crashing kernel's memory regions that
- *   would be exported via an elfcore.
- * @mem_ranges:  Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
-{
-   phys_addr_t base, end;
-   struct crash_mem *tmem;
-   u64 i;
-   int ret;
-
-   for_each_mem_range(i, , ) {
-   u64 size = end - base;
-
-   /* Skip backup memory region, which needs a separate entry */
-   if (base == BACKUP_SRC_START) {
-   if (size > BACKUP_SRC_SIZE) {
-   base = BACKUP_SRC_END + 1;
-   size -= BACKUP_SRC_SIZE;
-   } else
-   continue;
-   }
-
-   ret = add_mem_range(mem_ranges, base, size);
-   if (ret)
-   goto out;
-
-   /* Try merging adjacent ranges before reallocation attempt */
-   if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
-   sort_memory_ranges(*mem_ranges, true);
-   }
-
-   /* Reallocate memory ranges if there is no space to split ranges */
-   tmem = *mem_ranges;
-   if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
-   tmem = realloc_mem_ranges(mem_ranges);
-   if (!tmem)
-   goto out;
-   }
-
-   /* Exclude crashkernel region */
-   ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
-   if (ret)
-   goto out;
-
-   /*
-* F

[PATCH linux-next v2 3/3] powerpc/kdump: Split KEXEC_CORE and CRASH_DUMP dependency

2024-02-26 Thread Hari Bathini
Remove CONFIG_CRASH_DUMP dependency on CONFIG_KEXEC. CONFIG_KEXEC_CORE
was used at places where CONFIG_CRASH_DUMP or CONFIG_CRASH_RESERVE was
appropriate. Replace with appropriate #ifdefs to support CONFIG_KEXEC
and !CONFIG_CRASH_DUMP configuration option. Also, make CONFIG_FA_DUMP
dependent on CONFIG_CRASH_DUMP to avoid unmet dependencies for FA_DUMP
with !CONFIG_KEXEC_CORE configuration option.

Signed-off-by: Hari Bathini 
---

Changes in v2:
* Fixed a compile error for POWERNV build reported by Sourabh.

 arch/powerpc/Kconfig |  9 +--
 arch/powerpc/include/asm/kexec.h | 98 ++--
 arch/powerpc/kernel/prom.c   |  2 +-
 arch/powerpc/kernel/setup-common.c   |  2 +-
 arch/powerpc/kernel/smp.c|  4 +-
 arch/powerpc/kexec/Makefile  |  3 +-
 arch/powerpc/kexec/core.c|  4 ++
 arch/powerpc/platforms/powernv/smp.c |  2 +-
 8 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5cf8ad8d7e8e..e377deefa2dc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -607,11 +607,6 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config ARCH_SUPPORTS_KEXEC
def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
 
-config ARCH_SELECTS_KEXEC
-   def_bool y
-   depends on KEXEC
-   select CRASH_DUMP
-
 config ARCH_SUPPORTS_KEXEC_FILE
def_bool PPC64
 
@@ -622,7 +617,6 @@ config ARCH_SELECTS_KEXEC_FILE
def_bool y
depends on KEXEC_FILE
select KEXEC_ELF
-   select CRASH_DUMP
select HAVE_IMA_KEXEC if IMA
 
 config PPC64_BIG_ENDIAN_ELF_ABI_V2
@@ -694,8 +688,7 @@ config ARCH_SELECTS_CRASH_DUMP
 
 config FA_DUMP
bool "Firmware-assisted dump"
-   depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-   select CRASH_DUMP
+   depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
help
  A robust mechanism to get reliable kernel crash dump with
  assistance from firmware. This approach does not use kexec,
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index e1b43aa12175..fdb90e24dc74 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -55,59 +55,18 @@
 typedef void (*crash_shutdown_t)(void);
 
 #ifdef CONFIG_KEXEC_CORE
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic or invoking dump using sysrq-trigger.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
-   struct pt_regs *oldregs)
-{
-   if (oldregs)
-   memcpy(newregs, oldregs, sizeof(*newregs));
-   else
-   ppc_save_regs(newregs);
-}
+struct kimage;
+struct pt_regs;
 
 extern void kexec_smp_wait(void);  /* get and clear naca physid, wait for
  master to copy new code to 0 */
-extern int crashing_cpu;
-extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
-extern void crash_ipi_callback(struct pt_regs *);
-extern int crash_wake_offline;
-
-struct kimage;
-struct pt_regs;
 extern void default_machine_kexec(struct kimage *image);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
-extern int crash_shutdown_register(crash_shutdown_t handler);
-extern int crash_shutdown_unregister(crash_shutdown_t handler);
-
-extern void crash_kexec_prepare(void);
-extern void crash_kexec_secondary(struct pt_regs *regs);
-int __init overlaps_crashkernel(unsigned long start, unsigned long size);
-extern void reserve_crashkernel(void);
 extern void machine_kexec_mask_interrupts(void);
 
-static inline bool kdump_in_progress(void)
-{
-   return crashing_cpu >= 0;
-}
-
 void relocate_new_kernel(unsigned long indirection_page, unsigned long 
reboot_code_buffer,
 unsigned long start_address) __noreturn;
-
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP)
-bool is_kdump_kernel(void);
-#define is_kdump_kernelis_kdump_kernel
-#if defined(CONFIG_PPC_RTAS)
-void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
-#define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif /* CONFIG_PPC_RTAS */
-#endif /* CONFIG_CRASH_DUMP */
-
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
@@ -152,15 +111,56 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
 
 #endif /* CONFIG_KEXEC_FILE */
 
-#else /* !CONFIG_KEXEC_CORE */
-static inline void crash_kexec_secondary(struct pt_regs *regs) { }
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_RESERVE
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
+extern void reserve_crashkernel(void);
+#else
+static inline void reserve_crashkernel(void) {}
+static inline int overlaps_crashkernel(unsigned long start, unsigned long 
siz

[PATCH linux-next v2 0/3] powerpc/kexec: split CONFIG_CRASH_DUMP out from CONFIG_KEXEC_CORE

2024-02-26 Thread Hari Bathini
This patch series is a follow-up to [1] based on discussions at [2]
about additional work needed to get it working on powerpc.

The first patch in the series makes struct crash_mem available with or
without CONFIG_CRASH_DUMP enabled. The next patch moves kdump specific
code for kexec_file_load syscall under CONFIG_CRASH_DUMP and the last
patch splits other kdump specific code under CONFIG_CRASH_DUMP and
removes dependency with CONFIG_CRASH_DUMP for CONFIG_KEXEC_CORE.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/
[2] 
https://lore.kernel.org/all/9101bb07-70f1-476c-bec9-ec67e9899...@linux.ibm.com/

Changes in v2:
* Fixed a compile error for POWERNV build reported by Sourabh.

Hari Bathini (3):
  kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP
  powerpc/kexec: split CONFIG_KEXEC_FILE and CONFIG_CRASH_DUMP
  powerpc/kdump: Split KEXEC_CORE and CRASH_DUMP dependency

 arch/powerpc/Kconfig |   9 +-
 arch/powerpc/include/asm/kexec.h |  98 +-
 arch/powerpc/kernel/prom.c   |   2 +-
 arch/powerpc/kernel/setup-common.c   |   2 +-
 arch/powerpc/kernel/smp.c|   4 +-
 arch/powerpc/kexec/Makefile  |   3 +-
 arch/powerpc/kexec/core.c|   4 +
 arch/powerpc/kexec/elf_64.c  |   4 +-
 arch/powerpc/kexec/file_load_64.c| 269 ++-
 arch/powerpc/platforms/powernv/smp.c |   2 +-
 include/linux/crash_core.h   |  12 +-
 11 files changed, 209 insertions(+), 200 deletions(-)

-- 
2.43.2


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH linux-next v2 1/3] kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP

2024-02-26 Thread Hari Bathini
struct crash_mem defined under include/linux/crash_core.h represents
a list of memory ranges. While it is used to represent memory ranges
for kdump kernel, it can also be used for other kind of memory ranges.
In fact, KEXEC_FILE_LOAD syscall in powerpc uses this structure to
represent reserved memory ranges and exclude memory ranges needed to
find the right memory regions to load kexec kernel. So, make the
definition of crash_mem structure available for !CONFIG_CRASH_DUMP
case too.

Signed-off-by: Hari Bathini 
---

* No changes in v2.

 include/linux/crash_core.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 23270b16e1db..d33352c2e386 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -8,6 +8,12 @@
 
 struct kimage;
 
+struct crash_mem {
+   unsigned int max_nr_ranges;
+   unsigned int nr_ranges;
+   struct range ranges[] __counted_by(max_nr_ranges);
+};
+
 #ifdef CONFIG_CRASH_DUMP
 
 int crash_shrink_memory(unsigned long new_size);
@@ -51,12 +57,6 @@ static inline unsigned int crash_get_elfcorehdr_size(void) { 
return 0; }
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
 
-struct crash_mem {
-   unsigned int max_nr_ranges;
-   unsigned int nr_ranges;
-   struct range ranges[] __counted_by(max_nr_ranges);
-};
-
 extern int crash_exclude_mem_range(struct crash_mem *mem,
   unsigned long long mstart,
   unsigned long long mend);
-- 
2.43.2


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH linux-next 3/3] powerpc/kdump: Split KEXEC_CORE and CRASH_DUMP dependency

2024-02-25 Thread Hari Bathini



On 23/02/24 1:05 pm, Sourabh Jain wrote:

Hello Hari,


Hi Sourabh,



Build failure detected.


Thanks for trying out the patches.



On 13/02/24 17:01, Hari Bathini wrote:

Remove CONFIG_CRASH_DUMP dependency on CONFIG_KEXEC. CONFIG_KEXEC_CORE
was used at places where CONFIG_CRASH_DUMP or CONFIG_CRASH_RESERVE was
appropriate. Replace with appropriate #ifdefs to support CONFIG_KEXEC
and !CONFIG_CRASH_DUMP configuration option. Also, make CONFIG_FA_DUMP
dependent on CONFIG_CRASH_DUMP to avoid unmet dependencies for FA_DUMP
with !CONFIG_KEXEC_CORE configuration option.

Signed-off-by: Hari Bathini 
---
  arch/powerpc/Kconfig   |  9 +--
  arch/powerpc/include/asm/kexec.h   | 98 +++---
  arch/powerpc/kernel/prom.c |  2 +-
  arch/powerpc/kernel/setup-common.c |  2 +-
  arch/powerpc/kernel/smp.c  |  4 +-
  arch/powerpc/kexec/Makefile    |  3 +-
  arch/powerpc/kexec/core.c  |  4 ++
  7 files changed, 60 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5cf8ad8d7e8e..e377deefa2dc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -607,11 +607,6 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
  config ARCH_SUPPORTS_KEXEC
  def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
-config ARCH_SELECTS_KEXEC
-    def_bool y
-    depends on KEXEC
-    select CRASH_DUMP
-
  config ARCH_SUPPORTS_KEXEC_FILE
  def_bool PPC64
@@ -622,7 +617,6 @@ config ARCH_SELECTS_KEXEC_FILE
  def_bool y
  depends on KEXEC_FILE
  select KEXEC_ELF
-    select CRASH_DUMP
  select HAVE_IMA_KEXEC if IMA
  config PPC64_BIG_ENDIAN_ELF_ABI_V2
@@ -694,8 +688,7 @@ config ARCH_SELECTS_CRASH_DUMP
  config FA_DUMP
  bool "Firmware-assisted dump"
-    depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-    select CRASH_DUMP
+    depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
  help
    A robust mechanism to get reliable kernel crash dump with
    assistance from firmware. This approach does not use kexec,
diff --git a/arch/powerpc/include/asm/kexec.h 
b/arch/powerpc/include/asm/kexec.h

index e1b43aa12175..fdb90e24dc74 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -55,59 +55,18 @@
  typedef void (*crash_shutdown_t)(void);
  #ifdef CONFIG_KEXEC_CORE
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic or invoking dump using sysrq-trigger.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
-    struct pt_regs *oldregs)
-{
-    if (oldregs)
-    memcpy(newregs, oldregs, sizeof(*newregs));
-    else
-    ppc_save_regs(newregs);
-}
+struct kimage;
+struct pt_regs;
  extern void kexec_smp_wait(void);    /* get and clear naca physid, 
wait for

    master to copy new code to 0 */
-extern int crashing_cpu;
-extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs 
*));

-extern void crash_ipi_callback(struct pt_regs *);
-extern int crash_wake_offline;
-
-struct kimage;
-struct pt_regs;
  extern void default_machine_kexec(struct kimage *image);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
-extern int crash_shutdown_register(crash_shutdown_t handler);
-extern int crash_shutdown_unregister(crash_shutdown_t handler);
-
-extern void crash_kexec_prepare(void);
-extern void crash_kexec_secondary(struct pt_regs *regs);
-int __init overlaps_crashkernel(unsigned long start, unsigned long 
size);

-extern void reserve_crashkernel(void);
  extern void machine_kexec_mask_interrupts(void);
-static inline bool kdump_in_progress(void)
-{
-    return crashing_cpu >= 0;
-}
-
  void relocate_new_kernel(unsigned long indirection_page, unsigned 
long reboot_code_buffer,

   unsigned long start_address) __noreturn;
-
  void kexec_copy_flush(struct kimage *image);
-#if defined(CONFIG_CRASH_DUMP)
-bool is_kdump_kernel(void);
-#define is_kdump_kernel    is_kdump_kernel
-#if defined(CONFIG_PPC_RTAS)
-void crash_free_reserved_phys_range(unsigned long begin, unsigned 
long end);

-#define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif /* CONFIG_PPC_RTAS */
-#endif /* CONFIG_CRASH_DUMP */
-
  #ifdef CONFIG_KEXEC_FILE
  extern const struct kexec_file_ops kexec_elf64_ops;
@@ -152,15 +111,56 @@ int setup_new_fdt_ppc64(const struct kimage 
*image, void *fdt,

  #endif /* CONFIG_KEXEC_FILE */
-#else /* !CONFIG_KEXEC_CORE */
-static inline void crash_kexec_secondary(struct pt_regs *regs) { }
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_RESERVE
+int __init overlaps_crashkernel(unsigned long start, unsigned long 
size);

+extern void reserve_crashkernel(void);
+#else
+static inline void reserve_crashkernel(void) {}
+static inline int overlaps_crashkernel(unsigned long start, unsigned 
long size) { return 0; }

+#endif
-static inline int overlaps_crashkernel(unsigned l

Re: [PATCH v2 00/14] Split crash out from kexec and clean up related config items

2024-02-22 Thread Hari Bathini




On 23/02/24 2:59 am, Andrew Morton wrote:

On Thu, 22 Feb 2024 10:47:29 +0530 Hari Bathini  wrote:




On 22/02/24 2:27 am, Andrew Morton wrote:

On Wed, 21 Feb 2024 11:15:00 +0530 Hari Bathini  wrote:


On 04/02/24 8:56 am, Baoquan He wrote:

Hope Hari and Pingfan can help have a look, see if
it's doable. Now, I make it either have both kexec and crash enabled, or
disable both of them altogether.


Sure. I will take a closer look...

Thanks a lot. Please feel free to post patches to make that, or I can do
it with your support or suggestion.


Tested your changes and on top of these changes, came up with the below
changes to get it working for powerpc:

   
https://lore.kernel.org/all/20240213113150.1148276-1-hbath...@linux.ibm.com/


So can we take it that you're OK with Baoquan's series as-is?


Hi Andrew,

If you mean

v3 (https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/)
+
follow-up from Baoquan
(https://lore.kernel.org/all/Zb8D1ASrgX0qVm9z@MiWiFi-R3L-srv/)

Yes.



Can I add your Acked-by: and/or Tested-by: to the patches in this series?


Sure, Andrew.

Acked-by: Hari Bathini 

for..

Patches 1-5 & 8 in:

  https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

and this follow-up patch:

  https://lore.kernel.org/all/Zb8D1ASrgX0qVm9z@MiWiFi-R3L-srv/

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 00/14] Split crash out from kexec and clean up related config items

2024-02-21 Thread Hari Bathini




On 22/02/24 2:27 am, Andrew Morton wrote:

On Wed, 21 Feb 2024 11:15:00 +0530 Hari Bathini  wrote:


On 04/02/24 8:56 am, Baoquan He wrote:

Hope Hari and Pingfan can help have a look, see if
it's doable. Now, I make it either have both kexec and crash enabled, or
disable both of them altogether.


Sure. I will take a closer look...

Thanks a lot. Please feel free to post patches to make that, or I can do
it with your support or suggestion.


Tested your changes and on top of these changes, came up with the below
changes to get it working for powerpc:

  
https://lore.kernel.org/all/20240213113150.1148276-1-hbath...@linux.ibm.com/


So can we take it that you're OK with Baoquan's series as-is?


Hi Andrew,

If you mean

v3 (https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/)
+
follow-up from Baoquan 
(https://lore.kernel.org/all/Zb8D1ASrgX0qVm9z@MiWiFi-R3L-srv/)


Yes.

My changes are based on top of the above patches..

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 00/14] Split crash out from kexec and clean up related config items

2024-02-20 Thread Hari Bathini

Hi Baoquan,

On 04/02/24 8:56 am, Baoquan He wrote:

Hope Hari and Pingfan can help have a look, see if
it's doable. Now, I make it either have both kexec and crash enabled, or
disable both of them altogether.


Sure. I will take a closer look...

Thanks a lot. Please feel free to post patches to make that, or I can do
it with your support or suggestion.


Tested your changes and on top of these changes, came up with the below
changes to get it working for powerpc:


https://lore.kernel.org/all/20240213113150.1148276-1-hbath...@linux.ibm.com/

Please take a look.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH linux-next 0/3] powerpc/kexec: split CONFIG_CRASH_DUMP out from CONFIG_KEXEC_CORE

2024-02-13 Thread Hari Bathini
This patch series is a follow-up to [1] based on discussions at [2]
about additional work needed to get it working on powerpc.

The first patch in the series makes struct crash_mem available with or
without CONFIG_CRASH_DUMP enabled. The next patch moves kdump specific
code for kexec_file_load syscall under CONFIG_CRASH_DUMP and the last
patch splits other kdump specific code under CONFIG_CRASH_DUMP and
removes dependency with CONFIG_CRASH_DUMP for CONFIG_KEXEC_CORE.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/
[2] 
https://lore.kernel.org/all/9101bb07-70f1-476c-bec9-ec67e9899...@linux.ibm.com/


Hari Bathini (3):
  kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP
  powerpc/kexec: split CONFIG_KEXEC_FILE and CONFIG_CRASH_DUMP
  powerpc/kdump: Split KEXEC_CORE and CRASH_DUMP dependency

 arch/powerpc/Kconfig   |   9 +-
 arch/powerpc/include/asm/kexec.h   |  98 +--
 arch/powerpc/kernel/prom.c |   2 +-
 arch/powerpc/kernel/setup-common.c |   2 +-
 arch/powerpc/kernel/smp.c  |   4 +-
 arch/powerpc/kexec/Makefile|   3 +-
 arch/powerpc/kexec/core.c  |   4 +
 arch/powerpc/kexec/elf_64.c|   4 +-
 arch/powerpc/kexec/file_load_64.c  | 269 +++--
 include/linux/crash_core.h |  12 +-
 10 files changed, 208 insertions(+), 199 deletions(-)

-- 
2.43.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH linux-next 1/3] kexec/kdump: make struct crash_mem available without CONFIG_CRASH_DUMP

2024-02-13 Thread Hari Bathini
struct crash_mem defined under include/linux/crash_core.h represents
a list of memory ranges. While it is used to represent memory ranges
for kdump kernel, it can also be used for other kind of memory ranges.
In fact, KEXEC_FILE_LOAD syscall in powerpc uses this structure to
represent reserved memory ranges and exclude memory ranges needed to
find the right memory regions to load kexec kernel. So, make the
definition of crash_mem structure available for !CONFIG_CRASH_DUMP
case too.

Signed-off-by: Hari Bathini 
---
 include/linux/crash_core.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 23270b16e1db..d33352c2e386 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -8,6 +8,12 @@
 
 struct kimage;
 
+struct crash_mem {
+   unsigned int max_nr_ranges;
+   unsigned int nr_ranges;
+   struct range ranges[] __counted_by(max_nr_ranges);
+};
+
 #ifdef CONFIG_CRASH_DUMP
 
 int crash_shrink_memory(unsigned long new_size);
@@ -51,12 +57,6 @@ static inline unsigned int crash_get_elfcorehdr_size(void) { 
return 0; }
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
 
-struct crash_mem {
-   unsigned int max_nr_ranges;
-   unsigned int nr_ranges;
-   struct range ranges[] __counted_by(max_nr_ranges);
-};
-
 extern int crash_exclude_mem_range(struct crash_mem *mem,
   unsigned long long mstart,
   unsigned long long mend);
-- 
2.43.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH linux-next 3/3] powerpc/kdump: Split KEXEC_CORE and CRASH_DUMP dependency

2024-02-13 Thread Hari Bathini
Remove CONFIG_CRASH_DUMP dependency on CONFIG_KEXEC. CONFIG_KEXEC_CORE
was used at places where CONFIG_CRASH_DUMP or CONFIG_CRASH_RESERVE was
appropriate. Replace with appropriate #ifdefs to support CONFIG_KEXEC
and !CONFIG_CRASH_DUMP configuration option. Also, make CONFIG_FA_DUMP
dependent on CONFIG_CRASH_DUMP to avoid unmet dependencies for FA_DUMP
with !CONFIG_KEXEC_CORE configuration option.

Signed-off-by: Hari Bathini 
---
 arch/powerpc/Kconfig   |  9 +--
 arch/powerpc/include/asm/kexec.h   | 98 +++---
 arch/powerpc/kernel/prom.c |  2 +-
 arch/powerpc/kernel/setup-common.c |  2 +-
 arch/powerpc/kernel/smp.c  |  4 +-
 arch/powerpc/kexec/Makefile|  3 +-
 arch/powerpc/kexec/core.c  |  4 ++
 7 files changed, 60 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5cf8ad8d7e8e..e377deefa2dc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -607,11 +607,6 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config ARCH_SUPPORTS_KEXEC
def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
 
-config ARCH_SELECTS_KEXEC
-   def_bool y
-   depends on KEXEC
-   select CRASH_DUMP
-
 config ARCH_SUPPORTS_KEXEC_FILE
def_bool PPC64
 
@@ -622,7 +617,6 @@ config ARCH_SELECTS_KEXEC_FILE
def_bool y
depends on KEXEC_FILE
select KEXEC_ELF
-   select CRASH_DUMP
select HAVE_IMA_KEXEC if IMA
 
 config PPC64_BIG_ENDIAN_ELF_ABI_V2
@@ -694,8 +688,7 @@ config ARCH_SELECTS_CRASH_DUMP
 
 config FA_DUMP
bool "Firmware-assisted dump"
-   depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-   select CRASH_DUMP
+   depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
help
  A robust mechanism to get reliable kernel crash dump with
  assistance from firmware. This approach does not use kexec,
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index e1b43aa12175..fdb90e24dc74 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -55,59 +55,18 @@
 typedef void (*crash_shutdown_t)(void);
 
 #ifdef CONFIG_KEXEC_CORE
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic or invoking dump using sysrq-trigger.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
-   struct pt_regs *oldregs)
-{
-   if (oldregs)
-   memcpy(newregs, oldregs, sizeof(*newregs));
-   else
-   ppc_save_regs(newregs);
-}
+struct kimage;
+struct pt_regs;
 
 extern void kexec_smp_wait(void);  /* get and clear naca physid, wait for
  master to copy new code to 0 */
-extern int crashing_cpu;
-extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
-extern void crash_ipi_callback(struct pt_regs *);
-extern int crash_wake_offline;
-
-struct kimage;
-struct pt_regs;
 extern void default_machine_kexec(struct kimage *image);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
-extern int crash_shutdown_register(crash_shutdown_t handler);
-extern int crash_shutdown_unregister(crash_shutdown_t handler);
-
-extern void crash_kexec_prepare(void);
-extern void crash_kexec_secondary(struct pt_regs *regs);
-int __init overlaps_crashkernel(unsigned long start, unsigned long size);
-extern void reserve_crashkernel(void);
 extern void machine_kexec_mask_interrupts(void);
 
-static inline bool kdump_in_progress(void)
-{
-   return crashing_cpu >= 0;
-}
-
 void relocate_new_kernel(unsigned long indirection_page, unsigned long 
reboot_code_buffer,
 unsigned long start_address) __noreturn;
-
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP)
-bool is_kdump_kernel(void);
-#define is_kdump_kernelis_kdump_kernel
-#if defined(CONFIG_PPC_RTAS)
-void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
-#define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif /* CONFIG_PPC_RTAS */
-#endif /* CONFIG_CRASH_DUMP */
-
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
@@ -152,15 +111,56 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
 
 #endif /* CONFIG_KEXEC_FILE */
 
-#else /* !CONFIG_KEXEC_CORE */
-static inline void crash_kexec_secondary(struct pt_regs *regs) { }
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_RESERVE
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
+extern void reserve_crashkernel(void);
+#else
+static inline void reserve_crashkernel(void) {}
+static inline int overlaps_crashkernel(unsigned long start, unsigned long 
size) { return 0; }
+#endif
 
-static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
+#if defined(CONFIG_CRASH_DUMP)
+/*
+

[PATCH linux-next 2/3] powerpc/kexec: split CONFIG_KEXEC_FILE and CONFIG_CRASH_DUMP

2024-02-13 Thread Hari Bathini
CONFIG_KEXEC_FILE does not have to select CONFIG_CRASH_DUMP. Move
some code under CONFIG_CRASH_DUMP to support CONFIG_KEXEC_FILE and
!CONFIG_CRASH_DUMP case.

Signed-off-by: Hari Bathini 
---
 arch/powerpc/kexec/elf_64.c   |   4 +-
 arch/powerpc/kexec/file_load_64.c | 269 --
 2 files changed, 142 insertions(+), 131 deletions(-)

diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 904016cf89ea..6d8951e8e966 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -47,7 +47,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
if (ret)
return ERR_PTR(ret);
 
-   if (image->type == KEXEC_TYPE_CRASH) {
+   if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
/* min & max buffer values for kdump case */
kbuf.buf_min = pbuf.buf_min = crashk_res.start;
kbuf.buf_max = pbuf.buf_max =
@@ -70,7 +70,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
/* Load additional segments needed for panic kernel */
-   if (image->type == KEXEC_TYPE_CRASH) {
+   if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
ret = load_crashdump_segments_ppc64(image, );
if (ret) {
pr_err("Failed to load kdump kernel segments\n");
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 5b4c5cb23354..1bc65de6174f 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -96,119 +96,6 @@ static int get_exclude_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
-/**
- * get_usable_memory_ranges - Get usable memory ranges. This list includes
- *regions like crashkernel, opal/rtas & tce-table,
- *that kdump kernel could use.
- * @mem_ranges:   Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
-{
-   int ret;
-
-   /*
-* Early boot failure observed on guests when low memory (first memory
-* block?) is not added to usable memory. So, add [0, crashk_res.end]
-* instead of [crashk_res.start, crashk_res.end] to workaround it.
-* Also, crashed kernel's memory must be added to reserve map to
-* avoid kdump kernel from using it.
-*/
-   ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
-   if (ret)
-   goto out;
-
-   ret = add_rtas_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_opal_mem_range(mem_ranges);
-   if (ret)
-   goto out;
-
-   ret = add_tce_mem_ranges(mem_ranges);
-out:
-   if (ret)
-   pr_err("Failed to setup usable memory ranges\n");
-   return ret;
-}
-
-/**
- * get_crash_memory_ranges - Get crash memory ranges. This list includes
- *   first/crashing kernel's memory regions that
- *   would be exported via an elfcore.
- * @mem_ranges:  Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
-{
-   phys_addr_t base, end;
-   struct crash_mem *tmem;
-   u64 i;
-   int ret;
-
-   for_each_mem_range(i, , ) {
-   u64 size = end - base;
-
-   /* Skip backup memory region, which needs a separate entry */
-   if (base == BACKUP_SRC_START) {
-   if (size > BACKUP_SRC_SIZE) {
-   base = BACKUP_SRC_END + 1;
-   size -= BACKUP_SRC_SIZE;
-   } else
-   continue;
-   }
-
-   ret = add_mem_range(mem_ranges, base, size);
-   if (ret)
-   goto out;
-
-   /* Try merging adjacent ranges before reallocation attempt */
-   if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
-   sort_memory_ranges(*mem_ranges, true);
-   }
-
-   /* Reallocate memory ranges if there is no space to split ranges */
-   tmem = *mem_ranges;
-   if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
-   tmem = realloc_mem_ranges(mem_ranges);
-   if (!tmem)
-   goto out;
-   }
-
-   /* Exclude crashkernel region */
-   ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
-   if (ret)
-   goto out;
-
-   /*
-* FIXME: For now, stay i

[PATCH linux-next] bpf: fix warning for crash_kexec

2024-02-09 Thread Hari Bathini
With [1], CONFIG_KEXEC & !CONFIG_CRASH_DUMP is supported but that led
to the below warning:

  "WARN: resolve_btfids: unresolved symbol crash_kexec"

Fix it by using the appropriate #ifdef.

[1] https://lore.kernel.org/all/20240124051254.67105-1-...@redhat.com/

Signed-off-by: Hari Bathini 
---
 kernel/bpf/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4db1c658254c..e408d1115e26 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2545,7 +2545,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
-- 
2.43.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 00/14] Split crash out from kexec and clean up related config items

2024-02-01 Thread Hari Bathini

Hi Baoquan,

On 19/01/24 8:22 pm, Baoquan He wrote:

Motivation:
=
Previously, LKP reported a building error. When investigating, it can't
be resolved reasonablly with the present messy kdump config items.

  https://lore.kernel.org/oe-kbuild-all/202312182200.ka7mzifq-...@intel.com/

The kdump (crash dumping) related config items could causes confusions:

Firstly,
---
CRASH_CORE enables codes including
  - crashkernel reservation;
  - elfcorehdr updating;
  - vmcoreinfo exporting;
  - crash hotplug handling;

Now fadump of powerpc, kcore dynamic debugging and kdump all selects
CRASH_CORE, while fadump
  - fadump needs crashkernel parsing, vmcoreinfo exporting, and accessing
global variable 'elfcorehdr_addr';
  - kcore only needs vmcoreinfo exporting;
  - kdump needs all of the current kernel/crash_core.c.

So only enabling PROC_CORE or FA_DUMP will enable CRASH_CORE, this
mislead people that we enable crash dumping, actual it's not.

Secondly,
---
It's not reasonable to allow KEXEC_CORE select CRASH_CORE.

Because KEXEC_CORE enables codes which allocate control pages, copy
kexec/kdump segments, and prepare for switching. These codes are
shared by both kexec reboot and kdump. We could want kexec reboot,
but disable kdump. In that case, CRASH_CORE should not be selected.

  
  CONFIG_CRASH_CORE=y
  CONFIG_KEXEC_CORE=y
  CONFIG_KEXEC=y
  CONFIG_KEXEC_FILE=y
 -

Thirdly,
---
It's not reasonable to allow CRASH_DUMP select KEXEC_CORE.

That could make KEXEC_CORE, CRASH_DUMP are enabled independently from
KEXEC or KEXEC_FILE. However, w/o KEXEC or KEXEC_FILE, the KEXEC_CORE
code built in doesn't make any sense because no kernel loading or
switching will happen to utilize the KEXEC_CORE code.
  -
  CONFIG_CRASH_CORE=y
  CONFIG_KEXEC_CORE=y
  CONFIG_CRASH_DUMP=y
  -

In this case, what is worse, on arch sh and arm, KEXEC relies on MMU,
while CRASH_DUMP can still be enabled when !MMU, then compiling error is
seen as the lkp test robot reported in above link.

  --arch/sh/Kconfig--
  config ARCH_SUPPORTS_KEXEC
  def_bool MMU

  config ARCH_SUPPORTS_CRASH_DUMP
  def_bool BROKEN_ON_SMP
  ---

Changes:
===
1, split out crash_reserve.c from crash_core.c;
2, split out vmcore_infoc. from crash_core.c;
3, move crash related codes in kexec_core.c into crash_core.c;
4, remove dependency of FA_DUMP on CRASH_DUMP;
5, clean up kdump related config items;
6, wrap up crash codes in crash related ifdefs on all 9 arch-es
which support crash dumping;

Achievement:
===
With above changes, I can rearrange the config item logic as below (the right
item depends on or is selected by the left item):

 PROC_KCORE ---> VMCORE_INFO

|--> VMCORE_INFO
 FA_DUMP|
|--> CRASH_RESERVE


FA_DUMP also needs PROC_VMCORE (CRASH_DUMP by dependency, I guess).
So, the FA_DUMP related changes here will need a relook..



 >VMCORE_INFO
/
|>CRASH_RESERVE
 KEXEC  --|/|
  |--> KEXEC_CORE--> CRASH_DUMP-->/-|>PROC_VMCORE
 KEXEC_FILE --|   \ |
\>CRASH_HOTPLUG


 KEXEC  --|
  |--> KEXEC_CORE (for kexec reboot only)
 KEXEC_FILE --|

Test

On all 8 architectures, including x86_64, arm64, s390x, sh, arm, mips,
riscv, loongarch, I did below three cases of config item setting and
building all passed. Let me take configs on x86_64 as exampmle here:

(1) Both CONFIG_KEXEC and KEXEC_FILE is unset, then all kexec/kdump
items are unset automatically:
# Kexec and crash features
# CONFIG_KEXEC is not set
# CONFIG_KEXEC_FILE is not set
# end of Kexec and crash features

(2) set CONFIG_KEXEC_FILE and 'make olddefconfig':
---
# Kexec and crash features
CONFIG_CRASH_RESERVE=y
CONFIG_VMCORE_INFO=y
CONFIG_KEXEC_CORE=y
CONFIG_KEXEC_FILE=y
CONFIG_CRASH_DUMP=y
CONFIG_CRASH_HOTPLUG=y
CONFIG_CRASH_MAX_MEMORY_RANGES=8192
# end of Kexec and crash features
---

(3) unset CONFIG_CRASH_DUMP in case 2 and execute 'make olddefconfig':

# Kexec and crash features
CONFIG_KEXEC_CORE=y
CONFIG_KEXEC_FILE=y
# end of Kexec and crash features


Note:
For ppc, it needs investigation to make clear how to split out crash
code in arch folder.


On powerpc, both kdump and fadump need PROC_VMCORE & CRASH_DUMP.
Hope that clears things. So, patch 3/14 breaks things for FA_DUMP..


Hope Hari and Pingfan can help have a look, see if
it's doable. Now, I make it either have both kexec and crash enabled, or
disable both of them altogether.



Sure. I will take a closer 

Re: [PATCH v15 5/5] powerpc: add crash memory hotplug support

2024-01-23 Thread Hari Bathini




On 11/01/24 4:21 pm, Sourabh Jain wrote:

Extend the arch crash hotplug handler, as introduced by the patch title
("powerpc: add crash CPU hotplug support"), to also support memory
add/remove events.

Elfcorehdr describes the memory of the crash kernel to capture the
kernel; hence, it needs to be updated if memory resources change due to
memory add/remove events. Therefore, arch_crash_handle_hotplug_event()
is updated to recreate the elfcorehdr and replace it with the previous
one on memory add/remove events.

The memblock list is used to prepare the elfcorehdr. In the case of
memory hot removal, the memblock list is updated after the arch crash
hotplug handler is triggered, as depicted in Figure 1. Thus, the
hot-removed memory is explicitly removed from the crash memory ranges
to ensure that the memory ranges added to elfcorehdr do not include the
hot-removed memory.

 Memory remove
   |
   v
 Offline pages
   |
   v
  Initiate memory notify call <> crash hotplug handler
  chain for MEM_OFFLINE event
   |
   v
  Update memblock list

Figure 1

There are two system calls, `kexec_file_load` and `kexec_load`, used to
load the kdump image. A few changes have been made to ensure that the
kernel can safely update the elfcorehdr component of the kdump image for
both system calls.

For the kexec_file_load syscall, kdump image is prepared in the kernel.
To support an increasing number of memory regions, the elfcorehdr is
built with extra buffer space to ensure that it can accommodate
additional memory ranges in future.

For the kexec_load syscall, the elfcorehdr is updated only if the
KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag is passed to the kernel by the
kexec tool. Passing this flag to the kernel indicates that the
elfcorehdr is built to accommodate additional memory ranges and the
elfcorehdr segment is not considered for SHA calculation, making it safe
to update.

The changes related to this feature are kept under the CRASH_HOTPLUG
config, and it is enabled by default.

Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/include/asm/kexec.h|   5 +-
  arch/powerpc/include/asm/kexec_ranges.h |   1 +
  arch/powerpc/kexec/core_64.c| 107 +++-
  arch/powerpc/kexec/file_load_64.c   |  34 +++-
  arch/powerpc/kexec/ranges.c |  85 +++
  5 files changed, 225 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 943e58eb9bff..25ff5b7f1a28 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -116,8 +116,11 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges);
  #ifdef CONFIG_CRASH_HOTPLUG
  void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
  #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
-#endif /*CONFIG_CRASH_HOTPLUG */
  
+unsigned int arch_crash_get_elfcorehdr_size(void);

+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
+
+#endif /*CONFIG_CRASH_HOTPLUG */
  #endif /* CONFIG_PPC64 */
  
  #ifdef CONFIG_KEXEC_FILE

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e87..802abf580cf0 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
  void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
  struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
  int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
  int add_tce_mem_ranges(struct crash_mem **mem_ranges);
  int add_initrd_mem_range(struct crash_mem **mem_ranges);
  #ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 43fcd78c2102..4673f150f973 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -19,8 +19,11 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

+#include 
+#include 
  #include 
  #include 
  #include 
@@ -546,6 +549,101 @@ int update_cpus_node(void *fdt)
  #undef pr_fmt
  #define pr_fmt(fmt) "crash hp: " fmt
  
+/*

+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+   unsigned int sz;
+   

Re: [PATCH v15 4/5] powerpc: add crash CPU hotplug support

2024-01-23 Thread Hari Bathini




On 11/01/24 4:21 pm, Sourabh Jain wrote:

Due to CPU/Memory hotplug or online/offline events, the elfcorehdr
(which describes the CPUs and memory of the crashed kernel) and FDT
(Flattened Device Tree) of kdump image becomes outdated. Consequently,
attempting dump collection with an outdated elfcorehdr or FDT can lead
to failed or inaccurate dump collection.

Going forward, CPU hotplug or online/offline events are referred as
CPU/Memory add/remove events.

The current solution to address the above issue involves monitoring the
CPU/Memory add/remove events in userspace using udev rules and whenever
there are changes in CPU and memory resources, the entire kdump image
is loaded again. The kdump image includes kernel, initrd, elfcorehdr,
FDT, purgatory. Given that only elfcorehdr and FDT get outdated due to
CPU/Memory add/remove events, reloading the entire kdump image is
inefficient. More importantly, kdump remains inactive for a substantial
amount of time until the kdump reload completes.

To address the aforementioned issue, commit 247262756121 ("crash: add
generic infrastructure for crash hotplug support") added a generic
infrastructure that allows architectures to selectively update the kdump
image component during CPU or memory add/remove events within the kernel
itself.

In the event of a CPU or memory add/remove events, the generic crash
hotplug event handler, `crash_handle_hotplug_event()`, is triggered. It
then acquires the necessary locks to update the kdump image and invokes
the architecture-specific crash hotplug handler,
`arch_crash_handle_hotplug_event()`, to update the required kdump image
components.

This patch adds crash hotplug handler for PowerPC and enable support to
update the kdump image on CPU add/remove events. Support for memory
add/remove events is added in a subsequent patch with the title
"powerpc: add crash memory hotplug support"

As mentioned earlier, only the elfcorehdr and FDT kdump image components
need to be updated in the event of CPU or memory add/remove events.
However, on PowerPC architecture crash hotplug handler only updates the
FDT to enable crash hotplug support for CPU add/remove events. Here's
why.

The elfcorehdr on PowerPC is built with possible CPUs, and thus, it does
not need an update on CPU add/remove events. On the other hand, the FDT
needs to be updated on CPU add events to include the newly added CPU. If
the FDT is not updated and the kernel crashes on a newly added CPU, the
kdump kernel will fail to boot due to the unavailability of the crashing
CPU in the FDT. During the early boot, it is expected that the boot CPU
must be a part of the FDT; otherwise, the kernel will raise a BUG and
fail to boot. For more information, refer to commit 36ae37e3436b0
("powerpc: Make boot_cpuid common between 32 and 64-bit"). Since it is
okay to have an offline CPU in the kdump FDT, no action is taken in case
of CPU removal.

There are two system calls, `kexec_file_load` and `kexec_load`, used to
load the kdump image. Few changes have been made to ensure kernel can
safely update the FDT of kdump image loaded using both system calls.

For kexec_file_load syscall the kdump image is prepared in kernel. So to
support an increasing number of CPUs, the FDT is constructed with extra
buffer space to ensure it can accommodate a possible number of CPU
nodes. Additionally, a call to fdt_pack (which trims the unused space
once the FDT is prepared) is avoided if this feature is enabled.

For the kexec_load syscall, the FDT is updated only if the
KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag is passed to the kernel by
userspace (kexec tools). When userspace passes this flag to the kernel,
it indicates that the FDT is built to accommodate possible CPUs, and the
FDT segment is excluded from SHA calculation, making it safe to update.

The changes related to this feature are kept under the CRASH_HOTPLUG
config, and it is enabled by default.

Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc: Mahesh Salgaonkar 
Cc: Michael Ellerman 
Cc: Mimi Zohar 
Cc: Naveen N Rao 
Cc: Oscar Salvador 
Cc: Thomas Gleixner 
Cc: Valentin Schneider 
Cc: Vivek Goyal 
Cc: kexec@lists.infradead.org
Cc: x...@kernel.org
---
  arch/powerpc/Kconfig  |  4 ++
  arch/powerpc/include/asm/kexec.h  |  6 +++
  arch/powerpc/kexec/core_64.c  | 69 +++
  arch/powerpc/kexec/elf_64.c   | 12 +-
  arch/powerpc/kexec/file_load_64.c | 15 +++
  5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 414b978b8010..91d7bb0b81ee 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -682,6 +682,10 @@ config RELOCATABLE_TEST
  config ARCH_SUPPORTS_CRA

Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2024-01-08 Thread Hari Bathini



On 09/01/24 9:57 am, Hari Bathini wrote:

Hi Michael,



Sorry, Michael.
I am just about getting back to work and I spoke too soon.
You already seem to have posted a set with the approach you had in mind:

  https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=388350

Thanks
Hari


I am fine with either approach. I was trying to address your concerns
in my way. Looking for your inputs here on how to go about this now..

On 29/11/23 7:00 am, Pingfan Liu wrote:

Hi Hari,


On Mon, Nov 27, 2023 at 12:30 PM Hari Bathini  
wrote:


Hi Pingfan, Michael,

On 17/10/23 4:03 pm, Hari Bathini wrote:



On 17/10/23 7:58 am, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the 
problem

of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, 
this

patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped 
into the

range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are 
forced to

be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


Thanks for working on this, Pingfan.
Looks good to me.

Acked-by: Hari Bathini 



On second thoughts, probably better off with no impact for
bootcpu < nr_cpu_ids case and changing only two cores logical
numbering otherwise. Something like the below (Please share
your thoughts):



I am afraid that it may not be as ideal as it looks, considering the
following factors:
-1. For the case of 'bootcpu < nr_cpu_ids', crash can happen evenly
across any cpu in the system, which seriously undermines the
protection intended here (Under the most optimistic scenario, there is
a 50% chance of success)

-2. For the re-ordering of logical numbering, IMHO, if there is
concern that re-ordering will break something, the partial re-ordering
can not avoid that.  We ought to spot probable hazards so as to ease
worries.


Thanks,

Pingfan


diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..78a8312aa8c4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
   unsigned int boot_cpu_node_count __ro_after_init;
   #endif
   static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
   static int __initdata boot_cpu_count;
+#endif

   static int __init early_parse_mem(char *p)
   {
@@ -357,6 +359,25 @@ static int __init early_init_dt_scan_cpus(unsigned
long node,
 fdt_boot_cpuid_phys(initial_boot_params)) {
 found = boot_cpu_count;
 found_thread = i;
+   /*
+    * Map boot-cpu logical id into the range
+    * of [0, thread_per_core) if it can't be
+    * accommodated within nr_cpu_ids.
+    */
+   if (i != boot_cpu_count && boot_cpu_count >= 
nr_cpu_ids) {

+   boot_cpuid = i;
+   DBG("Logical CPU number for boot CPU 
changed from %d to %d\n",

+   boot_cpu_count, i);
+   } else {
+   boot_cpuid = boot_cpu_count;
+   }
+
+   /* Ensure boot thread is acconted for in 
nr_cpu_ids */

+   if (boot_cpuid >= nr_cpu_ids) {
+   set_nr_cpu_ids(boot_cpuid + 1);
+   DBG("Adjusted nr_cpu_ids to %u, to 
include boot CPU.\n",

+   nr_cpu_ids);
+   }
 }
   #ifdef CONFIG_SMP
 /* logical cpu id is always 0 on UP kernels */
@@ -368,9 +389,8 @@ static int __init early_init_dt_scan_cpus(unsigned
long node,
 if (found < 0)
 return 0;

-   DBG("boot cpu: logical %d physical %d\n", found,
+   DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
 be32_to_cpu(intserv[found_thread]));
-   boot_cpuid = found;

 

Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2024-01-08 Thread Hari Bathini

Hi Michael,

I am fine with either approach. I was trying to address your concerns
in my way. Looking for your inputs here on how to go about this now..

On 29/11/23 7:00 am, Pingfan Liu wrote:

Hi Hari,


On Mon, Nov 27, 2023 at 12:30 PM Hari Bathini  wrote:


Hi Pingfan, Michael,

On 17/10/23 4:03 pm, Hari Bathini wrote:



On 17/10/23 7:58 am, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


Thanks for working on this, Pingfan.
Looks good to me.

Acked-by: Hari Bathini 



On second thoughts, probably better off with no impact for
bootcpu < nr_cpu_ids case and changing only two cores logical
numbering otherwise. Something like the below (Please share
your thoughts):



I am afraid that it may not be as ideal as it looks, considering the
following factors:
-1. For the case of 'bootcpu < nr_cpu_ids', crash can happen evenly
across any cpu in the system, which seriously undermines the
protection intended here (Under the most optimistic scenario, there is
a 50% chance of success)

-2. For the re-ordering of logical numbering, IMHO, if there is
concern that re-ordering will break something, the partial re-ordering
can not avoid that.  We ought to spot probable hazards so as to ease
worries.


Thanks,

Pingfan


diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..78a8312aa8c4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
   unsigned int boot_cpu_node_count __ro_after_init;
   #endif
   static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
   static int __initdata boot_cpu_count;
+#endif

   static int __init early_parse_mem(char *p)
   {
@@ -357,6 +359,25 @@ static int __init early_init_dt_scan_cpus(unsigned
long node,
 fdt_boot_cpuid_phys(initial_boot_params)) {
 found = boot_cpu_count;
 found_thread = i;
+   /*
+* Map boot-cpu logical id into the range
+* of [0, thread_per_core) if it can't be
+* accommodated within nr_cpu_ids.
+*/
+   if (i != boot_cpu_count && boot_cpu_count >= 
nr_cpu_ids) {
+   boot_cpuid = i;
+   DBG("Logical CPU number for boot CPU changed from %d 
to %d\n",
+   boot_cpu_count, i);
+   } else {
+   boot_cpuid = boot_cpu_count;
+   }
+
+   /* Ensure boot thread is acconted for in nr_cpu_ids */
+   if (boot_cpuid >= nr_cpu_ids) {
+   set_nr_cpu_ids(boot_cpuid + 1);
+   DBG("Adjusted nr_cpu_ids to %u, to include boot 
CPU.\n",
+   nr_cpu_ids);
+   }
 }
   #ifdef CONFIG_SMP
 /* logical cpu id is always 0 on UP kernels */
@@ -368,9 +389,8 @@ static int __init early_init_dt_scan_cpus(unsigned
long node,
 if (found < 0)
 return 0;

-   DBG("boot cpu: logical %d physical %d\n", found,
+   DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
 be32_to_cpu(intserv[found_thread]));
-   boot_cpuid = found;

 boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);

diff --git a/arch/powerpc/kernel/setup-common.c
b/arch/powerpc/kernel/setup-common.c
index b7b733474b60..f7179525c774 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -409,6 +409,12 @@ static void __init cp

Re: [PATCH v14 5/6] powerpc: add crash CPU hotplug support

2023-12-19 Thread Hari Bathini

Hi Sourabh

On 11/12/23 2:00 pm, Sourabh Jain wrote:

Due to CPU/Memory hotplug or online/offline events the elfcorehdr
(which describes the CPUs and memory of the crashed kernel) and FDT
(Flattened Device Tree) of kdump image becomes outdated. Consequently,
attempting dump collection with an outdated elfcorehdr or FDT can lead
to failed or inaccurate dump collection.

Going forward CPU hotplug or online/offlice events are referred as


s/offlice/offline/


CPU/Memory add/remvoe events.


s/remvoe/remove/


The current solution to address the above issue involves monitoring the
CPU/memory add/remove events in userspace using udev rules and whenever
there are changes in CPU and memory resources, the entire kdump image
is loaded again. The kdump image includes kernel, initrd, elfcorehdr,
FDT, purgatory. Given that only elfcorehdr and FDT get outdated due to
CPU/Memory add/remove events, reloading the entire kdump image is
inefficient. More importantly, kdump remains inactive for a substantial
amount of time until the kdump reload completes.

To address the aforementioned issue, commit 247262756121 ("crash: add
generic infrastructure for crash hotplug support") added a generic
infrastructure that allows architectures to selectively update the kdump
image component during CPU or memory add/remove events within the kernel
itself.

In the event of a CPU or memory add/remove event, the generic crash
hotplug event handler, `crash_handle_hotplug_event()`, is triggered. It
then acquires the necessary locks to update the kdump image and invokes
the architecture-specific crash hotplug handler,
`arch_crash_handle_hotplug_event()`, to update the required kdump image
components.

This patch adds crash hotplug handler for PowerPC and enable support to
update the kdump image on CPU add/remove events. Support for memory
add/remove events is added in a subsequent patch with the title
"powerpc: add crash memory hotplug support."

As mentioned earlier, only the elfcorehdr and FDT kdump image components
need to be updated in the event of CPU or memory add/remove events.
However, the PowerPC architecture crash hotplug handler only updates the
FDT to enable crash hotplug support for CPU add/remove events. Here's
why.

The Elfcorehdr on PowerPC is built with possible CPUs, and thus, it does
not need an update on CPU add/remove events. On the other hand, the FDT
needs to be updated on CPU add events to include the newly added CPU. If
the FDT is not updated and the kernel crashes on a newly added CPU, the
kdump kernel will fail to boot due to the unavailability of the crashing
CPU in the FDT. During the early boot, it is expected that the boot CPU
must be a part of the FDT; otherwise, the kernel will raise a BUG and
fail to boot. For more information, refer to commit 36ae37e3436b0
("powerpc: Make boot_cpuid common between 32 and 64-bit"). Since it is
okay to have an offline CPU in the kdump FDT, no action is taken in case
of CPU removal.

There are two system calls, `kexec_file_load` and `kexec_load`, used to
load the kdump image. Few changes have been made to ensure kernel can
safely update the kdump FDT for both system calls.

For kexec_file_load syscall the kdump image is prepared in kernel. So to
support an increasing number of CPUs, the FDT is constructed with extra
buffer space to ensure it can accommodate a possible number of CPU
nodes. Additionally, a call to fdt_pack (which trims the unused space
once the FDT is prepared) is avoided for kdump image loading if this
feature is enabled.

For the kexec_load syscall, the FDT is updated only if both the
KEXEC_UPDATE_FDT and KEXEC_UPDATE_ELFCOREHDR kexec flags are passed to
the kernel by the kexec tool. Passing these flags to the kernel
indicates that the FDT is built to accommodate possible CPUs, and the
FDT segment is not considered for SHA calculation, making it safe to
update the FDT.

Commit 88a6f8994421 ("crash: memory and CPU hotplug sysfs attributes")
added a sysfs interface to indicate userspace (kdump udev rule) that
kernel will update the kdump image on CPU hotplug events, so kdump
reload can be avoided. Implement arch specific function
`arch_crash_hotplug_cpu_support()` to correctly advertise kernel
capability to update kdump image.

This feature is advertised to userspace when the following conditions
are met:

1. Kdump image is loaded using kexec_file_load system call.
2. Kdump image is loaded using kexec_load system and both
KEXEC_UPATE_ELFCOREHDR and KEXEC_UPDATE_FDT kexec flags are
passed to kernel.

The changes related to this feature are kept under the CRASH_HOTPLUG
config, and it is enabled by default.

Signed-off-by: Sourabh Jain 
Cc: Akhil Raj 
Cc: Andrew Morton 
Cc: Aneesh Kumar K.V 
Cc: Baoquan He 
Cc: Borislav Petkov (AMD) 
Cc: Boris Ostrovsky 
Cc: Christophe Leroy 
Cc: Dave Hansen 
Cc: Dave Young 
Cc: David Hildenbrand 
Cc: Eric DeVolder 
Cc: Greg Kroah-Hartman 
Cc: Hari Bathini 
Cc: Laurent Dufour 
Cc:

Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2023-11-26 Thread Hari Bathini

Hi Pingfan, Michael,

On 17/10/23 4:03 pm, Hari Bathini wrote:



On 17/10/23 7:58 am, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


Thanks for working on this, Pingfan.
Looks good to me.

Acked-by: Hari Bathini 



On second thoughts, probably better off with no impact for
bootcpu < nr_cpu_ids case and changing only two cores logical
numbering otherwise. Something like the below (Please share
your thoughts):

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..78a8312aa8c4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
 unsigned int boot_cpu_node_count __ro_after_init;
 #endif
 static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
 static int __initdata boot_cpu_count;
+#endif

 static int __init early_parse_mem(char *p)
 {
@@ -357,6 +359,25 @@ static int __init early_init_dt_scan_cpus(unsigned 
long node,

fdt_boot_cpuid_phys(initial_boot_params)) {
found = boot_cpu_count;
found_thread = i;
+   /*
+* Map boot-cpu logical id into the range
+* of [0, thread_per_core) if it can't be
+* accommodated within nr_cpu_ids.
+*/
+   if (i != boot_cpu_count && boot_cpu_count >= 
nr_cpu_ids) {
+   boot_cpuid = i;
+   DBG("Logical CPU number for boot CPU changed from %d 
to %d\n",
+   boot_cpu_count, i);
+   } else {
+   boot_cpuid = boot_cpu_count;
+   }
+
+   /* Ensure boot thread is acconted for in nr_cpu_ids */
+   if (boot_cpuid >= nr_cpu_ids) {
+   set_nr_cpu_ids(boot_cpuid + 1);
+   DBG("Adjusted nr_cpu_ids to %u, to include boot 
CPU.\n",
+   nr_cpu_ids);
+   }
}
 #ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -368,9 +389,8 @@ static int __init early_init_dt_scan_cpus(unsigned 
long node,

if (found < 0)
return 0;

-   DBG("boot cpu: logical %d physical %d\n", found,
+   DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
be32_to_cpu(intserv[found_thread]));
-   boot_cpuid = found;

boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c

index b7b733474b60..f7179525c774 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -409,6 +409,12 @@ static void __init cpu_init_thread_core_maps(int tpc)

 u32 *cpu_to_phys_id = NULL;

+struct interrupt_server_node {
+   boolavail;
+   int len;
+   __be32 intserv[];
+};
+
 /**
  * setup_cpu_maps - initialize the following cpu maps:
  *  cpu_possible_mask
@@ -429,9 +435,13 @@ u32 *cpu_to_phys_id = NULL;
  */
 void __init smp_setup_cpu_maps(void)
 {
+   struct interrupt_server_node *core0_node = NULL, *bt_node = NULL;
+   int orig_boot_cpu = -1, orig_boot_thread = -1;
+   bool found_boot_cpu = false;
struct device_node *dn;
-   int cpu = 0;
int nthreads = 1;
+   int cpu = 0;
+   int j, len;

DBG("smp_setup_cpu_maps()\n");

@@ -442,9 +452,9 @@ void __init smp_setup_cpu_maps(void)
  __func__, nr_cpu_ids * sizeof(u32), __alignof__(u

Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2023-10-20 Thread Hari Bathini



On 18/10/23 1:51 pm, Pingfan Liu wrote:

On Tue, Oct 17, 2023 at 6:39 PM Hari Bathini  wrote:




On 17/10/23 7:58 am, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


Thanks for working on this, Pingfan.
Looks good to me.

Acked-by: Hari Bathini 



Thank you for kindly reviewing. I hope that after all these years, we
have accomplished the objective.



I hope so too.
Thanks!

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCHv9 2/2] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2023-10-17 Thread Hari Bathini




On 17/10/23 7:58 am, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


Thanks for working on this, Pingfan.
Looks good to me.

Acked-by: Hari Bathini 


---
  arch/powerpc/kernel/prom.c | 25 +
  arch/powerpc/kernel/setup-common.c | 84 +++---
  2 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..7ed9034912ca 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
  unsigned int boot_cpu_node_count __ro_after_init;
  #endif
  static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
  static int __initdata boot_cpu_count;
+#endif
  
  static int __init early_parse_mem(char *p)

  {
@@ -331,8 +333,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
const __be32 *intserv;
int i, nthreads;
int len;
-   int found = -1;
-   int found_thread = 0;
+   bool found = false;
  
  	/* We are scanning "cpu" nodes only */

if (type == NULL || strcmp(type, "cpu") != 0)
@@ -355,8 +356,15 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
for (i = 0; i < nthreads; i++) {
if (be32_to_cpu(intserv[i]) ==
fdt_boot_cpuid_phys(initial_boot_params)) {
-   found = boot_cpu_count;
-   found_thread = i;
+   /*
+* always map the boot-cpu logical id into the
+* range of [0, thread_per_core)
+*/
+   boot_cpuid = i;
+   found = true;
+   /* This forces all threads in a core to be online */
+   if (nr_cpu_ids % nthreads != 0)
+   set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads));
}
  #ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -365,14 +373,13 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
}
  
  	/* Not the boot CPU */

-   if (found < 0)
+   if (!found)
return 0;
  
-	DBG("boot cpu: logical %d physical %d\n", found,

-   be32_to_cpu(intserv[found_thread]));
-   boot_cpuid = found;
+   DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+   be32_to_cpu(intserv[boot_cpuid]));
  
-	boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);

+   boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
  
  	/*

 * PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 707f0490639d..9802c7e5ee2f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -36,6 +36,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -425,6 +426,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
  
  u32 *cpu_to_phys_id = NULL;
  
+struct interrupt_server_node {

+   struct list_head node;
+   boolavail;
+   int len;
+   __be32 intserv[];
+};
+
  /**
   * setup_cpu_maps - initialize the following cpu maps:
   *  cpu_possible_mask
@@ -446,11 +454,16 @@ u32 *cpu_to_phys_id = NULL;
  void __init smp_setup_cpu_maps(void)
  {
struct device_node *dn;
-   int cpu = 0;
-   int nthreads = 1;
+   int shift = 0, cpu = 0;
+   int j, nthreads = 1;
+   int len;
+   struct interrupt_server_node *intserv_node, *n;
+   struct list_head *bt_node, head;
+   bool avail, f

Re: [PATCHv9 1/2] powerpc/setup : Enable boot_cpu_hwid for PPC32

2023-10-17 Thread Hari Bathini




On 17/10/23 7:58 am, Pingfan Liu wrote:

In order to identify the boot cpu, its intserv[] should be recorded and
checked in smp_setup_cpu_maps().

smp_setup_cpu_maps() is shared between PPC64 and PPC32. Since PPC64 has
already used boot_cpu_hwid to carry that information, enabling this
variable on PPC32 so later it can also be used to carry that information
for PPC32 in the coming patch.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: Sourabh Jain 
Cc: Hari Bathini 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org


LGTM.

Acked-by: Hari Bathini 


---
  arch/powerpc/include/asm/smp.h | 2 +-
  arch/powerpc/kernel/prom.c | 3 +--
  arch/powerpc/kernel/setup-common.c | 2 --
  3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 576d0e15..5db9178cc800 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -26,7 +26,7 @@
  #include 
  
  extern int boot_cpuid;

-extern int boot_cpu_hwid; /* PPC64 only */
+extern int boot_cpu_hwid;
  extern int spinning_secondaries;
  extern u32 *cpu_to_phys_id;
  extern bool coregroup_enabled;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 0b5878c3125b..ec82f5bda908 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -372,8 +372,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
  
-	if (IS_ENABLED(CONFIG_PPC64))

-   boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+   boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
  
  	/*

 * PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 2f1026fba00d..707f0490639d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -87,9 +87,7 @@ EXPORT_SYMBOL(machine_id);
  int boot_cpuid = -1;
  EXPORT_SYMBOL_GPL(boot_cpuid);
  
-#ifdef CONFIG_PPC64

  int boot_cpu_hwid = -1;
-#endif
  
  /*

   * These are used in binfmt_elf.c to put aux entries on the stack


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus

2023-10-11 Thread Hari Bathini




On 11/10/23 8:35 am, Pingfan Liu wrote:

On Tue, Oct 10, 2023 at 01:56:13PM +0530, Hari Bathini wrote:



On 09/10/23 5:00 pm, Pingfan Liu wrote:

If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
ensure the boot_cpu is in cpu_present_mask. This can be achieved by
reserving the last quota for the boot cpu.

Note: the restriction on nr_cpus will be lifted with more effort in the
successive patches

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org
---
   arch/powerpc/kernel/setup-common.c | 25 ++---
   1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 81291e13dec0..f9ef0a2666b0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -454,8 +454,8 @@ struct interrupt_server_node {
   void __init smp_setup_cpu_maps(void)
   {
struct device_node *dn;
-   int shift = 0, cpu = 0;
-   int j, nthreads = 1;
+   int terminate, shift = 0, cpu = 0;
+   int j, bt_thread = 0, nthreads = 1;
int len;
struct interrupt_server_node *intserv_node, *n;
struct list_head *bt_node, head;
@@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
for (j = 0 ; j < nthreads; j++) {
if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
bt_node = _node->node;
+   bt_thread = j;
found_boot_cpu = true;
/*
 * Record the round-shift between dt
@@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
list_add_tail(, bt_node);
pr_info("the round shift between dt seq and the cpu logic number: 
%d\n", shift);
+   terminate = nr_cpu_ids;
list_for_each_entry(intserv_node, , node) {
+   j = 0;



+   /* Choose a start point to cover the boot cpu */
+   if (nr_cpu_ids - 1 < bt_thread) {
+   /*
+* The processor core puts assumption on the thread id,
+* not to breach the assumption.
+*/
+   terminate = nr_cpu_ids - 1;


nthreads is anyway assumed to be same for all cores. So, enforcing
nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
make the code much simpler without the need for above check and the
other complexities addressed in the subsequent patches...



Indeed, this series can be splited into two partsk, [1-2/5] and [3-5/5].
In [1-2/5], if smaller, the nr_cpu_ids is enforced to be equal to
nthreads. I will make it align upward on nthreads in the next version.
So [1-2/5] can be totally independent from the rest patches in this
series.


Yup. Would prefer it that way.


 From an engineer's perspective, [3-5/5] are added to maintain the
nr_cpus semantics. (Finally, nr_cpus=1 can be achieved but requiring
effort on other subsystem)


I understand it would be nice to maintain semantics but not worth the
complexity it brings, IMHO. So, my suggest would be to drop [3-5/5].

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt

2023-10-10 Thread Hari Bathini




On 09/10/23 5:00 pm, Pingfan Liu wrote:

*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.

*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.

*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).

Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org
---
  arch/powerpc/kernel/prom.c | 25 +
  arch/powerpc/kernel/setup-common.c | 87 +++---
  2 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..87272a2d8c10 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
  unsigned int boot_cpu_node_count __ro_after_init;
  #endif
  static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
  static int __initdata boot_cpu_count;
+#endif
  
  static int __init early_parse_mem(char *p)

  {
@@ -331,8 +333,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
const __be32 *intserv;
int i, nthreads;
int len;
-   int found = -1;
-   int found_thread = 0;
+   bool found = false;
  
  	/* We are scanning "cpu" nodes only */

if (type == NULL || strcmp(type, "cpu") != 0)
@@ -355,8 +356,15 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
for (i = 0; i < nthreads; i++) {
if (be32_to_cpu(intserv[i]) ==
fdt_boot_cpuid_phys(initial_boot_params)) {
-   found = boot_cpu_count;
-   found_thread = i;
+   /*
+* always map the boot-cpu logical id into the
+* range of [0, thread_per_core)
+*/
+   boot_cpuid = i;
+   found = true;
+   /* This works around the hole in paca_ptrs[]. */
+   if (nr_cpu_ids < nthreads)
+   set_nr_cpu_ids(nthreads);
}
  #ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -365,14 +373,13 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
}
  
  	/* Not the boot CPU */

-   if (found < 0)
+   if (!found)
return 0;
  
-	DBG("boot cpu: logical %d physical %d\n", found,

-   be32_to_cpu(intserv[found_thread]));
-   boot_cpuid = found;
+   DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+   be32_to_cpu(intserv[boot_cpuid]));
  
-	boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);

+   boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
  
  	/*

 * PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 1b19a9815672..81291e13dec0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -36,6 +36,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -425,6 +426,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
  
  u32 *cpu_to_phys_id = NULL;
  
+struct interrupt_server_node {

+   struct list_head node;
+   boolavail;
+   int len;
+   __be32 *intserv;
+};
+
  /**
   * setup_cpu_maps - initialize the following cpu maps:
   *  cpu_possible_mask
@@ -446,11 +454,16 @@ u32 *cpu_to_phys_id = NULL;
  void __init smp_setup_cpu_maps(void)
  {
struct device_node *dn;
-   int cpu = 0;
-   int nthreads = 1;
+   int shift = 0, cpu = 0;
+   int j, nthreads = 1;
+   int len;
+   struct interrupt_server_node *intserv_node, *n;
+   struct list_head *bt_node, head;
+   bool avail, found_boot_cpu = false;
  
  	DBG("smp_setup_cpu_maps()\n");
  
+	INIT_LIST_HEAD();

cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
__alignof__(u32));
if 

Re: [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus

2023-10-10 Thread Hari Bathini




On 09/10/23 5:00 pm, Pingfan Liu wrote:

If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
ensure the boot_cpu is in cpu_present_mask. This can be achieved by
reserving the last quota for the boot cpu.

Note: the restriction on nr_cpus will be lifted with more effort in the
successive patches

Signed-off-by: Pingfan Liu 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Mahesh Salgaonkar 
Cc: Wen Xiong 
Cc: Baoquan He 
Cc: Ming Lei 
Cc: kexec@lists.infradead.org
To: linuxppc-...@lists.ozlabs.org
---
  arch/powerpc/kernel/setup-common.c | 25 ++---
  1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 81291e13dec0..f9ef0a2666b0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -454,8 +454,8 @@ struct interrupt_server_node {
  void __init smp_setup_cpu_maps(void)
  {
struct device_node *dn;
-   int shift = 0, cpu = 0;
-   int j, nthreads = 1;
+   int terminate, shift = 0, cpu = 0;
+   int j, bt_thread = 0, nthreads = 1;
int len;
struct interrupt_server_node *intserv_node, *n;
struct list_head *bt_node, head;
@@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
for (j = 0 ; j < nthreads; j++) {
if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
bt_node = _node->node;
+   bt_thread = j;
found_boot_cpu = true;
/*
 * Record the round-shift between dt
@@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
list_add_tail(, bt_node);
pr_info("the round shift between dt seq and the cpu logic number: 
%d\n", shift);
+   terminate = nr_cpu_ids;
list_for_each_entry(intserv_node, , node) {
  
+		j = 0;



+   /* Choose a start point to cover the boot cpu */
+   if (nr_cpu_ids - 1 < bt_thread) {
+   /*
+* The processor core puts assumption on the thread id,
+* not to breach the assumption.
+*/
+   terminate = nr_cpu_ids - 1;


nthreads is anyway assumed to be same for all cores. So, enforcing
nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
make the code much simpler without the need for above check and the
other complexities addressed in the subsequent patches...

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: update manpage with explicit mention of clean kexec

2023-09-20 Thread Hari Bathini




On 20/09/23 5:11 pm, Simon Horman wrote:

On Mon, Sep 11, 2023 at 12:04:08PM +0530, Hari Bathini wrote:



On 28/08/23 3:12 pm, Baoquan He wrote:

On 08/28/23 at 09:48am, Simon Horman wrote:

On Mon, Aug 28, 2023 at 11:46:14AM +0530, Hari Bathini wrote:

Any comments on this, please..


Hi Hari,

Sorry, I had missed this.


On 07/07/23 10:59 am, Hari Bathini wrote:

While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---
kexec/kexec.8 | 12 ++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..87e4ab5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,16 @@ then you would use the following command to load the kernel:
.RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
.RE
.PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming your Linux distribution supports
+kexec-based rebooting, it can be booted to, with a clean shutdown, using
+the command:


Perhaps user-space would be better than "your Linux distribution".
But perhaps that is just kernel developer speak and makes less
sense to others.


Agree, linux distrobution doesn't make sense here. And since this is a
kexec man page, does it really need to make this change? We know
kexec-tools loaded a kernel, we will use kexec-tools to trigger the
reboot. Wondering why we need to stress distros or kernel or kexec-tools
supports the kexec-based rebooting. 'with a clean shutdown' seems a
little helpful though.


Yeah. Users did express that lack of clarity on what is the cleaner
approach to use kexec boot after load. That prompted this patch.
I am ok with "user-space" or "your linux distribution".

Simon, do you want to respin with "user-space" change instead?


Yes, I think that would be best.


Sure, Simon.
Sent v2.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2] kexec: update manpage with explicit mention of clean kexec

2023-09-20 Thread Hari Bathini
While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---

Changes in v2:
* Replaced "your linux distribution" with user-space.


 kexec/kexec.8 | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..179dcf2 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,15 @@ then you would use the following command to load the kernel:
 .RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
 .RE
 .PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming the user-space supports kexec-based
+rebooting, it can be booted to, with a clean shutdown, using the command:
+
+.RS
+.BR reboot
+.RE
+.PP
+Alternatively, it can also be booted to, without calling shutdown(8), with
+the command:
 
 .RS
 .BR kexec \ \-e
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/3] vmcore: allow fadump to export vmcore even if is_kdump_kernel() is false

2023-09-12 Thread Hari Bathini




On 11/09/23 4:01 pm, Baoquan He wrote:

On 09/11/23 at 05:13pm, Michael Ellerman wrote:

Hari Bathini  writes:

Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. Since, is_kdump_kernel() is used to restrict resources in
crash dump capture kernel and such restrictions are not desirable for
fadump, allow is_kdump_kernel() to be defined differently for fadump
case. With that change, include is_fadump_active() check in functions
is_vmcore_usable() & vmcore_unusable() to be able to export vmcore for
fadump case too.

...

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0f3a656293b0..de8a9fabfb6f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -50,6 +50,7 @@ void vmcore_cleanup(void);
  #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
  #endif
  
+#ifndef is_kdump_kernel

  /*
   * is_kdump_kernel() checks whether this kernel is booting after a panic of
   * previous kernel or not. This is determined by checking if previous kernel
@@ -64,6 +65,19 @@ static inline bool is_kdump_kernel(void)
  {
return elfcorehdr_addr != ELFCORE_ADDR_MAX;
  }
+#endif
+
+#ifndef is_fadump_active
+/*
+ * If f/w assisted dump capturing mechanism (fadump), instead of kexec based
+ * dump capturing mechanism (kdump) is exporting the vmcore, then this function
+ * will be defined in arch specific code to return true, when appropriate.
+ */
+static inline bool is_fadump_active(void)
+{
+   return false;
+}
+#endif
  
  /* is_vmcore_usable() checks if the kernel is booting after a panic and

   * the vmcore region is usable.
@@ -75,7 +89,8 @@ static inline bool is_kdump_kernel(void)
  
  static inline int is_vmcore_usable(void)

  {
-   return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+   return (is_kdump_kernel() || is_fadump_active())
+   && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
  }
  
  /* vmcore_unusable() marks the vmcore as unusable,

@@ -84,7 +99,7 @@ static inline int is_vmcore_usable(void)
  
  static inline void vmcore_unusable(void)

  {
-   if (is_kdump_kernel())
+   if (is_kdump_kernel() || is_fadump_active())
elfcorehdr_addr = ELFCORE_ADDR_ERR;
  }


I think it would be cleaner to decouple is_vmcore_usable() and
vmcore_usable() from is_kdump_kernel().

ie, make them operate solely based on the value of elforehdr_addr:

static inline int is_vmcore_usable(void)
{
elfcorehdr_addr != ELFCORE_ADDR_ERR && \
elfcorehdr_addr != ELFCORE_ADDR_MAX;


Agree. I fell into the blind corner of thinking earlier. Above change
is better.


Thanks for the reviews.
Posted v3.

- Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 2/2] powerpc/fadump: make is_kdump_kernel() return false when fadump is active

2023-09-12 Thread Hari Bathini
Currently, is_kdump_kernel() returns true in crash dump capture kernel
for both kdump and fadump crash dump capturing methods, as both these
methods set elfcorehdr_addr. Some restrictions enforced for crash dump
capture kernel, based on is_kdump_kernel(), are specifically meant for
kdump case and not desirable for fadump - eg. IO queues restriction in
device drivers. So, define is_kdump_kernel() to return false when f/w
assisted dump is active.

Signed-off-by: Hari Bathini 
---
 arch/powerpc/include/asm/kexec.h |  8 ++--
 arch/powerpc/kernel/crash_dump.c | 12 
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a1ddba01e7d1..e1b43aa12175 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -99,10 +99,14 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
+#if defined(CONFIG_CRASH_DUMP)
+bool is_kdump_kernel(void);
+#define is_kdump_kernelis_kdump_kernel
+#if defined(CONFIG_PPC_RTAS)
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
 #define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif
+#endif /* CONFIG_PPC_RTAS */
+#endif /* CONFIG_CRASH_DUMP */
 
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9a3b85bfc83f..2086fa6cdc25 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef DEBUG
 #include 
@@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned 
long pfn,
return csize;
 }
 
+/*
+ * Return true only when kexec based kernel dump capturing method is used.
+ * This ensures all restritions applied for kdump case are not automatically
+ * applied for fadump case.
+ */
+bool is_kdump_kernel(void)
+{
+   return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
 #ifdef CONFIG_PPC_RTAS
 /*
  * The crashkernel region will almost always overlap the RTAS region, so
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 1/2] vmcore: remove dependency with is_kdump_kernel() for exporting vmcore

2023-09-12 Thread Hari Bathini
Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. Since, is_kdump_kernel() is used to restrict resources in
crash dump capture kernel and such restrictions may not be desirable
for fadump, allow is_kdump_kernel() to be defined differently for such
scenarios. With this, is_kdump_kernel() could be false while vmcore is
usable. So, remove unnecessary dependency with is_kdump_kernel(), for
exporting vmcore.

[1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html

Suggested-by: Michael Ellerman 
Signed-off-by: Hari Bathini 
---

Changes in v3:
* Decoupled is_vmcore_usable() & vmcore_unusable() from is_kdump_kernel()
  as suggested here: 

https://lore.kernel.org/linuxppc-dev/ZP7si3UMVpPfYV+w@MiWiFi-R3L-srv/T/#m13ae5a7e4ba6f4d8397f0f66581832292eee3a85


 include/linux/crash_dump.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0f3a656293b0..acc55626afdc 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -50,6 +50,7 @@ void vmcore_cleanup(void);
 #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
 #endif
 
+#ifndef is_kdump_kernel
 /*
  * is_kdump_kernel() checks whether this kernel is booting after a panic of
  * previous kernel or not. This is determined by checking if previous kernel
@@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void)
 {
return elfcorehdr_addr != ELFCORE_ADDR_MAX;
 }
+#endif
 
 /* is_vmcore_usable() checks if the kernel is booting after a panic and
  * the vmcore region is usable.
@@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void)
 
 static inline int is_vmcore_usable(void)
 {
-   return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+   return elfcorehdr_addr != ELFCORE_ADDR_ERR &&
+   elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0;
 }
 
 /* vmcore_unusable() marks the vmcore as unusable,
@@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void)
 
 static inline void vmcore_unusable(void)
 {
-   if (is_kdump_kernel())
-   elfcorehdr_addr = ELFCORE_ADDR_ERR;
+   elfcorehdr_addr = ELFCORE_ADDR_ERR;
 }
 
 /**
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: update manpage with explicit mention of clean kexec

2023-09-11 Thread Hari Bathini




On 28/08/23 3:12 pm, Baoquan He wrote:

On 08/28/23 at 09:48am, Simon Horman wrote:

On Mon, Aug 28, 2023 at 11:46:14AM +0530, Hari Bathini wrote:

Any comments on this, please..


Hi Hari,

Sorry, I had missed this.


On 07/07/23 10:59 am, Hari Bathini wrote:

While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---
   kexec/kexec.8 | 12 ++--
   1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..87e4ab5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,16 @@ then you would use the following command to load the kernel:
   .RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
   .RE
   .PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming your Linux distribution supports
+kexec-based rebooting, it can be booted to, with a clean shutdown, using
+the command:


Perhaps user-space would be better than "your Linux distribution".
But perhaps that is just kernel developer speak and makes less
sense to others.


Agree, linux distrobution doesn't make sense here. And since this is a
kexec man page, does it really need to make this change? We know
kexec-tools loaded a kernel, we will use kexec-tools to trigger the
reboot. Wondering why we need to stress distros or kernel or kexec-tools
supports the kexec-based rebooting. 'with a clean shutdown' seems a
little helpful though.


Yeah. Users did express that lack of clarity on what is the cleaner
approach to use kexec boot after load. That prompted this patch.
I am ok with "user-space" or "your linux distribution".

Simon, do you want to respin with "user-space" change instead?

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 1/3] powerpc/fadump: make is_fadump_active() visible for exporting vmcore

2023-09-07 Thread Hari Bathini

Thanks, Baoquan.

On 07/09/23 11:12 am, Baoquan He wrote:

On 09/06/23 at 12:06am, Hari Bathini wrote:

Include asm/fadump.h in asm/kexec.h to make it visible while exporting
vmcore. Also, update is_fadump_active() to return boolean instead of
integer for better readability. The change will be used in the next
patch to ensure vmcore is exported when fadump is active.

Signed-off-by: Hari Bathini 


Thanks, Hari. The whole series looks good to me.

Acked-by: Baoquan He 

Since it's a power specific change, should be picked into powerpc tree?


Michael, would you mind taking the series via powerpc tree..

Thanks
Hari


---

Changes in v2:
* New patch based on Baoquan's suggestion to use is_fadump_active()
   instead of introducing new function is_crashdump_kernel().


  arch/powerpc/include/asm/fadump.h | 4 ++--
  arch/powerpc/include/asm/kexec.h  | 8 ++--
  arch/powerpc/kernel/fadump.c  | 4 ++--
  3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 526a6a647312..27b74a7e2162 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -15,13 +15,13 @@ extern int crashing_cpu;
  
  extern int is_fadump_memory_area(u64 addr, ulong size);

  extern int setup_fadump(void);
-extern int is_fadump_active(void);
+extern bool is_fadump_active(void);
  extern int should_fadump_crash(void);
  extern void crash_fadump(struct pt_regs *, const char *);
  extern void fadump_cleanup(void);
  
  #else	/* CONFIG_FA_DUMP */

-static inline int is_fadump_active(void) { return 0; }
+static inline bool is_fadump_active(void) { return false; }
  static inline int should_fadump_crash(void) { return 0; }
  static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
  static inline void fadump_cleanup(void) { }
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a1ddba01e7d1..b760ef459234 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -51,6 +51,7 @@
  
  #ifndef __ASSEMBLY__

  #include 
+#include 
  
  typedef void (*crash_shutdown_t)(void);
  
@@ -99,10 +100,13 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co
  
  void kexec_copy_flush(struct kimage *image);
  
-#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)

+#if defined(CONFIG_CRASH_DUMP)
+#define is_fadump_active   is_fadump_active
+#if defined(CONFIG_PPC_RTAS)
  void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
  #define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif
+#endif /* CONFIG_PPC_RTAS */
+#endif /* CONFIG_CRASH_DUMP */
  
  #ifdef CONFIG_KEXEC_FILE

  extern const struct kexec_file_ops kexec_elf64_ops;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3ff2da7b120b..5682a65e8326 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -187,9 +187,9 @@ int should_fadump_crash(void)
return 1;
  }
  
-int is_fadump_active(void)

+bool is_fadump_active(void)
  {
-   return fw_dump.dump_active;
+   return !!fw_dump.dump_active;
  }
  
  /*

--
2.41.0





___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 2/3] vmcore: allow fadump to export vmcore even if is_kdump_kernel() is false

2023-09-05 Thread Hari Bathini
Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. Since, is_kdump_kernel() is used to restrict resources in
crash dump capture kernel and such restrictions are not desirable for
fadump, allow is_kdump_kernel() to be defined differently for fadump
case. With that change, include is_fadump_active() check in functions
is_vmcore_usable() & vmcore_unusable() to be able to export vmcore for
fadump case too.

[1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html

Signed-off-by: Hari Bathini 
---

Changes in v2:
* is_fadump_active() check added to is_vmcore_usable() as suggested
  by Baoquan.


 include/linux/crash_dump.h | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0f3a656293b0..de8a9fabfb6f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -50,6 +50,7 @@ void vmcore_cleanup(void);
 #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
 #endif
 
+#ifndef is_kdump_kernel
 /*
  * is_kdump_kernel() checks whether this kernel is booting after a panic of
  * previous kernel or not. This is determined by checking if previous kernel
@@ -64,6 +65,19 @@ static inline bool is_kdump_kernel(void)
 {
return elfcorehdr_addr != ELFCORE_ADDR_MAX;
 }
+#endif
+
+#ifndef is_fadump_active
+/*
+ * If f/w assisted dump capturing mechanism (fadump), instead of kexec based
+ * dump capturing mechanism (kdump) is exporting the vmcore, then this function
+ * will be defined in arch specific code to return true, when appropriate.
+ */
+static inline bool is_fadump_active(void)
+{
+   return false;
+}
+#endif
 
 /* is_vmcore_usable() checks if the kernel is booting after a panic and
  * the vmcore region is usable.
@@ -75,7 +89,8 @@ static inline bool is_kdump_kernel(void)
 
 static inline int is_vmcore_usable(void)
 {
-   return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+   return (is_kdump_kernel() || is_fadump_active())
+   && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
 }
 
 /* vmcore_unusable() marks the vmcore as unusable,
@@ -84,7 +99,7 @@ static inline int is_vmcore_usable(void)
 
 static inline void vmcore_unusable(void)
 {
-   if (is_kdump_kernel())
+   if (is_kdump_kernel() || is_fadump_active())
elfcorehdr_addr = ELFCORE_ADDR_ERR;
 }
 
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] vmcore: allow alternate dump capturing methods to export vmcore without is_kdump_kernel()

2023-09-05 Thread Hari Bathini




On 05/09/23 8:00 am, Baoquan He wrote:

On 09/04/23 at 08:04pm, Hari Bathini wrote:

Hi Baoquan,

Thanks for the review...

On 03/09/23 9:06 am, Baoquan He wrote:

Hi Hari,

On 09/02/23 at 12:34am, Hari Bathini wrote:

Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. is_kdump_kernel() is used to restrict resources in crash
dump capture kernel but such restrictions may not be desirable for
fadump. Allow is_kdump_kernel() to be defined differently for such
scenarios. With this, is_kdump_kernel() could be false while vmcore
is usable. So, introduce is_crashdump_kernel() to return true when
elfcorehdr_addr is set and use it for vmcore related checks.


I got what is done in these two patches, but didn't get why they need be
done. vmcore_unusable()/is_vmcore_usable() are only unitilized in ia64.
Why do you care if it's is_crashdump_kernel() or is_kdump_kernel()?
If you want to override the generic is_kdump_kernel() with powerpc's own
is_kdump_kernel(), your below change is enough to allow you to do that.
I can't see why is_crashdump_kernel() is needed. Could you explain that
specifically?


You mean to just remove is_kdump_kernel() check in is_vmcore_usable() &
vmcore_unusable() functions? Replaced generic is_crashdump_kernel()
function instead, that returns true for any dump capturing method,
irrespective of whether is_kdump_kernel() returns true or false.
For fadump case, is_kdump_kernel() will return false after patch 2/2.


OK, I could understand what you want to achieve. You want to make
is_kdump_kernel() only return true for kdump, while is_vmcore_usable()
returns true for both kdump and fadump.

IIUC, can we change as below? It could make code clearer and more
straightforward. I don't think adding another is_crashdump_kernel()
is a good idea, that would be a torture for non-powerpc people reading
code when they need differentiate between kdump and crashdump.



Sure, Baoquan.
Posted v2 based on your suggestion.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 1/3] powerpc/fadump: make is_fadump_active() visible for exporting vmcore

2023-09-05 Thread Hari Bathini
Include asm/fadump.h in asm/kexec.h to make it visible while exporting
vmcore. Also, update is_fadump_active() to return boolean instead of
integer for better readability. The change will be used in the next
patch to ensure vmcore is exported when fadump is active.

Signed-off-by: Hari Bathini 
---

Changes in v2:
* New patch based on Baoquan's suggestion to use is_fadump_active()
  instead of introducing new function is_crashdump_kernel().


 arch/powerpc/include/asm/fadump.h | 4 ++--
 arch/powerpc/include/asm/kexec.h  | 8 ++--
 arch/powerpc/kernel/fadump.c  | 4 ++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 526a6a647312..27b74a7e2162 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -15,13 +15,13 @@ extern int crashing_cpu;
 
 extern int is_fadump_memory_area(u64 addr, ulong size);
 extern int setup_fadump(void);
-extern int is_fadump_active(void);
+extern bool is_fadump_active(void);
 extern int should_fadump_crash(void);
 extern void crash_fadump(struct pt_regs *, const char *);
 extern void fadump_cleanup(void);
 
 #else  /* CONFIG_FA_DUMP */
-static inline int is_fadump_active(void) { return 0; }
+static inline bool is_fadump_active(void) { return false; }
 static inline int should_fadump_crash(void) { return 0; }
 static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
 static inline void fadump_cleanup(void) { }
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a1ddba01e7d1..b760ef459234 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -51,6 +51,7 @@
 
 #ifndef __ASSEMBLY__
 #include 
+#include 
 
 typedef void (*crash_shutdown_t)(void);
 
@@ -99,10 +100,13 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
+#if defined(CONFIG_CRASH_DUMP)
+#define is_fadump_active   is_fadump_active
+#if defined(CONFIG_PPC_RTAS)
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
 #define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif
+#endif /* CONFIG_PPC_RTAS */
+#endif /* CONFIG_CRASH_DUMP */
 
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3ff2da7b120b..5682a65e8326 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -187,9 +187,9 @@ int should_fadump_crash(void)
return 1;
 }
 
-int is_fadump_active(void)
+bool is_fadump_active(void)
 {
-   return fw_dump.dump_active;
+   return !!fw_dump.dump_active;
 }
 
 /*
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 3/3] powerpc/fadump: make is_kdump_kernel() return false when fadump is active

2023-09-05 Thread Hari Bathini
Currently, is_kdump_kernel() returns true in crash dump capture kernel
for both kdump and fadump crash dump capturing methods, as both these
methods set elfcorehdr_addr. Some restrictions enforced for crash dump
capture kernel, based on is_kdump_kernel(), are specifically meant for
kdump case and not desirable for fadump - eg. IO queues restriction in
device drivers. So, define is_kdump_kernel() to return false when f/w
assisted dump is active.

Signed-off-by: Hari Bathini 
---
 arch/powerpc/include/asm/kexec.h |  2 ++
 arch/powerpc/kernel/crash_dump.c | 11 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index b760ef459234..f0b9c3fd0618 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -101,6 +101,8 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 void kexec_copy_flush(struct kimage *image);
 
 #if defined(CONFIG_CRASH_DUMP)
+bool is_kdump_kernel(void);
+#define is_kdump_kernelis_kdump_kernel
 #define is_fadump_active   is_fadump_active
 #if defined(CONFIG_PPC_RTAS)
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9a3b85bfc83f..6d8e616ce3ce 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -92,6 +92,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned 
long pfn,
return csize;
 }
 
+/*
+ * Return true only when kexec based kernel dump capturing method is used.
+ * This ensures all restritions applied for kdump case are not automatically
+ * applied for fadump case.
+ */
+bool is_kdump_kernel(void)
+{
+   return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
 #ifdef CONFIG_PPC_RTAS
 /*
  * The crashkernel region will almost always overlap the RTAS region, so
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] vmcore: allow alternate dump capturing methods to export vmcore without is_kdump_kernel()

2023-09-04 Thread Hari Bathini

Hi Baoquan,

Thanks for the review...

On 03/09/23 9:06 am, Baoquan He wrote:

Hi Hari,

On 09/02/23 at 12:34am, Hari Bathini wrote:

Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. is_kdump_kernel() is used to restrict resources in crash
dump capture kernel but such restrictions may not be desirable for
fadump. Allow is_kdump_kernel() to be defined differently for such
scenarios. With this, is_kdump_kernel() could be false while vmcore
is usable. So, introduce is_crashdump_kernel() to return true when
elfcorehdr_addr is set and use it for vmcore related checks.


I got what is done in these two patches, but didn't get why they need be
done. vmcore_unusable()/is_vmcore_usable() are only unitilized in ia64.
Why do you care if it's is_crashdump_kernel() or is_kdump_kernel()?
If you want to override the generic is_kdump_kernel() with powerpc's own
is_kdump_kernel(), your below change is enough to allow you to do that.
I can't see why is_crashdump_kernel() is needed. Could you explain that
specifically?


You mean to just remove is_kdump_kernel() check in is_vmcore_usable() &
vmcore_unusable() functions? Replaced generic is_crashdump_kernel()
function instead, that returns true for any dump capturing method,
irrespective of whether is_kdump_kernel() returns true or false.
For fadump case, is_kdump_kernel() will return false after patch 2/2.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 2/2] powerpc/fadump: make is_kdump_kernel() return false when fadump is active

2023-09-01 Thread Hari Bathini
Currently, is_kdump_kernel() returns true in crash dump capture kernel
for both kdump and fadump crash dump capturing methods, as both these
methods set elfcorehdr_addr. Some restrictions enforced for crash dump
capture kernel, based on is_kdump_kernel(), are specifically meant for
kdump case and not desirable for fadump - eg. IO queues restriction in
device drivers. So, define is_kdump_kernel() to return false when f/w
assisted adump is active. For fadump case, is_fadump_active() can be
used for capture kernel specific checks.

Signed-off-by: Hari Bathini 
---
 arch/powerpc/include/asm/kexec.h |  8 ++--
 arch/powerpc/kernel/crash_dump.c | 12 
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a1ddba01e7d1..e1b43aa12175 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -99,10 +99,14 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
+#if defined(CONFIG_CRASH_DUMP)
+bool is_kdump_kernel(void);
+#define is_kdump_kernelis_kdump_kernel
+#if defined(CONFIG_PPC_RTAS)
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
 #define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif
+#endif /* CONFIG_PPC_RTAS */
+#endif /* CONFIG_CRASH_DUMP */
 
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9a3b85bfc83f..2086fa6cdc25 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef DEBUG
 #include 
@@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned 
long pfn,
return csize;
 }
 
+/*
+ * Return true only when kexec based kernel dump capturing method is used.
+ * This ensures all restritions applied for kdump case are not automatically
+ * applied for fadump case.
+ */
+bool is_kdump_kernel(void)
+{
+   return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
 #ifdef CONFIG_PPC_RTAS
 /*
  * The crashkernel region will almost always overlap the RTAS region, so
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 1/2] vmcore: allow alternate dump capturing methods to export vmcore without is_kdump_kernel()

2023-09-01 Thread Hari Bathini
Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
While elfcorehdr_addr is set for kexec based kernel dump mechanism,
alternate dump capturing methods like fadump [1] also set it to export
the vmcore. is_kdump_kernel() is used to restrict resources in crash
dump capture kernel but such restrictions may not be desirable for
fadump. Allow is_kdump_kernel() to be defined differently for such
scenarios. With this, is_kdump_kernel() could be false while vmcore
is usable. So, introduce is_crashdump_kernel() to return true when
elfcorehdr_addr is set and use it for vmcore related checks.

[1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html

Signed-off-by: Hari Bathini 
---
 include/linux/crash_dump.h | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0f3a656293b0..1052a0faf0dd 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -50,6 +50,7 @@ void vmcore_cleanup(void);
 #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
 #endif
 
+#ifndef is_kdump_kernel
 /*
  * is_kdump_kernel() checks whether this kernel is booting after a panic of
  * previous kernel or not. This is determined by checking if previous kernel
@@ -64,6 +65,19 @@ static inline bool is_kdump_kernel(void)
 {
return elfcorehdr_addr != ELFCORE_ADDR_MAX;
 }
+#endif
+
+/*
+ * Return true if this is a dump capture kernel, where vmcore needs to be
+ * exported, irrespective of the dump capture mechanism in use.
+ *
+ * Same as is_kdump_kernel() unless arch specific code defines 
is_kdump_kernel()
+ * differently while supporting other dump capturing mechanisms.
+ */
+static inline bool is_crashdump_kernel(void)
+{
+   return elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
 
 /* is_vmcore_usable() checks if the kernel is booting after a panic and
  * the vmcore region is usable.
@@ -75,7 +89,7 @@ static inline bool is_kdump_kernel(void)
 
 static inline int is_vmcore_usable(void)
 {
-   return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+   return is_crashdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 
: 0;
 }
 
 /* vmcore_unusable() marks the vmcore as unusable,
@@ -84,7 +98,7 @@ static inline int is_vmcore_usable(void)
 
 static inline void vmcore_unusable(void)
 {
-   if (is_kdump_kernel())
+   if (is_crashdump_kernel())
elfcorehdr_addr = ELFCORE_ADDR_ERR;
 }
 
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: update manpage with explicit mention of clean kexec

2023-08-28 Thread Hari Bathini




On 28/08/23 1:18 pm, Simon Horman wrote:

On Mon, Aug 28, 2023 at 11:46:14AM +0530, Hari Bathini wrote:

Any comments on this, please..


Hi Hari,

Sorry, I had missed this.


No worries. Thanks for the response.




On 07/07/23 10:59 am, Hari Bathini wrote:

While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---
   kexec/kexec.8 | 12 ++--
   1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..87e4ab5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,16 @@ then you would use the following command to load the kernel:
   .RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
   .RE
   .PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming your Linux distribution supports
+kexec-based rebooting, it can be booted to, with a clean shutdown, using
+the command:


Perhaps user-space would be better than "your Linux distribution".
But perhaps that is just kernel developer speak and makes less
sense to others.


No strong inclination to putting it either way. Would leave it to you :)

- Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: update manpage with explicit mention of clean kexec

2023-08-28 Thread Hari Bathini

Any comments on this, please..

On 07/07/23 10:59 am, Hari Bathini wrote:

While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---
  kexec/kexec.8 | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..87e4ab5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,16 @@ then you would use the following command to load the kernel:
  .RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
  .RE
  .PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming your Linux distribution supports
+kexec-based rebooting, it can be booted to, with a clean shutdown, using
+the command:
+
+.RS
+.BR reboot
+.RE
+.PP
+Alternatively, it can also be booted to, without calling shutdown(8), with
+the command:

  .RS
  .BR kexec \ \-e


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH V3 01/14] blk-mq: add blk_mq_max_nr_hw_queues()

2023-08-11 Thread Hari Bathini




On 10/08/23 8:31 am, Baoquan He wrote:

On 08/10/23 at 10:06am, Ming Lei wrote:

On Thu, Aug 10, 2023 at 09:18:27AM +0800, Baoquan He wrote:

On 08/10/23 at 08:09am, Ming Lei wrote:

On Wed, Aug 09, 2023 at 03:44:01PM +0200, Christoph Hellwig wrote:

I'm starting to sound like a broken record, but we can't just do random
is_kdump checks, and it's not going to get better by resending it again and
again.  If kdump kernels limit the number of possible CPUs, it needs to
reflected in cpu_possible_map and we need to use that information.



Can you look at previous kdump/arch guys' comment about kdump usage &
num_possible_cpus?

 
https://lore.kernel.org/linux-block/caf+s44ruqswbosy9kmdx35crviqnxoeuvgnsue75bb0y2jg...@mail.gmail.com/
 https://lore.kernel.org/linux-block/ZKz912KyFQ7q9qwL@MiWiFi-R3L-srv/

The point is that kdump kernels does not limit the number of possible CPUs.

1) some archs support 'nr_cpus=1' for kdump kernel, which is fine, since
num_possible_cpus becomes 1.


Yes, "nr_cpus=" is strongly suggested in kdump kernel because "nr_cpus="
limits the possible cpu numbers, while "maxcpuss=" only limits the cpu
number which can be brought up during bootup. We noticed this diference
because a large number of possible cpus will cost more memory in kdump
kernel. e.g percpu initialization, even though kdump kernel have set
"maxcpus=1".

Currently x86 and arm64 all support "nr_cpus=". Pingfan ever spent much
effort to make patches to add "nr_cpus=" support to ppc64, seems ppc64
dev and maintainers do not care about it. Finally the patches are not
accepted, and the work is not continued.

Now, I am wondering what is the barrier to add "nr_cpus=" to power ach.
Can we reconsider adding 'nr_cpus=' to power arch since real issue
occurred in kdump kernel?


If 'nr_cpus=' can be supported on ppc64, this patchset isn't needed.



As for this patchset, it can be accpeted so that no failure in kdump
kernel is seen on ARCHes w/o "nr_cpus=" support? My personal opinion.


IMO 'nr_cpus=' support should be preferred, given it is annoying to
maintain two kinds of implementation for kdump kernel from driver
viewpoint. I guess kdump things can be simplified too with supporting
'nr_cpus=' only.


Yes, 'nr_cpus=' is ideal. Not sure if there's some underlying concerns so
that power people decided to not support it.


Though "nr_cpus=1" is an ideal solution, maintainer was not happy with
the patch as the code changes have impact for regular boot path and
it is likely to cause breakages. So, even if "nr_cpus=1" support for
ppc64 is revived, the change is going to take time to be accepted
upstream.

Also, I see is_kdump_kernel() being used irrespective of "nr_cpus=1"
support for other optimizations in the driver for the special dump
capture environment kdump is.

If there is no other downside for driver code, to use is_kdump_kernel(),
other than the maintainability aspect, I think the above changes are
worth considering.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH] kexec: update manpage with explicit mention of clean kexec

2023-07-06 Thread Hari Bathini
While the manpage does mention about kexec boot with a clean shutdown,
it is not explicit about it. Make it explicit.

Signed-off-by: Hari Bathini 
---
 kexec/kexec.8 | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kexec/kexec.8 b/kexec/kexec.8
index 3a344c5..87e4ab5 100644
--- a/kexec/kexec.8
+++ b/kexec/kexec.8
@@ -95,8 +95,16 @@ then you would use the following command to load the kernel:
 .RB "\-\-append=" "root=/dev/hda1" "\ \-\-initrd=" /boot/initrd
 .RE
 .PP
-After this kernel is loaded, it can be booted to at any time using the
-command:
+After this kernel is loaded, assuming your Linux distribution supports
+kexec-based rebooting, it can be booted to, with a clean shutdown, using
+the command:
+
+.RS
+.BR reboot
+.RE
+.PP
+Alternatively, it can also be booted to, without calling shutdown(8), with
+the command:
 
 .RS
 .BR kexec \ \-e
-- 
2.41.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v22 0/8] crash: Kernel handling of CPU and memory hot un/plug

2023-05-04 Thread Hari Bathini




On 04/05/23 4:11 am, Eric DeVolder wrote:

Once the kdump service is loaded, if changes to CPUs or memory occur,
either by hot un/plug or off/onlining, the crash elfcorehdr must also
be updated.

The elfcorehdr describes to kdump the CPUs and memory in the system,
and any inaccuracies can result in a vmcore with missing CPU context
or memory regions.

The current solution utilizes udev to initiate an unload-then-reload
of the kdump image (eg. kernel, initrd, boot_params, purgatory and
elfcorehdr) by the userspace kexec utility. In the original post I
outlined the significant performance problems related to offloading
this activity to userspace.

This patchset introduces a generic crash handler that registers with
the CPU and memory notifiers. Upon CPU or memory changes, from either
hot un/plug or off/onlining, this generic handler is invoked and
performs important housekeeping, for example obtaining the appropriate
lock, and then invokes an architecture specific handler to do the
appropriate elfcorehdr update.

Note the description in patch 'crash: change crash_prepare_elf64_headers()
to for_each_possible_cpu()' and 'x86/crash: optimize CPU changes' that
enables further optimizations related to CPU plug/unplug/online/offline
performance of elfcorehdr updates.

In the case of x86_64, the arch specific handler generates a new
elfcorehdr, and overwrites the old one in memory; thus no involvement
with userspace needed.

To realize the benefits/test this patchset, one must make a couple
of minor changes to userspace:

  - Prevent udev from updating kdump crash kernel on hot un/plug changes.
Add the following as the first lines to the RHEL udev rule file
/usr/lib/udev/rules.d/98-kexec.rules:

# The kernel updates the crash elfcorehdr for CPU and memory changes
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"

With this changeset applied, the two rules evaluate to false for
CPU and memory change events and thus skip the userspace
unload-then-reload of kdump.

  - Change to the kexec_file_load for loading the kdump kernel:
Eg. on RHEL: in /usr/bin/kdumpctl, change to:
 standard_kexec_args="-p -d -s"
which adds the -s to select kexec_file_load() syscall.

This kernel patchset also supports kexec_load() with a modified kexec
userspace utility. A working changeset to the kexec userspace utility
is posted to the kexec-tools mailing list here:

  http://lists.infradead.org/pipermail/kexec/2023-May/027049.html

To use the kexec-tools patch, apply, build and install kexec-tools,
then change the kdumpctl's standard_kexec_args to replace the -s with
--hotplug. The removal of -s reverts to the kexec_load syscall and
the addition of --hotplug invokes the changes put forth in the
kexec-tools patch.


The changes look good to me. For the series..

Acked-by: Hari Bathini 



Regards,
eric
---
v22: 3may2023
  - Rebased onto 6.3.0
  - Improved support for kexec_load(), per Hari Bathini. See
"crash: hotplug support for kexec_load()" which is the only
change to this series.
  - Applied Baoquan He's Acked-by for all other patches.

v21: 4apr2023
  https://lkml.org/lkml/2023/4/4/1136
  https://lore.kernel.org/lkml/20230404180326.6890-1-eric.devol...@oracle.com/
  - Rebased onto 6.3.0-rc5
  - Additional simplification of indentation in crash_handle_hotplug_event(),
per Baoquan.

v20: 17mar2023
  https://lkml.org/lkml/2023/3/17/1169
  https://lore.kernel.org/lkml/20230317212128.21424-1-eric.devol...@oracle.com/
  - Rebased onto 6.3.0-rc2
  - Defaulting CRASH_HOTPLUG for x86 to Y, per Sourabh.
  - Explicitly initializing image->hp_action, per Baoquan.
  - Simplified kexec_trylock() in crash_handle_hotplug_event(),
per Baoquan.
  - Applied Sourabh's Reviewed-by to the series.

v19: 6mar2023
  https://lkml.org/lkml/2023/3/6/1358
  https://lore.kernel.org/lkml/20230306162228.8277-1-eric.devol...@oracle.com/
  - Rebased onto 6.2.0
  - Did away with offlinecpu, per Thomas Gleixner.
  - Changed to CPUHP_BP_PREPARE_DYN instead of CPUHP_AP_ONLINE_DYN.
  - Did away with elfcorehdr_index_valid, per Sourabh.
  - Convert to for_each_possible_cpu() in crash_prepare_elf64_headers()
per Sourabh.
  - Small optimization for x86 cpu changes.

v18: 31jan2023
  https://lkml.org/lkml/2023/1/31/1356
  https://lore.kernel.org/lkml/20230131224236.122805-1-eric.devol...@oracle.com/
  - Rebased onto 6.2.0-rc6
  - Renamed struct kimage member hotplug_event to hp_action, and
re-enumerated the KEXEC_CRASH_HP_x items, adding _NONE at 0.
  - Moved to cpuhp state CPUHP_BP_PREPARE_DYN instead of
CPUHP_AP_ONLINE_DYN in order to minimize window of time CPU
is not reflected in elfcorehdr.
  - Reworked some of the comments and commit messages to offer
more of the why, than what, per Thomas Gleixner.

v

Re: [PATCH v2 0/6] crashdump: Kernel handling of CPU and memory hot un/plug

2023-05-04 Thread Hari Bathini




On 04/05/23 3:46 am, Eric DeVolder wrote:

When the kdump service is loaded, if a CPU or memory is hot
un/plugged, the crash elfcorehdr, which describes the CPUs and memory
in the system, must also be updated, else the resulting vmcore is
inaccurate (eg. missing either CPU context or memory regions).

The current solution (eg. RHEL /usr/lib/udev/rules.d/98-kexec.rules)
utilizes udev to initiate an unload-then-reload of the *entire* kdump
image (eg. kernel, initrd, boot_params, purgatory and elfcorehdr) by
the userspace kexec utility. In a previous kernel patch post I have
outlined the significant performance problems related to offloading
this activity to userspace.

As such, I've been working to provide the ability for the Linux kernel
to directly modify the elfcorehdr in response to hotplug changes.

  https://lore.kernel.org/lkml/20230404180326.6890-1-eric.devol...@oracle.com/

The series listed above is v21, and the v22 contains changes that
work in concert with the v2 changes cited within. (I'm posting the
kexec-tools changes first so I can reference them in the kernel v22
posting.)

I believe this work to be nearing the finish line. As such, I'd like
to start posting the kexec-tools userspace changes for review in order
to minimize the time to adoption.

This kexec-tools patch series is for supporting the kexec_load
syscall only. The kernel patch series cited above is self-contained
for the kexec_file_load syscall, requiring no userspace help.

There are two basic obstacles/requirements for the kexec-tools to
overcome in order to support kernel hotplug rewriting of the
elfcorehdr.

First, the buffer containing the elfcorehdr must be excluded from the
purgatory checksum/digest, which is computed at load time. Otherwise
kernel run-time changes to the elfcorehdr, as a result of hot un/plug,
would result in the checksum failing (specifically in purgatory at
panic kernel boot time), and kdump capture kernel failing to start.
To let the kernel know it is okay to modify the elfcorehdr, kexec
sets the KEXEC_UPDATE_ELFCOREHDR flag.

NOTE: The kernel specifically does *NOT* attempt to recompute the
checksum/digest as that would ultimately require patching the in-
memory purgatory image with the updated checksum. As that purgatory
image is already fully linked, it is binary blob containing no ELF
information which would allow it to be re-linked or patched. Thus
excluding the elfcorehdr from the checksum/digests avoids all these
problems.

Second, the size of the elfcorehdr buffer must be large enough
to accomodate growth of the number of CPUs and/or memory regions.

To satisfy the first requirement, this patch series introduces the
--hotplug option to indicate to kexec-tools that kexec should exclude
the elfcorehdr buffer from the purgatory checksum/digest calculation
and set the KEXEC_UPDATE_ELFCOREHDR flag.

To satisfy the second requirement, the size is obtained from the
(proposed in the kernel series above)
/sys/kernel/crash_elfcorehdr_size node, or it can be specified
manually with new --elfcorehdrsz= option.

I am intentionally posting this series before the kernel changes
have been merged. I'm hoping to facilitate discussion as to how
kexec-tools wants to handle the soon-to-be new kernel feature.

Discussion items:

- It is worth noting, that deploying kexec-tools, with this series
   included, on kernels that do NOT have the kernel hotplug series
   cited above, is safe to do. The result of running a kernel without
   hotplug elfcorehdr support with kexec-tools and the --hotplug option
   simply removes the elfcorehdr buffer from the digest. This does not
   prevent kdump from operating; the only risk being a slight chance of
   corruption of the elfcorehdr, as it now not covered by the checksum.
   Using the --elfcorehdrsz option on a kernel without hotplug
   elfcorehdr support simply results in a possibly oversized buffer for
   the elfcorehdr, there is no harm in that.

- While I currently have the --hotplug as an option, the option could
   be eliminated (or reversed polarity) it would be safe to *always*
   omit the elfcorehdr from the checksum/digest for purgatory.
   If this were the case, then distros would not have to make any
   changes to kdump scripts to pass the --hotplug option. Then, when
   their kernel does include the kernel patch series cited above,
   kdump and hotplug would "just work".

- I'm unsure if these options should be kept as common/global
   kexec options, or moved to arch options.

- I'm only showing x86 support (and testing) at this time, but
   it would be straight forward to provide similar support for the
   other architectures in a future patch revision.


True. Should be straightforward to add similar support for other
architectures. For example, powerpc would need another flag
KEXEC_UPDATE_FDT on top of the flag to update elfcorehdr.

Looks good to me. For the series..

Acked-by: Hari Bathini 


Thanks!
eric

---
v2: 3may2023
 

Re: [PATCH v21 5/7] x86/crash: add x86 crash hotplug support

2023-05-02 Thread Hari Bathini



On 02/05/23 12:03 am, Eric DeVolder wrote:



On 4/28/23 13:31, Hari Bathini wrote:


On 28/04/23 2:55 pm, Baoquan He wrote:

On 04/27/23 at 10:26pm, Hari Bathini wrote:

On 27/04/23 2:19 pm, Baoquan He wrote:

On 04/27/23 at 12:39pm, Hari Bathini wrote:

Hi Eric,

On 04/04/23 11:33 pm, Eric DeVolder wrote:

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system,
must also be updated.

The segment containing the elfcorehdr is identified at run-time
in crash_core:crash_handle_hotplug_event(), which works for both
the kexec_load() and kexec_file_load() syscalls. A new elfcorehdr
is generated from the available CPUs and memory into a buffer,
and then installed over the top of the existing elfcorehdr.

In the patch 'kexec: exclude elfcorehdr from the segment digest'
the need to update purgatory due to the change in elfcorehdr was
eliminated.  As a result, no changes to purgatory or boot_params
(as the elfcorehdr= kernel command line parameter pointer
remains unchanged and correct) are needed, just elfcorehdr.

To accommodate a growing number of resources via hotplug, the
elfcorehdr segment must be sufficiently large enough to accommodate
changes, see the CRASH_MAX_MEMORY_RANGES description. This is used
only on the kexec_file_load() syscall; for kexec_load() userspace
will need to size the segment similarly.

To accommodate kexec_load() syscall in the absence of


Firstly, thanks! This series is a nice improvement to kdump support
in hotplug environment.

Thank you!



One concern though is that this change assumes corresponding support
in kexec-tools. Without that support kexec_load would fail to boot
with digest verification failure, iiuc.


Yes, you've correctly identified that if a hotplug change occurs 
following kexec_load
(made with kexec-tools unaltered for hotplug), then a subsequent panic 
would in fact

fail the purgatory digest verification, and  kdump would not happen.



Eric has posted patchset to modify kexec_tools to support that, please
see the link Eric pasted in the cover letter.

http://lists.infradead.org/pipermail/kexec/2022-October/026032.html


Right, Baoquan.

I did see that and if I read the code correctly, without that patchset
kexec_load would fail. Not with an explicit error that hotplug support
is missing or such but it would simply fail to boot into capture kernel
with digest verification failure.

This is correct.



My suggestion was to avoid that userspace tool breakage for older
kexec-tools version by introducing a new kexec flag that can tell
kernel that kexec-tools is ready to use this in-kernel update support.
So, if kexec_load happens without the flag, avoid doing an in-kernel
update on hotplug. I hope that clears the confusion.


Yeah, sounds like a good idea. It may be extended in later patch.


Fixing it in this series itself would be a cleaner way, I guess.


You're suggestion of using a flag makes alot of sense; it is an indication
to the kernel that it is valid/okay to modify the kexec_load elfcorehdr.
Only kexec-tools that understands this (meaning the elfcorehdr buffer is
appropriately sized *and* excludes the elfcorehdr from the purgatory check)
would set that flag.

The roll-out of this feature needs to be coordinated, no doubt. There 
are three
pieces to this puzzle: this kernel series, the udev rule changes, and 
the changes

to kexec-tools for kexec_load.

I consider the udev rule changes critical to making this feature work 
efficiently.
I also think that deploying the udev rules immediately is doable since 
nothing
references them, yet; they would be NOPs. And they would be in place 
when the

kernel and/or kexec-tool changes deploy.

However, your point about supporting kexec_load with and without this 
new flag

means the sysfs nodes upon which the udev rule change rely need to be a bit
smarter now. (I'm assuming these udev rules will be generally accepted 
as-is,

as they are simple and efficient.)




The sysfs crash_hotplug nodes need to take into account kexec_file_load vs
(kexec_load && new_flag). Generally speaking these crash_hotplug sysfs 
nodes we
want to be 1 going forward, but where kexec_load/kexec-tools is older 
and/or no new_flag,
it needs to be 0. In this way the udev rules can remain as proposed and 
work properly

for kexec_file_load and both flavors of kexec_load.


Right. That is the tricky part. kdump scripts and kexec-tools have to
be in sync if udev rules have to just rely on crash_hotplug.

Thanks
Hari


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v21 5/7] x86/crash: add x86 crash hotplug support

2023-04-28 Thread Hari Bathini



On 28/04/23 2:55 pm, Baoquan He wrote:

On 04/27/23 at 10:26pm, Hari Bathini wrote:

On 27/04/23 2:19 pm, Baoquan He wrote:

On 04/27/23 at 12:39pm, Hari Bathini wrote:

Hi Eric,

On 04/04/23 11:33 pm, Eric DeVolder wrote:

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system,
must also be updated.

The segment containing the elfcorehdr is identified at run-time
in crash_core:crash_handle_hotplug_event(), which works for both
the kexec_load() and kexec_file_load() syscalls. A new elfcorehdr
is generated from the available CPUs and memory into a buffer,
and then installed over the top of the existing elfcorehdr.

In the patch 'kexec: exclude elfcorehdr from the segment digest'
the need to update purgatory due to the change in elfcorehdr was
eliminated.  As a result, no changes to purgatory or boot_params
(as the elfcorehdr= kernel command line parameter pointer
remains unchanged and correct) are needed, just elfcorehdr.

To accommodate a growing number of resources via hotplug, the
elfcorehdr segment must be sufficiently large enough to accommodate
changes, see the CRASH_MAX_MEMORY_RANGES description. This is used
only on the kexec_file_load() syscall; for kexec_load() userspace
will need to size the segment similarly.

To accommodate kexec_load() syscall in the absence of


Firstly, thanks! This series is a nice improvement to kdump support
in hotplug environment.

One concern though is that this change assumes corresponding support
in kexec-tools. Without that support kexec_load would fail to boot
with digest verification failure, iiuc.


Eric has posted patchset to modify kexec_tools to support that, please
see the link Eric pasted in the cover letter.

http://lists.infradead.org/pipermail/kexec/2022-October/026032.html


Right, Baoquan.

I did see that and if I read the code correctly, without that patchset
kexec_load would fail. Not with an explicit error that hotplug support
is missing or such but it would simply fail to boot into capture kernel
with digest verification failure.

My suggestion was to avoid that userspace tool breakage for older
kexec-tools version by introducing a new kexec flag that can tell
kernel that kexec-tools is ready to use this in-kernel update support.
So, if kexec_load happens without the flag, avoid doing an in-kernel
update on hotplug. I hope that clears the confusion.


Yeah, sounds like a good idea. It may be extended in later patch.


Fixing it in this series itself would be a cleaner way, I guess.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v21 5/7] x86/crash: add x86 crash hotplug support

2023-04-27 Thread Hari Bathini

On 27/04/23 2:19 pm, Baoquan He wrote:

On 04/27/23 at 12:39pm, Hari Bathini wrote:

Hi Eric,

On 04/04/23 11:33 pm, Eric DeVolder wrote:

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system,
must also be updated.

The segment containing the elfcorehdr is identified at run-time
in crash_core:crash_handle_hotplug_event(), which works for both
the kexec_load() and kexec_file_load() syscalls. A new elfcorehdr
is generated from the available CPUs and memory into a buffer,
and then installed over the top of the existing elfcorehdr.

In the patch 'kexec: exclude elfcorehdr from the segment digest'
the need to update purgatory due to the change in elfcorehdr was
eliminated.  As a result, no changes to purgatory or boot_params
(as the elfcorehdr= kernel command line parameter pointer
remains unchanged and correct) are needed, just elfcorehdr.

To accommodate a growing number of resources via hotplug, the
elfcorehdr segment must be sufficiently large enough to accommodate
changes, see the CRASH_MAX_MEMORY_RANGES description. This is used
only on the kexec_file_load() syscall; for kexec_load() userspace
will need to size the segment similarly.

To accommodate kexec_load() syscall in the absence of


Firstly, thanks! This series is a nice improvement to kdump support
in hotplug environment.

One concern though is that this change assumes corresponding support
in kexec-tools. Without that support kexec_load would fail to boot
with digest verification failure, iiuc.


Eric has posted patchset to modify kexec_tools to support that, please
see the link Eric pasted in the cover letter.

http://lists.infradead.org/pipermail/kexec/2022-October/026032.html


Right, Baoquan.

I did see that and if I read the code correctly, without that patchset
kexec_load would fail. Not with an explicit error that hotplug support
is missing or such but it would simply fail to boot into capture kernel
with digest verification failure.

My suggestion was to avoid that userspace tool breakage for older
kexec-tools version by introducing a new kexec flag that can tell
kernel that kexec-tools is ready to use this in-kernel update support.
So, if kexec_load happens without the flag, avoid doing an in-kernel
update on hotplug. I hope that clears the confusion.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v21 5/7] x86/crash: add x86 crash hotplug support

2023-04-27 Thread Hari Bathini

Hi Eric,

On 04/04/23 11:33 pm, Eric DeVolder wrote:

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system,
must also be updated.

The segment containing the elfcorehdr is identified at run-time
in crash_core:crash_handle_hotplug_event(), which works for both
the kexec_load() and kexec_file_load() syscalls. A new elfcorehdr
is generated from the available CPUs and memory into a buffer,
and then installed over the top of the existing elfcorehdr.

In the patch 'kexec: exclude elfcorehdr from the segment digest'
the need to update purgatory due to the change in elfcorehdr was
eliminated.  As a result, no changes to purgatory or boot_params
(as the elfcorehdr= kernel command line parameter pointer
remains unchanged and correct) are needed, just elfcorehdr.

To accommodate a growing number of resources via hotplug, the
elfcorehdr segment must be sufficiently large enough to accommodate
changes, see the CRASH_MAX_MEMORY_RANGES description. This is used
only on the kexec_file_load() syscall; for kexec_load() userspace
will need to size the segment similarly.

To accommodate kexec_load() syscall in the absence of


Firstly, thanks! This series is a nice improvement to kdump support
in hotplug environment.

One concern though is that this change assumes corresponding support
in kexec-tools. Without that support kexec_load would fail to boot
with digest verification failure, iiuc.

I would suggest a flag to advertise to the kernel that kexec-tools/
userspace wants in-kernel update. Something like KEXEC_IN_KERNEL_UPDATE
on top of existing flags like KEXEC_ON_CRASH & KEXEC_PRESERVE_CONTEXT.
This flag can be used to decide whether in-kernel update needs to be
enforced or not. That should make transition to this change smoother
without having to break userspace.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v21 0/7] crash: Kernel handling of CPU and memory hot un/plug

2023-04-27 Thread Hari Bathini

Hi Eric,

On 04/04/23 11:33 pm, Eric DeVolder wrote:

Once the kdump service is loaded, if changes to CPUs or memory occur,
either by hot un/plug or off/onlining, the crash elfcorehdr must also
be updated.

The elfcorehdr describes to kdump the CPUs and memory in the system,
and any inaccuracies can result in a vmcore with missing CPU context
or memory regions.

The current solution utilizes udev to initiate an unload-then-reload
of the kdump image (eg. kernel, initrd, boot_params, purgatory and
elfcorehdr) by the userspace kexec utility. In the original post I
outlined the significant performance problems related to offloading
this activity to userspace.

This patchset introduces a generic crash handler that registers with
the CPU and memory notifiers. Upon CPU or memory changes, from either
hot un/plug or off/onlining, this generic handler is invoked and
performs important housekeeping, for example obtaining the appropriate
lock, and then invokes an architecture specific handler to do the
appropriate elfcorehdr update.

Note the description in patch 'crash: change crash_prepare_elf64_headers()
to for_each_possible_cpu()' and 'x86/crash: optimize CPU changes' that
enables further optimizations related to CPU plug/unplug/online/offline
performance of elfcorehdr updates.

In the case of x86_64, the arch specific handler generates a new
elfcorehdr, and overwrites the old one in memory; thus no involvement
with userspace needed.

To realize the benefits/test this patchset, one must make a couple
of minor changes to userspace:

  - Prevent udev from updating kdump crash kernel on hot un/plug changes.
Add the following as the first lines to the RHEL udev rule file
/usr/lib/udev/rules.d/98-kexec.rules:

# The kernel updates the crash elfcorehdr for CPU and memory changes
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"

With this changeset applied, the two rules evaluate to false for
CPU and memory change events and thus skip the userspace
unload-then-reload of kdump.

  - Change to the kexec_file_load for loading the kdump kernel:
Eg. on RHEL: in /usr/bin/kdumpctl, change to:
 standard_kexec_args="-p -d -s"
which adds the -s to select kexec_file_load() syscall.

This kernel patchset also supports kexec_load() with a modified kexec
userspace utility. A working changeset to the kexec userspace utility
is posted to the kexec-tools mailing list here:

  http://lists.infradead.org/pipermail/kexec/2022-October/026032.html


With the in-kernel update, the size is anyway getting calculated in
kernel for kexec_file_load case, how about passing the recommended size
for elfcorehdr segment and any other segment that needs an in-kernel 
update as a sysfs attribute instead of kexec-tools having to do the

calculation again in the userspace  (get_elfcorehdrsz()) for kexec_load
case. That makes segment size calculation less error prone.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] ppc64: remove rma_top limit

2022-09-16 Thread Hari Bathini




On 15/09/22 2:12 pm, Sourabh Jain wrote:

Restricting kexec tool to allocate hole for kexec segments below 768MB
may not be relavent now since first memory block size can be 1024MB and
more.

Removing rma_top restriction will give more space to find holes for
kexec segments and existing in-place checks make sure that kexec segment
allocation doesn't cross the first memory block because every kexec segment
has to be within first memory block for kdump kernel to boot properly.

Signed-off-by: Sourabh Jain 


Yeah. The restriction seems arbitrary. Even more so since commit
47478ea66d43 ("kexec-tools: ppc64: fix how RMA top is deduced")
with which kexec_load was sure to place all segments within the
first memory block. So, the change looks good to me.

Acked-by: Hari Bathini 


---
  kexec/arch/ppc64/kexec-ppc64.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
index 5b17740..611809f 100644
--- a/kexec/arch/ppc64/kexec-ppc64.c
+++ b/kexec/arch/ppc64/kexec-ppc64.c
@@ -717,8 +717,6 @@ static int get_devtree_details(unsigned long kexec_flags)
if (base < rma_base) {
rma_base = base;
rma_top = base + be64_to_cpu(((uint64_t 
*)buf)[1]);
-   if (rma_top > 0x3000UL)
-   rma_top = 0x3000UL;
}

fclose(file);


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 08/30] powerpc/setup: Refactor/untangle panic notifiers

2022-05-09 Thread Hari Bathini



On 28/04/22 4:19 am, Guilherme G. Piccoli wrote:

The panic notifiers infrastructure is a bit limited in the scope of
the callbacks - basically every kind of functionality is dropped
in a list that runs in the same point during the kernel panic path.
This is not really on par with the complexities and particularities
of architecture / hypervisors' needs, and a refactor is ongoing.

As part of this refactor, it was observed that powerpc has 2 notifiers,
with mixed goals: one is just a KASLR offset dumper, whereas the other
aims to hard-disable IRQs (necessary on panic path), warn firmware of
the panic event (fadump) and run low-level platform-specific machinery
that might stop kernel execution and never come back.

Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump
should run earlier while low-level platform actions should
run late since it might not even return. Hence, this patch decouples
the notifiers splitting them in three:

- First one is responsible for hard-disable IRQs and fadump,
should run early;

- The kernel KASLR offset dumper is really an informative notifier,
harmless and may run at any moment in the panic path;

- The last notifier should run last, since it aims to perform
low-level actions for specific platforms, and might never return.
It is also only registered for 2 platforms, pseries and ps3.

The patch better documents the notifiers and clears the code too,
also removing a useless header.

Currently no functionality change should be observed, but after
the planned panic refactor we should expect more panic reliability
with this patch.

Cc: Benjamin Herrenschmidt 
Cc: Hari Bathini 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Paul Mackerras 
Signed-off-by: Guilherme G. Piccoli 


The change looks good. I have tested it on an LPAR (ppc64).

Reviewed-by: Hari Bathini 


---

We'd like to thanks specially the MiniCloud infrastructure [0] maintainers,
that allow us to test PowerPC code in a very complete, functional and FREE
environment (there's no need even for adding a credit card, like many "free"
clouds require ¬¬ ).

[0] https://openpower.ic.unicamp.br/minicloud

  arch/powerpc/kernel/setup-common.c | 74 ++
  1 file changed, 54 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 518ae5aa9410..52f96b209a96 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -23,7 +23,6 @@
  #include 
  #include 
  #include 
-#include 
  #include 
  #include 
  #include 
@@ -680,8 +679,25 @@ int check_legacy_ioport(unsigned long base_port)
  }
  EXPORT_SYMBOL(check_legacy_ioport);
  
-static int ppc_panic_event(struct notifier_block *this,

- unsigned long event, void *ptr)
+/*
+ * Panic notifiers setup
+ *
+ * We have 3 notifiers for powerpc, each one from a different "nature":
+ *
+ * - ppc_panic_fadump_handler() is a hypervisor notifier, which hard-disables
+ *   IRQs and deal with the Firmware-Assisted dump, when it is configured;
+ *   should run early in the panic path.
+ *
+ * - dump_kernel_offset() is an informative notifier, just showing the KASLR
+ *   offset if we have RANDOMIZE_BASE set.
+ *
+ * - ppc_panic_platform_handler() is a low-level handler that's registered
+ *   only if the platform wishes to perform final actions in the panic path,
+ *   hence it should run late and might not even return. Currently, only
+ *   pseries and ps3 platforms register callbacks.
+ */
+static int ppc_panic_fadump_handler(struct notifier_block *this,
+   unsigned long event, void *ptr)
  {
/*
 * panic does a local_irq_disable, but we really
@@ -691,45 +707,63 @@ static int ppc_panic_event(struct notifier_block *this,
  
  	/*

 * If firmware-assisted dump has been registered then trigger
-* firmware-assisted dump and let firmware handle everything else.
+* its callback and let the firmware handles everything else.
 */
crash_fadump(NULL, ptr);
-   if (ppc_md.panic)
-   ppc_md.panic(ptr);  /* May not return */
+
return NOTIFY_DONE;
  }
  
-static struct notifier_block ppc_panic_block = {

-   .notifier_call = ppc_panic_event,
-   .priority = INT_MIN /* may not return; must be done last */
-};
-
-/*
- * Dump out kernel offset information on panic.
- */
  static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
  void *p)
  {
pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
 kaslr_offset(), KERNELBASE);
  
-	return 0;

+   return NOTIFY_DONE;
  }
  
+static int ppc_panic_platform_handler(struct notifier_block *this,

+ unsigned long event, void *ptr)
+{
+   /*
+* This handler is only registered if we have a panic callback
+* on ppc_md, 

[PATCH] kexec-tools: print error if kexec_file_load fails

2022-03-16 Thread Hari Bathini
Commit 4f77da634035 ("kexec-tools: Fix kexec_file_load(2) error
handling") introduced EFALLBACK for scenarios where fallbacking back
to kexec_load syscall is likely to work and dropped printing error
message for these scenarios. But printing error message for other
failure scenarios was inadvertently dropped. Restore printing error
message for such cases.

Fixes: 4f77da634035 ("kexec-tools: Fix kexec_file_load(2) error handling")
Cc: Petr Tesarik 
Reported-by: Nageswara R Sastry 
Tested-by: Nageswara R Sastry 
Signed-off-by: Hari Bathini 
---
 kexec/kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index 7e4787b..865de61 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -1339,6 +1339,7 @@ static int do_kexec_file_load(int fileind, int argc, char 
**argv,
case EMSGSIZE:
/* Reject by default. */
default:
+   fprintf(stderr, "kexec_file_load failed: %s\n", 
strerror(errno));
ret = EFAILED;
break;
 
-- 
2.35.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel

2021-02-20 Thread Hari Bathini




On 20/02/21 6:22 am, Thiago Jung Bauermann wrote:

Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
kernel") fixed how elf64_load() estimates the FDT size needed by the
crashdump kernel.

At the same time, commit 130b2d59cec0 ("powerpc: Use common
of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
change made the code overestimate it a bit by counting twice the space
required for the kernel command line and /chosen properties.

Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
space needed by the kdump kernel, and change the function name so that it
better reflects what the function is now doing.


Thanks for fixing this, Thiago.

Reviewed-by: Hari Bathini 



Signed-off-by: Thiago Jung Bauermann 
Reviewed-by: Lakshmi Ramasubramanian 
---
  arch/powerpc/include/asm/kexec.h  |  2 +-
  arch/powerpc/kexec/elf_64.c   |  2 +-
  arch/powerpc/kexec/file_load_64.c | 26 --
  3 files changed, 10 insertions(+), 20 deletions(-)

Applies on top of next-20210219.

Changes since v1:

- Adjusted comment describing kexec_extra_fdt_size_ppc64() as suggested
   by Lakshmi.

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index baab158e215c..5a11cc8d2350 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -128,7 +128,7 @@ int load_crashdump_segments_ppc64(struct kimage *image,
  int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
  const void *fdt, unsigned long kernel_load_addr,
  unsigned long fdt_load_addr);
-unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image);
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image);
  int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 0492ca6003f3..5a569bb51349 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -104,7 +104,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
  
  	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,

   initrd_len, cmdline,
-  kexec_fdt_totalsize_ppc64(image));
+  kexec_extra_fdt_size_ppc64(image));
if (!fdt) {
pr_err("Error setting up the new device tree.\n");
ret = -EINVAL;
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 3609de30a170..297f73795a1f 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -927,37 +927,27 @@ int setup_purgatory_ppc64(struct kimage *image, const 
void *slave_code,
  }
  
  /**

- * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT
- * for kexec/kdump kernel.
- * @image: kexec image being loaded.
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ *  setup FDT for kexec/kdump kernel.
+ * @image:  kexec image being loaded.
   *
- * Returns the estimated size needed for kexec/kdump kernel FDT.
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
   */
-unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
  {
-   unsigned int fdt_size;
u64 usm_entries;
  
-	/*

-* The below estimate more than accounts for a typical kexec case where
-* the additional space is to accommodate things like kexec cmdline,
-* chosen node with properties for initrd start & end addresses and
-* a property to indicate kexec boot..
-*/
-   fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE);
if (image->type != KEXEC_TYPE_CRASH)
-   return fdt_size;
+   return 0;
  
  	/*

-* For kdump kernel, also account for linux,usable-memory and
+* For kdump kernel, account for linux,usable-memory and
 * linux,drconf-usable-memory properties. Get an approximate on the
 * number of usable memory entries and use for FDT size estimation.
 */
usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
   (2 * (resource_size(_res) / drmem_lmb_size(;
-   fdt_size += (unsigned int)(usm_entries * sizeof(u64));
-
-   return fdt_size;
+   return (unsigned int)(usm_entries * sizeof(u64));
  }
  
  /**




___
kexec mailing list
kexec@lists.infradead.o

Re: [RESEND PATCH v5 00/11] ppc64: enable kdump support for kexec_file_load syscall

2020-07-30 Thread Hari Bathini




On 28/07/20 8:02 am, piliu wrote:



On 07/27/2020 03:36 AM, Hari Bathini wrote:

Sorry! There was a gateway issue on my system while posting v5, due to
which some patches did not make it through. Resending...

This patch series enables kdump support for kexec_file_load system
call (kexec -s -p) on PPC64. The changes are inspired from kexec-tools
code but heavily modified for kernel consumption.

The first patch adds a weak arch_kexec_locate_mem_hole() function to
override locate memory hole logic suiting arch needs. There are some
special regions in ppc64 which should be avoided while loading buffer
& there are multiple callers to kexec_add_buffer making it complicated
to maintain range sanity and using generic lookup at the same time.

The second patch marks ppc64 specific code within arch/powerpc/kexec
and arch/powerpc/purgatory to make the subsequent code changes easy
to understand.

The next patch adds helper function to setup different memory ranges
needed for loading kdump kernel, booting into it and exporting the
crashing kernel's elfcore.

The fourth patch overrides arch_kexec_locate_mem_hole() function to
locate memory hole for kdump segments by accounting for the special
memory regions, referred to as excluded memory ranges, and sets
kbuf->mem when a suitable memory region is found.

The fifth patch moves walk_drmem_lmbs() out of .init section with
a few changes to reuse it for setting up kdump kernel's usable memory
ranges. The next patch uses walk_drmem_lmbs() to look up the LMBs
and set linux,drconf-usable-memory & linux,usable-memory properties
in order to restrict kdump kernel's memory usage.

The seventh patch updates purgatory to setup r8 & r9 with opal base
and opal entry addresses respectively to aid kernels built with
CONFIG_PPC_EARLY_DEBUG_OPAL enabled. The next patch setups up backup
region as a kexec segment while loading kdump kernel and teaches
purgatory to copy data from source to destination.

Patch 09 builds the elfcore header for the running kernel & passes
the info to kdump kernel via "elfcorehdr=" parameter to export as
/proc/vmcore file. The next patch sets up the memory reserve map
for the kexec kernel and also claims kdump support for kdump as
all the necessary changes are added.

The last patch fixes a lookup issue for `kexec -l -s` case when
memory is reserved for crashkernel.

Tested the changes successfully on P8, P9 lpars, couple of OpenPOWER
boxes, one with secureboot enabled, KVM guest and a simulator.

v4 -> v5:
* Dropped patches 07/12 & 08/12 and updated purgatory to do everything
   in assembly.


Hello Pingfan,

Sorry, I missed out on responding to this.



I guess you achieve this by carefully selecting instruction to avoid
relocation issue, right?


Yes. No far branching or reference to data from elsewhere.

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 07/11] ppc64/kexec_file: setup backup region for kdump kernel

2020-07-29 Thread Hari Bathini
Though kdump kernel boots from loaded address, the first 64KB of it is
copied down to real 0. So, setup a backup region and let purgatory
copy the first 64KB of crashed kernel into this backup region before
booting into kdump kernel. Update reserve map with backup region and
crashed kernel's memory to avoid kdump kernel from accidentially using
that memory.

Signed-off-by: Hari Bathini 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Added Reviewed-by tag from Thiago.
* The comment explaining why a source buffer is needed for backup segment
  is moved to appropriate place.
* Used the special branching instruction mpe suggested instead of "bl 0f"
* Added local labels & space between arguments in assembler code.

v4 -> v5:
* Did not add Reviewed-by tag from Thiago yet as he might want to reconsider
  it with the changes in this patch.
* Wrote backup region copy code in assembler. Also, dropped the patch that
  applies RELA relocations & the patch that sets up stack as they are no
  longer needed.
* For correctness, updated fdt_add_mem_rsv() to take "BACKUP_SRC_END + 1"
  as start address instead of BACKUP_SRC_SIZE.

v3 -> v4:
* Moved fdt_add_mem_rsv() for backup region under kdump flag, on Thiago's
  suggestion, as it is only relevant for kdump.

v2 -> v3:
* Dropped check for backup_start in trampoline_64.S as purgatory() takes
  care of it anyway.

v1 -> v2:
* Check if backup region is available before branching out. This is
  to keep `kexec -l -s` flow as before as much as possible. This would
  eventually change with more testing and addition of sha256 digest
  verification support.
* Fixed missing prototype for purgatory() as reported by lkp.
  lkp report for reference:
- https://lore.kernel.org/patchwork/patch/1264423/


 arch/powerpc/include/asm/crashdump-ppc64.h |   19 ++
 arch/powerpc/include/asm/kexec.h   |7 ++
 arch/powerpc/kexec/elf_64.c|9 +++
 arch/powerpc/kexec/file_load_64.c  |   93 +++-
 arch/powerpc/purgatory/trampoline_64.S |   38 ++-
 5 files changed, 159 insertions(+), 7 deletions(-)
 create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h

diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h 
b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index ..68d9717cc5ee
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
+#define _ASM_POWERPC_CRASHDUMP_PPC64_H
+
+/*
+ * Backup region - first 64KB of System RAM
+ *
+ * If ever the below macros are to be changed, please be judicious.
+ * The implicit assumptions are:
+ * - start, end & size are less than UINT32_MAX.
+ * - start & size are at least 8 byte aligned.
+ *
+ * For implementation details: arch/powerpc/purgatory/trampoline_64.S
+ */
+#define BACKUP_SRC_START   0
+#define BACKUP_SRC_END 0x
+#define BACKUP_SRC_SIZE(BACKUP_SRC_END - BACKUP_SRC_START + 1)
+
+#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 835dc92e091c..f9514ebeffaa 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -105,6 +105,9 @@ extern const struct kexec_file_ops kexec_elf64_ops;
 struct kimage_arch {
struct crash_mem *exclude_ranges;
 
+   unsigned long backup_start;
+   void *backup_buf;
+
 #ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
@@ -120,6 +123,10 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
 
 #ifdef CONFIG_PPC64
+struct kexec_buf;
+
+int load_crashdump_segments_ppc64(struct kimage *image,
+ struct kexec_buf *kbuf);
 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
  const void *fdt, unsigned long kernel_load_addr,
  unsigned long fdt_load_addr);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 64c15a5a280b..76e2fc7e6dc3 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -68,6 +68,15 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
 
pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
+   /* Load additional segments needed for panic kernel */
+   if (image->type == KEXEC_TYPE_CRASH) {
+   ret = load_crashdump_segments_ppc64(image, );
+   if (ret) {
+   pr_err("Failed to load kdump kernel segments\n");
+   goto out;
+   }
+   }
+
if (initrd != NULL) {
kbuf.buffer = initrd;
kbuf.bufsz = kbuf.memsz = initrd_len;
diff --git a/arch/powerpc/ke

[PATCH v6 10/11] ppc64/kexec_file: fix kexec load failure with lack of memory hole

2020-07-29 Thread Hari Bathini
The kexec purgatory has to run in real mode. Only the first memory
block maybe accessible in real mode. And, unlike the case with panic
kernel, no memory is set aside for regular kexec load. Another thing
to note is, the memory for crashkernel is reserved at an offset of
128MB. So, when crashkernel memory is reserved, the memory ranges to
load kexec segments shrink further as the generic code only looks for
memblock free memory ranges and in all likelihood only a tiny bit of
memory from 0 to 128MB would be available to load kexec segments.

With kdump being used by default in general, kexec file load is likely
to fail almost always. This can be fixed by changing the memory hole
lookup logic for regular kexec to use the same method as kdump. This
would mean that most kexec segments will overlap with crashkernel
memory region. That should still be ok as the pages, whose destination
address isn't available while loading, are placed in an intermediate
location till a flush to the actual destination address happens during
kexec boot sequence.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Unchanged.

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* New patch to fix locating memory hole for kexec_file_load (kexec -s -l)
  when memory is reserved for crashkernel.


 arch/powerpc/kexec/file_load_64.c |   33 ++---
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index f13c5b8399e1..c6a37ad5a0a4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -1012,13 +1012,6 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
u64 buf_min, buf_max;
int ret;
 
-   /*
-* Use the generic kexec_locate_mem_hole for regular
-* kexec_file_load syscall
-*/
-   if (kbuf->image->type != KEXEC_TYPE_CRASH)
-   return kexec_locate_mem_hole(kbuf);
-
/* Look up the exclude ranges list while locating the memory hole */
emem = &(kbuf->image->arch.exclude_ranges);
if (!(*emem) || ((*emem)->nr_ranges == 0)) {
@@ -1026,11 +1019,15 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
return kexec_locate_mem_hole(kbuf);
}
 
+   buf_min = kbuf->buf_min;
+   buf_max = kbuf->buf_max;
/* Segments for kdump kernel should be within crashkernel region */
-   buf_min = (kbuf->buf_min < crashk_res.start ?
-  crashk_res.start : kbuf->buf_min);
-   buf_max = (kbuf->buf_max > crashk_res.end ?
-  crashk_res.end : kbuf->buf_max);
+   if (kbuf->image->type == KEXEC_TYPE_CRASH) {
+   buf_min = (buf_min < crashk_res.start ?
+  crashk_res.start : buf_min);
+   buf_max = (buf_max > crashk_res.end ?
+  crashk_res.end : buf_max);
+   }
 
if (buf_min > buf_max) {
pr_err("Invalid buffer min and/or max values\n");
@@ -1067,15 +1064,13 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
  unsigned long buf_len)
 {
-   if (image->type == KEXEC_TYPE_CRASH) {
-   int ret;
+   int ret;
 
-   /* Get exclude memory ranges needed for setting up kdump 
segments */
-   ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
-   if (ret) {
-   pr_err("Failed to setup exclude memory ranges for 
buffer lookup\n");
-   return ret;
-   }
+   /* Get exclude memory ranges needed for setting up kexec segments */
+   ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+   if (ret) {
+   pr_err("Failed to setup exclude memory ranges for buffer 
lookup\n");
+   return ret;
}
 
return kexec_image_probe_default(image, buf, buf_len);



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 09/11] ppc64/kexec_file: add appropriate regions for memory reserve map

2020-07-29 Thread Hari Bathini
While initrd, elfcorehdr and backup regions are already added to the
reserve map, there are a few missing regions that need to be added to
the memory reserve map. Add them here. And now that all the changes
to load panic kernel are in place, claim likewise.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Unchanged.

v4 -> v5:
* Unchanged.

v3 -> v4:
* Fixed a spellcheck and added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/kexec/file_load_64.c |   58 ++---
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 0d280d097cd6..f13c5b8399e1 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -205,6 +205,34 @@ static int get_crash_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ *  memory regions that should be added to the
+ *  memory reserve map to ensure the region is
+ *  protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+   int ret;
+
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_tce_mem_ranges(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_reserved_mem_ranges(mem_ranges);
+out:
+   if (ret)
+   pr_err("Failed to setup reserved memory ranges\n");
+   return ret;
+}
+
 /**
  * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
  *  in the memory regions between buf_min & buf_max
@@ -897,8 +925,8 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline)
 {
-   struct crash_mem *umem = NULL;
-   int ret;
+   struct crash_mem *umem = NULL, *rmem = NULL;
+   int i, nr_ranges, ret;
 
ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
if (ret)
@@ -941,7 +969,27 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
}
}
 
+   /* Update memory reserve map */
+   ret = get_reserved_memory_ranges();
+   if (ret)
+   goto out;
+
+   nr_ranges = rmem ? rmem->nr_ranges : 0;
+   for (i = 0; i < nr_ranges; i++) {
+   u64 base, size;
+
+   base = rmem->ranges[i].start;
+   size = rmem->ranges[i].end - base + 1;
+   ret = fdt_add_mem_rsv(fdt, base, size);
+   if (ret) {
+   pr_err("Error updating memory reserve map: %s\n",
+  fdt_strerror(ret));
+   goto out;
+   }
+   }
+
 out:
+   kfree(rmem);
kfree(umem);
return ret;
 }
@@ -1024,10 +1072,10 @@ int arch_kexec_kernel_image_probe(struct kimage *image, 
void *buf,
 
/* Get exclude memory ranges needed for setting up kdump 
segments */
ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
-   if (ret)
+   if (ret) {
pr_err("Failed to setup exclude memory ranges for 
buffer lookup\n");
-   /* Return this until all changes for panic kernel are in */
-   return -EOPNOTSUPP;
+   return ret;
+   }
}
 
return kexec_image_probe_default(image, buf, buf_len);



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 08/11] ppc64/kexec_file: prepare elfcore header for crashing kernel

2020-07-29 Thread Hari Bathini
Prepare elf headers for the crashing kernel's core file using
crash_prepare_elf64_headers() and pass on this info to kdump
kernel by updating its command line with elfcorehdr parameter.
Also, add elfcorehdr location to reserve map to avoid it from
being stomped on while booting.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Unchanged.

v4 -> v5:
* Unchanged. Added Reviewed-by tag from Thiago.

v3 -> v4:
* Added a FIXME tag to indicate issue in adding opal/rtas regions to
  core image.
* Folded prepare_elf_headers() function into load_elfcorehdr_segment().

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Tried merging adjacent memory ranges on hitting maximum ranges limit
  to reduce reallocations for memory ranges and also, minimize PT_LOAD
  segments for elfcore.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/include/asm/kexec.h  |6 +
 arch/powerpc/kexec/elf_64.c   |   12 +++
 arch/powerpc/kexec/file_load.c|   49 +++
 arch/powerpc/kexec/file_load_64.c |  165 +
 4 files changed, 232 insertions(+)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f9514ebeffaa..fe885bc3127e 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -108,12 +108,18 @@ struct kimage_arch {
unsigned long backup_start;
void *backup_buf;
 
+   unsigned long elfcorehdr_addr;
+   unsigned long elf_headers_sz;
+   void *elf_headers;
+
 #ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
 #endif
 };
 
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len);
 int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
unsigned long fdt_load_addr);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 76e2fc7e6dc3..d0e459bb2f05 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -35,6 +35,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
+   char *modified_cmdline = NULL;
struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
  .buf_max = ppc64_rma_size };
@@ -75,6 +76,16 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_err("Failed to load kdump kernel segments\n");
goto out;
}
+
+   /* Setup cmdline for kdump kernel case */
+   modified_cmdline = setup_kdump_cmdline(image, cmdline,
+  cmdline_len);
+   if (!modified_cmdline) {
+   pr_err("Setting up cmdline for kdump kernel failed\n");
+   ret = -EINVAL;
+   goto out;
+   }
+   cmdline = modified_cmdline;
}
 
if (initrd != NULL) {
@@ -131,6 +142,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_err("Error setting up the purgatory.\n");
 
 out:
+   kfree(modified_cmdline);
kexec_free_elf_info(_info);
 
/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 38439aba27d7..d52c09729edd 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -18,10 +18,45 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define SLAVE_CODE_SIZE256 /* First 0x100 bytes */
 
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr= " to command line
+ *   of kdump kernel for exporting the core.
+ * @image:   Kexec image
+ * @cmdline: Command line parameters to update.
+ * @cmdline_len: Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+   int elfcorehdr_strlen;
+   char *cmdline_ptr;
+
+   cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+   if (!cmdline_ptr)
+   return NULL;
+
+   elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+   image->arch.elfcorehdr_addr);
+
+   if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_S

[PATCH v6 11/11] ppc64/kexec_file: enable early kernel's OPAL calls

2020-07-29 Thread Hari Bathini
Kernel built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled expects r8 & r9
to be filled with OPAL base & entry addresses respectively. Setting
these registers allows the kernel to perform OPAL calls before the
device tree is parsed.

Signed-off-by: Hari Bathini 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Added Reviewed-by tag from Thiago.
* Moved the patch to end of the series for mpe to take a call on whether
  to have it or not.

v4 -> v5:
* New patch. Updated opal_base & opal_entry values in r8 & r9 respectively.
  This change was part of the below dropped patch in v4:
- https://lore.kernel.org/patchwork/patch/1275667/


 arch/powerpc/kexec/file_load_64.c  |   20 
 arch/powerpc/purgatory/trampoline_64.S |   16 
 2 files changed, 36 insertions(+)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index c6a37ad5a0a4..53bb71e3a2e1 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -876,6 +876,7 @@ int setup_purgatory_ppc64(struct kimage *image, const void 
*slave_code,
  const void *fdt, unsigned long kernel_load_addr,
  unsigned long fdt_load_addr)
 {
+   struct device_node *dn = NULL;
int ret;
 
ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
@@ -903,9 +904,28 @@ int setup_purgatory_ppc64(struct kimage *image, const void 
*slave_code,
 >arch.backup_start,
 sizeof(image->arch.backup_start),
 false);
+   if (ret)
+   goto out;
+
+   /* Setup OPAL base & entry values */
+   dn = of_find_node_by_path("/ibm,opal");
+   if (dn) {
+   u64 val;
+
+   of_property_read_u64(dn, "opal-base-address", );
+   ret = kexec_purgatory_get_set_symbol(image, "opal_base", ,
+sizeof(val), false);
+   if (ret)
+   goto out;
+
+   of_property_read_u64(dn, "opal-entry-address", );
+   ret = kexec_purgatory_get_set_symbol(image, "opal_entry", ,
+sizeof(val), false);
+   }
 out:
if (ret)
pr_err("Failed to setup purgatory symbols");
+   of_node_put(dn);
return ret;
 }
 
diff --git a/arch/powerpc/purgatory/trampoline_64.S 
b/arch/powerpc/purgatory/trampoline_64.S
index e79077ff1355..e6a6e7e6dfe4 100644
--- a/arch/powerpc/purgatory/trampoline_64.S
+++ b/arch/powerpc/purgatory/trampoline_64.S
@@ -87,6 +87,10 @@ master:
li  %r4,28
STWX_BE %r17,%r3,%r4/* Store my cpu as __be32 at byte 28 */
 1:
+   /* Load opal base and entry values in r8 & r9 respectively */
+   ld  %r8,(opal_base - 0b)(%r18)
+   ld  %r9,(opal_entry - 0b)(%r18)
+
/* load the kernel address */
ld  %r4,(kernel - 0b)(%r18)
 
@@ -133,6 +137,18 @@ backup_start:
.8byte  0x0
.size backup_start, . - backup_start
 
+   .balign 8
+   .globl opal_base
+opal_base:
+   .8byte  0x0
+   .size opal_base, . - opal_base
+
+   .balign 8
+   .globl opal_entry
+opal_entry:
+   .8byte  0x0
+   .size opal_entry, . - opal_entry
+
.data
.balign 8
 .globl purgatory_sha256_digest



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 06/11] ppc64/kexec_file: restrict memory usage of kdump kernel

2020-07-29 Thread Hari Bathini
Kdump kernel, used for capturing the kernel core image, is supposed
to use only specific memory regions to avoid corrupting the image to
be captured. The regions are crashkernel range - the memory reserved
explicitly for kdump kernel, memory used for the tce-table, the OPAL
region and RTAS region as applicable. Restrict kdump kernel memory
to use only these regions by setting up usable-memory DT property.
Also, tell the kdump kernel to run at the loaded address by setting
the magic word at 0x5c.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Added Reviewed-by tag from Thiago.
* Avoided pass by reference count parameter in add_usable_mem() function
  by calculating the range count added from index value before & after it.
* Instead of trying to reinvent the wheel with get_node_path() &
  get_node_path_size() functions, used %pOF format as suggested by mpe.
* Used kernel types instead of uint32_t/uint64_t.
* and Dropped 'struct crash_mem *' member & added 'struct crash_mem_range *',
  nr_ranges & max_entries fields to 'struct umem_info' to avoid bit of
  a clutter in check_realloc_usable_mem() & add_usable_mem() functions.
* Updated the comment as to why 0 till crashk_res.start was needed to be
  added to usable memory ranges. Note that kexec-tools also has been
  doing the same thing.

v4 -> v5:
* Renamed get_node_pathlen() function to get_node_path_size() and
  handled root node separately to avoid off-by-one error in
  calculating string size.
* Updated get_node_path() in line with change in get_node_path_size().

v3 -> v4:
* Updated get_node_path() to be an iterative function instead of a
  recursive one.
* Added comment explaining why low memory is added to kdump kernel's
  usable memory ranges though it doesn't fall in crashkernel region.
* For correctness, added fdt_add_mem_rsv() for the low memory being
  added to kdump kernel's usable memory ranges.
* Fixed prop pointer update in add_usable_mem_property() and changed
  duple to tuple as suggested by Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Fixed off-by-one error while setting up usable-memory properties.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/kexec/file_load_64.c |  386 +
 1 file changed, 385 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index d09c7724efa8..f94660874765 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,9 +17,23 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
 #include 
 
+struct umem_info {
+   u64 *buf;   /* data buffer for usable-memory property */
+   u32 size;   /* size allocated for the data buffer */
+   u32 max_entries;/* maximum no. of entries */
+   u32 idx;/* index of current entry */
+
+   /* usable memory ranges to look up */
+   unsigned int nr_ranges;
+   const struct crash_mem_range *ranges;
+};
+
 const struct kexec_file_ops * const kexec_file_loaders[] = {
_elf64_ops,
NULL
@@ -74,6 +88,44 @@ static int get_exclude_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ *regions like crashkernel, opal/rtas & tce-table,
+ *that kdump kernel could use.
+ * @mem_ranges:   Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+   int ret;
+
+   /*
+* Early boot failure observed on guests when low memory (first memory
+* block?) is not added to usable memory. So, add [0, crashk_res.end]
+* instead of [crashk_res.start, crashk_res.end] to workaround it.
+* Also, crashed kernel's memory must be added to reserve map to
+* avoid kdump kernel from using it.
+*/
+   ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+   if (ret)
+   goto out;
+
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_opal_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_tce_mem_ranges(mem_ranges);
+out:
+   if (ret)
+   pr_err("Failed to setup usable memory ranges\n");
+   return ret;
+}
+
 /**
  * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
  *  in the memory regions between buf_min & buf_max
@@ -273,6 +325,286 @@ static int locate_mem_hole_bottom_up_ppc64(struct 
kexec_buf *kbuf,
return ret;
 

[PATCH v6 02/11] powerpc/kexec_file: mark PPC64 specific code

2020-07-29 Thread Hari Bathini
Some of the kexec_file_load code isn't PPC64 specific. Move PPC64
specific code from kexec/file_load.c to kexec/file_load_64.c. Also,
rename purgatory/trampoline.S to purgatory/trampoline_64.S in the
same spirit. No functional changes.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Laurent Dufour 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Dropped email address from copyright header of the new file being
  added: arch/powerpc/kexec/file_load_64.c

v4 -> v5:
* Unchanged.

v3 -> v4:
* Moved common code back to set_new_fdt() from setup_new_fdt_ppc64()
  function. Added Reviewed-by tags from Laurent & Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* No changes.


 arch/powerpc/include/asm/kexec.h   |9 ++
 arch/powerpc/kexec/Makefile|2 -
 arch/powerpc/kexec/elf_64.c|7 +-
 arch/powerpc/kexec/file_load.c |   19 +
 arch/powerpc/kexec/file_load_64.c  |   87 
 arch/powerpc/purgatory/Makefile|4 +
 arch/powerpc/purgatory/trampoline.S|  117 
 arch/powerpc/purgatory/trampoline_64.S |  117 
 8 files changed, 222 insertions(+), 140 deletions(-)
 create mode 100644 arch/powerpc/kexec/file_load_64.c
 delete mode 100644 arch/powerpc/purgatory/trampoline.S
 create mode 100644 arch/powerpc/purgatory/trampoline_64.S

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c68476818753..ac8fd4839171 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -116,6 +116,15 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
  unsigned long initrd_load_addr, unsigned long initrd_len,
  const char *cmdline);
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+
+#ifdef CONFIG_PPC64
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+   unsigned long initrd_load_addr,
+   unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 86380c69f5ce..67c355329457 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)   += file_load.o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6dbe94..23ad04ccaf8e 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -88,7 +88,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
goto out;
}
 
-   ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+   ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+ initrd_len, cmdline);
if (ret)
goto out;
 
@@ -107,8 +108,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
 
slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-   ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
+   ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+   fdt_load_addr);
if (ret)
pr_err("Error setting up the purgatory.\n");
 
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 143c91724617..38439aba27d7 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
  *
  * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
  * Copyright (C) 2004  IBM Corp.
@@ -20,22 +20,7 @@
 #include 
 #include 
 
-#define SLAVE_CODE_SIZE256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-   _elf64_ops,
-   NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
-   /* We don't support crash kernels yet. */
-   if (image->type == KEXEC_TYPE_CRASH)
-   return -EOPNOTSUPP;
-
-   return kex

[PATCH v6 05/11] powerpc/drmem: make lmb walk a bit more flexible

2020-07-29 Thread Hari Bathini
Currently, numa & prom are the users of drmem lmb walk code. Loading
kdump with kexec_file also needs to walk the drmem LMBs to setup the
usable memory ranges for kdump kernel. But there are couple of issues
in using the code as is. One, walk_drmem_lmb() code is built into the
.init section currently, while kexec_file needs it later. Two, there
is no scope to pass data to the callback function for processing and/
or erroring out on certain conditions.

Fix that by, moving drmem LMB walk code out of .init section, adding
scope to pass data to the callback function and bailing out when
an error is encountered in the callback function.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Unchanged.

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* No changes.


 arch/powerpc/include/asm/drmem.h |9 ++--
 arch/powerpc/kernel/prom.c   |   13 +++---
 arch/powerpc/mm/drmem.c  |   87 +-
 arch/powerpc/mm/numa.c   |   13 +++---
 4 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 414d209f45bb..17ccc6474ab6 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -90,13 +90,14 @@ static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
 }
 
 u64 drmem_lmb_memory_max(void);
-void __init walk_drmem_lmbs(struct device_node *dn,
-   void (*func)(struct drmem_lmb *, const __be32 **));
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+   int (*func)(struct drmem_lmb *, const __be32 **, void *));
 int drmem_update_dt(void);
 
 #ifdef CONFIG_PPC_PSERIES
-void __init walk_drmem_lmbs_early(unsigned long node,
-   void (*func)(struct drmem_lmb *, const __be32 **));
+int __init
+walk_drmem_lmbs_early(unsigned long node, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *));
 #endif
 
 static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9cc49f265c86..7df78de378b0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -468,8 +468,9 @@ static bool validate_mem_limit(u64 base, u64 *size)
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
-   const __be32 **usm)
+static int  __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+   const __be32 **usm,
+   void *data)
 {
u64 base, size;
int is_kexec_kdump = 0, rngs;
@@ -484,7 +485,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
 */
if ((lmb->flags & DRCONF_MEM_RESERVED) ||
!(lmb->flags & DRCONF_MEM_ASSIGNED))
-   return;
+   return 0;
 
if (*usm)
is_kexec_kdump = 1;
@@ -499,7 +500,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
 */
rngs = dt_mem_next_cell(dt_root_size_cells, usm);
if (!rngs) /* there are no (base, size) duple */
-   return;
+   return 0;
}
 
do {
@@ -524,6 +525,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
memblock_mark_hotplug(base, size);
} while (--rngs);
+
+   return 0;
 }
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -534,7 +537,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned 
long node,
 #ifdef CONFIG_PPC_PSERIES
if (depth == 1 &&
strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
-   walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+   walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
return 0;
}
 #endif
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327cefbc6a..b2eeea39684c 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 
+static int n_root_addr_cells, n_root_size_cells;
+
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
@@ -189,12 +191,13 @@ int drmem_update_dt(void)
return rc;
 }
 
-static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
   const __be32 **prop)
 {
const __be32 *p = *prop;
 
-   lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells

[PATCH v6 03/11] powerpc/kexec_file: add helper functions for getting memory ranges

2020-07-29 Thread Hari Bathini
In kexec case, the kernel to be loaded uses the same memory layout as
the running kernel. So, passing on the DT of the running kernel would
be good enough.

But in case of kdump, different memory ranges are needed to manage
loading the kdump kernel, booting into it and exporting the elfcore
of the crashing kernel. The ranges are exclude memory ranges, usable
memory ranges, reserved memory ranges and crash memory ranges.

Exclude memory ranges specify the list of memory ranges to avoid while
loading kdump segments. Usable memory ranges list the memory ranges
that could be used for booting kdump kernel. Reserved memory ranges
list the memory regions for the loading kernel's reserve map. Crash
memory ranges list the memory ranges to be exported as the crashing
kernel's elfcore.

Add helper functions for setting up the above mentioned memory ranges.
This helpers facilitate in understanding the subsequent changes better
and make it easy to setup the different memory ranges listed above, as
and when appropriate.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Dropped email address from copyright header of the new file being
  added: arch/powerpc/kexec/ranges.c
* Changed mrngs to mem_rngs. Using the convention mem_ranges for
  'struct crash_mem **' types & mem_rngs for 'struct crash_mem *'
  for easy readibility.
* Updated add_opal_mem_range() & add_rtas_mem_range() functions without
  goto statements.
* Moved implementation of all add_foo_mem_range(s)() functions to
  patch 04/11, where they are used.
* Fixed reference count leak in add_tce_mem_ranges() function and also
  updated error handling in reading tce table base & sizes.

v4 -> v5:
* Added Reviewed-by tag from Thiago.
* Added the missing "#ifdef CONFIG_PPC_BOOK3S_64" around add_htab_mem_range()
  function in arch/powerpc/kexec/ranges.c file.
* add_tce_mem_ranges() function returned error when tce table is not found
  in a pci node. This is wrong as pci nodes may not always have tce tables
  (KVM guests, for example). Fixed it by ignoring error in reading tce
  table base/size while returning from the function.

v3 -> v4:
* Updated sort_memory_ranges() function to reuse sort() from lib/sort.c
  and addressed other review comments from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Added an option to merge ranges while sorting to minimize reallocations
  for memory ranges list.
* Dropped within_crashkernel option for add_opal_mem_range() &
  add_rtas_mem_range() as it is not really needed.


 arch/powerpc/include/asm/kexec_ranges.h |   11 +
 arch/powerpc/kexec/Makefile |2 
 arch/powerpc/kexec/ranges.c |  235 +++
 3 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/kexec_ranges.h
 create mode 100644 arch/powerpc/kexec/ranges.c

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
new file mode 100644
index ..35ae31a7a4de
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_KEXEC_RANGES_H
+#define _ASM_POWERPC_KEXEC_RANGES_H
+
+#define MEM_RANGE_CHUNK_SZ 2048/* Memory ranges size chunk */
+
+void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+
+#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 67c355329457..4aff6846c772 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)   += file_load.o ranges.o file_load_$(BITS).o 
elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index ..dc3ce036f416
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (shar...@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mo...@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include 
+#

[PATCH v6 04/11] ppc64/kexec_file: avoid stomping memory used by special regions

2020-07-29 Thread Hari Bathini
crashkernel region could have an overlap with special memory regions
like  opal, rtas, tce-table & such. These regions are referred to as
exclude memory ranges. Setup this ranges during image probe in order
to avoid them while finding the buffer for different kdump segments.
Override arch_kexec_locate_mem_hole() to locate a memory hole taking
these ranges into account.

Signed-off-by: Hari Bathini 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Implemented all the add_foo_mem_ranges() functions that get used while
  setting up exclude memory ranges.

v4 -> v5:
* Unchanged. Added Reviewed-by tag from Thiago.

v3 -> v4:
* Dropped KDUMP_BUF_MIN & KDUMP_BUF_MAX macros and fixed off-by-one error
  in arch_locate_mem_hole() helper routines.

v2 -> v3:
* If there are no exclude ranges, the right thing to do is fallbacking
  back to default kexec_locate_mem_hole() implementation instead of
  returning 0. Fixed that.

v1 -> v2:
* Did arch_kexec_locate_mem_hole() override to handle special regions.
* Ensured holes in the memory are accounted for while locating mem hole.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/include/asm/kexec.h|7 -
 arch/powerpc/include/asm/kexec_ranges.h |   14 +
 arch/powerpc/kexec/elf_64.c |8 +
 arch/powerpc/kexec/file_load_64.c   |  337 +++
 arch/powerpc/kexec/ranges.c |  177 
 5 files changed, 539 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index ac8fd4839171..835dc92e091c 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
-#ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
 
 struct kimage_arch {
+   struct crash_mem *exclude_ranges;
+
+#ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
-};
 #endif
+};
 
 int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
@@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline);
 #endif /* CONFIG_PPC64 */
+
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
index 35ae31a7a4de..7a9f8d15 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,5 +7,19 @@
 void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int add_tce_mem_ranges(struct crash_mem **mem_ranges);
+int add_initrd_mem_range(struct crash_mem **mem_ranges);
+#ifdef CONFIG_PPC_BOOK3S_64
+int add_htab_mem_range(struct crash_mem **mem_ranges);
+#else
+static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+   return 0;
+}
+#endif
+int add_kernel_mem_range(struct crash_mem **mem_ranges);
+int add_rtas_mem_range(struct crash_mem **mem_ranges);
+int add_opal_mem_range(struct crash_mem **mem_ranges);
+int add_reserved_mem_ranges(struct crash_mem **mem_ranges);
 
 #endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 23ad04ccaf8e..64c15a5a280b 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -46,6 +46,14 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
if (ret)
goto out;
 
+   if (image->type == KEXEC_TYPE_CRASH) {
+   /* min & max buffer values for kdump case */
+   kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+   kbuf.buf_max = pbuf.buf_max =
+   ((crashk_res.end < ppc64_rma_size) ?
+crashk_res.end : (ppc64_rma_size - 1));
+   }
+
ret = kexec_elf_load(image, , _info, , _load_addr);
if (ret)
goto out;
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 3e9ac5f216b0..d09c7724efa8 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,12 +17,262 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
_elf64_ops,
NULL
 };
 
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regi

[PATCH v6 00/11] ppc64: enable kdump support for kexec_file_load syscall

2020-07-29 Thread Hari Bathini
Sorry! There was a gateway issue on my system while posting v5, due to
which some patches did not make it through. Resending...

This patch series enables kdump support for kexec_file_load system
call (kexec -s -p) on PPC64. The changes are inspired from kexec-tools
code but heavily modified for kernel consumption.

The first patch adds a weak arch_kexec_locate_mem_hole() function to
override locate memory hole logic suiting arch needs. There are some
special regions in ppc64 which should be avoided while loading buffer
& there are multiple callers to kexec_add_buffer making it complicated
to maintain range sanity and using generic lookup at the same time.

The second patch marks ppc64 specific code within arch/powerpc/kexec
and arch/powerpc/purgatory to make the subsequent code changes easy
to understand.

The next patch adds helper function to setup different memory ranges
needed for loading kdump kernel, booting into it and exporting the
crashing kernel's elfcore.

The fourth patch overrides arch_kexec_locate_mem_hole() function to
locate memory hole for kdump segments by accounting for the special
memory regions, referred to as excluded memory ranges, and sets
kbuf->mem when a suitable memory region is found.

The fifth patch moves walk_drmem_lmbs() out of .init section with
a few changes to reuse it for setting up kdump kernel's usable memory
ranges. The next patch uses walk_drmem_lmbs() to look up the LMBs
and set linux,drconf-usable-memory & linux,usable-memory properties
in order to restrict kdump kernel's memory usage.

The next patch setups up backup region as a kexec segment while
loading kdump kernel and teaches purgatory to copy data from source
to destination.

Patch 09 builds the elfcore header for the running kernel & passes
the info to kdump kernel via "elfcorehdr=" parameter to export as
/proc/vmcore file. The next patch sets up the memory reserve map
for the kexec kernel and also claims kdump support for kdump as
all the necessary changes are added.

The next patch fixes a lookup issue for `kexec -l -s` case when
memory is reserved for crashkernel.

The last patch updates purgatory to setup r8 & r9 with opal base
and opal entry addresses respectively to aid kernels built with
CONFIG_PPC_EARLY_DEBUG_OPAL enabled.

Tested the changes successfully on P8, P9 lpars, couple of OpenPOWER
boxes, one with secureboot enabled, KVM guest and a simulator.

v5 -> v6:
* Fixed reference count leak in add_tce_mem_ranges() function and also
  updated error handling in reading tce table base & sizes.
* Instead of trying to reinvent the wheel with get_node_path() &
  get_node_path_size() functions, used %pOF format as suggested by mpe.
* Moved patch 07/11 to end of the series for mpe to take a call on
  whether to have it or not.

v4 -> v5:
* Dropped patches 07/12 & 08/12 and updated purgatory to do everything
  in assembly.
* Added a new patch (which was part of patch 08/12 in v4) to update
  r8 & r9 registers with opal base & opal entry addresses as it is
  expected on kernels built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled.
* Fixed kexec load issue on KVM guest.

v3 -> v4:
* Updated get_node_path() function to be iterative instead of a recursive one.
* Added comment explaining why low memory is added to kdump kernel's usable
  memory ranges though it doesn't fall in crashkernel region.
* Fixed stack_buf to be quadword aligned in accordance with ABI.
* Added missing of_node_put() in setup_purgatory_ppc64().
* Added a FIXME tag to indicate issue in adding opal/rtas regions to
  core image.

v2 -> v3:
* Fixed TOC pointer calculation for purgatory by using section info
  that has relocations applied.
* Fixed arch_kexec_locate_mem_hole() function to fallback to generic
  kexec_locate_mem_hole() lookup if exclude ranges list is empty.
* Dropped check for backup_start in trampoline_64.S as purgatory()
  function takes care of it anyway.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Addressed warnings reported by lkp.
* Added patch to address kexec load issue when memory is reserved
  for crashkernel.
* Used the appropriate license header for the new files added.
* Added an option to merge ranges to minimize reallocations while
  adding memory ranges.
* Dropped within_crashkernel parameter for add_opal_mem_range() &
  add_rtas_mem_range() functions as it is not really needed.

---

Hari Bathini (11):
  kexec_file: allow archs to handle special regions while locating memory 
hole
  powerpc/kexec_file: mark PPC64 specific code
  powerpc/kexec_file: add helper functions for getting memory ranges
  ppc64/kexec_file: avoid stomping memory used by special regions
  powerpc/drmem: make lmb walk a bit more flexible
  ppc64/kexec_file: restrict memory usage of kdump kernel
  ppc64/kexec_file: setup backup region for kdump kernel
  ppc64/kexec_file: pr

[PATCH v6 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-07-29 Thread Hari Bathini
Some architectures may have special memory regions, within the given
memory range, which can't be used for the buffer in a kexec segment.
Implement weak arch_kexec_locate_mem_hole() definition which arch code
may override, to take care of special regions, while trying to locate
a memory hole.

Also, add the missing declarations for arch overridable functions and
and drop the __weak descriptors in the declarations to avoid non-weak
definitions from becoming weak.

Reported-by: kernel test robot 
[lkp: In v1, arch_kimage_file_post_load_cleanup() declaration was missing]
Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Acked-by: Dave Young 
Reviewed-by: Thiago Jung Bauermann 
---

v5 -> v6:
* Unchanged.

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Acked-by & Tested-by tags from Dave & Pingfan.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Dropped __weak identifier for arch overridable functions.
* Fixed the missing declaration for arch_kimage_file_post_load_cleanup()
  reported by lkp. lkp report for reference:
- https://lore.kernel.org/patchwork/patch/1264418/


 include/linux/kexec.h |   29 ++---
 kernel/kexec_file.c   |   16 ++--
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ea67910ae6b7..9e93bef52968 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -183,17 +183,24 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, 
const char *name,
   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-unsigned long buf_len);
-void * __weak arch_kexec_kernel_image_load(struct kimage *image);
-int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
-int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
+/* Architectures may override the below functions */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len);
+void *arch_kexec_kernel_image_load(struct kimage *image);
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kexec_apply_relocations(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#ifdef CONFIG_KEXEC_SIG
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+unsigned long buf_len);
+#endif
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
 
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
 int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 09cc78df53c6..e89912d33a27 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -635,6 +635,19 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
return ret == 1 ? 0 : -EADDRNOTAVAIL;
 }
 
+/**
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+ * @kbuf:   Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region 
found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+   return kexec_locate_mem_hole(kbuf);
+}
+
 /**
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:  Buffer contents and memory parameters.
@@ -647,7 +660,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
  */
 int kexec_add_buffer(struct kexec_buf *kbuf)
 {
-
struct kexec_segment *ksegment;
int ret;
 
@@ -675,7 +687,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
/* Walk the RAM ranges and allocate a suitable range for the buffer */
-   ret = kexec_locate_mem_hole(kbuf);
+   ret = arch_kexec_locate_mem_hole(kbuf);
if (ret)
return ret;
 




Re: [RESEND PATCH v5 06/11] ppc64/kexec_file: restrict memory usage of kdump kernel

2020-07-28 Thread Hari Bathini




On 28/07/20 7:14 pm, Michael Ellerman wrote:

Hari Bathini  writes:

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 2df6f4273ddd..8df085a22fd7 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,9 +17,21 @@
  #include 
  #include 
  #include 
+#include 
  #include 
+#include 
+#include 
  #include 
  
+struct umem_info {

+   uint64_t *buf; /* data buffer for usable-memory property */
+   uint32_t idx;  /* current index */
+   uint32_t size; /* size allocated for the data buffer */


Use kernel types please, u64, u32.


+   /* usable memory ranges to look up */
+   const struct crash_mem *umrngs;


"umrngs".

Given it's part of the umem_info struct could it just be "ranges"?


True. Actually, having crash_mem_range *ranges + u32 nr_ranges and 
populating them seems better. Will do that..



+   return NULL;
+   }


um_info->size = new_size;


+
+   memset(tbuf + um_info->idx, 0, MEM_RANGE_CHUNK_SZ);


Just pass __GFP_ZERO to krealloc?


There are patches submitted to stable fixing a few modules that use 
krealloc with __GFP_ZERO. Also, this zeroing is not really needed.

I will drop the memset instead..

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [RESEND PATCH v5 07/11] ppc64/kexec_file: enable early kernel's OPAL calls

2020-07-28 Thread Hari Bathini




On 28/07/20 7:16 pm, Michael Ellerman wrote:

Hari Bathini  writes:

Kernel built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled expects r8 & r9
to be filled with OPAL base & entry addresses respectively. Setting
these registers allows the kernel to perform OPAL calls before the
device tree is parsed.


I'm not convinced we want to do this.

If we do it becomes part of the kexec ABI and we have to honour it into
the future.

And in practice there are no non-development kernels built with OPAL early
debugging enabled, so it's not clear it actually helps anyone other than
developers.



Hmmm.. kexec-tools does it since commit d58ad564852c ("kexec/ppc64
Enable early kernel's OPAL calls") for kexec_load syscall. So, we would
be breaking kexec ABI either way, I guess.

Let me put this patch at the end of the series in the respin to let you
decide whether to have it or not..

Thanks
Hari

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[RESEND PATCH v5 11/11] ppc64/kexec_file: fix kexec load failure with lack of memory hole

2020-07-26 Thread Hari Bathini
The kexec purgatory has to run in real mode. Only the first memory
block maybe accessible in real mode. And, unlike the case with panic
kernel, no memory is set aside for regular kexec load. Another thing
to note is, the memory for crashkernel is reserved at an offset of
128MB. So, when crashkernel memory is reserved, the memory ranges to
load kexec segments shrink further as the generic code only looks for
memblock free memory ranges and in all likelihood only a tiny bit of
memory from 0 to 128MB would be available to load kexec segments.

With kdump being used by default in general, kexec file load is likely
to fail almost always. This can be fixed by changing the memory hole
lookup logic for regular kexec to use the same method as kdump. This
would mean that most kexec segments will overlap with crashkernel
memory region. That should still be ok as the pages, whose destination
address isn't available while loading, are placed in an intermediate
location till a flush to the actual destination address happens during
kexec boot sequence.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* New patch to fix locating memory hole for kexec_file_load (kexec -s -l)
  when memory is reserved for crashkernel.


 arch/powerpc/kexec/file_load_64.c |   33 ++---
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 296be7fc6440..7933b8990714 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -1122,13 +1122,6 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
u64 buf_min, buf_max;
int ret;
 
-   /*
-* Use the generic kexec_locate_mem_hole for regular
-* kexec_file_load syscall
-*/
-   if (kbuf->image->type != KEXEC_TYPE_CRASH)
-   return kexec_locate_mem_hole(kbuf);
-
/* Look up the exclude ranges list while locating the memory hole */
emem = &(kbuf->image->arch.exclude_ranges);
if (!(*emem) || ((*emem)->nr_ranges == 0)) {
@@ -1136,11 +1129,15 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
return kexec_locate_mem_hole(kbuf);
}
 
+   buf_min = kbuf->buf_min;
+   buf_max = kbuf->buf_max;
/* Segments for kdump kernel should be within crashkernel region */
-   buf_min = (kbuf->buf_min < crashk_res.start ?
-  crashk_res.start : kbuf->buf_min);
-   buf_max = (kbuf->buf_max > crashk_res.end ?
-  crashk_res.end : kbuf->buf_max);
+   if (kbuf->image->type == KEXEC_TYPE_CRASH) {
+   buf_min = (buf_min < crashk_res.start ?
+  crashk_res.start : buf_min);
+   buf_max = (buf_max > crashk_res.end ?
+  crashk_res.end : buf_max);
+   }
 
if (buf_min > buf_max) {
pr_err("Invalid buffer min and/or max values\n");
@@ -1177,15 +1174,13 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
  unsigned long buf_len)
 {
-   if (image->type == KEXEC_TYPE_CRASH) {
-   int ret;
+   int ret;
 
-   /* Get exclude memory ranges needed for setting up kdump 
segments */
-   ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
-   if (ret) {
-   pr_err("Failed to setup exclude memory ranges for 
buffer lookup\n");
-   return ret;
-   }
+   /* Get exclude memory ranges needed for setting up kexec segments */
+   ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+   if (ret) {
+   pr_err("Failed to setup exclude memory ranges for buffer 
lookup\n");
+   return ret;
}
 
return kexec_image_probe_default(image, buf, buf_len);



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[RESEND PATCH v5 10/11] ppc64/kexec_file: add appropriate regions for memory reserve map

2020-07-26 Thread Hari Bathini
While initrd, elfcorehdr and backup regions are already added to the
reserve map, there are a few missing regions that need to be added to
the memory reserve map. Add them here. And now that all the changes
to load panic kernel are in place, claim likewise.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Fixed a spellcheck and added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/kexec/file_load_64.c |   58 ++---
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 7a52f0634ce6..296be7fc6440 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -201,6 +201,34 @@ static int get_crash_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ *  memory regions that should be added to the
+ *  memory reserve map to ensure the region is
+ *  protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+   int ret;
+
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_tce_mem_ranges(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_reserved_ranges(mem_ranges);
+out:
+   if (ret)
+   pr_err("Failed to setup reserved memory ranges\n");
+   return ret;
+}
+
 /**
  * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
  *  in the memory regions between buf_min & buf_max
@@ -1007,8 +1035,8 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline)
 {
-   struct crash_mem *umem = NULL;
-   int ret;
+   struct crash_mem *umem = NULL, *rmem = NULL;
+   int i, nr_ranges, ret;
 
ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
if (ret)
@@ -1051,7 +1079,27 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
}
}
 
+   /* Update memory reserve map */
+   ret = get_reserved_memory_ranges();
+   if (ret)
+   goto out;
+
+   nr_ranges = rmem ? rmem->nr_ranges : 0;
+   for (i = 0; i < nr_ranges; i++) {
+   u64 base, size;
+
+   base = rmem->ranges[i].start;
+   size = rmem->ranges[i].end - base + 1;
+   ret = fdt_add_mem_rsv(fdt, base, size);
+   if (ret) {
+   pr_err("Error updating memory reserve map: %s\n",
+  fdt_strerror(ret));
+   goto out;
+   }
+   }
+
 out:
+   kfree(rmem);
kfree(umem);
return ret;
 }
@@ -1134,10 +1182,10 @@ int arch_kexec_kernel_image_probe(struct kimage *image, 
void *buf,
 
/* Get exclude memory ranges needed for setting up kdump 
segments */
ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
-   if (ret)
+   if (ret) {
pr_err("Failed to setup exclude memory ranges for 
buffer lookup\n");
-   /* Return this until all changes for panic kernel are in */
-   return -EOPNOTSUPP;
+   return ret;
+   }
}
 
return kexec_image_probe_default(image, buf, buf_len);



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[RESEND PATCH v5 09/11] ppc64/kexec_file: prepare elfcore header for crashing kernel

2020-07-26 Thread Hari Bathini
Prepare elf headers for the crashing kernel's core file using
crash_prepare_elf64_headers() and pass on this info to kdump
kernel by updating its command line with elfcorehdr parameter.
Also, add elfcorehdr location to reserve map to avoid it from
being stomped on while booting.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged. Added Reviewed-by tag from Thiago.

v3 -> v4:
* Added a FIXME tag to indicate issue in adding opal/rtas regions to
  core image.
* Folded prepare_elf_headers() function into load_elfcorehdr_segment().

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Tried merging adjacent memory ranges on hitting maximum ranges limit
  to reduce reallocations for memory ranges and also, minimize PT_LOAD
  segments for elfcore.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/include/asm/kexec.h  |6 +
 arch/powerpc/kexec/elf_64.c   |   12 +++
 arch/powerpc/kexec/file_load.c|   49 +++
 arch/powerpc/kexec/file_load_64.c |  165 +
 4 files changed, 232 insertions(+)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f9514ebeffaa..fe885bc3127e 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -108,12 +108,18 @@ struct kimage_arch {
unsigned long backup_start;
void *backup_buf;
 
+   unsigned long elfcorehdr_addr;
+   unsigned long elf_headers_sz;
+   void *elf_headers;
+
 #ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
 #endif
 };
 
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len);
 int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
unsigned long fdt_load_addr);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 76e2fc7e6dc3..d0e459bb2f05 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -35,6 +35,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
+   char *modified_cmdline = NULL;
struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
  .buf_max = ppc64_rma_size };
@@ -75,6 +76,16 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_err("Failed to load kdump kernel segments\n");
goto out;
}
+
+   /* Setup cmdline for kdump kernel case */
+   modified_cmdline = setup_kdump_cmdline(image, cmdline,
+  cmdline_len);
+   if (!modified_cmdline) {
+   pr_err("Setting up cmdline for kdump kernel failed\n");
+   ret = -EINVAL;
+   goto out;
+   }
+   cmdline = modified_cmdline;
}
 
if (initrd != NULL) {
@@ -131,6 +142,7 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_err("Error setting up the purgatory.\n");
 
 out:
+   kfree(modified_cmdline);
kexec_free_elf_info(_info);
 
/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 38439aba27d7..d52c09729edd 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -18,10 +18,45 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define SLAVE_CODE_SIZE256 /* First 0x100 bytes */
 
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr= " to command line
+ *   of kdump kernel for exporting the core.
+ * @image:   Kexec image
+ * @cmdline: Command line parameters to update.
+ * @cmdline_len: Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+   int elfcorehdr_strlen;
+   char *cmdline_ptr;
+
+   cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+   if (!cmdline_ptr)
+   return NULL;
+
+   elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+   image->arch.elfcorehdr_addr);
+
+   if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+   pr_err("Appe

[RESEND PATCH v5 08/11] ppc64/kexec_file: setup backup region for kdump kernel

2020-07-26 Thread Hari Bathini
Though kdump kernel boots from loaded address, the first 64KB of it is
copied down to real 0. So, setup a backup region and let purgatory
copy the first 64KB of crashed kernel into this backup region before
booting into kdump kernel. Update reserve map with backup region and
crashed kernel's memory to avoid kdump kernel from accidentially using
that memory.

Signed-off-by: Hari Bathini 
---

v4 -> v5:
* Did not add Reviewed-by tag from Thiago yet as he might want to reconsider
  it with the changes in this patch.
* Wrote backup region copy code in assembler. Also, dropped the patch that
  applies RELA relocations & the patch that sets up stack as they are no
  longer needed.
* For correctness, updated fdt_add_mem_rsv() to take "BACKUP_SRC_END + 1"
  as start address instead of BACKUP_SRC_SIZE.

v3 -> v4:
* Moved fdt_add_mem_rsv() for backup region under kdump flag, on Thiago's
  suggestion, as it is only relevant for kdump.

v2 -> v3:
* Dropped check for backup_start in trampoline_64.S as purgatory() takes
  care of it anyway.

v1 -> v2:
* Check if backup region is available before branching out. This is
  to keep `kexec -l -s` flow as before as much as possible. This would
  eventually change with more testing and addition of sha256 digest
  verification support.
* Fixed missing prototype for purgatory() as reported by lkp.
  lkp report for reference:
- https://lore.kernel.org/patchwork/patch/1264423/


 arch/powerpc/include/asm/crashdump-ppc64.h |   19 ++
 arch/powerpc/include/asm/kexec.h   |7 ++
 arch/powerpc/kexec/elf_64.c|9 +++
 arch/powerpc/kexec/file_load_64.c  |   95 +++-
 arch/powerpc/purgatory/trampoline_64.S |   38 ++-
 5 files changed, 161 insertions(+), 7 deletions(-)
 create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h

diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h 
b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index ..68d9717cc5ee
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
+#define _ASM_POWERPC_CRASHDUMP_PPC64_H
+
+/*
+ * Backup region - first 64KB of System RAM
+ *
+ * If ever the below macros are to be changed, please be judicious.
+ * The implicit assumptions are:
+ * - start, end & size are less than UINT32_MAX.
+ * - start & size are at least 8 byte aligned.
+ *
+ * For implementation details: arch/powerpc/purgatory/trampoline_64.S
+ */
+#define BACKUP_SRC_START   0
+#define BACKUP_SRC_END 0x
+#define BACKUP_SRC_SIZE(BACKUP_SRC_END - BACKUP_SRC_START + 1)
+
+#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 835dc92e091c..f9514ebeffaa 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -105,6 +105,9 @@ extern const struct kexec_file_ops kexec_elf64_ops;
 struct kimage_arch {
struct crash_mem *exclude_ranges;
 
+   unsigned long backup_start;
+   void *backup_buf;
+
 #ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
@@ -120,6 +123,10 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
 
 #ifdef CONFIG_PPC64
+struct kexec_buf;
+
+int load_crashdump_segments_ppc64(struct kimage *image,
+ struct kexec_buf *kbuf);
 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
  const void *fdt, unsigned long kernel_load_addr,
  unsigned long fdt_load_addr);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 64c15a5a280b..76e2fc7e6dc3 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -68,6 +68,15 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
 
pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
+   /* Load additional segments needed for panic kernel */
+   if (image->type == KEXEC_TYPE_CRASH) {
+   ret = load_crashdump_segments_ppc64(image, );
+   if (ret) {
+   pr_err("Failed to load kdump kernel segments\n");
+   goto out;
+   }
+   }
+
if (initrd != NULL) {
kbuf.buffer = initrd;
kbuf.bufsz = kbuf.memsz = initrd_len;
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index a5c1442590b2..88408b17a7f6 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -20,8 +20,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
+#include 
 
 struct umem_info {
uint64_t *buf; /* data buffer for 

[RESEND PATCH v5 07/11] ppc64/kexec_file: enable early kernel's OPAL calls

2020-07-26 Thread Hari Bathini
Kernel built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled expects r8 & r9
to be filled with OPAL base & entry addresses respectively. Setting
these registers allows the kernel to perform OPAL calls before the
device tree is parsed.

Signed-off-by: Hari Bathini 
---

v4 -> v5:
* New patch. Updated opal_base & opal_entry values in r8 & r9 respectively.
  This change was part of the below dropped patch in v4:
- https://lore.kernel.org/patchwork/patch/1275667/


 arch/powerpc/kexec/file_load_64.c  |   16 
 arch/powerpc/purgatory/trampoline_64.S |   15 +++
 2 files changed, 31 insertions(+)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 8df085a22fd7..a5c1442590b2 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -713,6 +713,8 @@ int setup_purgatory_ppc64(struct kimage *image, const void 
*slave_code,
  const void *fdt, unsigned long kernel_load_addr,
  unsigned long fdt_load_addr)
 {
+   struct device_node *dn = NULL;
+   uint64_t val;
int ret;
 
ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
@@ -735,9 +737,23 @@ int setup_purgatory_ppc64(struct kimage *image, const void 
*slave_code,
goto out;
}
 
+   /* Setup OPAL base & entry values */
+   dn = of_find_node_by_path("/ibm,opal");
+   if (dn) {
+   of_property_read_u64(dn, "opal-base-address", );
+   ret = kexec_purgatory_get_set_symbol(image, "opal_base", ,
+sizeof(val), false);
+   if (ret)
+   goto out;
+
+   of_property_read_u64(dn, "opal-entry-address", );
+   ret = kexec_purgatory_get_set_symbol(image, "opal_entry", ,
+sizeof(val), false);
+   }
 out:
if (ret)
pr_err("Failed to setup purgatory symbols");
+   of_node_put(dn);
return ret;
 }
 
diff --git a/arch/powerpc/purgatory/trampoline_64.S 
b/arch/powerpc/purgatory/trampoline_64.S
index a5a83c3f53e6..464af8e8a4cb 100644
--- a/arch/powerpc/purgatory/trampoline_64.S
+++ b/arch/powerpc/purgatory/trampoline_64.S
@@ -61,6 +61,10 @@ master:
li  %r4,28
STWX_BE %r17,%r3,%r4/* Store my cpu as __be32 at byte 28 */
 1:
+   /* Load opal base and entry values in r8 & r9 respectively */
+   ld  %r8,(opal_base - 0b)(%r18)
+   ld  %r9,(opal_entry - 0b)(%r18)
+
/* load the kernel address */
ld  %r4,(kernel - 0b)(%r18)
 
@@ -102,6 +106,17 @@ dt_offset:
.8byte  0x0
.size dt_offset, . - dt_offset
 
+   .balign 8
+   .globl opal_base
+opal_base:
+   .8byte  0x0
+   .size opal_base, . - opal_base
+
+   .balign 8
+   .globl opal_entry
+opal_entry:
+   .8byte  0x0
+   .size opal_entry, . - opal_entry
 
.data
.balign 8



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[RESEND PATCH v5 06/11] ppc64/kexec_file: restrict memory usage of kdump kernel

2020-07-26 Thread Hari Bathini
Kdump kernel, used for capturing the kernel core image, is supposed
to use only specific memory regions to avoid corrupting the image to
be captured. The regions are crashkernel range - the memory reserved
explicitly for kdump kernel, memory used for the tce-table, the OPAL
region and RTAS region as applicable. Restrict kdump kernel memory
to use only these regions by setting up usable-memory DT property.
Also, tell the kdump kernel to run at the loaded address by setting
the magic word at 0x5c.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
---

v4 -> v5:
* Renamed get_node_pathlen() function to get_node_path_size() and
  handled root node separately to avoid off-by-one error in
  calculating string size.
* Updated get_node_path() in line with change in get_node_path_size().

v3 -> v4:
* Updated get_node_path() to be an iterative function instead of a
  recursive one.
* Added comment explaining why low memory is added to kdump kernel's
  usable memory ranges though it doesn't fall in crashkernel region.
* For correctness, added fdt_add_mem_rsv() for the low memory being
  added to kdump kernel's usable memory ranges.
* Fixed prop pointer update in add_usable_mem_property() and changed
  duple to tuple as suggested by Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Fixed off-by-one error while setting up usable-memory properties.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/kexec/file_load_64.c |  478 +
 1 file changed, 477 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 2df6f4273ddd..8df085a22fd7 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,9 +17,21 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
 #include 
 
+struct umem_info {
+   uint64_t *buf; /* data buffer for usable-memory property */
+   uint32_t idx;  /* current index */
+   uint32_t size; /* size allocated for the data buffer */
+
+   /* usable memory ranges to look up */
+   const struct crash_mem *umrngs;
+};
+
 const struct kexec_file_ops * const kexec_file_loaders[] = {
_elf64_ops,
NULL
@@ -74,6 +86,42 @@ static int get_exclude_memory_ranges(struct crash_mem 
**mem_ranges)
return ret;
 }
 
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ *regions like crashkernel, opal/rtas & tce-table,
+ *that kdump kernel could use.
+ * @mem_ranges:   Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+   int ret;
+
+   /*
+* prom code doesn't take kindly to missing low memory. So, add
+* [0, crashk_res.end] instead of [crashk_res.start, crashk_res.end]
+* to keep it happy.
+*/
+   ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+   if (ret)
+   goto out;
+
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_opal_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_tce_mem_ranges(mem_ranges);
+out:
+   if (ret)
+   pr_err("Failed to setup usable memory ranges\n");
+   return ret;
+}
+
 /**
  * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
  *  in the memory regions between buf_min & buf_max
@@ -273,6 +321,382 @@ static int locate_mem_hole_bottom_up_ppc64(struct 
kexec_buf *kbuf,
return ret;
 }
 
+/**
+ * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
+ * @um_info:  Usable memory buffer and ranges info.
+ * @cnt:  No. of entries to accommodate.
+ *
+ * Frees up the old buffer if memory reallocation fails.
+ *
+ * Returns buffer on success, NULL on error.
+ */
+static uint64_t *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
+{
+   void *tbuf;
+
+   if (um_info->size >=
+   ((um_info->idx + cnt) * sizeof(*(um_info->buf
+   return um_info->buf;
+
+   um_info->size += MEM_RANGE_CHUNK_SZ;
+   tbuf = krealloc(um_info->buf, um_info->size, GFP_KERNEL);
+   if (!tbuf) {
+   um_info->size -= MEM_RANGE_CHUNK_SZ;
+   return NULL;
+   }
+
+   memset(tbuf + um_info->idx, 0, MEM_RANGE_CHUNK_SZ);
+   return tbuf;
+}
+
+/**
+ * add_usable_mem - Add the usable memory ranges within the given memory range
+ *  to the buffer
+ * @um_info:Usable memory buffer and ranges info.
+ * @base:   Base address of memory ran

[RESEND PATCH v5 05/11] powerpc/drmem: make lmb walk a bit more flexible

2020-07-26 Thread Hari Bathini
Currently, numa & prom are the users of drmem lmb walk code. Loading
kdump with kexec_file also needs to walk the drmem LMBs to setup the
usable memory ranges for kdump kernel. But there are couple of issues
in using the code as is. One, walk_drmem_lmb() code is built into the
.init section currently, while kexec_file needs it later. Two, there
is no scope to pass data to the callback function for processing and/
or erroring out on certain conditions.

Fix that by, moving drmem LMB walk code out of .init section, adding
scope to pass data to the callback function and bailing out when
an error is encountered in the callback function.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* No changes.


 arch/powerpc/include/asm/drmem.h |9 ++--
 arch/powerpc/kernel/prom.c   |   13 +++---
 arch/powerpc/mm/drmem.c  |   87 +-
 arch/powerpc/mm/numa.c   |   13 +++---
 4 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 414d209f45bb..17ccc6474ab6 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -90,13 +90,14 @@ static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
 }
 
 u64 drmem_lmb_memory_max(void);
-void __init walk_drmem_lmbs(struct device_node *dn,
-   void (*func)(struct drmem_lmb *, const __be32 **));
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+   int (*func)(struct drmem_lmb *, const __be32 **, void *));
 int drmem_update_dt(void);
 
 #ifdef CONFIG_PPC_PSERIES
-void __init walk_drmem_lmbs_early(unsigned long node,
-   void (*func)(struct drmem_lmb *, const __be32 **));
+int __init
+walk_drmem_lmbs_early(unsigned long node, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *));
 #endif
 
 static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9cc49f265c86..7df78de378b0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -468,8 +468,9 @@ static bool validate_mem_limit(u64 base, u64 *size)
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
-   const __be32 **usm)
+static int  __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+   const __be32 **usm,
+   void *data)
 {
u64 base, size;
int is_kexec_kdump = 0, rngs;
@@ -484,7 +485,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
 */
if ((lmb->flags & DRCONF_MEM_RESERVED) ||
!(lmb->flags & DRCONF_MEM_ASSIGNED))
-   return;
+   return 0;
 
if (*usm)
is_kexec_kdump = 1;
@@ -499,7 +500,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
 */
rngs = dt_mem_next_cell(dt_root_size_cells, usm);
if (!rngs) /* there are no (base, size) duple */
-   return;
+   return 0;
}
 
do {
@@ -524,6 +525,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
memblock_mark_hotplug(base, size);
} while (--rngs);
+
+   return 0;
 }
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -534,7 +537,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned 
long node,
 #ifdef CONFIG_PPC_PSERIES
if (depth == 1 &&
strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
-   walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+   walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
return 0;
}
 #endif
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327cefbc6a..b2eeea39684c 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 
+static int n_root_addr_cells, n_root_size_cells;
+
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
@@ -189,12 +191,13 @@ int drmem_update_dt(void)
return rc;
 }
 
-static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
   const __be32 **prop)
 {
const __be32 *p = *prop;
 
-   lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
+   lmb->base_addr = of_

[RESEND PATCH v5 04/11] ppc64/kexec_file: avoid stomping memory used by special regions

2020-07-26 Thread Hari Bathini
crashkernel region could have an overlap with special memory regions
like  opal, rtas, tce-table & such. These regions are referred to as
exclude memory ranges. Setup this ranges during image probe in order
to avoid them while finding the buffer for different kdump segments.
Override arch_kexec_locate_mem_hole() to locate a memory hole taking
these ranges into account.

Signed-off-by: Hari Bathini 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged. Added Reviewed-by tag from Thiago.

v3 -> v4:
* Dropped KDUMP_BUF_MIN & KDUMP_BUF_MAX macros and fixed off-by-one error
  in arch_locate_mem_hole() helper routines.

v2 -> v3:
* If there are no exclude ranges, the right thing to do is fallbacking
  back to default kexec_locate_mem_hole() implementation instead of
  returning 0. Fixed that.

v1 -> v2:
* Did arch_kexec_locate_mem_hole() override to handle special regions.
* Ensured holes in the memory are accounted for while locating mem hole.
* Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
  the new prototype for these functions.


 arch/powerpc/include/asm/kexec.h  |7 +
 arch/powerpc/kexec/elf_64.c   |8 +
 arch/powerpc/kexec/file_load_64.c |  337 +
 3 files changed, 348 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index ac8fd4839171..835dc92e091c 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, 
unsigned long reboot_co
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
-#ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
 
 struct kimage_arch {
+   struct crash_mem *exclude_ranges;
+
+#ifdef CONFIG_IMA_KEXEC
phys_addr_t ima_buffer_addr;
size_t ima_buffer_size;
-};
 #endif
+};
 
 int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
@@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
*fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline);
 #endif /* CONFIG_PPC64 */
+
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 23ad04ccaf8e..64c15a5a280b 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -46,6 +46,14 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
if (ret)
goto out;
 
+   if (image->type == KEXEC_TYPE_CRASH) {
+   /* min & max buffer values for kdump case */
+   kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+   kbuf.buf_max = pbuf.buf_max =
+   ((crashk_res.end < ppc64_rma_size) ?
+crashk_res.end : (ppc64_rma_size - 1));
+   }
+
ret = kexec_elf_load(image, , _info, , _load_addr);
if (ret)
goto out;
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 41fe8b6c72d6..2df6f4273ddd 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,12 +17,262 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
_elf64_ops,
NULL
 };
 
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regions like opal/rtas, tce-table, initrd,
+ * kernel, htab which should be avoided while
+ * setting up kexec load segments.
+ * @mem_ranges:Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+   int ret;
+
+   ret = add_tce_mem_ranges(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_initrd_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_htab_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_kernel_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_rtas_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_opal_mem_range(mem_ranges);
+   if (ret)
+   goto out;
+
+   ret = add_reserved_ranges(mem_ranges);
+   if (ret)
+   goto out;
+
+   /* exclude memory ranges should be sorted for easy lookup */
+   sort_memory_ranges(*mem_ranges, true);
+out:
+   if (ret)
+   pr_err("Failed to setup exclude memory ranges\n");
+   return ret;
+}
+
+/**
+ * __locate_mem_hole_

[RESEND PATCH v5 03/11] powerpc/kexec_file: add helper functions for getting memory ranges

2020-07-26 Thread Hari Bathini
In kexec case, the kernel to be loaded uses the same memory layout as
the running kernel. So, passing on the DT of the running kernel would
be good enough.

But in case of kdump, different memory ranges are needed to manage
loading the kdump kernel, booting into it and exporting the elfcore
of the crashing kernel. The ranges are exclude memory ranges, usable
memory ranges, reserved memory ranges and crash memory ranges.

Exclude memory ranges specify the list of memory ranges to avoid while
loading kdump segments. Usable memory ranges list the memory ranges
that could be used for booting kdump kernel. Reserved memory ranges
list the memory regions for the loading kernel's reserve map. Crash
memory ranges list the memory ranges to be exported as the crashing
kernel's elfcore.

Add helper functions for setting up the above mentioned memory ranges.
This helpers facilitate in understanding the subsequent changes better
and make it easy to setup the different memory ranges listed above, as
and when appropriate.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Added Reviewed-by tag from Thiago.
* Added the missing "#ifdef CONFIG_PPC_BOOK3S_64" around add_htab_mem_range()
  function in arch/powerpc/kexec/ranges.c file.
* add_tce_mem_ranges() function returned error when tce table is not found
  in a pci node. This is wrong as pci nodes may not always have tce tables
  (KVM guests, for example). Fixed it by ignoring error in reading tce
  table base/size while returning from the function.

v3 -> v4:
* Updated sort_memory_ranges() function to reuse sort() from lib/sort.c
  and addressed other review comments from Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* Added an option to merge ranges while sorting to minimize reallocations
  for memory ranges list.
* Dropped within_crashkernel option for add_opal_mem_range() &
  add_rtas_mem_range() as it is not really needed.


 arch/powerpc/include/asm/kexec_ranges.h |   25 ++
 arch/powerpc/kexec/Makefile |2 
 arch/powerpc/kexec/ranges.c |  417 +++
 3 files changed, 443 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/kexec_ranges.h
 create mode 100644 arch/powerpc/kexec/ranges.c

diff --git a/arch/powerpc/include/asm/kexec_ranges.h 
b/arch/powerpc/include/asm/kexec_ranges.h
new file mode 100644
index ..78f3111e4e74
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_KEXEC_RANGES_H
+#define _ASM_POWERPC_KEXEC_RANGES_H
+
+#define MEM_RANGE_CHUNK_SZ 2048/* Memory ranges size chunk */
+
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int add_tce_mem_ranges(struct crash_mem **mem_ranges);
+int add_initrd_mem_range(struct crash_mem **mem_ranges);
+#ifdef CONFIG_PPC_BOOK3S_64
+int add_htab_mem_range(struct crash_mem **mem_ranges);
+#else
+static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+   return 0;
+}
+#endif
+int add_kernel_mem_range(struct crash_mem **mem_ranges);
+int add_rtas_mem_range(struct crash_mem **mem_ranges);
+int add_opal_mem_range(struct crash_mem **mem_ranges);
+int add_reserved_ranges(struct crash_mem **mem_ranges);
+void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
+
+#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 67c355329457..4aff6846c772 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)   += file_load.o ranges.o file_load_$(BITS).o 
elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index ..21bea1b78443
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (shar...@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mo...@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini .
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * get_max_nr_ranges - Get the m

[RESEND PATCH v5 02/11] powerpc/kexec_file: mark PPC64 specific code

2020-07-26 Thread Hari Bathini
Some of the kexec_file_load code isn't PPC64 specific. Move PPC64
specific code from kexec/file_load.c to kexec/file_load_64.c. Also,
rename purgatory/trampoline.S to purgatory/trampoline_64.S in the
same spirit. No functional changes.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Laurent Dufour 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Moved common code back to set_new_fdt() from setup_new_fdt_ppc64()
  function. Added Reviewed-by tags from Laurent & Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* No changes.


 arch/powerpc/include/asm/kexec.h   |9 ++
 arch/powerpc/kexec/Makefile|2 -
 arch/powerpc/kexec/elf_64.c|7 +-
 arch/powerpc/kexec/file_load.c |   19 +
 arch/powerpc/kexec/file_load_64.c  |   87 
 arch/powerpc/purgatory/Makefile|4 +
 arch/powerpc/purgatory/trampoline.S|  117 
 arch/powerpc/purgatory/trampoline_64.S |  117 
 8 files changed, 222 insertions(+), 140 deletions(-)
 create mode 100644 arch/powerpc/kexec/file_load_64.c
 delete mode 100644 arch/powerpc/purgatory/trampoline.S
 create mode 100644 arch/powerpc/purgatory/trampoline_64.S

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c68476818753..ac8fd4839171 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -116,6 +116,15 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
  unsigned long initrd_load_addr, unsigned long initrd_len,
  const char *cmdline);
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+
+#ifdef CONFIG_PPC64
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+   unsigned long initrd_load_addr,
+   unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 86380c69f5ce..67c355329457 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)   += file_load.o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6dbe94..23ad04ccaf8e 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -88,7 +88,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
goto out;
}
 
-   ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+   ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+ initrd_len, cmdline);
if (ret)
goto out;
 
@@ -107,8 +108,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
 
slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-   ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
+   ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+   fdt_load_addr);
if (ret)
pr_err("Error setting up the purgatory.\n");
 
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 143c91724617..38439aba27d7 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
  *
  * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
  * Copyright (C) 2004  IBM Corp.
@@ -20,22 +20,7 @@
 #include 
 #include 
 
-#define SLAVE_CODE_SIZE256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-   _elf64_ops,
-   NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
-   /* We don't support crash kernels yet. */
-   if (image->type == KEXEC_TYPE_CRASH)
-   return -EOPNOTSUPP;
-
-   return kexec_image_probe_default(image, buf, buf_len);
-}
+#define SLAVE_CODE_SIZE256 /* First 0x100 bytes */
 
 /**
  * setup

[RESEND PATCH v5 00/11] ppc64: enable kdump support for kexec_file_load syscall

2020-07-26 Thread Hari Bathini
Sorry! There was a gateway issue on my system while posting v5, due to
which some patches did not make it through. Resending...

This patch series enables kdump support for kexec_file_load system
call (kexec -s -p) on PPC64. The changes are inspired from kexec-tools
code but heavily modified for kernel consumption.

The first patch adds a weak arch_kexec_locate_mem_hole() function to
override locate memory hole logic suiting arch needs. There are some
special regions in ppc64 which should be avoided while loading buffer
& there are multiple callers to kexec_add_buffer making it complicated
to maintain range sanity and using generic lookup at the same time.

The second patch marks ppc64 specific code within arch/powerpc/kexec
and arch/powerpc/purgatory to make the subsequent code changes easy
to understand.

The next patch adds helper function to setup different memory ranges
needed for loading kdump kernel, booting into it and exporting the
crashing kernel's elfcore.

The fourth patch overrides arch_kexec_locate_mem_hole() function to
locate memory hole for kdump segments by accounting for the special
memory regions, referred to as excluded memory ranges, and sets
kbuf->mem when a suitable memory region is found.

The fifth patch moves walk_drmem_lmbs() out of .init section with
a few changes to reuse it for setting up kdump kernel's usable memory
ranges. The next patch uses walk_drmem_lmbs() to look up the LMBs
and set linux,drconf-usable-memory & linux,usable-memory properties
in order to restrict kdump kernel's memory usage.

The seventh patch updates purgatory to setup r8 & r9 with opal base
and opal entry addresses respectively to aid kernels built with
CONFIG_PPC_EARLY_DEBUG_OPAL enabled. The next patch setups up backup
region as a kexec segment while loading kdump kernel and teaches
purgatory to copy data from source to destination.

Patch 09 builds the elfcore header for the running kernel & passes
the info to kdump kernel via "elfcorehdr=" parameter to export as
/proc/vmcore file. The next patch sets up the memory reserve map
for the kexec kernel and also claims kdump support for kdump as
all the necessary changes are added.

The last patch fixes a lookup issue for `kexec -l -s` case when
memory is reserved for crashkernel.

Tested the changes successfully on P8, P9 lpars, couple of OpenPOWER
boxes, one with secureboot enabled, KVM guest and a simulator.

v4 -> v5:
* Dropped patches 07/12 & 08/12 and updated purgatory to do everything
  in assembly.
* Added a new patch (which was part of patch 08/12 in v4) to update
  r8 & r9 registers with opal base & opal entry addresses as it is
  expected on kernels built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled.
* Fixed kexec load issue on KVM guest.

v3 -> v4:
* Updated get_node_path() function to be iterative instead of a recursive one.
* Added comment explaining why low memory is added to kdump kernel's usable
  memory ranges though it doesn't fall in crashkernel region.
* Fixed stack_buf to be quadword aligned in accordance with ABI.
* Added missing of_node_put() in setup_purgatory_ppc64().
* Added a FIXME tag to indicate issue in adding opal/rtas regions to
  core image.

v2 -> v3:
* Fixed TOC pointer calculation for purgatory by using section info
  that has relocations applied.
* Fixed arch_kexec_locate_mem_hole() function to fallback to generic
  kexec_locate_mem_hole() lookup if exclude ranges list is empty.
* Dropped check for backup_start in trampoline_64.S as purgatory()
  function takes care of it anyway.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Addressed warnings reported by lkp.
* Added patch to address kexec load issue when memory is reserved
  for crashkernel.
* Used the appropriate license header for the new files added.
* Added an option to merge ranges to minimize reallocations while
  adding memory ranges.
* Dropped within_crashkernel parameter for add_opal_mem_range() &
  add_rtas_mem_range() functions as it is not really needed.

---

Hari Bathini (11):
  kexec_file: allow archs to handle special regions while locating memory 
hole
  powerpc/kexec_file: mark PPC64 specific code
  powerpc/kexec_file: add helper functions for getting memory ranges
  ppc64/kexec_file: avoid stomping memory used by special regions
  powerpc/drmem: make lmb walk a bit more flexible
  ppc64/kexec_file: restrict memory usage of kdump kernel
  ppc64/kexec_file: enable early kernel's OPAL calls
  ppc64/kexec_file: setup backup region for kdump kernel
  ppc64/kexec_file: prepare elfcore header for crashing kernel
  ppc64/kexec_file: add appropriate regions for memory reserve map
  ppc64/kexec_file: fix kexec load failure with lack of memory hole


 arch/powerpc/include/asm/crashdump-ppc64.h |   19 
 arch/powerpc/include/asm/drmem.h   |9 
 arch/powerpc/include/asm/kexec.h   

[RESEND PATCH v5 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-07-26 Thread Hari Bathini
Some architectures may have special memory regions, within the given
memory range, which can't be used for the buffer in a kexec segment.
Implement weak arch_kexec_locate_mem_hole() definition which arch code
may override, to take care of special regions, while trying to locate
a memory hole.

Also, add the missing declarations for arch overridable functions and
and drop the __weak descriptors in the declarations to avoid non-weak
definitions from becoming weak.

Reported-by: kernel test robot 
[lkp: In v1, arch_kimage_file_post_load_cleanup() declaration was missing]
Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Acked-by: Dave Young 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Acked-by & Tested-by tags from Dave & Pingfan.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Dropped __weak identifier for arch overridable functions.
* Fixed the missing declaration for arch_kimage_file_post_load_cleanup()
  reported by lkp. lkp report for reference:
- https://lore.kernel.org/patchwork/patch/1264418/


 include/linux/kexec.h |   29 ++---
 kernel/kexec_file.c   |   16 ++--
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ea67910ae6b7..9e93bef52968 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -183,17 +183,24 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, 
const char *name,
   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-unsigned long buf_len);
-void * __weak arch_kexec_kernel_image_load(struct kimage *image);
-int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
-int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
+/* Architectures may override the below functions */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len);
+void *arch_kexec_kernel_image_load(struct kimage *image);
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kexec_apply_relocations(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#ifdef CONFIG_KEXEC_SIG
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+unsigned long buf_len);
+#endif
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
 
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
 int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 09cc78df53c6..e89912d33a27 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -635,6 +635,19 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
return ret == 1 ? 0 : -EADDRNOTAVAIL;
 }
 
+/**
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+ * @kbuf:   Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region 
found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+   return kexec_locate_mem_hole(kbuf);
+}
+
 /**
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:  Buffer contents and memory parameters.
@@ -647,7 +660,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
  */
 int kexec_add_buffer(struct kexec_buf *kbuf)
 {
-
struct kexec_segment *ksegment;
int ret;
 
@@ -675,7 +687,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
/* Walk the RAM ranges and allocate a suitable range for the buffer */
-   ret = kexec_locate_mem_hole(kbuf);
+   ret = arch_kexec_locate_mem_hole(kbuf);
if (ret)
return ret;
 



___
kexec mailing list
k

[PATCH v5 00/11] ppc64: enable kdump support for kexec_file_load syscall

2020-07-24 Thread Hari Bathini
This patch series enables kdump support for kexec_file_load system
call (kexec -s -p) on PPC64. The changes are inspired from kexec-tools
code but heavily modified for kernel consumption.

The first patch adds a weak arch_kexec_locate_mem_hole() function to
override locate memory hole logic suiting arch needs. There are some
special regions in ppc64 which should be avoided while loading buffer
& there are multiple callers to kexec_add_buffer making it complicated
to maintain range sanity and using generic lookup at the same time.

The second patch marks ppc64 specific code within arch/powerpc/kexec
and arch/powerpc/purgatory to make the subsequent code changes easy
to understand.

The next patch adds helper function to setup different memory ranges
needed for loading kdump kernel, booting into it and exporting the
crashing kernel's elfcore.

The fourth patch overrides arch_kexec_locate_mem_hole() function to
locate memory hole for kdump segments by accounting for the special
memory regions, referred to as excluded memory ranges, and sets
kbuf->mem when a suitable memory region is found.

The fifth patch moves walk_drmem_lmbs() out of .init section with
a few changes to reuse it for setting up kdump kernel's usable memory
ranges. The next patch uses walk_drmem_lmbs() to look up the LMBs
and set linux,drconf-usable-memory & linux,usable-memory properties
in order to restrict kdump kernel's memory usage.

The seventh patch updates purgatory to setup r8 & r9 with opal base
and opal entry addresses respectively to aid kernels built with
CONFIG_PPC_EARLY_DEBUG_OPAL enabled. The next patch setups up backup
region as a kexec segment while loading kdump kernel and teaches
purgatory to copy data from source to destination.

Patch 09 builds the elfcore header for the running kernel & passes
the info to kdump kernel via "elfcorehdr=" parameter to export as
/proc/vmcore file. The next patch sets up the memory reserve map
for the kexec kernel and also claims kdump support for kdump as
all the necessary changes are added.

The last patch fixes a lookup issue for `kexec -l -s` case when
memory is reserved for crashkernel.

Tested the changes successfully on P8, P9 lpars, couple of OpenPOWER
boxes, one with secureboot enabled, KVM guest and a simulator.

v4 -> v5:
* Dropped patches 07/12 & 08/12 and updated purgatory to do everything
  in assembly.
* Added a new patch (which was part of patch 08/12 in v4) to update
  r8 & r9 registers with opal base & opal entry addresses as it is
  expected on kernels built with CONFIG_PPC_EARLY_DEBUG_OPAL enabled.
* Fixed kexec load issue on KVM guest.

v3 -> v4:
* Updated get_node_path() function to be iterative instead of a recursive one.
* Added comment explaining why low memory is added to kdump kernel's usable
  memory ranges though it doesn't fall in crashkernel region.
* Fixed stack_buf to be quadword aligned in accordance with ABI.
* Added missing of_node_put() in setup_purgatory_ppc64().
* Added a FIXME tag to indicate issue in adding opal/rtas regions to
  core image.

v2 -> v3:
* Fixed TOC pointer calculation for purgatory by using section info
  that has relocations applied.
* Fixed arch_kexec_locate_mem_hole() function to fallback to generic
  kexec_locate_mem_hole() lookup if exclude ranges list is empty.
* Dropped check for backup_start in trampoline_64.S as purgatory()
  function takes care of it anyway.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Addressed warnings reported by lkp.
* Added patch to address kexec load issue when memory is reserved
  for crashkernel.
* Used the appropriate license header for the new files added.
* Added an option to merge ranges to minimize reallocations while
  adding memory ranges.
* Dropped within_crashkernel parameter for add_opal_mem_range() &
  add_rtas_mem_range() functions as it is not really needed.

---

Hari Bathini (11):
  kexec_file: allow archs to handle special regions while locating memory 
hole
  powerpc/kexec_file: mark PPC64 specific code
  powerpc/kexec_file: add helper functions for getting memory ranges
  ppc64/kexec_file: avoid stomping memory used by special regions
  powerpc/drmem: make lmb walk a bit more flexible
  ppc64/kexec_file: restrict memory usage of kdump kernel
  ppc64/kexec_file: enable early kernel's OPAL calls
  ppc64/kexec_file: setup backup region for kdump kernel
  ppc64/kexec_file: prepare elfcore header for crashing kernel
  ppc64/kexec_file: add appropriate regions for memory reserve map
  ppc64/kexec_file: fix kexec load failure with lack of memory hole


 arch/powerpc/include/asm/crashdump-ppc64.h |   19 
 arch/powerpc/include/asm/drmem.h   |9 
 arch/powerpc/include/asm/kexec.h   |   29 +
 arch/powerpc/include/asm/kexec_ranges.h|   25 +
 arch/powerpc/kernel/prom.c |   13 
 arch/powerpc/

[PATCH v5 02/11] powerpc/kexec_file: mark PPC64 specific code

2020-07-24 Thread Hari Bathini
Some of the kexec_file_load code isn't PPC64 specific. Move PPC64
specific code from kexec/file_load.c to kexec/file_load_64.c. Also,
rename purgatory/trampoline.S to purgatory/trampoline_64.S in the
same spirit. No functional changes.

Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Reviewed-by: Laurent Dufour 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Moved common code back to set_new_fdt() from setup_new_fdt_ppc64()
  function. Added Reviewed-by tags from Laurent & Thiago.

v2 -> v3:
* Unchanged. Added Tested-by tag from Pingfan.

v1 -> v2:
* No changes.


 arch/powerpc/include/asm/kexec.h   |9 ++
 arch/powerpc/kexec/Makefile|2 -
 arch/powerpc/kexec/elf_64.c|7 +-
 arch/powerpc/kexec/file_load.c |   19 +
 arch/powerpc/kexec/file_load_64.c  |   87 
 arch/powerpc/purgatory/Makefile|4 +
 arch/powerpc/purgatory/trampoline.S|  117 
 arch/powerpc/purgatory/trampoline_64.S |  117 
 8 files changed, 222 insertions(+), 140 deletions(-)
 create mode 100644 arch/powerpc/kexec/file_load_64.c
 delete mode 100644 arch/powerpc/purgatory/trampoline.S
 create mode 100644 arch/powerpc/purgatory/trampoline_64.S

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c684768..ac8fd48 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -116,6 +116,15 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
  unsigned long initrd_load_addr, unsigned long initrd_len,
  const char *cmdline);
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+
+#ifdef CONFIG_PPC64
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+   unsigned long initrd_load_addr,
+   unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 86380c6..67c3553 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)   += file_load.o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6..23ad04c 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -88,7 +88,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
goto out;
}
 
-   ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+   ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+ initrd_len, cmdline);
if (ret)
goto out;
 
@@ -107,8 +108,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
 
slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-   ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
+   ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+   fdt_load_addr);
if (ret)
pr_err("Error setting up the purgatory.\n");
 
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 143c917..38439ab 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
  *
  * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
  * Copyright (C) 2004  IBM Corp.
@@ -20,22 +20,7 @@
 #include 
 #include 
 
-#define SLAVE_CODE_SIZE256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-   _elf64_ops,
-   NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
-   /* We don't support crash kernels yet. */
-   if (image->type == KEXEC_TYPE_CRASH)
-   return -EOPNOTSUPP;
-
-   return kexec_image_probe_default(image, buf, buf_len);
-}
+#define SLAVE_CODE_SIZE256 /* First 0x100 bytes */
 
 /**
  * setup_purgatory - initialize the purgato

[PATCH v5 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-07-24 Thread Hari Bathini
Some architectures may have special memory regions, within the given
memory range, which can't be used for the buffer in a kexec segment.
Implement weak arch_kexec_locate_mem_hole() definition which arch code
may override, to take care of special regions, while trying to locate
a memory hole.

Also, add the missing declarations for arch overridable functions and
and drop the __weak descriptors in the declarations to avoid non-weak
definitions from becoming weak.

Reported-by: kernel test robot 
[lkp: In v1, arch_kimage_file_post_load_cleanup() declaration was missing]
Signed-off-by: Hari Bathini 
Tested-by: Pingfan Liu 
Acked-by: Dave Young 
Reviewed-by: Thiago Jung Bauermann 
---

v4 -> v5:
* Unchanged.

v3 -> v4:
* Unchanged. Added Reviewed-by tag from Thiago.

v2 -> v3:
* Unchanged. Added Acked-by & Tested-by tags from Dave & Pingfan.

v1 -> v2:
* Introduced arch_kexec_locate_mem_hole() for override and dropped
  weak arch_kexec_add_buffer().
* Dropped __weak identifier for arch overridable functions.
* Fixed the missing declaration for arch_kimage_file_post_load_cleanup()
  reported by lkp. lkp report for reference:
- https://lore.kernel.org/patchwork/patch/1264418/


 include/linux/kexec.h |   29 ++---
 kernel/kexec_file.c   |   16 ++--
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ea67910..9e93bef 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -183,17 +183,24 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, 
const char *name,
   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-unsigned long buf_len);
-void * __weak arch_kexec_kernel_image_load(struct kimage *image);
-int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
-int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
-   Elf_Shdr *section,
-   const Elf_Shdr *relsec,
-   const Elf_Shdr *symtab);
+/* Architectures may override the below functions */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len);
+void *arch_kexec_kernel_image_load(struct kimage *image);
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kexec_apply_relocations(struct purgatory_info *pi,
+Elf_Shdr *section,
+const Elf_Shdr *relsec,
+const Elf_Shdr *symtab);
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#ifdef CONFIG_KEXEC_SIG
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+unsigned long buf_len);
+#endif
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
 
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
 int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 09cc78d..e89912d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -636,6 +636,19 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
 }
 
 /**
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+ * @kbuf:   Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region 
found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+   return kexec_locate_mem_hole(kbuf);
+}
+
+/**
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:  Buffer contents and memory parameters.
  *
@@ -647,7 +660,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
  */
 int kexec_add_buffer(struct kexec_buf *kbuf)
 {
-
struct kexec_segment *ksegment;
int ret;
 
@@ -675,7 +687,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
/* Walk the RAM ranges and allocate a suitable range for the buffer */
-   ret = kexec_locate_mem_hole(kbuf);
+   ret = arch_kexec_locate_mem_hole(kbuf);
if (ret)
return ret;
 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


  1   2   3   >