[PATCH v3 1/3] kexec: add variant helper functions for handling memory regions

2019-12-18 Thread Masayoshi Mizuma
From: AKASHI Takahiro 

mem_regions_alloc_and_add() and mem_regions_alloc_and_exclude() are
functionally equivalent to, respectively, mem_regions_add() and
mem_regions_exclude() except the formers will re-allocate memory
dynamically when no more entries are available in 'ranges' array.

Signed-off-by: AKASHI Takahiro 
Tested-by: Bhupesh Sharma 
Tested-by: Masayoshi Mizuma 
---
 kexec/mem_regions.c | 42 ++
 kexec/mem_regions.h |  7 +++
 2 files changed, 49 insertions(+)

diff --git a/kexec/mem_regions.c b/kexec/mem_regions.c
index 50c8abc..ad7d3f1 100644
--- a/kexec/mem_regions.c
+++ b/kexec/mem_regions.c
@@ -125,3 +125,45 @@ int mem_regions_exclude(struct memory_ranges *ranges,
}
return 0;
 }
+
+#define KEXEC_MEMORY_RANGES 16
+
+int mem_regions_alloc_and_add(struct memory_ranges *ranges,
+ unsigned long long base,
+ unsigned long long length, int type)
+{
+   void *new_ranges;
+
+   if (ranges->size >= ranges->max_size) {
+   new_ranges = realloc(ranges->ranges,
+   sizeof(struct memory_range) *
+   (ranges->max_size + KEXEC_MEMORY_RANGES));
+   if (!new_ranges)
+   return -1;
+
+   ranges->ranges = new_ranges;
+   ranges->max_size += KEXEC_MEMORY_RANGES;
+   }
+
+   return mem_regions_add(ranges, base, length, type);
+}
+
+int mem_regions_alloc_and_exclude(struct memory_ranges *ranges,
+ const struct memory_range *range)
+{
+   void *new_ranges;
+
+   /* for safety, we should have at least one free entry in ranges */
+   if (ranges->size >= ranges->max_size) {
+   new_ranges = realloc(ranges->ranges,
+   sizeof(struct memory_range) *
+   (ranges->max_size + KEXEC_MEMORY_RANGES));
+   if (!new_ranges)
+   return -1;
+
+   ranges->ranges = new_ranges;
+   ranges->max_size += KEXEC_MEMORY_RANGES;
+   }
+
+   return mem_regions_exclude(ranges, range);
+}
diff --git a/kexec/mem_regions.h b/kexec/mem_regions.h
index ae9e972..e306d67 100644
--- a/kexec/mem_regions.h
+++ b/kexec/mem_regions.h
@@ -12,4 +12,11 @@ int mem_regions_exclude(struct memory_ranges *ranges,
 int mem_regions_add(struct memory_ranges *ranges, unsigned long long base,
 unsigned long long length, int type);
 
+int mem_regions_alloc_and_exclude(struct memory_ranges *ranges,
+ const struct memory_range *range);
+
+int mem_regions_alloc_and_add(struct memory_ranges *ranges,
+ unsigned long long base,
+ unsigned long long length, int type);
+
 #endif
-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 2/3] arm64: kexec: allocate memory space avoiding reserved regions

2019-12-18 Thread Masayoshi Mizuma
From: AKASHI Takahiro 

On UEFI/ACPI-only system, some memory regions, including but not limited
to UEFI memory map and ACPI tables, must be preserved across kexec'ing.
Otherwise, they can be corrupted and result in early failure in booting
a new kernel.

In recent kernels, /proc/iomem now has an extended file format like:

 4000-5871 : System RAM
   4180-426a : Kernel code
   426b-42aa : reserved
   42ab-42c64fff : Kernel data
   5440-583f : Crash kernel
   5859-585e : reserved
   5870-5871 : reserved
 5872-58b5 : reserved
 58b6-5be3 : System RAM
   58b61000-58b61fff : reserved

where the "reserved" entries at the top level or under System RAM (and
its descendant resources) are ones of such kind and should not be regarded
as usable memory ranges where several free spaces for loading kexec data
will be allocated.

With this patch, get_memory_ranges() will handle this format of file
correctly. Note that, for safety, unknown regions, in addition to
"reserved" ones, will also be excluded.

Signed-off-by: AKASHI Takahiro 
Tested-by: Bhupesh Sharma 
Tested-by: Masayoshi Mizuma 
---
 kexec/arch/arm64/kexec-arm64.c | 153 -
 1 file changed, 94 insertions(+), 59 deletions(-)

diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 6ad3b0a..45ebc54 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -10,7 +10,9 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -29,6 +31,7 @@
 #include "fs2dt.h"
 #include "iomem.h"
 #include "kexec-syscall.h"
+#include "mem_regions.h"
 #include "arch/options.h"
 
 #define ROOT_NODE_ADDR_CELLS_DEFAULT 1
@@ -905,19 +908,33 @@ int get_phys_base_from_pt_load(unsigned long *phys_offset)
return 0;
 }
 
+static bool to_be_excluded(char *str)
+{
+   if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) ||
+   !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) ||
+   !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) ||
+   !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)))
+   return false;
+   else
+   return true;
+}
+
 /**
- * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem.
+ * get_memory_ranges - Try to get the memory ranges from
+ * /proc/iomem.
  */
-
-static int get_memory_ranges_iomem_cb(void *data, int nr, char *str,
-   unsigned long long base, unsigned long long length)
+int get_memory_ranges(struct memory_range **range, int *ranges,
+   unsigned long kexec_flags)
 {
-   int ret;
unsigned long phys_offset = UINT64_MAX;
-   struct memory_range *r;
-
-   if (nr >= KEXEC_SEGMENT_MAX)
-   return -1;
+   FILE *fp;
+   const char *iomem = proc_iomem();
+   char line[MAX_LINE], *str;
+   unsigned long long start, end;
+   int n, consumed;
+   struct memory_ranges memranges;
+   struct memory_range *last, excl_range;
+   int ret;
 
if (!try_read_phys_offset_from_kcore) {
/* Since kernel version 4.19, 'kcore' contains
@@ -951,17 +968,72 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, 
char *str,
try_read_phys_offset_from_kcore = true;
}
 
-   r = (struct memory_range *)data + nr;
+   fp = fopen(iomem, "r");
+   if (!fp)
+   die("Cannot open %s\n", iomem);
+
+   memranges.ranges = NULL;
+   memranges.size = memranges.max_size  = 0;
+
+   while (fgets(line, sizeof(line), fp) != 0) {
+   n = sscanf(line, "%llx-%llx : %n", , , );
+   if (n != 2)
+   continue;
+   str = line + consumed;
+
+   if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) {
+   ret = mem_regions_alloc_and_add(,
+   start, end - start + 1, RANGE_RAM);
+   if (ret) {
+   fprintf(stderr,
+   "Cannot allocate memory for ranges\n");
+   fclose(fp);
+   return -ENOMEM;
+   }
 
-   if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)))
-   r->type = RANGE_RAM;
-   else if (!strncmp(str, IOMEM_RESERVED, strlen(IOMEM_RESERVED)))
-   r->type = RANGE_RESERVED;
-   else
-   return 1;
+   dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__,
+   memranges.size - 1,
+   memranges.ranges[memranges.size - 1].start,
+   memranges.ranges[memranges.size - 1].end);
+   } else if (to_be_excluded(str)) {
+ 

[PATCH v3 3/3] arm64: kdump: deal with a lot of resource entries in /proc/iomem

2019-12-18 Thread Masayoshi Mizuma
From: AKASHI Takahiro 

As described in the commit ("arm64: kexec: allocate memory space avoiding
reserved regions"), /proc/iomem now has a lot of "reserved" entries, and
it's not just enough to have a fixed size of memory range array.

With this patch, kdump is allowed to handle arbitrary number of memory
ranges, using mem_regions_alloc_and_xxx() functions.

Signed-off-by: AKASHI Takahiro 
Tested-by: Bhupesh Sharma 
Tested-by: Masayoshi Mizuma 
---
 kexec/arch/arm64/crashdump-arm64.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/kexec/arch/arm64/crashdump-arm64.c 
b/kexec/arch/arm64/crashdump-arm64.c
index 4fd7aa8..38d1a0f 100644
--- a/kexec/arch/arm64/crashdump-arm64.c
+++ b/kexec/arch/arm64/crashdump-arm64.c
@@ -23,13 +23,8 @@
 #include "kexec-elf.h"
 #include "mem_regions.h"
 
-/* memory ranges on crashed kernel */
-static struct memory_range system_memory_ranges[CRASH_MAX_MEMORY_RANGES];
-static struct memory_ranges system_memory_rgns = {
-   .size = 0,
-   .max_size = CRASH_MAX_MEMORY_RANGES,
-   .ranges = system_memory_ranges,
-};
+/* memory ranges of crashed kernel */
+static struct memory_ranges system_memory_rgns;
 
 /* memory range reserved for crashkernel */
 struct memory_range crash_reserved_mem;
@@ -82,7 +77,7 @@ static uint64_t get_kernel_page_offset(void)
  *
  * This function is called once for each memory region found in /proc/iomem.
  * It locates system RAM and crashkernel reserved memory and places these to
- * variables, respectively, system_memory_ranges and crash_reserved_mem.
+ * variables, respectively, system_memory_rgns and usablemem_rgns.
  */
 
 static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr),
@@ -90,11 +85,11 @@ static int iomem_range_callback(void *UNUSED(data), int 
UNUSED(nr),
unsigned long long length)
 {
if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0)
-   return mem_regions_add(_rgns,
-  base, length, RANGE_RAM);
+   return mem_regions_alloc_and_add(_rgns,
+   base, length, RANGE_RAM);
else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0)
-   return mem_regions_add(_memory_rgns,
-  base, length, RANGE_RAM);
+   return mem_regions_alloc_and_add(_memory_rgns,
+   base, length, RANGE_RAM);
else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0)
elf_info.kern_paddr_start = base;
else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0)
@@ -135,9 +130,9 @@ static int crash_get_memory_ranges(void)
 
dbgprint_mem_range("Reserved memory range", _reserved_mem, 1);
 
-   if (mem_regions_exclude(_memory_rgns, _reserved_mem)) {
-   fprintf(stderr,
-   "Error: Number of crash memory ranges excedeed the max 
limit\n");
+   if (mem_regions_alloc_and_exclude(_memory_rgns,
+   _reserved_mem)) {
+   fprintf(stderr, "Cannot allocate memory for ranges\n");
return -ENOMEM;
}
 
-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 0/3] arm64: handle "reserved" entries in /proc/iomem

2019-12-18 Thread Masayoshi Mizuma
In recent arm64 kernels, /proc/iomem has an extended file format like:

 4000-5871 : System RAM
   4180-426a : Kernel code
   426b-42aa : reserved
   42ab-42c64fff : Kernel data
   5440-583f : Crash kernel
   5859-585e : reserved
   5870-5871 : reserved
 5872-58b5 : reserved
 58b6-5be3 : System RAM
   58b61000-58b61fff : reserved

where "reserved" entries can be an ACPI table, UEFI related code or
data. They can be corrupted and result in early failure in booting
a new kernel. As an actual example, LPI pending table and LPI property
table, which are pointed by a UEFI data, are sometimes destroyed.

They are expected to be preserved across kexec'ing.

Changelog:
v3: - Re-based to the latest commit (bd07796).
- Added Tested-by tag from Bhupesh and Masayoshi
- Added an error handling in case
  mem_regions_alloc_and_exclude() fails (0002 patch).

AKASHI Takahiro (3):
  kexec: add variant helper functions for handling memory regions
  arm64: kexec: allocate memory space avoiding reserved regions
  arm64: kdump: deal with a lot of resource entries in /proc/iomem

 kexec/arch/arm64/crashdump-arm64.c |  25 ++---
 kexec/arch/arm64/kexec-arm64.c | 153 ++---
 kexec/mem_regions.c|  42 
 kexec/mem_regions.h|   7 ++
 4 files changed, 153 insertions(+), 74 deletions(-)

-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions

2019-12-18 Thread Masayoshi Mizuma
On Wed, Dec 18, 2019 at 11:48:50AM +0900, AKASHI Takahiro wrote:
> On Mon, Dec 16, 2019 at 09:16:06AM -0500, Masayoshi Mizuma wrote:
> > On Mon, Dec 16, 2019 at 11:22:56AM +0530, Bhupesh Sharma wrote:
> > > Thanks Masa,
> 
> [...]
> 
> > > Since this is an old thread, it would be useful for people looking at
> > > the same, if you can add some comments/details about why you think
> > > this nit is needed.
> > 
> > Thank you for your follow up and I'm sorry I didn't explain it.
> > mem_regions_alloc_and_exclude() may fail in case realloc() or
> > mem_region_exclude() fail, so it would be better to add the error
> > handling.
> > 
> > > 
> > > Also if Akashi agrees with the same, it would be better if he could
> > > send a rebased version of the patchset (with your comments addressed),
> > > so that the same can be picked for upstream kexec-tools cleanly.
> > 
> > Sounds great!
> 
> I have almost forgotten the background of this patch.
> If you see that it is still useful, please feel free to re-post it.

Thanks! I'll repost the patchset as v3.

- Masa
> 
> Thanks,
> -Takahiro Akashi
> 
> > - Masa
> > 
> > > 
> > > @Akashi- Hi Akashi, Please let us know your views.
> > > 
> > > Thanks,
> > > Bhupesh
> > > 
> > > > + dbgprintf("%s:-  %016llx - %016llx\n",
> > > > > + __func__, start, end);
> > > > > + }
> > > > > + }
> > > > > +
> > > > > + fclose(fp);
> > > > > +
> > > > > + *range = memranges.ranges;
> > > > > + *ranges = memranges.size;
> > > > >
> > > > >   /* As a fallback option, we can try determining the PHYS_OFFSET
> > > > >* value from the '/proc/iomem' entries as well.
> > > > > @@ -976,52 +1041,15 @@ static int get_memory_ranges_iomem_cb(void 
> > > > > *data, int nr, char *str,
> > > > >* between the user-space and kernel space 'PHYS_OFFSET'
> > > > >* value.
> > > > >*/
> > > > > - set_phys_offset(r->start, "iomem");
> > > > > + if (memranges.size)
> > > > > + set_phys_offset(memranges.ranges[0].start, "iomem");
> > > > >
> > > > > - dbgprintf("%s: %016llx - %016llx : %s", __func__, r->start,
> > > > > - r->end, str);
> > > > > + dbgprint_mem_range("System RAM ranges;",
> > > > > + memranges.ranges, memranges.size);
> > > > >
> > > > >   return 0;
> > > > >  }
> > > > >
> > > > > -/**
> > > > > - * get_memory_ranges_iomem - Try to get the memory ranges from
> > > > > - * /proc/iomem.
> > > > > - */
> > > > > -
> > > > > -static int get_memory_ranges_iomem(struct memory_range *array,
> > > > > - unsigned int *count)
> > > > > -{
> > > > > - *count = kexec_iomem_for_each_line(NULL,
> > > > > - get_memory_ranges_iomem_cb, array);
> > > > > -
> > > > > - if (!*count) {
> > > > > - dbgprintf("%s: failed: No RAM found.\n", __func__);
> > > > > - return EFAILED;
> > > > > - }
> > > > > -
> > > > > - return 0;
> > > > > -}
> > > > > -
> > > > > -/**
> > > > > - * get_memory_ranges - Try to get the memory ranges some how.
> > > > > - */
> > > > > -
> > > > > -int get_memory_ranges(struct memory_range **range, int *ranges,
> > > > > - unsigned long kexec_flags)
> > > > > -{
> > > > > - static struct memory_range array[KEXEC_SEGMENT_MAX];
> > > > > - unsigned int count;
> > > > > - int result;
> > > > > -
> > > > > - result = get_memory_ranges_iomem(array, );
> > > > > -
> > > > > - *range = result ? NULL : array;
> > > > > - *ranges = result ? 0 : count;
> > > > > -
> > > > > - return result;
> > > > > -}
> > > > > -
> > > > >  int arch_compat_trampoline(struct kexec_info *info)
> > > > >  {
> > > > >   return 0;
> > > > > --
> > > > > 2.19.1
> > > > >
> > > > >
> > > >
> > > 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions

2019-12-16 Thread Masayoshi Mizuma
On Mon, Dec 16, 2019 at 11:22:56AM +0530, Bhupesh Sharma wrote:
> Thanks Masa,
> 
> On Sat, Dec 14, 2019 at 1:34 AM Masayoshi Mizuma  
> wrote:
> >
> > some nits as below:
> >
> > On Fri, Jan 11, 2019 at 06:59:45PM +0900, AKASHI Takahiro wrote:
> > > On UEFI/ACPI-only system, some memory regions, including but not limited
> > > to UEFI memory map and ACPI tables, must be preserved across kexec'ing.
> > > Otherwise, they can be corrupted and result in early failure in booting
> > > a new kernel.
> > >
> > > In recent kernels, /proc/iomem now has an extended file format like:
> > >   4000-5871 : System RAM
> > > 4180-426a : Kernel code
> > > 426b-42aa : reserved
> > > 42ab-42c64fff : Kernel data
> > > 5440-583f : Crash kernel
> > > 5859-585e : reserved
> > > 5870-5871 : reserved
> > >   5872-58b5 : reserved
> > >   58b6-5be3 : System RAM
> > > 58b61000-58b61fff : reserved
> > > 59a77000-59a77fff : reserved
> > >   5be4-5bec : reserved
> > >   5bed-5bed : System RAM
> > >   5bee-5bff : reserved
> > >   5c00-5fff : System RAM
> > > 5da0-5e9f : reserved
> > > 5ec0-5edf : reserved
> > > 5ef6a000-5ef6afff : reserved
> > > 5ef6b000-5efcafff : reserved
> > > 5efcd000-5efc : reserved
> > > 5efd-5eff : reserved
> > > 5f00-5fff : reserved
> > >
> > > where the "reserved" entries at the top level or under System RAM (and
> > > its descendant resources) are ones of such kind and should not be regarded
> > > as usable memory ranges where several free spaces for loading kexec data
> > > will be allocated.
> > >
> > > With this patch, get_memory_ranges() will handle this format of file
> > > correctly. Note that, for safety, unknown regions, in addition to
> > > "reserved" ones, will also be excluded.
> > >
> > > Signed-off-by: AKASHI Takahiro 
> > > ---
> > >  kexec/arch/arm64/kexec-arm64.c | 146 -
> > >  1 file changed, 87 insertions(+), 59 deletions(-)
> > >
> > > diff --git a/kexec/arch/arm64/kexec-arm64.c 
> > > b/kexec/arch/arm64/kexec-arm64.c
> > > index 1cde75d1a771..2e923b54f5b1 100644
> > > --- a/kexec/arch/arm64/kexec-arm64.c
> > > +++ b/kexec/arch/arm64/kexec-arm64.c
> > > @@ -10,7 +10,9 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -29,6 +31,7 @@
> > >  #include "fs2dt.h"
> > >  #include "iomem.h"
> > >  #include "kexec-syscall.h"
> > > +#include "mem_regions.h"
> > >  #include "arch/options.h"
> > >
> > >  #define ROOT_NODE_ADDR_CELLS_DEFAULT 1
> > > @@ -899,19 +902,33 @@ int get_phys_base_from_pt_load(unsigned long 
> > > *phys_offset)
> > >   return 0;
> > >  }
> > >
> > > +static bool to_be_excluded(char *str)
> > > +{
> > > + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) ||
> > > + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) ||
> > > + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) ||
> > > + !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)))
> > > + return false;
> > > + else
> > > + return true;
> > > +}
> > > +
> > >  /**
> > > - * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem.
> > > + * get_memory_ranges - Try to get the memory ranges from
> > > + * /proc/iomem.
> > >   */
> > > -
> > > -static int get_memory_ranges_iomem_cb(void *data, int nr, char *str,
> > > - unsigned long long base, unsigned long long length)
> > > +int get_memory_ranges(struct memory_range **range, int *ranges,
> > > + unsigned long kexec_flags)
> > >  {
> > > - int ret;
> > >   unsigned long phys_offset = UINT64_MAX;
> > > - struct memory_range *r;
> > > -
> > > - if (nr >= KEXEC_SEGMENT_MAX)
> > > -   

Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions

2019-12-13 Thread Masayoshi Mizuma
some nits as below:

On Fri, Jan 11, 2019 at 06:59:45PM +0900, AKASHI Takahiro wrote:
> On UEFI/ACPI-only system, some memory regions, including but not limited
> to UEFI memory map and ACPI tables, must be preserved across kexec'ing.
> Otherwise, they can be corrupted and result in early failure in booting
> a new kernel.
> 
> In recent kernels, /proc/iomem now has an extended file format like:
>   4000-5871 : System RAM
> 4180-426a : Kernel code
> 426b-42aa : reserved
> 42ab-42c64fff : Kernel data
> 5440-583f : Crash kernel
> 5859-585e : reserved
> 5870-5871 : reserved
>   5872-58b5 : reserved
>   58b6-5be3 : System RAM
> 58b61000-58b61fff : reserved
> 59a77000-59a77fff : reserved
>   5be4-5bec : reserved
>   5bed-5bed : System RAM
>   5bee-5bff : reserved
>   5c00-5fff : System RAM
> 5da0-5e9f : reserved
> 5ec0-5edf : reserved
> 5ef6a000-5ef6afff : reserved
> 5ef6b000-5efcafff : reserved
> 5efcd000-5efc : reserved
> 5efd-5eff : reserved
> 5f00-5fff : reserved
> 
> where the "reserved" entries at the top level or under System RAM (and
> its descendant resources) are ones of such kind and should not be regarded
> as usable memory ranges where several free spaces for loading kexec data
> will be allocated.
> 
> With this patch, get_memory_ranges() will handle this format of file
> correctly. Note that, for safety, unknown regions, in addition to
> "reserved" ones, will also be excluded.
> 
> Signed-off-by: AKASHI Takahiro 
> ---
>  kexec/arch/arm64/kexec-arm64.c | 146 -
>  1 file changed, 87 insertions(+), 59 deletions(-)
> 
> diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
> index 1cde75d1a771..2e923b54f5b1 100644
> --- a/kexec/arch/arm64/kexec-arm64.c
> +++ b/kexec/arch/arm64/kexec-arm64.c
> @@ -10,7 +10,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -29,6 +31,7 @@
>  #include "fs2dt.h"
>  #include "iomem.h"
>  #include "kexec-syscall.h"
> +#include "mem_regions.h"
>  #include "arch/options.h"
>  
>  #define ROOT_NODE_ADDR_CELLS_DEFAULT 1
> @@ -899,19 +902,33 @@ int get_phys_base_from_pt_load(unsigned long 
> *phys_offset)
>   return 0;
>  }
>  
> +static bool to_be_excluded(char *str)
> +{
> + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) ||
> + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) ||
> + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) ||
> + !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)))
> + return false;
> + else
> + return true;
> +}
> +
>  /**
> - * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem.
> + * get_memory_ranges - Try to get the memory ranges from
> + * /proc/iomem.
>   */
> -
> -static int get_memory_ranges_iomem_cb(void *data, int nr, char *str,
> - unsigned long long base, unsigned long long length)
> +int get_memory_ranges(struct memory_range **range, int *ranges,
> + unsigned long kexec_flags)
>  {
> - int ret;
>   unsigned long phys_offset = UINT64_MAX;
> - struct memory_range *r;
> -
> - if (nr >= KEXEC_SEGMENT_MAX)
> - return -1;
> + FILE *fp;
> + const char *iomem = proc_iomem();
> + char line[MAX_LINE], *str;
> + unsigned long long start, end;
> + int n, consumed;
> + struct memory_ranges memranges;
> + struct memory_range *last, excl_range;
> + int ret;
>  
>   if (!try_read_phys_offset_from_kcore) {
>   /* Since kernel version 4.19, 'kcore' contains
> @@ -945,17 +962,65 @@ static int get_memory_ranges_iomem_cb(void *data, int 
> nr, char *str,
>   try_read_phys_offset_from_kcore = true;
>   }
>  
> - r = (struct memory_range *)data + nr;
> + fp = fopen(iomem, "r");
> + if (!fp)
> + die("Cannot open %s\n", iomem);
> +
> + memranges.ranges = NULL;
> + memranges.size = memranges.max_size  = 0;
> +
> + while (fgets(line, sizeof(line), fp) != 0) {
> + n = sscanf(line, "%llx-%llx : %n", , , );
> + if (n != 2)
> + continue;
> + str = line + consumed;
> +
> + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) {
> + ret = mem_regions_alloc_and_add(,
> + start, end - start + 1, RANGE_RAM);
> + if (ret) {
> + fprintf(stderr,
> + "Cannot allocate memory for ranges\n");

fclose(fp);

> + return -ENOMEM;
> + }
>  
> - if (!strncmp(str, SYSTEM_RAM, 

Re: [PATCH v2 0/3] arm64: handle "reserved" entries in /proc/iomem

2019-12-13 Thread Masayoshi Mizuma
Hello,

(I'm sorry if I break the mail thread...)

As Bhupesh said at:
http://lists.infradead.org/pipermail/kexec/2019-December/024142.html

Please feel free to add:

Tested-by: Bhupesh Sharma 
Tested-by: Masayoshi Mizuma 

Thanks!
Masa

On Fri, Jan 11, 2019 at 06:59:43PM +0900, AKASHI Takahiro wrote:
> # @James, @Bhupesh
> # Could you kindly test this patch, please?
> # I'm not quite confident that I correctly merged my patch with,
> # particularly, Bhupesh's recent change.
> 
> In recent arm64 kernels, /proc/iomem has an extended file format like:
> 4000-5871 : System RAM
>   4180-426a : Kernel code
>   426b-42aa : reserved
>   42ab-42c64fff : Kernel data
>   5440-583f : Crash kernel
>   5859-585e : reserved
>   5870-5871 : reserved
> 5872-58b5 : reserved
> 58b6-5be3 : System RAM
>   58b61000-58b61fff : reserved
>   59a77000-59a77fff : reserved
> 5be4-5bec : reserved
> 5bed-5bed : System RAM
> 5bee-5bff : reserved
> 5c00-5fff : System RAM
>   5da0-5e9f : reserved
>   5ec0-5edf : reserved
>   5ef6a000-5ef6afff : reserved
>   5ef6b000-5efcafff : reserved
>   5efcd000-5efc : reserved
>   5efd-5eff : reserved
>   5f00-5fff : reserved
> 
> where "reserved" entries can be an APCI table, UEFI related code or data,
> and they are expected to be preserved across kexec'ing.
> With this patch[1], kexec/kdump will be allowed to handle them properly.
> 
> [1] https://git.linaro.org/people/takahiro.akashi/kexec-tools.git 
> arm64/resv_mem
> 
> AKASHI Takahiro (3):
>   kexec: add variant helper functions for handling memory regions
>   arm64: kexec: allocate memory space avoiding reserved regions
>   arm64: kdump: deal with a lot of resource entries in /proc/iomem
> 
>  kexec/arch/arm64/crashdump-arm64.c |  25 ++---
>  kexec/arch/arm64/kexec-arm64.c | 146 +
>  kexec/mem_regions.c|  42 +
>  kexec/mem_regions.h|   7 ++
>  4 files changed, 146 insertions(+), 74 deletions(-)
> 
> -- 
> 2.19.1
> 
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] efi/memreserve: register reservations as 'reserved' in /proc/iomem

2019-12-04 Thread Masayoshi Mizuma
On Wed, Dec 04, 2019 at 06:17:59PM +, James Morse wrote:
> Hi Masa,
> 
> On 04/12/2019 17:17, Masayoshi Mizuma wrote:
> > Thank you for sending the patch, but unfortunately it doesn't work for the 
> > issue...
> > 
> > After applied your patch, the LPI tables are marked as reserved in
> > /proc/iomem like as:
> > 
> > 8030-a1fd : System RAM
> >   8048-8134 : Kernel code
> >   8135-817b : reserved
> >   817c-82ac : Kernel data
> >   830f-830f : reserved # Property table
> >   8348-83480fff : reserved # Pending table
> >   8349-8349 : reserved # Pending table
> > 
> > However, kexec tries to allocate memory from System RAM, it doesn't care
> > the reserved in System RAM.
> 
> > I'm not sure why kexec doesn't care the reserved in System RAM, however,
> 
> Hmm, we added these to fix a problem with the UEFI memory map, and more 
> recently ACPI
> tables being overwritten by kexec.
> 
> Which version of kexec-tools are you using? Could you try:
> https://git.linaro.org/people/takahiro.akashi/kexec-tools.git/commit/?h=arm64/resv_mem

Thanks a lot! It worked and the issue is gone with Ard's patch and
the linaro kexec (arm64/resv_mem branch).

Ard, please feel free to add:

Tested-by: Masayoshi Mizuma 

> 
> 
> > if the kexec behaivor is right, the LPI tables should not belong to
> > System RAM.
> 
> > Like as:
> > 
> > 8030-830e : System RAM
> >   8048-8134 : Kernel code
> >   8135-817b : reserved
> >   817c-82ac : Kernel data
> > 830f-830f : reserved # Property table
> > 8348-83480fff : reserved # Pending table
> > 8349-8349 : reserved # Pending table
> > 834a-a1fd : System RAM
> > 
> > I don't have ideas to separete LPI tables from System RAM... so I tried
> > to add a new file to inform the LPI tables to userspace.
> 
> This is how 'nomap' memory appears, we carve it out of System RAM. A side 
> effect of this
> is kdump can't touch it, as you've told it this isn't memory.
> 
> As these tables are memory, mapped by the linear map, I think Ard's patch is 
> the right
> thing to do ... I suspect your kexec-tools doesn't have those patches from 
> Akashi to make
> it honour all second level entries.
 
I used the kexec on the top of master branch:
git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git

Should we use the linaro kexec for aarch64 machine?
Or will the arm64/resv_mem branch be merged to the kexec on
git.kernel.org...?

Thanks!
Masa

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] efi/memreserve: register reservations as 'reserved' in /proc/iomem

2019-12-04 Thread Masayoshi Mizuma
Hello Ard,

Thank you for sending the patch, but unfortunately it doesn't work for the 
issue...

After applied your patch, the LPI tables are marked as reserved in
/proc/iomem like as:

8030-a1fd : System RAM
  8048-8134 : Kernel code
  8135-817b : reserved
  817c-82ac : Kernel data
  830f-830f : reserved # Property table
  8348-83480fff : reserved # Pending table
  8349-8349 : reserved # Pending table

However, kexec tries to allocate memory from System RAM, it doesn't care
the reserved in System RAM.

Following example, kexec allocates memory 0x82ad-0x8664 to locate
the initrd, and LPI tables are also in the memory region, so LPI tables
will be destroyed by kexec reboot.

# kexec -d -l /boot/vmlinuz-5.4.1+ --initrd=/boot/initramfs-5.4.1+.img
...
initrd: base 82ad, size 3b67c6fh (62291055)
...
segment[1].mem   = 0x82ad
segment[1].memsz = 0x3b7   # 0x8664 (== 0x82ad + 0x3b7)
...

I'm not sure why kexec doesn't care the reserved in System RAM, however,
if the kexec behaivor is right, the LPI tables should not belong to
System RAM.
Like as:

8030-830e : System RAM
  8048-8134 : Kernel code
  8135-817b : reserved
  817c-82ac : Kernel data
830f-830f : reserved # Property table
8348-83480fff : reserved # Pending table
8349-8349 : reserved # Pending table
834a-a1fd : System RAM

I don't have ideas to separete LPI tables from System RAM... so I tried
to add a new file to inform the LPI tables to userspace.

Thanks,
Masa

On Wed, Dec 04, 2019 at 02:52:33PM +, Ard Biesheuvel wrote:
> Memory regions that are reserved using efi_mem_reserve_persistent()
> are recorded in a special EFI config table which survives kexec,
> allowing the incoming kernel to honour them as well. However,
> such reservations are not visible in /proc/iomem, and so the kexec
> tools that load the incoming kernel and its initrd into memory may
> overwrite these reserved regions before the incoming kernel has a
> chance to reserve them from further use.
> 
> So add these reservations to /proc/iomem as they are created. Note
> that reservations that are inherited from a previous kernel are
> memblock_reserve()'d early on, so they are already visible in
> /proc/iomem.
> 
> Cc: Masayoshi Mizuma 
> Cc: d.hatay...@fujitsu.com
> Cc: kexec@lists.infradead.org
> Signed-off-by: Ard Biesheuvel 
> ---
>  drivers/firmware/efi/efi.c | 29 ++--
>  1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> index d101f072c8f8..fcd82dde23c8 100644
> --- a/drivers/firmware/efi/efi.c
> +++ b/drivers/firmware/efi/efi.c
> @@ -979,6 +979,24 @@ static int __init efi_memreserve_map_root(void)
>   return 0;
>  }
>  
> +static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
> +{
> + struct resource *res, *parent;
> +
> + res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
> + if (!res)
> + return -ENOMEM;
> +
> + res->name   = "reserved";
> + res->flags  = IORESOURCE_MEM;
> + res->start  = addr;
> + res->end= addr + size - 1;
> +
> + /* we expect a conflict with a 'System RAM' region */
> + parent = request_resource_conflict(_resource, res);
> + return parent ? request_resource(parent, res) : 0;
> +}
> +
>  int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
>  {
>   struct linux_efi_memreserve *rsv;
> @@ -1001,9 +1019,8 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, 
> u64 size)
>   if (index < rsv->size) {
>   rsv->entry[index].base = addr;
>   rsv->entry[index].size = size;
> -
>   memunmap(rsv);
> - return 0;
> + return efi_mem_reserve_iomem(addr, size);
>   }
>   memunmap(rsv);
>   }
> @@ -1013,6 +1030,12 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, 
> u64 size)
>   if (!rsv)
>   return -ENOMEM;
>  
> + rc = efi_mem_reserve_iomem(__pa(rsv), SZ_4K);
> + if (rc) {
> + free_page(rsv);
> + return rc;
> + }
> +
>   /*
>* The memremap() call above assumes that a linux_efi_memreserve entry
>* never crosses a page boundary, so let's ensure that this remains true
> @@ -1029,7 +1052,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, 
> u64 size)
>   efi_memreserve_root->next = __pa(rsv);
>   spin_unlock(_mem_reserve_persistent_lock);
>  
> - return 0;
> + return efi_mem_reserve_iomem(addr, size);
>  }
>  
>  static int __init efi_memreserve_root_init(void)
> -- 
> 2.17.1
> 
> 
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 2/2] efi: arm64: Introduce /proc/efi/memreserve to tell the persistent pages

2019-12-03 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

kexec reboot stops in early boot sequence because efi_config_parse_tables()
refers garbage data. We can see the log with memblock=debug kernel option:

  efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
MEMRESERVE=0x9820bfdc58
  memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0x8276-0x324d07ff] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
efi_config_parse_tables+0x244/0x278
  ...

That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
head page of LPI pending table and LPI property table which are allocated by
gic_reserve_range().

The destroyer is kexec. kexec locates the initrd to the area:

  ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
--reuse-cmdline
  ...
  initrd: base 8229, size 388dd8ah (59301258)
  ...

>From dynamic debug log. initrd is located in segment[1]:
  machine_kexec_prepare:70:
kexec kimage info:
  type:0
  start:   85b30680
  head:0
  nr_segments: 4
segment[0]: 8048 - 8229, 0x1e1 bytes, 481 
pages
segment[1]: 8229 - 85b2, 0x389 bytes, 905 
pages
segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages
segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages

kexec searches the memory region to locate initrd through
"System RAM" in /proc/iomem. The pending tables are included in
"System RAM" because they are allocated by alloc_pages(), so kexec
destroys the LPI pending tables.

Introduce /proc/efi/memreserve to tell the pages pointed by
efi.mem_reserve so that kexec can avoid the area to locate initrd.

Signed-off-by: Masayoshi Mizuma 
---
 drivers/firmware/efi/efi.c | 75 --
 1 file changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index d8157cb34..80bbe0b3e 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -325,17 +325,87 @@ static __init int efivar_ssdt_load(void)
 static inline int efivar_ssdt_load(void) { return 0; }
 #endif
 
+static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
+
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *proc_efi;
+#ifdef CONFIG_KEXEC
+static int memreserve_show(struct seq_file *m, void *v)
+{
+   struct linux_efi_memreserve *rsv;
+   phys_addr_t start, end;
+   unsigned long prsv;
+   int count, i;
+
+   if ((efi_memreserve_root == (void *)ULONG_MAX) ||
+   (!efi_memreserve_root))
+   return -ENODEV;
+
+   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+   if (!rsv) {
+   pr_err("Could not map efi_memreserve\n");
+   return -ENOMEM;
+   }
+   count = atomic_read(>count);
+   for (i = 0; i < count; i++) {
+   start = rsv->entry[i].base;
+   end = start + rsv->entry[i].size - 1;
+
+   seq_printf(m, "%pa-%pa\n", , );
+   }
+   memunmap(rsv);
+   }
+
+   return 0;
+}
+
+static int memreserve_open(struct inode *inode, struct file *filp)
+{
+   return single_open(filp, memreserve_show, NULL);
+}
+
+static const struct file_operations memreserve_fops = {
+   .owner  = THIS_MODULE,
+   .open   = memreserve_open,
+   .read   = seq_read,
+   .llseek = seq_lseek,
+   .release= single_release,
+};
+
+static int __init efi_proc_memreserve(void)
+{
+   struct proc_dir_entry *pde;
+
+   if ((efi_memreserve_root == (void *)ULONG_MAX) ||
+   (!efi_memreserve_root))
+   return 0;
+
+   pde = proc_create("memreserve", 0444, proc_efi, _fops);
+   if (!pde) {
+   pr_err("/proc/efi: Cannot create /proc/efi/memreserve file.\n");
+   return 1;
+   }
+
+   return 0;
+}
+#else
+static inline int efi_proc_memreserve(void) { return 0; }
+#endif /* CONFIG_KEXEC */
+
 static int __init efi_proc_init(void)
 {
+   int error = 1;
+
proc_efi = proc_mkdir("efi", NULL);
if (!proc_efi) {
pr_err("/proc/efi: Cannot create /proc/efi directroy.\n");
-   return 1;
+   return error;
 

[PATCH v2 1/2] efi: add /proc/efi directory

2019-12-03 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

Add /proc/efi directory to show some efi internal information.

Signed-off-by: Masayoshi Mizuma 
---
 drivers/firmware/efi/efi.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index d101f072c..d8157cb34 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -325,6 +325,22 @@ static __init int efivar_ssdt_load(void)
 static inline int efivar_ssdt_load(void) { return 0; }
 #endif
 
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_efi;
+static int __init efi_proc_init(void)
+{
+   proc_efi = proc_mkdir("efi", NULL);
+   if (!proc_efi) {
+   pr_err("/proc/efi: Cannot create /proc/efi directory.\n");
+   return 1;
+   }
+
+   return 0;
+}
+#else
+static inline int efi_proc_init(void) { return 0; }
+#endif /* CONFIG_PROC_FS */
+
 /*
  * We register the efi subsystem with the firmware subsystem and the
  * efivars subsystem with the efi subsystem, if the system was booted with
@@ -381,6 +397,12 @@ static int __init efisubsys_init(void)
goto err_remove_group;
}
 
+   error = efi_proc_init();
+   if (error) {
+   sysfs_remove_mount_point(efi_kobj, "efivars");
+   goto err_remove_group;
+   }
+
return 0;
 
 err_remove_group:
-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 0/2] efi: arm64: Introduce /proc/efi/memreserve to tell the persistent pages

2019-12-03 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

kexec reboot sometime fails in early boot sequence on aarch64 machine.
That is because kexec overwrites the LPI property tables and pending
tables with the initrd.

To avoid the overwrite, introduce /proc/efi/memreserve to tell the
tables region to kexec so that kexec can avoid the memory region to
locate initrd.

kexec also needs a patch to handle /proc/efi/memreserve. I'm preparing
the patch for kexec.

Changelog
v2: - Change memreserve file location from sysfs to procfs.
  memreserve may exceed the PAGE_SIZE in case efi_memreserve_root
  has a lot of entries. So we cannot use sysfs_kf_seq_show().
  Use seq_printf() in procfs instead.

Masayoshi Mizuma (2):
  efi: add /proc/efi directory
  efi: arm64: Introduce /proc/efi/memreserve to tell the persistent
pages

 drivers/firmware/efi/efi.c | 93 +-
 1 file changed, 92 insertions(+), 1 deletion(-)

-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-12-02 Thread Masayoshi Mizuma
On Fri, Nov 29, 2019 at 01:25:36PM +0100, Matthias Brugger wrote:
> 
> 
> On 25/11/2019 19:49, Masayoshi Mizuma wrote:
> > From: Masayoshi Mizuma 
> > 
> > kexec reboot stops in early boot sequence because efi_config_parse_tables()
> > refers garbage data. We can see the log with memblock=debug kernel option:
> > 
> >   efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
> > MEMRESERVE=0x9820bfdc58
> >   memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
> > efi_config_parse_tables+0x228/0x278
> >   memblock_reserve: [0x8276-0x324d07ff] 
> > efi_config_parse_tables+0x228/0x278
> >   memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
> > efi_config_parse_tables+0x244/0x278
> >   memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
> > efi_config_parse_tables+0x244/0x278
> >   memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
> > efi_config_parse_tables+0x244/0x278
> >   ...
> > 
> > That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
> > 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
> > head page of LPI pending table and LPI property table which are allocated by
> > gic_reserve_range().
> > 
> > The destroyer is kexec. kexec locates the initrd to the area:
> > 
> >   ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
> > --reuse-cmdline
> >   ...
> >   initrd: base 8229, size 388dd8ah (59301258)
> >   ...
> > 
> > From dynamic debug log. initrd is located in segment[1]:
> >   machine_kexec_prepare:70:
> > kexec kimage info:
> >   type:0
> >   start:   85b30680
> >   head:0
> >   nr_segments: 4
> > segment[0]: 8048 - 8229, 0x1e1 bytes, 
> > 481 pages
> > segment[1]: 8229 - 85b2, 0x389 bytes, 
> > 905 pages
> > segment[2]: 85b2 - 85b3, 0x1 bytes, 1 
> > pages
> > segment[3]: 85b3 - 85b4, 0x1 bytes, 1 
> > pages
> > 
> > kexec searches the memory region to locate initrd through
> > "System RAM" in /proc/iomem. The pending tables are included in
> > "System RAM" because they are allocated by alloc_pages(), so kexec
> > destroys the LPI pending tables.
> > 
> 
> Doesn't that mean that you haven't enough memory reserved so that you have to
> fallback to allocate it via __get_free_page()?

That's a not fallback allocation. The pending tables and also property
tables are allocated by alloc_pages() on its_allocate_prop_table() and
its_allocate_pending_table().

> 
> 
> > Introduce /sys/firmware/efi/memreserve to tell the pages pointed by
> > efi.mem_reserve so that kexec can avoid the area to locate initrd.
> > 
> 
> Doesn't that need a patch for kexec-tools to actually take this into account?

Yes, we need a patch for kexec-tools as well. I'm preparing the kexec
patch.

> 
> > Signed-off-by: Masayoshi Mizuma 
> > ---
> >  drivers/firmware/efi/efi.c | 45 +-
> >  1 file changed, 44 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> > index e98bbf8e5..0aa07cc09 100644
> > --- a/drivers/firmware/efi/efi.c
> > +++ b/drivers/firmware/efi/efi.c
> > @@ -141,6 +141,47 @@ static ssize_t systab_show(struct kobject *kobj,
> >  
> >  static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 
> > 0400);
> >  
> > +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
> > +#ifdef CONFIG_KEXEC
> > +static ssize_t memreserve_show(struct kobject *kobj,
> > +  struct kobj_attribute *attr, char *buf)
> > +{
> > +   struct linux_efi_memreserve *rsv;
> > +   phys_addr_t start, end;
> > +   unsigned long prsv;
> > +   char *str = buf;
> > +   int count, i;
> > +
> > +   if (!kobj || !buf)
> > +   return -EINVAL;
> > +
> > +   if ((efi_memreserve_root == (void *)ULONG_MAX) ||
> > +   (!efi_memreserve_root))
> > +   return -ENODEV;
> > +
> > +   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
> > +   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
> > +   if (!rsv) {
> > +   pr_err("Could not map efi_memreserve\n");
> > +   return -ENOMEM;
> > +  

[PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-11-25 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

kexec reboot stops in early boot sequence because efi_config_parse_tables()
refers garbage data. We can see the log with memblock=debug kernel option:

  efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
MEMRESERVE=0x9820bfdc58
  memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0x8276-0x324d07ff] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
efi_config_parse_tables+0x244/0x278
  ...

That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
head page of LPI pending table and LPI property table which are allocated by
gic_reserve_range().

The destroyer is kexec. kexec locates the initrd to the area:

  ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
--reuse-cmdline
  ...
  initrd: base 8229, size 388dd8ah (59301258)
  ...

>From dynamic debug log. initrd is located in segment[1]:
  machine_kexec_prepare:70:
kexec kimage info:
  type:0
  start:   85b30680
  head:0
  nr_segments: 4
segment[0]: 8048 - 8229, 0x1e1 bytes, 481 
pages
segment[1]: 8229 - 85b2, 0x389 bytes, 905 
pages
segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages
segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages

kexec searches the memory region to locate initrd through
"System RAM" in /proc/iomem. The pending tables are included in
"System RAM" because they are allocated by alloc_pages(), so kexec
destroys the LPI pending tables.

Introduce /sys/firmware/efi/memreserve to tell the pages pointed by
efi.mem_reserve so that kexec can avoid the area to locate initrd.

Signed-off-by: Masayoshi Mizuma 
---
 drivers/firmware/efi/efi.c | 45 +-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e98bbf8e5..0aa07cc09 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -141,6 +141,47 @@ static ssize_t systab_show(struct kobject *kobj,
 
 static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
 
+static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
+#ifdef CONFIG_KEXEC
+static ssize_t memreserve_show(struct kobject *kobj,
+  struct kobj_attribute *attr, char *buf)
+{
+   struct linux_efi_memreserve *rsv;
+   phys_addr_t start, end;
+   unsigned long prsv;
+   char *str = buf;
+   int count, i;
+
+   if (!kobj || !buf)
+   return -EINVAL;
+
+   if ((efi_memreserve_root == (void *)ULONG_MAX) ||
+   (!efi_memreserve_root))
+   return -ENODEV;
+
+   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+   if (!rsv) {
+   pr_err("Could not map efi_memreserve\n");
+   return -ENOMEM;
+   }
+   count = atomic_read(>count);
+   for (i = 0; i < count; i++) {
+   start = rsv->entry[i].base;
+   end = start + rsv->entry[i].size - 1;
+
+   str += sprintf(str, "%pa-%pa\n", , );
+   }
+   memunmap(rsv);
+   }
+
+   return str - buf;
+}
+
+static struct kobj_attribute efi_attr_memreserve =
+   __ATTR_RO_MODE(memreserve, 0444);
+#endif /* CONFIG_KEXEC */
+
 #define EFI_FIELD(var) efi.var
 
 #define EFI_ATTR_SHOW(name) \
@@ -172,6 +213,9 @@ static struct attribute *efi_subsys_attrs[] = {
_attr_runtime.attr,
_attr_config_table.attr,
_attr_fw_platform_size.attr,
+#ifdef CONFIG_KEXEC
+   _attr_memreserve.attr,
+#endif
NULL,
 };
 
@@ -955,7 +999,6 @@ int efi_status_to_err(efi_status_t status)
 }
 
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
-static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
 static int __init efi_memreserve_map_root(void)
 {
-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [RFC PATCH v2] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-11-21 Thread Masayoshi Mizuma
On Thu, Nov 14, 2019 at 11:10:19AM -0500, Masayoshi Mizuma wrote:
> From: Masayoshi Mizuma 
> 
> kexec reboot stops in early boot sequence because efi_config_parse_tables()
> refers garbage data. We can see the log with memblock=debug kernel option:
> 
>   efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
> MEMRESERVE=0x9820bfdc58
>   memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
> efi_config_parse_tables+0x228/0x278
>   memblock_reserve: [0x8276-0x324d07ff] 
> efi_config_parse_tables+0x228/0x278
>   memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
> efi_config_parse_tables+0x244/0x278
>   memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
> efi_config_parse_tables+0x244/0x278
>   memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
> efi_config_parse_tables+0x244/0x278
>   ...
> 
> That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
> 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
> head page of LPI pending table and LPI property table which are allocated by
> gic_reserve_range().
> 
> The destroyer is kexec. kexec locates the initrd to the area:
> 
>   ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
> --reuse-cmdline
>   ...
>   initrd: base 8229, size 388dd8ah (59301258)
>   ...
> 
> From dynamic debug log. initrd is located in segment[1]:
>   machine_kexec_prepare:70:
> kexec kimage info:
>   type:0
>   start:   85b30680
>   head:0
>   nr_segments: 4
> segment[0]: 8048 - 8229, 0x1e1 bytes, 481 
> pages
> segment[1]: 8229 - 85b2, 0x389 bytes, 905 
> pages
> segment[2]: 85b2 - 85b3, 0x1 bytes, 1 
> pages
> segment[3]: 85b3 - 85b4, 0x1 bytes, 1 
> pages
> 
> kexec searches the memory region to locate initrd through
> "System RAM" in /proc/iomem. The pending tables are included in
> "System RAM" because they are allocated by alloc_pages(), so kexec
> destroys the LPI pending tables.
> 
> Introduce /sys/firmware/efi/memreserve to tell the pages pointed by
> efi.mem_reserve so that kexec can avoid the area to locate initrd.
> 
> Signed-off-by: Masayoshi Mizuma 
> ---
>  drivers/firmware/efi/efi.c | 41 +-
>  1 file changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> index 0b6b0c19a..07812d697 100644
> --- a/drivers/firmware/efi/efi.c
> +++ b/drivers/firmware/efi/efi.c
> @@ -149,6 +149,45 @@ static ssize_t systab_show(struct kobject *kobj,
>  
>  static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
>  
> +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;

> +static ssize_t memreserve_show(struct kobject *kobj,
> +struct kobj_attribute *attr, char *buf)
> +{
> + struct linux_efi_memreserve *rsv;
> + phys_addr_t start, end;
> + unsigned long prsv;
> + char *str = buf;
> + int count, i;
> +
> + if (!kobj || !buf)
> + return -EINVAL;
> +
> + if ((efi_memreserve_root == (void *)ULONG_MAX) ||
> + (!efi_memreserve_root))
> + return -ENODEV;
> +
> + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
> + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
> + if (!rsv) {
> + pr_err("Could not map efi_memreserve\n");
> + return -ENOMEM;
> + }
> + count = atomic_read(>count);
> + for (i = 0; i < count; i++) {
> + start = rsv->entry[i].base;
> + end = start + rsv->entry[i].size - 1;
> +
> + str += sprintf(str, "%pa-%pa\n", , );
> + }
> + memunmap(rsv);
> + }
> +
> + return str - buf;
> +}
> +
> +static struct kobj_attribute efi_attr_memreserve =
> + __ATTR_RO_MODE(memreserve, 0444);
> +

I think it's better to put above between #ifdef CONFIG_KEXEC and #endif
because it's useful for only kexec...

>  #define EFI_FIELD(var) efi.var
>  
>  #define EFI_ATTR_SHOW(name) \
> @@ -180,6 +219,7 @@ static struct attribute *efi_subsys_attrs[] = {
>   _attr_runtime.attr,
>   _attr_config_table.attr,
>   _attr_fw_platform_size.attr,

> + _attr_memreserve.attr,

Same as.
I'll post the patch to change above and will remove the 

[RFC PATCH v2] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-11-14 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

kexec reboot stops in early boot sequence because efi_config_parse_tables()
refers garbage data. We can see the log with memblock=debug kernel option:

  efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
MEMRESERVE=0x9820bfdc58
  memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0x8276-0x324d07ff] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
efi_config_parse_tables+0x244/0x278
  ...

That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
head page of LPI pending table and LPI property table which are allocated by
gic_reserve_range().

The destroyer is kexec. kexec locates the initrd to the area:

  ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
--reuse-cmdline
  ...
  initrd: base 8229, size 388dd8ah (59301258)
  ...

>From dynamic debug log. initrd is located in segment[1]:
  machine_kexec_prepare:70:
kexec kimage info:
  type:0
  start:   85b30680
  head:0
  nr_segments: 4
segment[0]: 8048 - 8229, 0x1e1 bytes, 481 
pages
segment[1]: 8229 - 85b2, 0x389 bytes, 905 
pages
segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages
segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages

kexec searches the memory region to locate initrd through
"System RAM" in /proc/iomem. The pending tables are included in
"System RAM" because they are allocated by alloc_pages(), so kexec
destroys the LPI pending tables.

Introduce /sys/firmware/efi/memreserve to tell the pages pointed by
efi.mem_reserve so that kexec can avoid the area to locate initrd.

Signed-off-by: Masayoshi Mizuma 
---
 drivers/firmware/efi/efi.c | 41 +-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 0b6b0c19a..07812d697 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -149,6 +149,45 @@ static ssize_t systab_show(struct kobject *kobj,
 
 static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
 
+static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
+static ssize_t memreserve_show(struct kobject *kobj,
+  struct kobj_attribute *attr, char *buf)
+{
+   struct linux_efi_memreserve *rsv;
+   phys_addr_t start, end;
+   unsigned long prsv;
+   char *str = buf;
+   int count, i;
+
+   if (!kobj || !buf)
+   return -EINVAL;
+
+   if ((efi_memreserve_root == (void *)ULONG_MAX) ||
+   (!efi_memreserve_root))
+   return -ENODEV;
+
+   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+   if (!rsv) {
+   pr_err("Could not map efi_memreserve\n");
+   return -ENOMEM;
+   }
+   count = atomic_read(>count);
+   for (i = 0; i < count; i++) {
+   start = rsv->entry[i].base;
+   end = start + rsv->entry[i].size - 1;
+
+   str += sprintf(str, "%pa-%pa\n", , );
+   }
+   memunmap(rsv);
+   }
+
+   return str - buf;
+}
+
+static struct kobj_attribute efi_attr_memreserve =
+   __ATTR_RO_MODE(memreserve, 0444);
+
 #define EFI_FIELD(var) efi.var
 
 #define EFI_ATTR_SHOW(name) \
@@ -180,6 +219,7 @@ static struct attribute *efi_subsys_attrs[] = {
_attr_runtime.attr,
_attr_config_table.attr,
_attr_fw_platform_size.attr,
+   _attr_memreserve.attr,
NULL,
 };
 
@@ -964,7 +1004,6 @@ int efi_status_to_err(efi_status_t status)
 }
 
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
-static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
 static int __init efi_memreserve_map_root(void)
 {
-- 
2.21.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [RFC PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-11-13 Thread Masayoshi Mizuma
On Wed, Nov 13, 2019 at 07:59:04AM +, d.hatay...@fujitsu.com wrote:
> > From: Masayoshi Mizuma 
> > 
> > kexec reboot stucks because efi_config_parse_tables() refers garbage
> >  (with memblock=debug):
> > 
> >   efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074
> > MEMRESERVE=0x9820bfdc58
> >   memblock_reserve: [0x009820bfdc58-0x009820bfdc67]
> > efi_config_parse_tables+0x228/0x278
> >   memblock_reserve: [0x8276-0x324d07ff]
> > efi_config_parse_tables+0x228/0x278
> >   memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9]
> > efi_config_parse_tables+0x244/0x278
> >   memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2]
> > efi_config_parse_tables+0x244/0x278
> >   memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12]
> > efi_config_parse_tables+0x244/0x278
> >   ...
> > 
> > That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
> > 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
> > head page of pending table and prop table which are allocated by
> > gic_reserve_range().
> > 
> > The destroyer is kexec. kexec locates the inird to the area:
> > 
> > # kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img
> > --reuse-cmdline
> > ...
> > initrd: base 8229, size 388dd8ah (59301258)
> > ...
> > 
> > From dynamic debug log:
> >   machine_kexec_prepare:70:
> > kexec kimage info:
> >   type:0
> >   start:   85b30680
> >   head:0
> >   nr_segments: 4
> > segment[0]: 8048 - 8229, 0x1e1 bytes, 
> > 481
> > pages
> > segment[1]: 8229 - 85b2, 0x389 bytes, 
> > 905
> > pages
> > segment[2]: 85b2 - 85b3, 0x1 bytes, 1
> > pages
> > segment[3]: 85b3 - 85b4, 0x1 bytes, 1
> > pages
> > 
> > kexec searches the appropriate memory region to locate initrd through 
> > "System
> > RAM"
> > in /proc/iomem. The pending tables are included in "System RAM" because they
> > are
> > allocated by alloc_pages(), so kexec destroys the pending tables.
> > 
> > Introduce /sys/firmware/efi/memreserve to tell the pages pointed by
> > efi.mem_reserve
> > so that kexec can avoid the area to locate initrd.
> > 
> > Signed-off-by: Masayoshi Mizuma 
> > ---
> >  drivers/firmware/efi/efi.c | 32 +++-
> >  1 file changed, 31 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> > index e98bbf8e5..67b21ae7a 100644
> > --- a/drivers/firmware/efi/efi.c
> > +++ b/drivers/firmware/efi/efi.c
> > @@ -141,6 +141,36 @@ static ssize_t systab_show(struct kobject *kobj,
> > 
> >  static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 
> > 0400);
> > 
> > +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
> > +static ssize_t memreserve_show(struct kobject *kobj,
> > +  struct kobj_attribute *attr, char *buf)
> > +{
> > +   struct linux_efi_memreserve *rsv;
> > +   unsigned long prsv;
> > +   char *str = buf;
> > +   int index, i;
> > +
> > +   if (!kobj || !buf)
> > +   return -EINVAL;
> > +
> > +   if (!efi_memreserve_root)
> > +   return -ENODEV;
> 
> Other functions use different conditions.
> The latter efi_memreserve_root == (void *)ULONG_MAX is correct?
> 
> static int __init efi_memreserve_map_root(void)
> {
> if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
> return -ENODEV;
> int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
> {
> struct linux_efi_memreserve *rsv;
> unsigned long prsv;
> int rc, index;
> 
> if (efi_memreserve_root == (void *)ULONG_MAX)
> return -ENODEV;

I think it's better to add both checks like as:

if ((efi_memreserve_root == (void *)ULONG_MAX) ||
(!efi_memreserve_root))
return -ENODEV;
> 
> > +
> > +   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
> > +   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
> 
> memremap() could fail with NULL as a return value.
> You need to deal with such case.
> 
> It looks to me efi_mem_reserve_persistent() also doesn't deal with this.
>

[RFC PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages

2019-11-12 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

kexec reboot stucks because efi_config_parse_tables() refers garbage
 (with memblock=debug):

  efi:  ACPI 2.0=0x9821790014  PROP=0x8757f5c0  SMBIOS 3.0=0x982074  
MEMRESERVE=0x9820bfdc58
  memblock_reserve: [0x009820bfdc58-0x009820bfdc67] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0x8276-0x324d07ff] 
efi_config_parse_tables+0x228/0x278
  memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] 
efi_config_parse_tables+0x244/0x278
  memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] 
efi_config_parse_tables+0x244/0x278
  ...

That happens because 0x8276, struct linux_efi_memreserve, is destroyed.
0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the
head page of pending table and prop table which are allocated by 
gic_reserve_range().

The destroyer is kexec. kexec locates the inird to the area:

# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img 
--reuse-cmdline
...
initrd: base 8229, size 388dd8ah (59301258)
...

>From dynamic debug log:
  machine_kexec_prepare:70:
kexec kimage info:
  type:0
  start:   85b30680
  head:0
  nr_segments: 4
segment[0]: 8048 - 8229, 0x1e1 bytes, 481 
pages
segment[1]: 8229 - 85b2, 0x389 bytes, 905 
pages
segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages
segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages

kexec searches the appropriate memory region to locate initrd through "System 
RAM"
in /proc/iomem. The pending tables are included in "System RAM" because they are
allocated by alloc_pages(), so kexec destroys the pending tables.

Introduce /sys/firmware/efi/memreserve to tell the pages pointed by 
efi.mem_reserve
so that kexec can avoid the area to locate initrd.

Signed-off-by: Masayoshi Mizuma 
---
 drivers/firmware/efi/efi.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e98bbf8e5..67b21ae7a 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -141,6 +141,36 @@ static ssize_t systab_show(struct kobject *kobj,
 
 static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
 
+static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
+static ssize_t memreserve_show(struct kobject *kobj,
+  struct kobj_attribute *attr, char *buf)
+{
+   struct linux_efi_memreserve *rsv;
+   unsigned long prsv;
+   char *str = buf;
+   int index, i;
+
+   if (!kobj || !buf)
+   return -EINVAL;
+
+   if (!efi_memreserve_root)
+   return -ENODEV;
+
+   for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+   rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
+   index = atomic_read(>count);
+   for (i = 0; i < index; i++)
+   str += sprintf(str, "%llx-%llx\n",
+   rsv->entry[i].base,
+   rsv->entry[i].base + rsv->entry[i].size - 1);
+   memunmap(rsv);
+   }
+
+   return str - buf;
+}
+
+static struct kobj_attribute efi_attr_memreserve = __ATTR_RO_MODE(memreserve, 
0444);
+
 #define EFI_FIELD(var) efi.var
 
 #define EFI_ATTR_SHOW(name) \
@@ -172,6 +202,7 @@ static struct attribute *efi_subsys_attrs[] = {
_attr_runtime.attr,
_attr_config_table.attr,
_attr_fw_platform_size.attr,
+   _attr_memreserve.attr,
NULL,
 };
 
@@ -955,7 +986,6 @@ int efi_status_to_err(efi_status_t status)
 }
 
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
-static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
 static int __init efi_memreserve_map_root(void)
 {
-- 
2.18.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec