[PATCH v3 1/3] kexec: add variant helper functions for handling memory regions
From: AKASHI Takahiro mem_regions_alloc_and_add() and mem_regions_alloc_and_exclude() are functionally equivalent to, respectively, mem_regions_add() and mem_regions_exclude() except the formers will re-allocate memory dynamically when no more entries are available in 'ranges' array. Signed-off-by: AKASHI Takahiro Tested-by: Bhupesh Sharma Tested-by: Masayoshi Mizuma --- kexec/mem_regions.c | 42 ++ kexec/mem_regions.h | 7 +++ 2 files changed, 49 insertions(+) diff --git a/kexec/mem_regions.c b/kexec/mem_regions.c index 50c8abc..ad7d3f1 100644 --- a/kexec/mem_regions.c +++ b/kexec/mem_regions.c @@ -125,3 +125,45 @@ int mem_regions_exclude(struct memory_ranges *ranges, } return 0; } + +#define KEXEC_MEMORY_RANGES 16 + +int mem_regions_alloc_and_add(struct memory_ranges *ranges, + unsigned long long base, + unsigned long long length, int type) +{ + void *new_ranges; + + if (ranges->size >= ranges->max_size) { + new_ranges = realloc(ranges->ranges, + sizeof(struct memory_range) * + (ranges->max_size + KEXEC_MEMORY_RANGES)); + if (!new_ranges) + return -1; + + ranges->ranges = new_ranges; + ranges->max_size += KEXEC_MEMORY_RANGES; + } + + return mem_regions_add(ranges, base, length, type); +} + +int mem_regions_alloc_and_exclude(struct memory_ranges *ranges, + const struct memory_range *range) +{ + void *new_ranges; + + /* for safety, we should have at least one free entry in ranges */ + if (ranges->size >= ranges->max_size) { + new_ranges = realloc(ranges->ranges, + sizeof(struct memory_range) * + (ranges->max_size + KEXEC_MEMORY_RANGES)); + if (!new_ranges) + return -1; + + ranges->ranges = new_ranges; + ranges->max_size += KEXEC_MEMORY_RANGES; + } + + return mem_regions_exclude(ranges, range); +} diff --git a/kexec/mem_regions.h b/kexec/mem_regions.h index ae9e972..e306d67 100644 --- a/kexec/mem_regions.h +++ b/kexec/mem_regions.h @@ -12,4 +12,11 @@ int mem_regions_exclude(struct memory_ranges *ranges, int mem_regions_add(struct memory_ranges *ranges, unsigned long long base, unsigned long long length, int type); +int mem_regions_alloc_and_exclude(struct memory_ranges *ranges, + const struct memory_range *range); + +int mem_regions_alloc_and_add(struct memory_ranges *ranges, + unsigned long long base, + unsigned long long length, int type); + #endif -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v3 2/3] arm64: kexec: allocate memory space avoiding reserved regions
From: AKASHI Takahiro On UEFI/ACPI-only system, some memory regions, including but not limited to UEFI memory map and ACPI tables, must be preserved across kexec'ing. Otherwise, they can be corrupted and result in early failure in booting a new kernel. In recent kernels, /proc/iomem now has an extended file format like: 4000-5871 : System RAM 4180-426a : Kernel code 426b-42aa : reserved 42ab-42c64fff : Kernel data 5440-583f : Crash kernel 5859-585e : reserved 5870-5871 : reserved 5872-58b5 : reserved 58b6-5be3 : System RAM 58b61000-58b61fff : reserved where the "reserved" entries at the top level or under System RAM (and its descendant resources) are ones of such kind and should not be regarded as usable memory ranges where several free spaces for loading kexec data will be allocated. With this patch, get_memory_ranges() will handle this format of file correctly. Note that, for safety, unknown regions, in addition to "reserved" ones, will also be excluded. Signed-off-by: AKASHI Takahiro Tested-by: Bhupesh Sharma Tested-by: Masayoshi Mizuma --- kexec/arch/arm64/kexec-arm64.c | 153 - 1 file changed, 94 insertions(+), 59 deletions(-) diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c index 6ad3b0a..45ebc54 100644 --- a/kexec/arch/arm64/kexec-arm64.c +++ b/kexec/arch/arm64/kexec-arm64.c @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -29,6 +31,7 @@ #include "fs2dt.h" #include "iomem.h" #include "kexec-syscall.h" +#include "mem_regions.h" #include "arch/options.h" #define ROOT_NODE_ADDR_CELLS_DEFAULT 1 @@ -905,19 +908,33 @@ int get_phys_base_from_pt_load(unsigned long *phys_offset) return 0; } +static bool to_be_excluded(char *str) +{ + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) || + !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) + return false; + else + return true; +} + /** - * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem. + * get_memory_ranges - Try to get the memory ranges from + * /proc/iomem. */ - -static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, - unsigned long long base, unsigned long long length) +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) { - int ret; unsigned long phys_offset = UINT64_MAX; - struct memory_range *r; - - if (nr >= KEXEC_SEGMENT_MAX) - return -1; + FILE *fp; + const char *iomem = proc_iomem(); + char line[MAX_LINE], *str; + unsigned long long start, end; + int n, consumed; + struct memory_ranges memranges; + struct memory_range *last, excl_range; + int ret; if (!try_read_phys_offset_from_kcore) { /* Since kernel version 4.19, 'kcore' contains @@ -951,17 +968,72 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, try_read_phys_offset_from_kcore = true; } - r = (struct memory_range *)data + nr; + fp = fopen(iomem, "r"); + if (!fp) + die("Cannot open %s\n", iomem); + + memranges.ranges = NULL; + memranges.size = memranges.max_size = 0; + + while (fgets(line, sizeof(line), fp) != 0) { + n = sscanf(line, "%llx-%llx : %n", , , ); + if (n != 2) + continue; + str = line + consumed; + + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) { + ret = mem_regions_alloc_and_add(, + start, end - start + 1, RANGE_RAM); + if (ret) { + fprintf(stderr, + "Cannot allocate memory for ranges\n"); + fclose(fp); + return -ENOMEM; + } - if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) - r->type = RANGE_RAM; - else if (!strncmp(str, IOMEM_RESERVED, strlen(IOMEM_RESERVED))) - r->type = RANGE_RESERVED; - else - return 1; + dbgprintf("%s:+[%d] %016llx - %016llx\n", __func__, + memranges.size - 1, + memranges.ranges[memranges.size - 1].start, + memranges.ranges[memranges.size - 1].end); + } else if (to_be_excluded(str)) { +
[PATCH v3 3/3] arm64: kdump: deal with a lot of resource entries in /proc/iomem
From: AKASHI Takahiro As described in the commit ("arm64: kexec: allocate memory space avoiding reserved regions"), /proc/iomem now has a lot of "reserved" entries, and it's not just enough to have a fixed size of memory range array. With this patch, kdump is allowed to handle arbitrary number of memory ranges, using mem_regions_alloc_and_xxx() functions. Signed-off-by: AKASHI Takahiro Tested-by: Bhupesh Sharma Tested-by: Masayoshi Mizuma --- kexec/arch/arm64/crashdump-arm64.c | 25 ++--- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/kexec/arch/arm64/crashdump-arm64.c b/kexec/arch/arm64/crashdump-arm64.c index 4fd7aa8..38d1a0f 100644 --- a/kexec/arch/arm64/crashdump-arm64.c +++ b/kexec/arch/arm64/crashdump-arm64.c @@ -23,13 +23,8 @@ #include "kexec-elf.h" #include "mem_regions.h" -/* memory ranges on crashed kernel */ -static struct memory_range system_memory_ranges[CRASH_MAX_MEMORY_RANGES]; -static struct memory_ranges system_memory_rgns = { - .size = 0, - .max_size = CRASH_MAX_MEMORY_RANGES, - .ranges = system_memory_ranges, -}; +/* memory ranges of crashed kernel */ +static struct memory_ranges system_memory_rgns; /* memory range reserved for crashkernel */ struct memory_range crash_reserved_mem; @@ -82,7 +77,7 @@ static uint64_t get_kernel_page_offset(void) * * This function is called once for each memory region found in /proc/iomem. * It locates system RAM and crashkernel reserved memory and places these to - * variables, respectively, system_memory_ranges and crash_reserved_mem. + * variables, respectively, system_memory_rgns and usablemem_rgns. */ static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), @@ -90,11 +85,11 @@ static int iomem_range_callback(void *UNUSED(data), int UNUSED(nr), unsigned long long length) { if (strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL)) == 0) - return mem_regions_add(_rgns, - base, length, RANGE_RAM); + return mem_regions_alloc_and_add(_rgns, + base, length, RANGE_RAM); else if (strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) == 0) - return mem_regions_add(_memory_rgns, - base, length, RANGE_RAM); + return mem_regions_alloc_and_add(_memory_rgns, + base, length, RANGE_RAM); else if (strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) == 0) elf_info.kern_paddr_start = base; else if (strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) == 0) @@ -135,9 +130,9 @@ static int crash_get_memory_ranges(void) dbgprint_mem_range("Reserved memory range", _reserved_mem, 1); - if (mem_regions_exclude(_memory_rgns, _reserved_mem)) { - fprintf(stderr, - "Error: Number of crash memory ranges excedeed the max limit\n"); + if (mem_regions_alloc_and_exclude(_memory_rgns, + _reserved_mem)) { + fprintf(stderr, "Cannot allocate memory for ranges\n"); return -ENOMEM; } -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v3 0/3] arm64: handle "reserved" entries in /proc/iomem
In recent arm64 kernels, /proc/iomem has an extended file format like: 4000-5871 : System RAM 4180-426a : Kernel code 426b-42aa : reserved 42ab-42c64fff : Kernel data 5440-583f : Crash kernel 5859-585e : reserved 5870-5871 : reserved 5872-58b5 : reserved 58b6-5be3 : System RAM 58b61000-58b61fff : reserved where "reserved" entries can be an ACPI table, UEFI related code or data. They can be corrupted and result in early failure in booting a new kernel. As an actual example, LPI pending table and LPI property table, which are pointed by a UEFI data, are sometimes destroyed. They are expected to be preserved across kexec'ing. Changelog: v3: - Re-based to the latest commit (bd07796). - Added Tested-by tag from Bhupesh and Masayoshi - Added an error handling in case mem_regions_alloc_and_exclude() fails (0002 patch). AKASHI Takahiro (3): kexec: add variant helper functions for handling memory regions arm64: kexec: allocate memory space avoiding reserved regions arm64: kdump: deal with a lot of resource entries in /proc/iomem kexec/arch/arm64/crashdump-arm64.c | 25 ++--- kexec/arch/arm64/kexec-arm64.c | 153 ++--- kexec/mem_regions.c| 42 kexec/mem_regions.h| 7 ++ 4 files changed, 153 insertions(+), 74 deletions(-) -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions
On Wed, Dec 18, 2019 at 11:48:50AM +0900, AKASHI Takahiro wrote: > On Mon, Dec 16, 2019 at 09:16:06AM -0500, Masayoshi Mizuma wrote: > > On Mon, Dec 16, 2019 at 11:22:56AM +0530, Bhupesh Sharma wrote: > > > Thanks Masa, > > [...] > > > > Since this is an old thread, it would be useful for people looking at > > > the same, if you can add some comments/details about why you think > > > this nit is needed. > > > > Thank you for your follow up and I'm sorry I didn't explain it. > > mem_regions_alloc_and_exclude() may fail in case realloc() or > > mem_region_exclude() fail, so it would be better to add the error > > handling. > > > > > > > > Also if Akashi agrees with the same, it would be better if he could > > > send a rebased version of the patchset (with your comments addressed), > > > so that the same can be picked for upstream kexec-tools cleanly. > > > > Sounds great! > > I have almost forgotten the background of this patch. > If you see that it is still useful, please feel free to re-post it. Thanks! I'll repost the patchset as v3. - Masa > > Thanks, > -Takahiro Akashi > > > - Masa > > > > > > > > @Akashi- Hi Akashi, Please let us know your views. > > > > > > Thanks, > > > Bhupesh > > > > > > > + dbgprintf("%s:- %016llx - %016llx\n", > > > > > + __func__, start, end); > > > > > + } > > > > > + } > > > > > + > > > > > + fclose(fp); > > > > > + > > > > > + *range = memranges.ranges; > > > > > + *ranges = memranges.size; > > > > > > > > > > /* As a fallback option, we can try determining the PHYS_OFFSET > > > > >* value from the '/proc/iomem' entries as well. > > > > > @@ -976,52 +1041,15 @@ static int get_memory_ranges_iomem_cb(void > > > > > *data, int nr, char *str, > > > > >* between the user-space and kernel space 'PHYS_OFFSET' > > > > >* value. > > > > >*/ > > > > > - set_phys_offset(r->start, "iomem"); > > > > > + if (memranges.size) > > > > > + set_phys_offset(memranges.ranges[0].start, "iomem"); > > > > > > > > > > - dbgprintf("%s: %016llx - %016llx : %s", __func__, r->start, > > > > > - r->end, str); > > > > > + dbgprint_mem_range("System RAM ranges;", > > > > > + memranges.ranges, memranges.size); > > > > > > > > > > return 0; > > > > > } > > > > > > > > > > -/** > > > > > - * get_memory_ranges_iomem - Try to get the memory ranges from > > > > > - * /proc/iomem. > > > > > - */ > > > > > - > > > > > -static int get_memory_ranges_iomem(struct memory_range *array, > > > > > - unsigned int *count) > > > > > -{ > > > > > - *count = kexec_iomem_for_each_line(NULL, > > > > > - get_memory_ranges_iomem_cb, array); > > > > > - > > > > > - if (!*count) { > > > > > - dbgprintf("%s: failed: No RAM found.\n", __func__); > > > > > - return EFAILED; > > > > > - } > > > > > - > > > > > - return 0; > > > > > -} > > > > > - > > > > > -/** > > > > > - * get_memory_ranges - Try to get the memory ranges some how. > > > > > - */ > > > > > - > > > > > -int get_memory_ranges(struct memory_range **range, int *ranges, > > > > > - unsigned long kexec_flags) > > > > > -{ > > > > > - static struct memory_range array[KEXEC_SEGMENT_MAX]; > > > > > - unsigned int count; > > > > > - int result; > > > > > - > > > > > - result = get_memory_ranges_iomem(array, ); > > > > > - > > > > > - *range = result ? NULL : array; > > > > > - *ranges = result ? 0 : count; > > > > > - > > > > > - return result; > > > > > -} > > > > > - > > > > > int arch_compat_trampoline(struct kexec_info *info) > > > > > { > > > > > return 0; > > > > > -- > > > > > 2.19.1 > > > > > > > > > > > > > > > > > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions
On Mon, Dec 16, 2019 at 11:22:56AM +0530, Bhupesh Sharma wrote: > Thanks Masa, > > On Sat, Dec 14, 2019 at 1:34 AM Masayoshi Mizuma > wrote: > > > > some nits as below: > > > > On Fri, Jan 11, 2019 at 06:59:45PM +0900, AKASHI Takahiro wrote: > > > On UEFI/ACPI-only system, some memory regions, including but not limited > > > to UEFI memory map and ACPI tables, must be preserved across kexec'ing. > > > Otherwise, they can be corrupted and result in early failure in booting > > > a new kernel. > > > > > > In recent kernels, /proc/iomem now has an extended file format like: > > > 4000-5871 : System RAM > > > 4180-426a : Kernel code > > > 426b-42aa : reserved > > > 42ab-42c64fff : Kernel data > > > 5440-583f : Crash kernel > > > 5859-585e : reserved > > > 5870-5871 : reserved > > > 5872-58b5 : reserved > > > 58b6-5be3 : System RAM > > > 58b61000-58b61fff : reserved > > > 59a77000-59a77fff : reserved > > > 5be4-5bec : reserved > > > 5bed-5bed : System RAM > > > 5bee-5bff : reserved > > > 5c00-5fff : System RAM > > > 5da0-5e9f : reserved > > > 5ec0-5edf : reserved > > > 5ef6a000-5ef6afff : reserved > > > 5ef6b000-5efcafff : reserved > > > 5efcd000-5efc : reserved > > > 5efd-5eff : reserved > > > 5f00-5fff : reserved > > > > > > where the "reserved" entries at the top level or under System RAM (and > > > its descendant resources) are ones of such kind and should not be regarded > > > as usable memory ranges where several free spaces for loading kexec data > > > will be allocated. > > > > > > With this patch, get_memory_ranges() will handle this format of file > > > correctly. Note that, for safety, unknown regions, in addition to > > > "reserved" ones, will also be excluded. > > > > > > Signed-off-by: AKASHI Takahiro > > > --- > > > kexec/arch/arm64/kexec-arm64.c | 146 - > > > 1 file changed, 87 insertions(+), 59 deletions(-) > > > > > > diff --git a/kexec/arch/arm64/kexec-arm64.c > > > b/kexec/arch/arm64/kexec-arm64.c > > > index 1cde75d1a771..2e923b54f5b1 100644 > > > --- a/kexec/arch/arm64/kexec-arm64.c > > > +++ b/kexec/arch/arm64/kexec-arm64.c > > > @@ -10,7 +10,9 @@ > > > #include > > > #include > > > #include > > > +#include > > > #include > > > +#include > > > #include > > > #include > > > #include > > > @@ -29,6 +31,7 @@ > > > #include "fs2dt.h" > > > #include "iomem.h" > > > #include "kexec-syscall.h" > > > +#include "mem_regions.h" > > > #include "arch/options.h" > > > > > > #define ROOT_NODE_ADDR_CELLS_DEFAULT 1 > > > @@ -899,19 +902,33 @@ int get_phys_base_from_pt_load(unsigned long > > > *phys_offset) > > > return 0; > > > } > > > > > > +static bool to_be_excluded(char *str) > > > +{ > > > + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || > > > + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || > > > + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) || > > > + !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) > > > + return false; > > > + else > > > + return true; > > > +} > > > + > > > /** > > > - * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem. > > > + * get_memory_ranges - Try to get the memory ranges from > > > + * /proc/iomem. > > > */ > > > - > > > -static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, > > > - unsigned long long base, unsigned long long length) > > > +int get_memory_ranges(struct memory_range **range, int *ranges, > > > + unsigned long kexec_flags) > > > { > > > - int ret; > > > unsigned long phys_offset = UINT64_MAX; > > > - struct memory_range *r; > > > - > > > - if (nr >= KEXEC_SEGMENT_MAX) > > > -
Re: [PATCH v2 2/3] arm64: kexec: allocate memory space avoiding reserved regions
some nits as below: On Fri, Jan 11, 2019 at 06:59:45PM +0900, AKASHI Takahiro wrote: > On UEFI/ACPI-only system, some memory regions, including but not limited > to UEFI memory map and ACPI tables, must be preserved across kexec'ing. > Otherwise, they can be corrupted and result in early failure in booting > a new kernel. > > In recent kernels, /proc/iomem now has an extended file format like: > 4000-5871 : System RAM > 4180-426a : Kernel code > 426b-42aa : reserved > 42ab-42c64fff : Kernel data > 5440-583f : Crash kernel > 5859-585e : reserved > 5870-5871 : reserved > 5872-58b5 : reserved > 58b6-5be3 : System RAM > 58b61000-58b61fff : reserved > 59a77000-59a77fff : reserved > 5be4-5bec : reserved > 5bed-5bed : System RAM > 5bee-5bff : reserved > 5c00-5fff : System RAM > 5da0-5e9f : reserved > 5ec0-5edf : reserved > 5ef6a000-5ef6afff : reserved > 5ef6b000-5efcafff : reserved > 5efcd000-5efc : reserved > 5efd-5eff : reserved > 5f00-5fff : reserved > > where the "reserved" entries at the top level or under System RAM (and > its descendant resources) are ones of such kind and should not be regarded > as usable memory ranges where several free spaces for loading kexec data > will be allocated. > > With this patch, get_memory_ranges() will handle this format of file > correctly. Note that, for safety, unknown regions, in addition to > "reserved" ones, will also be excluded. > > Signed-off-by: AKASHI Takahiro > --- > kexec/arch/arm64/kexec-arm64.c | 146 - > 1 file changed, 87 insertions(+), 59 deletions(-) > > diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c > index 1cde75d1a771..2e923b54f5b1 100644 > --- a/kexec/arch/arm64/kexec-arm64.c > +++ b/kexec/arch/arm64/kexec-arm64.c > @@ -10,7 +10,9 @@ > #include > #include > #include > +#include > #include > +#include > #include > #include > #include > @@ -29,6 +31,7 @@ > #include "fs2dt.h" > #include "iomem.h" > #include "kexec-syscall.h" > +#include "mem_regions.h" > #include "arch/options.h" > > #define ROOT_NODE_ADDR_CELLS_DEFAULT 1 > @@ -899,19 +902,33 @@ int get_phys_base_from_pt_load(unsigned long > *phys_offset) > return 0; > } > > +static bool to_be_excluded(char *str) > +{ > + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM)) || > + !strncmp(str, KERNEL_CODE, strlen(KERNEL_CODE)) || > + !strncmp(str, KERNEL_DATA, strlen(KERNEL_DATA)) || > + !strncmp(str, CRASH_KERNEL, strlen(CRASH_KERNEL))) > + return false; > + else > + return true; > +} > + > /** > - * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem. > + * get_memory_ranges - Try to get the memory ranges from > + * /proc/iomem. > */ > - > -static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, > - unsigned long long base, unsigned long long length) > +int get_memory_ranges(struct memory_range **range, int *ranges, > + unsigned long kexec_flags) > { > - int ret; > unsigned long phys_offset = UINT64_MAX; > - struct memory_range *r; > - > - if (nr >= KEXEC_SEGMENT_MAX) > - return -1; > + FILE *fp; > + const char *iomem = proc_iomem(); > + char line[MAX_LINE], *str; > + unsigned long long start, end; > + int n, consumed; > + struct memory_ranges memranges; > + struct memory_range *last, excl_range; > + int ret; > > if (!try_read_phys_offset_from_kcore) { > /* Since kernel version 4.19, 'kcore' contains > @@ -945,17 +962,65 @@ static int get_memory_ranges_iomem_cb(void *data, int > nr, char *str, > try_read_phys_offset_from_kcore = true; > } > > - r = (struct memory_range *)data + nr; > + fp = fopen(iomem, "r"); > + if (!fp) > + die("Cannot open %s\n", iomem); > + > + memranges.ranges = NULL; > + memranges.size = memranges.max_size = 0; > + > + while (fgets(line, sizeof(line), fp) != 0) { > + n = sscanf(line, "%llx-%llx : %n", , , ); > + if (n != 2) > + continue; > + str = line + consumed; > + > + if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) { > + ret = mem_regions_alloc_and_add(, > + start, end - start + 1, RANGE_RAM); > + if (ret) { > + fprintf(stderr, > + "Cannot allocate memory for ranges\n"); fclose(fp); > + return -ENOMEM; > + } > > - if (!strncmp(str, SYSTEM_RAM,
Re: [PATCH v2 0/3] arm64: handle "reserved" entries in /proc/iomem
Hello, (I'm sorry if I break the mail thread...) As Bhupesh said at: http://lists.infradead.org/pipermail/kexec/2019-December/024142.html Please feel free to add: Tested-by: Bhupesh Sharma Tested-by: Masayoshi Mizuma Thanks! Masa On Fri, Jan 11, 2019 at 06:59:43PM +0900, AKASHI Takahiro wrote: > # @James, @Bhupesh > # Could you kindly test this patch, please? > # I'm not quite confident that I correctly merged my patch with, > # particularly, Bhupesh's recent change. > > In recent arm64 kernels, /proc/iomem has an extended file format like: > 4000-5871 : System RAM > 4180-426a : Kernel code > 426b-42aa : reserved > 42ab-42c64fff : Kernel data > 5440-583f : Crash kernel > 5859-585e : reserved > 5870-5871 : reserved > 5872-58b5 : reserved > 58b6-5be3 : System RAM > 58b61000-58b61fff : reserved > 59a77000-59a77fff : reserved > 5be4-5bec : reserved > 5bed-5bed : System RAM > 5bee-5bff : reserved > 5c00-5fff : System RAM > 5da0-5e9f : reserved > 5ec0-5edf : reserved > 5ef6a000-5ef6afff : reserved > 5ef6b000-5efcafff : reserved > 5efcd000-5efc : reserved > 5efd-5eff : reserved > 5f00-5fff : reserved > > where "reserved" entries can be an APCI table, UEFI related code or data, > and they are expected to be preserved across kexec'ing. > With this patch[1], kexec/kdump will be allowed to handle them properly. > > [1] https://git.linaro.org/people/takahiro.akashi/kexec-tools.git > arm64/resv_mem > > AKASHI Takahiro (3): > kexec: add variant helper functions for handling memory regions > arm64: kexec: allocate memory space avoiding reserved regions > arm64: kdump: deal with a lot of resource entries in /proc/iomem > > kexec/arch/arm64/crashdump-arm64.c | 25 ++--- > kexec/arch/arm64/kexec-arm64.c | 146 + > kexec/mem_regions.c| 42 + > kexec/mem_regions.h| 7 ++ > 4 files changed, 146 insertions(+), 74 deletions(-) > > -- > 2.19.1 > > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] efi/memreserve: register reservations as 'reserved' in /proc/iomem
On Wed, Dec 04, 2019 at 06:17:59PM +, James Morse wrote: > Hi Masa, > > On 04/12/2019 17:17, Masayoshi Mizuma wrote: > > Thank you for sending the patch, but unfortunately it doesn't work for the > > issue... > > > > After applied your patch, the LPI tables are marked as reserved in > > /proc/iomem like as: > > > > 8030-a1fd : System RAM > > 8048-8134 : Kernel code > > 8135-817b : reserved > > 817c-82ac : Kernel data > > 830f-830f : reserved # Property table > > 8348-83480fff : reserved # Pending table > > 8349-8349 : reserved # Pending table > > > > However, kexec tries to allocate memory from System RAM, it doesn't care > > the reserved in System RAM. > > > I'm not sure why kexec doesn't care the reserved in System RAM, however, > > Hmm, we added these to fix a problem with the UEFI memory map, and more > recently ACPI > tables being overwritten by kexec. > > Which version of kexec-tools are you using? Could you try: > https://git.linaro.org/people/takahiro.akashi/kexec-tools.git/commit/?h=arm64/resv_mem Thanks a lot! It worked and the issue is gone with Ard's patch and the linaro kexec (arm64/resv_mem branch). Ard, please feel free to add: Tested-by: Masayoshi Mizuma > > > > if the kexec behaivor is right, the LPI tables should not belong to > > System RAM. > > > Like as: > > > > 8030-830e : System RAM > > 8048-8134 : Kernel code > > 8135-817b : reserved > > 817c-82ac : Kernel data > > 830f-830f : reserved # Property table > > 8348-83480fff : reserved # Pending table > > 8349-8349 : reserved # Pending table > > 834a-a1fd : System RAM > > > > I don't have ideas to separete LPI tables from System RAM... so I tried > > to add a new file to inform the LPI tables to userspace. > > This is how 'nomap' memory appears, we carve it out of System RAM. A side > effect of this > is kdump can't touch it, as you've told it this isn't memory. > > As these tables are memory, mapped by the linear map, I think Ard's patch is > the right > thing to do ... I suspect your kexec-tools doesn't have those patches from > Akashi to make > it honour all second level entries. I used the kexec on the top of master branch: git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git Should we use the linaro kexec for aarch64 machine? Or will the arm64/resv_mem branch be merged to the kexec on git.kernel.org...? Thanks! Masa ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] efi/memreserve: register reservations as 'reserved' in /proc/iomem
Hello Ard, Thank you for sending the patch, but unfortunately it doesn't work for the issue... After applied your patch, the LPI tables are marked as reserved in /proc/iomem like as: 8030-a1fd : System RAM 8048-8134 : Kernel code 8135-817b : reserved 817c-82ac : Kernel data 830f-830f : reserved # Property table 8348-83480fff : reserved # Pending table 8349-8349 : reserved # Pending table However, kexec tries to allocate memory from System RAM, it doesn't care the reserved in System RAM. Following example, kexec allocates memory 0x82ad-0x8664 to locate the initrd, and LPI tables are also in the memory region, so LPI tables will be destroyed by kexec reboot. # kexec -d -l /boot/vmlinuz-5.4.1+ --initrd=/boot/initramfs-5.4.1+.img ... initrd: base 82ad, size 3b67c6fh (62291055) ... segment[1].mem = 0x82ad segment[1].memsz = 0x3b7 # 0x8664 (== 0x82ad + 0x3b7) ... I'm not sure why kexec doesn't care the reserved in System RAM, however, if the kexec behaivor is right, the LPI tables should not belong to System RAM. Like as: 8030-830e : System RAM 8048-8134 : Kernel code 8135-817b : reserved 817c-82ac : Kernel data 830f-830f : reserved # Property table 8348-83480fff : reserved # Pending table 8349-8349 : reserved # Pending table 834a-a1fd : System RAM I don't have ideas to separete LPI tables from System RAM... so I tried to add a new file to inform the LPI tables to userspace. Thanks, Masa On Wed, Dec 04, 2019 at 02:52:33PM +, Ard Biesheuvel wrote: > Memory regions that are reserved using efi_mem_reserve_persistent() > are recorded in a special EFI config table which survives kexec, > allowing the incoming kernel to honour them as well. However, > such reservations are not visible in /proc/iomem, and so the kexec > tools that load the incoming kernel and its initrd into memory may > overwrite these reserved regions before the incoming kernel has a > chance to reserve them from further use. > > So add these reservations to /proc/iomem as they are created. Note > that reservations that are inherited from a previous kernel are > memblock_reserve()'d early on, so they are already visible in > /proc/iomem. > > Cc: Masayoshi Mizuma > Cc: d.hatay...@fujitsu.com > Cc: kexec@lists.infradead.org > Signed-off-by: Ard Biesheuvel > --- > drivers/firmware/efi/efi.c | 29 ++-- > 1 file changed, 26 insertions(+), 3 deletions(-) > > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c > index d101f072c8f8..fcd82dde23c8 100644 > --- a/drivers/firmware/efi/efi.c > +++ b/drivers/firmware/efi/efi.c > @@ -979,6 +979,24 @@ static int __init efi_memreserve_map_root(void) > return 0; > } > > +static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) > +{ > + struct resource *res, *parent; > + > + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); > + if (!res) > + return -ENOMEM; > + > + res->name = "reserved"; > + res->flags = IORESOURCE_MEM; > + res->start = addr; > + res->end= addr + size - 1; > + > + /* we expect a conflict with a 'System RAM' region */ > + parent = request_resource_conflict(_resource, res); > + return parent ? request_resource(parent, res) : 0; > +} > + > int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) > { > struct linux_efi_memreserve *rsv; > @@ -1001,9 +1019,8 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, > u64 size) > if (index < rsv->size) { > rsv->entry[index].base = addr; > rsv->entry[index].size = size; > - > memunmap(rsv); > - return 0; > + return efi_mem_reserve_iomem(addr, size); > } > memunmap(rsv); > } > @@ -1013,6 +1030,12 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, > u64 size) > if (!rsv) > return -ENOMEM; > > + rc = efi_mem_reserve_iomem(__pa(rsv), SZ_4K); > + if (rc) { > + free_page(rsv); > + return rc; > + } > + > /* >* The memremap() call above assumes that a linux_efi_memreserve entry >* never crosses a page boundary, so let's ensure that this remains true > @@ -1029,7 +1052,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, > u64 size) > efi_memreserve_root->next = __pa(rsv); > spin_unlock(_mem_reserve_persistent_lock); > > - return 0; > + return efi_mem_reserve_iomem(addr, size); > } > > static int __init efi_memreserve_root_init(void) > -- > 2.17.1 > > > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v2 2/2] efi: arm64: Introduce /proc/efi/memreserve to tell the persistent pages
From: Masayoshi Mizuma kexec reboot stops in early boot sequence because efi_config_parse_tables() refers garbage data. We can see the log with memblock=debug kernel option: efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 MEMRESERVE=0x9820bfdc58 memblock_reserve: [0x009820bfdc58-0x009820bfdc67] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0x8276-0x324d07ff] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] efi_config_parse_tables+0x244/0x278 ... That happens because 0x8276, struct linux_efi_memreserve, is destroyed. 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the head page of LPI pending table and LPI property table which are allocated by gic_reserve_range(). The destroyer is kexec. kexec locates the initrd to the area: ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img --reuse-cmdline ... initrd: base 8229, size 388dd8ah (59301258) ... >From dynamic debug log. initrd is located in segment[1]: machine_kexec_prepare:70: kexec kimage info: type:0 start: 85b30680 head:0 nr_segments: 4 segment[0]: 8048 - 8229, 0x1e1 bytes, 481 pages segment[1]: 8229 - 85b2, 0x389 bytes, 905 pages segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages kexec searches the memory region to locate initrd through "System RAM" in /proc/iomem. The pending tables are included in "System RAM" because they are allocated by alloc_pages(), so kexec destroys the LPI pending tables. Introduce /proc/efi/memreserve to tell the pages pointed by efi.mem_reserve so that kexec can avoid the area to locate initrd. Signed-off-by: Masayoshi Mizuma --- drivers/firmware/efi/efi.c | 75 -- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index d8157cb34..80bbe0b3e 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -325,17 +325,87 @@ static __init int efivar_ssdt_load(void) static inline int efivar_ssdt_load(void) { return 0; } #endif +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; + #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_efi; +#ifdef CONFIG_KEXEC +static int memreserve_show(struct seq_file *m, void *v) +{ + struct linux_efi_memreserve *rsv; + phys_addr_t start, end; + unsigned long prsv; + int count, i; + + if ((efi_memreserve_root == (void *)ULONG_MAX) || + (!efi_memreserve_root)) + return -ENODEV; + + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); + if (!rsv) { + pr_err("Could not map efi_memreserve\n"); + return -ENOMEM; + } + count = atomic_read(>count); + for (i = 0; i < count; i++) { + start = rsv->entry[i].base; + end = start + rsv->entry[i].size - 1; + + seq_printf(m, "%pa-%pa\n", , ); + } + memunmap(rsv); + } + + return 0; +} + +static int memreserve_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, memreserve_show, NULL); +} + +static const struct file_operations memreserve_fops = { + .owner = THIS_MODULE, + .open = memreserve_open, + .read = seq_read, + .llseek = seq_lseek, + .release= single_release, +}; + +static int __init efi_proc_memreserve(void) +{ + struct proc_dir_entry *pde; + + if ((efi_memreserve_root == (void *)ULONG_MAX) || + (!efi_memreserve_root)) + return 0; + + pde = proc_create("memreserve", 0444, proc_efi, _fops); + if (!pde) { + pr_err("/proc/efi: Cannot create /proc/efi/memreserve file.\n"); + return 1; + } + + return 0; +} +#else +static inline int efi_proc_memreserve(void) { return 0; } +#endif /* CONFIG_KEXEC */ + static int __init efi_proc_init(void) { + int error = 1; + proc_efi = proc_mkdir("efi", NULL); if (!proc_efi) { pr_err("/proc/efi: Cannot create /proc/efi directroy.\n"); - return 1; + return error;
[PATCH v2 1/2] efi: add /proc/efi directory
From: Masayoshi Mizuma Add /proc/efi directory to show some efi internal information. Signed-off-by: Masayoshi Mizuma --- drivers/firmware/efi/efi.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index d101f072c..d8157cb34 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -325,6 +325,22 @@ static __init int efivar_ssdt_load(void) static inline int efivar_ssdt_load(void) { return 0; } #endif +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *proc_efi; +static int __init efi_proc_init(void) +{ + proc_efi = proc_mkdir("efi", NULL); + if (!proc_efi) { + pr_err("/proc/efi: Cannot create /proc/efi directory.\n"); + return 1; + } + + return 0; +} +#else +static inline int efi_proc_init(void) { return 0; } +#endif /* CONFIG_PROC_FS */ + /* * We register the efi subsystem with the firmware subsystem and the * efivars subsystem with the efi subsystem, if the system was booted with @@ -381,6 +397,12 @@ static int __init efisubsys_init(void) goto err_remove_group; } + error = efi_proc_init(); + if (error) { + sysfs_remove_mount_point(efi_kobj, "efivars"); + goto err_remove_group; + } + return 0; err_remove_group: -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v2 0/2] efi: arm64: Introduce /proc/efi/memreserve to tell the persistent pages
From: Masayoshi Mizuma kexec reboot sometime fails in early boot sequence on aarch64 machine. That is because kexec overwrites the LPI property tables and pending tables with the initrd. To avoid the overwrite, introduce /proc/efi/memreserve to tell the tables region to kexec so that kexec can avoid the memory region to locate initrd. kexec also needs a patch to handle /proc/efi/memreserve. I'm preparing the patch for kexec. Changelog v2: - Change memreserve file location from sysfs to procfs. memreserve may exceed the PAGE_SIZE in case efi_memreserve_root has a lot of entries. So we cannot use sysfs_kf_seq_show(). Use seq_printf() in procfs instead. Masayoshi Mizuma (2): efi: add /proc/efi directory efi: arm64: Introduce /proc/efi/memreserve to tell the persistent pages drivers/firmware/efi/efi.c | 93 +- 1 file changed, 92 insertions(+), 1 deletion(-) -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
On Fri, Nov 29, 2019 at 01:25:36PM +0100, Matthias Brugger wrote: > > > On 25/11/2019 19:49, Masayoshi Mizuma wrote: > > From: Masayoshi Mizuma > > > > kexec reboot stops in early boot sequence because efi_config_parse_tables() > > refers garbage data. We can see the log with memblock=debug kernel option: > > > > efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 > > MEMRESERVE=0x9820bfdc58 > > memblock_reserve: [0x009820bfdc58-0x009820bfdc67] > > efi_config_parse_tables+0x228/0x278 > > memblock_reserve: [0x8276-0x324d07ff] > > efi_config_parse_tables+0x228/0x278 > > memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] > > efi_config_parse_tables+0x244/0x278 > > memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] > > efi_config_parse_tables+0x244/0x278 > > memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] > > efi_config_parse_tables+0x244/0x278 > > ... > > > > That happens because 0x8276, struct linux_efi_memreserve, is destroyed. > > 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the > > head page of LPI pending table and LPI property table which are allocated by > > gic_reserve_range(). > > > > The destroyer is kexec. kexec locates the initrd to the area: > > > > ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img > > --reuse-cmdline > > ... > > initrd: base 8229, size 388dd8ah (59301258) > > ... > > > > From dynamic debug log. initrd is located in segment[1]: > > machine_kexec_prepare:70: > > kexec kimage info: > > type:0 > > start: 85b30680 > > head:0 > > nr_segments: 4 > > segment[0]: 8048 - 8229, 0x1e1 bytes, > > 481 pages > > segment[1]: 8229 - 85b2, 0x389 bytes, > > 905 pages > > segment[2]: 85b2 - 85b3, 0x1 bytes, 1 > > pages > > segment[3]: 85b3 - 85b4, 0x1 bytes, 1 > > pages > > > > kexec searches the memory region to locate initrd through > > "System RAM" in /proc/iomem. The pending tables are included in > > "System RAM" because they are allocated by alloc_pages(), so kexec > > destroys the LPI pending tables. > > > > Doesn't that mean that you haven't enough memory reserved so that you have to > fallback to allocate it via __get_free_page()? That's a not fallback allocation. The pending tables and also property tables are allocated by alloc_pages() on its_allocate_prop_table() and its_allocate_pending_table(). > > > > Introduce /sys/firmware/efi/memreserve to tell the pages pointed by > > efi.mem_reserve so that kexec can avoid the area to locate initrd. > > > > Doesn't that need a patch for kexec-tools to actually take this into account? Yes, we need a patch for kexec-tools as well. I'm preparing the kexec patch. > > > Signed-off-by: Masayoshi Mizuma > > --- > > drivers/firmware/efi/efi.c | 45 +- > > 1 file changed, 44 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c > > index e98bbf8e5..0aa07cc09 100644 > > --- a/drivers/firmware/efi/efi.c > > +++ b/drivers/firmware/efi/efi.c > > @@ -141,6 +141,47 @@ static ssize_t systab_show(struct kobject *kobj, > > > > static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, > > 0400); > > > > +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; > > +#ifdef CONFIG_KEXEC > > +static ssize_t memreserve_show(struct kobject *kobj, > > + struct kobj_attribute *attr, char *buf) > > +{ > > + struct linux_efi_memreserve *rsv; > > + phys_addr_t start, end; > > + unsigned long prsv; > > + char *str = buf; > > + int count, i; > > + > > + if (!kobj || !buf) > > + return -EINVAL; > > + > > + if ((efi_memreserve_root == (void *)ULONG_MAX) || > > + (!efi_memreserve_root)) > > + return -ENODEV; > > + > > + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { > > + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); > > + if (!rsv) { > > + pr_err("Could not map efi_memreserve\n"); > > + return -ENOMEM; > > +
[PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
From: Masayoshi Mizuma kexec reboot stops in early boot sequence because efi_config_parse_tables() refers garbage data. We can see the log with memblock=debug kernel option: efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 MEMRESERVE=0x9820bfdc58 memblock_reserve: [0x009820bfdc58-0x009820bfdc67] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0x8276-0x324d07ff] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] efi_config_parse_tables+0x244/0x278 ... That happens because 0x8276, struct linux_efi_memreserve, is destroyed. 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the head page of LPI pending table and LPI property table which are allocated by gic_reserve_range(). The destroyer is kexec. kexec locates the initrd to the area: ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img --reuse-cmdline ... initrd: base 8229, size 388dd8ah (59301258) ... >From dynamic debug log. initrd is located in segment[1]: machine_kexec_prepare:70: kexec kimage info: type:0 start: 85b30680 head:0 nr_segments: 4 segment[0]: 8048 - 8229, 0x1e1 bytes, 481 pages segment[1]: 8229 - 85b2, 0x389 bytes, 905 pages segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages kexec searches the memory region to locate initrd through "System RAM" in /proc/iomem. The pending tables are included in "System RAM" because they are allocated by alloc_pages(), so kexec destroys the LPI pending tables. Introduce /sys/firmware/efi/memreserve to tell the pages pointed by efi.mem_reserve so that kexec can avoid the area to locate initrd. Signed-off-by: Masayoshi Mizuma --- drivers/firmware/efi/efi.c | 45 +- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e98bbf8e5..0aa07cc09 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -141,6 +141,47 @@ static ssize_t systab_show(struct kobject *kobj, static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; +#ifdef CONFIG_KEXEC +static ssize_t memreserve_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct linux_efi_memreserve *rsv; + phys_addr_t start, end; + unsigned long prsv; + char *str = buf; + int count, i; + + if (!kobj || !buf) + return -EINVAL; + + if ((efi_memreserve_root == (void *)ULONG_MAX) || + (!efi_memreserve_root)) + return -ENODEV; + + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); + if (!rsv) { + pr_err("Could not map efi_memreserve\n"); + return -ENOMEM; + } + count = atomic_read(>count); + for (i = 0; i < count; i++) { + start = rsv->entry[i].base; + end = start + rsv->entry[i].size - 1; + + str += sprintf(str, "%pa-%pa\n", , ); + } + memunmap(rsv); + } + + return str - buf; +} + +static struct kobj_attribute efi_attr_memreserve = + __ATTR_RO_MODE(memreserve, 0444); +#endif /* CONFIG_KEXEC */ + #define EFI_FIELD(var) efi.var #define EFI_ATTR_SHOW(name) \ @@ -172,6 +213,9 @@ static struct attribute *efi_subsys_attrs[] = { _attr_runtime.attr, _attr_config_table.attr, _attr_fw_platform_size.attr, +#ifdef CONFIG_KEXEC + _attr_memreserve.attr, +#endif NULL, }; @@ -955,7 +999,6 @@ int efi_status_to_err(efi_status_t status) } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); -static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; static int __init efi_memreserve_map_root(void) { -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [RFC PATCH v2] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
On Thu, Nov 14, 2019 at 11:10:19AM -0500, Masayoshi Mizuma wrote: > From: Masayoshi Mizuma > > kexec reboot stops in early boot sequence because efi_config_parse_tables() > refers garbage data. We can see the log with memblock=debug kernel option: > > efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 > MEMRESERVE=0x9820bfdc58 > memblock_reserve: [0x009820bfdc58-0x009820bfdc67] > efi_config_parse_tables+0x228/0x278 > memblock_reserve: [0x8276-0x324d07ff] > efi_config_parse_tables+0x228/0x278 > memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] > efi_config_parse_tables+0x244/0x278 > memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] > efi_config_parse_tables+0x244/0x278 > memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] > efi_config_parse_tables+0x244/0x278 > ... > > That happens because 0x8276, struct linux_efi_memreserve, is destroyed. > 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the > head page of LPI pending table and LPI property table which are allocated by > gic_reserve_range(). > > The destroyer is kexec. kexec locates the initrd to the area: > > ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img > --reuse-cmdline > ... > initrd: base 8229, size 388dd8ah (59301258) > ... > > From dynamic debug log. initrd is located in segment[1]: > machine_kexec_prepare:70: > kexec kimage info: > type:0 > start: 85b30680 > head:0 > nr_segments: 4 > segment[0]: 8048 - 8229, 0x1e1 bytes, 481 > pages > segment[1]: 8229 - 85b2, 0x389 bytes, 905 > pages > segment[2]: 85b2 - 85b3, 0x1 bytes, 1 > pages > segment[3]: 85b3 - 85b4, 0x1 bytes, 1 > pages > > kexec searches the memory region to locate initrd through > "System RAM" in /proc/iomem. The pending tables are included in > "System RAM" because they are allocated by alloc_pages(), so kexec > destroys the LPI pending tables. > > Introduce /sys/firmware/efi/memreserve to tell the pages pointed by > efi.mem_reserve so that kexec can avoid the area to locate initrd. > > Signed-off-by: Masayoshi Mizuma > --- > drivers/firmware/efi/efi.c | 41 +- > 1 file changed, 40 insertions(+), 1 deletion(-) > > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c > index 0b6b0c19a..07812d697 100644 > --- a/drivers/firmware/efi/efi.c > +++ b/drivers/firmware/efi/efi.c > @@ -149,6 +149,45 @@ static ssize_t systab_show(struct kobject *kobj, > > static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); > > +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; > +static ssize_t memreserve_show(struct kobject *kobj, > +struct kobj_attribute *attr, char *buf) > +{ > + struct linux_efi_memreserve *rsv; > + phys_addr_t start, end; > + unsigned long prsv; > + char *str = buf; > + int count, i; > + > + if (!kobj || !buf) > + return -EINVAL; > + > + if ((efi_memreserve_root == (void *)ULONG_MAX) || > + (!efi_memreserve_root)) > + return -ENODEV; > + > + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { > + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); > + if (!rsv) { > + pr_err("Could not map efi_memreserve\n"); > + return -ENOMEM; > + } > + count = atomic_read(>count); > + for (i = 0; i < count; i++) { > + start = rsv->entry[i].base; > + end = start + rsv->entry[i].size - 1; > + > + str += sprintf(str, "%pa-%pa\n", , ); > + } > + memunmap(rsv); > + } > + > + return str - buf; > +} > + > +static struct kobj_attribute efi_attr_memreserve = > + __ATTR_RO_MODE(memreserve, 0444); > + I think it's better to put above between #ifdef CONFIG_KEXEC and #endif because it's useful for only kexec... > #define EFI_FIELD(var) efi.var > > #define EFI_ATTR_SHOW(name) \ > @@ -180,6 +219,7 @@ static struct attribute *efi_subsys_attrs[] = { > _attr_runtime.attr, > _attr_config_table.attr, > _attr_fw_platform_size.attr, > + _attr_memreserve.attr, Same as. I'll post the patch to change above and will remove the
[RFC PATCH v2] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
From: Masayoshi Mizuma kexec reboot stops in early boot sequence because efi_config_parse_tables() refers garbage data. We can see the log with memblock=debug kernel option: efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 MEMRESERVE=0x9820bfdc58 memblock_reserve: [0x009820bfdc58-0x009820bfdc67] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0x8276-0x324d07ff] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] efi_config_parse_tables+0x244/0x278 ... That happens because 0x8276, struct linux_efi_memreserve, is destroyed. 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the head page of LPI pending table and LPI property table which are allocated by gic_reserve_range(). The destroyer is kexec. kexec locates the initrd to the area: ]# kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img --reuse-cmdline ... initrd: base 8229, size 388dd8ah (59301258) ... >From dynamic debug log. initrd is located in segment[1]: machine_kexec_prepare:70: kexec kimage info: type:0 start: 85b30680 head:0 nr_segments: 4 segment[0]: 8048 - 8229, 0x1e1 bytes, 481 pages segment[1]: 8229 - 85b2, 0x389 bytes, 905 pages segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages kexec searches the memory region to locate initrd through "System RAM" in /proc/iomem. The pending tables are included in "System RAM" because they are allocated by alloc_pages(), so kexec destroys the LPI pending tables. Introduce /sys/firmware/efi/memreserve to tell the pages pointed by efi.mem_reserve so that kexec can avoid the area to locate initrd. Signed-off-by: Masayoshi Mizuma --- drivers/firmware/efi/efi.c | 41 +- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 0b6b0c19a..07812d697 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -149,6 +149,45 @@ static ssize_t systab_show(struct kobject *kobj, static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; +static ssize_t memreserve_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct linux_efi_memreserve *rsv; + phys_addr_t start, end; + unsigned long prsv; + char *str = buf; + int count, i; + + if (!kobj || !buf) + return -EINVAL; + + if ((efi_memreserve_root == (void *)ULONG_MAX) || + (!efi_memreserve_root)) + return -ENODEV; + + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); + if (!rsv) { + pr_err("Could not map efi_memreserve\n"); + return -ENOMEM; + } + count = atomic_read(>count); + for (i = 0; i < count; i++) { + start = rsv->entry[i].base; + end = start + rsv->entry[i].size - 1; + + str += sprintf(str, "%pa-%pa\n", , ); + } + memunmap(rsv); + } + + return str - buf; +} + +static struct kobj_attribute efi_attr_memreserve = + __ATTR_RO_MODE(memreserve, 0444); + #define EFI_FIELD(var) efi.var #define EFI_ATTR_SHOW(name) \ @@ -180,6 +219,7 @@ static struct attribute *efi_subsys_attrs[] = { _attr_runtime.attr, _attr_config_table.attr, _attr_fw_platform_size.attr, + _attr_memreserve.attr, NULL, }; @@ -964,7 +1004,6 @@ int efi_status_to_err(efi_status_t status) } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); -static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; static int __init efi_memreserve_map_root(void) { -- 2.21.0 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [RFC PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
On Wed, Nov 13, 2019 at 07:59:04AM +, d.hatay...@fujitsu.com wrote: > > From: Masayoshi Mizuma > > > > kexec reboot stucks because efi_config_parse_tables() refers garbage > > (with memblock=debug): > > > > efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 > > MEMRESERVE=0x9820bfdc58 > > memblock_reserve: [0x009820bfdc58-0x009820bfdc67] > > efi_config_parse_tables+0x228/0x278 > > memblock_reserve: [0x8276-0x324d07ff] > > efi_config_parse_tables+0x228/0x278 > > memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] > > efi_config_parse_tables+0x244/0x278 > > memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] > > efi_config_parse_tables+0x244/0x278 > > memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] > > efi_config_parse_tables+0x244/0x278 > > ... > > > > That happens because 0x8276, struct linux_efi_memreserve, is destroyed. > > 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the > > head page of pending table and prop table which are allocated by > > gic_reserve_range(). > > > > The destroyer is kexec. kexec locates the inird to the area: > > > > # kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img > > --reuse-cmdline > > ... > > initrd: base 8229, size 388dd8ah (59301258) > > ... > > > > From dynamic debug log: > > machine_kexec_prepare:70: > > kexec kimage info: > > type:0 > > start: 85b30680 > > head:0 > > nr_segments: 4 > > segment[0]: 8048 - 8229, 0x1e1 bytes, > > 481 > > pages > > segment[1]: 8229 - 85b2, 0x389 bytes, > > 905 > > pages > > segment[2]: 85b2 - 85b3, 0x1 bytes, 1 > > pages > > segment[3]: 85b3 - 85b4, 0x1 bytes, 1 > > pages > > > > kexec searches the appropriate memory region to locate initrd through > > "System > > RAM" > > in /proc/iomem. The pending tables are included in "System RAM" because they > > are > > allocated by alloc_pages(), so kexec destroys the pending tables. > > > > Introduce /sys/firmware/efi/memreserve to tell the pages pointed by > > efi.mem_reserve > > so that kexec can avoid the area to locate initrd. > > > > Signed-off-by: Masayoshi Mizuma > > --- > > drivers/firmware/efi/efi.c | 32 +++- > > 1 file changed, 31 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c > > index e98bbf8e5..67b21ae7a 100644 > > --- a/drivers/firmware/efi/efi.c > > +++ b/drivers/firmware/efi/efi.c > > @@ -141,6 +141,36 @@ static ssize_t systab_show(struct kobject *kobj, > > > > static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, > > 0400); > > > > +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; > > +static ssize_t memreserve_show(struct kobject *kobj, > > + struct kobj_attribute *attr, char *buf) > > +{ > > + struct linux_efi_memreserve *rsv; > > + unsigned long prsv; > > + char *str = buf; > > + int index, i; > > + > > + if (!kobj || !buf) > > + return -EINVAL; > > + > > + if (!efi_memreserve_root) > > + return -ENODEV; > > Other functions use different conditions. > The latter efi_memreserve_root == (void *)ULONG_MAX is correct? > > static int __init efi_memreserve_map_root(void) > { > if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) > return -ENODEV; > int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) > { > struct linux_efi_memreserve *rsv; > unsigned long prsv; > int rc, index; > > if (efi_memreserve_root == (void *)ULONG_MAX) > return -ENODEV; I think it's better to add both checks like as: if ((efi_memreserve_root == (void *)ULONG_MAX) || (!efi_memreserve_root)) return -ENODEV; > > > + > > + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { > > + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); > > memremap() could fail with NULL as a return value. > You need to deal with such case. > > It looks to me efi_mem_reserve_persistent() also doesn't deal with this. >
[RFC PATCH] efi: arm64: Introduce /sys/firmware/efi/memreserve to tell the persistent pages
From: Masayoshi Mizuma kexec reboot stucks because efi_config_parse_tables() refers garbage (with memblock=debug): efi: ACPI 2.0=0x9821790014 PROP=0x8757f5c0 SMBIOS 3.0=0x982074 MEMRESERVE=0x9820bfdc58 memblock_reserve: [0x009820bfdc58-0x009820bfdc67] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0x8276-0x324d07ff] efi_config_parse_tables+0x228/0x278 memblock_reserve: [0xcc4f84ecc0511670-0x5f6e5214a7fd91f9] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0xd2fd4144b9af693d-0xad0c1db1086f40a2] efi_config_parse_tables+0x244/0x278 memblock_reserve: [0x0c719bb159b1fadc-0x5aa6e62a1417ce12] efi_config_parse_tables+0x244/0x278 ... That happens because 0x8276, struct linux_efi_memreserve, is destroyed. 0x8276 is pointed from efi.mem_reseve, and efi.mem_reserve points the head page of pending table and prop table which are allocated by gic_reserve_range(). The destroyer is kexec. kexec locates the inird to the area: # kexec -d -l /boot/vmlinuz-5.4.0-rc7 /boot/initramfs-5.4.0-rc7.img --reuse-cmdline ... initrd: base 8229, size 388dd8ah (59301258) ... >From dynamic debug log: machine_kexec_prepare:70: kexec kimage info: type:0 start: 85b30680 head:0 nr_segments: 4 segment[0]: 8048 - 8229, 0x1e1 bytes, 481 pages segment[1]: 8229 - 85b2, 0x389 bytes, 905 pages segment[2]: 85b2 - 85b3, 0x1 bytes, 1 pages segment[3]: 85b3 - 85b4, 0x1 bytes, 1 pages kexec searches the appropriate memory region to locate initrd through "System RAM" in /proc/iomem. The pending tables are included in "System RAM" because they are allocated by alloc_pages(), so kexec destroys the pending tables. Introduce /sys/firmware/efi/memreserve to tell the pages pointed by efi.mem_reserve so that kexec can avoid the area to locate initrd. Signed-off-by: Masayoshi Mizuma --- drivers/firmware/efi/efi.c | 32 +++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e98bbf8e5..67b21ae7a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -141,6 +141,36 @@ static ssize_t systab_show(struct kobject *kobj, static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; +static ssize_t memreserve_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct linux_efi_memreserve *rsv; + unsigned long prsv; + char *str = buf; + int index, i; + + if (!kobj || !buf) + return -EINVAL; + + if (!efi_memreserve_root) + return -ENODEV; + + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); + index = atomic_read(>count); + for (i = 0; i < index; i++) + str += sprintf(str, "%llx-%llx\n", + rsv->entry[i].base, + rsv->entry[i].base + rsv->entry[i].size - 1); + memunmap(rsv); + } + + return str - buf; +} + +static struct kobj_attribute efi_attr_memreserve = __ATTR_RO_MODE(memreserve, 0444); + #define EFI_FIELD(var) efi.var #define EFI_ATTR_SHOW(name) \ @@ -172,6 +202,7 @@ static struct attribute *efi_subsys_attrs[] = { _attr_runtime.attr, _attr_config_table.attr, _attr_fw_platform_size.attr, + _attr_memreserve.attr, NULL, }; @@ -955,7 +986,6 @@ int efi_status_to_err(efi_status_t status) } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); -static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; static int __init efi_memreserve_map_root(void) { -- 2.18.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec