[PATCHv2 3/3] arm64: read VA_BITS from kcore for 52-bits VA kernel

2021-12-16 Thread Pingfan Liu
phys_to_virt() calculates virtual address. As a important factor,
page_offset is excepted to be accurate.

Since arm64 kernel exposes va_bits through vmcore, using it.

Signed-off-by: Pingfan Liu 
---
 kexec/arch/arm64/kexec-arm64.c | 34 ++
 util_lib/elf_info.c|  5 +
 util_lib/include/elf_info.h|  1 +
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 037e51c..86aadf0 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -54,7 +54,7 @@
 static bool try_read_phys_offset_from_kcore = false;
 
 /* Machine specific details. */
-static int va_bits;
+static int va_bits = -1;
 static unsigned long page_offset;
 
 /* Global varables the core kexec routines expect. */
@@ -876,7 +876,18 @@ static inline void set_phys_offset(long v, char 
*set_method)
 
 static int get_va_bits(void)
 {
-   unsigned long long stext_sym_addr = get_kernel_sym("_stext");
+   unsigned long long stext_sym_addr;
+
+   /*
+* if already got from kcore
+*/
+   if (va_bits != -1)
+   goto out;
+
+
+   /* For kernel older than v4.19 */
+   fprintf(stderr, "Warning, can't get the VA_BITS from kcore\n");
+   stext_sym_addr = get_kernel_sym("_stext");
 
if (stext_sym_addr == 0) {
fprintf(stderr, "Can't get the symbol of _stext.\n");
@@ -900,6 +911,7 @@ static int get_va_bits(void)
return -1;
}
 
+out:
dbgprintf("va_bits : %d\n", va_bits);
 
return 0;
@@ -917,14 +929,27 @@ int get_page_offset(unsigned long *page_offset)
if (ret < 0)
return ret;
 
-   *page_offset = (0xUL) << (va_bits - 1);
+   if (va_bits < 52)
+   *page_offset = UINT64_MAX << (va_bits - 1);
+   else
+   *page_offset = UINT64_MAX << va_bits;
+
dbgprintf("page_offset : %lx\n", *page_offset);
 
return 0;
 }
 
+static void arm64_scan_vmcoreinfo(char *pos)
+{
+   const char *str;
+
+   str = "NUMBER(VA_BITS)=";
+   if (memcmp(str, pos, strlen(str)) == 0)
+   va_bits = strtoul(pos + strlen(str), NULL, 10);
+}
+
 /**
- * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET
+ * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET 
(and va_bits)
  * from VMCOREINFO note inside 'kcore'.
  */
 
@@ -937,6 +962,7 @@ static int get_phys_offset_from_vmcoreinfo_pt_note(long 
*phys_offset)
return EFAILED;
}
 
+   arch_scan_vmcoreinfo = arm64_scan_vmcoreinfo;
ret = read_phys_offset_elf_kcore(fd, phys_offset);
 
close(fd);
diff --git a/util_lib/elf_info.c b/util_lib/elf_info.c
index 5574c7f..d252eff 100644
--- a/util_lib/elf_info.c
+++ b/util_lib/elf_info.c
@@ -310,6 +310,8 @@ int get_pt_load(int idx,
 
 #define NOT_FOUND_LONG_VALUE   (-1)
 
+void (*arch_scan_vmcoreinfo)(char *pos);
+
 void scan_vmcoreinfo(char *start, size_t size)
 {
char *last = start + size - 1;
@@ -551,6 +553,9 @@ void scan_vmcoreinfo(char *start, size_t size)
}
}
 
+   if (arch_scan_vmcoreinfo != NULL)
+   (*arch_scan_vmcoreinfo)(pos);
+
if (last_line)
break;
}
diff --git a/util_lib/include/elf_info.h b/util_lib/include/elf_info.h
index f550d86..fdf4c3d 100644
--- a/util_lib/include/elf_info.h
+++ b/util_lib/include/elf_info.h
@@ -31,5 +31,6 @@ int get_pt_load(int idx,
 int read_phys_offset_elf_kcore(int fd, long *phys_off);
 int read_elf(int fd);
 void dump_dmesg(int fd, void (*handler)(char*, unsigned int));
+extern void (*arch_scan_vmcoreinfo)(char *pos);
 
 #endif /* ELF_INFO_H */
-- 
2.31.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCHv2 2/3] arm64/crashdump: unify routine to get page_offset

2021-12-16 Thread Pingfan Liu
There are two funcs to get page_offset:
  get_kernel_page_offset()
  get_page_offset()

Since get_kernel_page_offset() does not observe the kernel formula, and
remove it. Unify them in order to introduce 52-bits VA kernel more
easily in the coming patch.

Signed-off-by: Pingfan Liu 
---
 kexec/arch/arm64/crashdump-arm64.c | 23 +--
 kexec/arch/arm64/kexec-arm64.c |  8 
 kexec/arch/arm64/kexec-arm64.h |  1 +
 3 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/kexec/arch/arm64/crashdump-arm64.c 
b/kexec/arch/arm64/crashdump-arm64.c
index a02019a..0a8d44c 100644
--- a/kexec/arch/arm64/crashdump-arm64.c
+++ b/kexec/arch/arm64/crashdump-arm64.c
@@ -46,27 +46,6 @@ static struct crash_elf_info elf_info = {
.machine= EM_AARCH64,
 };
 
-/*
- * Note: The returned value is correct only if !CONFIG_RANDOMIZE_BASE.
- */
-static uint64_t get_kernel_page_offset(void)
-{
-   int i;
-
-   if (elf_info.kern_vaddr_start == UINT64_MAX)
-   return UINT64_MAX;
-
-   /* Current max virtual memory range is 48-bits. */
-   for (i = 48; i > 0; i--)
-   if (!(elf_info.kern_vaddr_start & (1UL << i)))
-   break;
-
-   if (i <= 0)
-   return UINT64_MAX;
-   else
-   return UINT64_MAX << i;
-}
-
 /*
  * iomem_range_callback() - callback called for each iomem region
  * @data: not used
@@ -198,7 +177,7 @@ int load_crashdump_segments(struct kexec_info *info)
if (err)
return EFAILED;
 
-   elf_info.page_offset = get_kernel_page_offset();
+   get_page_offset(_info.page_offset);
dbgprintf("%s: page_offset:   %016llx\n", __func__,
elf_info.page_offset);
 
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 7373fa3..037e51c 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -909,7 +909,7 @@ static int get_va_bits(void)
  * get_page_offset - Helper for getting PAGE_OFFSET
  */
 
-static int get_page_offset(void)
+int get_page_offset(unsigned long *page_offset)
 {
int ret;
 
@@ -917,8 +917,8 @@ static int get_page_offset(void)
if (ret < 0)
return ret;
 
-   page_offset = (0xUL) << (va_bits - 1);
-   dbgprintf("page_offset : %lx\n", page_offset);
+   *page_offset = (0xUL) << (va_bits - 1);
+   dbgprintf("page_offset : %lx\n", *page_offset);
 
return 0;
 }
@@ -954,7 +954,7 @@ int get_phys_base_from_pt_load(long *phys_offset)
unsigned long long phys_start;
unsigned long long virt_start;
 
-   ret = get_page_offset();
+   ret = get_page_offset(_offset);
if (ret < 0)
return ret;
 
diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h
index 1844b67..ed99d9d 100644
--- a/kexec/arch/arm64/kexec-arm64.h
+++ b/kexec/arch/arm64/kexec-arm64.h
@@ -69,6 +69,7 @@ extern struct arm64_mem arm64_mem;
 
 uint64_t get_phys_offset(void);
 uint64_t get_vp_offset(void);
+int get_page_offset(unsigned long *offset);
 
 static inline void reset_vp_offset(void)
 {
-- 
2.31.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCHv2 1/3] arm64: make phys_offset signed

2021-12-16 Thread Pingfan Liu
After kernel commit 7bc1a0f9e176 ("arm64: mm: use single quantity to
represent the PA to VA translation"), phys_offset can be negative if
running 52-bits kernel on 48-bits hardware.

So changing phys_offset from unsigned to signed.

Signed-off-by: Pingfan Liu 
---
 kexec/arch/arm64/kexec-arm64.c | 8 
 kexec/arch/arm64/kexec-arm64.h | 2 +-
 util_lib/elf_info.c| 2 +-
 util_lib/include/elf_info.h| 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 6f572ed..7373fa3 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -859,7 +859,7 @@ void add_segment(struct kexec_info *info, const void *buf, 
size_t bufsz,
add_segment_phys_virt(info, buf, bufsz, base, memsz, 1);
 }
 
-static inline void set_phys_offset(uint64_t v, char *set_method)
+static inline void set_phys_offset(long v, char *set_method)
 {
if (arm64_mem.phys_offset == arm64_mem_ngv
|| v < arm64_mem.phys_offset) {
@@ -928,7 +928,7 @@ static int get_page_offset(void)
  * from VMCOREINFO note inside 'kcore'.
  */
 
-static int get_phys_offset_from_vmcoreinfo_pt_note(unsigned long *phys_offset)
+static int get_phys_offset_from_vmcoreinfo_pt_note(long *phys_offset)
 {
int fd, ret = 0;
 
@@ -948,7 +948,7 @@ static int get_phys_offset_from_vmcoreinfo_pt_note(unsigned 
long *phys_offset)
  * from PT_LOADs inside 'kcore'.
  */
 
-int get_phys_base_from_pt_load(unsigned long *phys_offset)
+int get_phys_base_from_pt_load(long *phys_offset)
 {
int i, fd, ret;
unsigned long long phys_start;
@@ -997,7 +997,7 @@ static bool to_be_excluded(char *str)
 int get_memory_ranges(struct memory_range **range, int *ranges,
unsigned long kexec_flags)
 {
-   unsigned long phys_offset = UINT64_MAX;
+   long phys_offset = (long)UINT64_MAX;
FILE *fp;
const char *iomem = proc_iomem();
char line[MAX_LINE], *str;
diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h
index ed447ac..1844b67 100644
--- a/kexec/arch/arm64/kexec-arm64.h
+++ b/kexec/arch/arm64/kexec-arm64.h
@@ -58,7 +58,7 @@ extern off_t initrd_size;
  */
 
 struct arm64_mem {
-   uint64_t phys_offset;
+   long phys_offset;
uint64_t text_offset;
uint64_t image_size;
uint64_t vp_offset;
diff --git a/util_lib/elf_info.c b/util_lib/elf_info.c
index 51d8b92..5574c7f 100644
--- a/util_lib/elf_info.c
+++ b/util_lib/elf_info.c
@@ -1236,7 +1236,7 @@ int read_elf(int fd)
return 0;
 }
 
-int read_phys_offset_elf_kcore(int fd, unsigned long *phys_off)
+int read_phys_offset_elf_kcore(int fd, long *phys_off)
 {
int ret;
 
diff --git a/util_lib/include/elf_info.h b/util_lib/include/elf_info.h
index 4bc9279..f550d86 100644
--- a/util_lib/include/elf_info.h
+++ b/util_lib/include/elf_info.h
@@ -28,7 +28,7 @@ int get_pt_load(int idx,
unsigned long long *phys_end,
unsigned long long *virt_start,
unsigned long long *virt_end);
-int read_phys_offset_elf_kcore(int fd, unsigned long *phys_off);
+int read_phys_offset_elf_kcore(int fd, long *phys_off);
 int read_elf(int fd);
 void dump_dmesg(int fd, void (*handler)(char*, unsigned int));
 
-- 
2.31.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCHv2 0/3] arm64: make phys_to_virt() correct

2021-12-16 Thread Pingfan Liu
Currently phys_to_virt() does not work well on 52-bits VA arm64 kernel.
One issue is contributed by phys_offset not signed.
The other is contributed by wrong page_offset.

v1 -> v2
Fix broken patch [2/3] in v1
Move arch_scan_vmcoreinfo declaration to util_lib/include/elf_info.h
Using UINT64_MAX instread of 0x



Pingfan Liu (3):
  arm64: make phys_offset signed
  arm64/crashdump: unify routine to get page_offset
  arm64: read VA_BITS from kcore for 52-bits VA kernel

 kexec/arch/arm64/crashdump-arm64.c | 23 +-
 kexec/arch/arm64/kexec-arm64.c | 48 +++---
 kexec/arch/arm64/kexec-arm64.h |  3 +-
 util_lib/elf_info.c|  7 -
 util_lib/include/elf_info.h|  3 +-
 5 files changed, 48 insertions(+), 36 deletions(-)

-- 
2.31.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Leizhen (ThunderTown)



On 2021/12/16 22:48, Borislav Petkov wrote:
> On Thu, Dec 16, 2021 at 08:08:30PM +0800, Leizhen (ThunderTown) wrote:
>> If the memory of 'crash_base' is successfully allocated at (1), because the 
>> last
>> parameter CRASH_ADDR_LOW_MAX is the upper bound, so we can sure that
>> "crash_base < CRASH_ADDR_LOW_MAX". So that, reserve_crashkernel_low() will 
>> not be
>> invoked at (3). That's why I said (1ULL << 32) is inaccurate and enlarge the 
>> CRASH_ADDR_LOW
>> upper limit.
> 
> No, this is actually wrong - that check *must* be 4G. See:
> 
>   eb6db83d1059 ("x86/setup: Do not reserve crashkernel high memory if low 
> reservation failed")
> 
> It is even documented:
> 
> crashkernel=size[KMG],low
> [KNL, X86-64] range under 4G. When crashkernel=X,high

[KNL, X86-64], This doc is for X86-64, not for X86-32

> is passed, kernel could allocate physical memory 
> region
> above 4G, that cause second kernel crash on system
> that require some amount of low memory, e.g. swiotlb
> requires at least 64M+32K low memory, also enough 
> extra
> low memory is needed to make sure DMA buffers for 
> 32-bit
> devices won't run out.

vi arch/x86/kernel/setup.c +398

/*
 * Keep the crash kernel below this limit.
 *
 * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
 * due to mapping restrictions.

If there is no such restriction, we can make CRASH_ADDR_LOW_MAX equal to (1ULL 
<< 32) minus 1 on X86_32.

> 
> so you need to do a low allocation for DMA *when* the reserved memory is
> above 4G. *NOT* above 512M. But that works due to the obscure situation,
> as Baoquan stated, that reserve_crashkernel_low() returns 0 on 32-bit.
> 
> So all this is telling us is that that function needs serious cleanup.
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Borislav Petkov
On Thu, Dec 16, 2021 at 10:11:15PM +0800, Baoquan He wrote:
>  As for the code refactoring, it can be done in another patchset.

Well, I said "future work" before having seen where this patchset is
going. So no, in that case, the usual kernel development process is:
cleanups/fixes first - new features later.

You can see for yourself that piling more crap on what is an already
unreadable mess is only going to make it worse. So, in order to avoid
the maintenance nightmare, we clean up, streamline, document and then
add the new feature and all is shiny.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 05/10] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c

2021-12-16 Thread Borislav Petkov
On Thu, Dec 16, 2021 at 09:15:18PM +0800, Leizhen (ThunderTown) wrote:
> I agree with you. This makes the code look clear. I will do it, try to
> post v18 next Monday.

Don't rush it: take your time, do it nice and clean, make sure each
patch does one logical thing only so that there are no bugs introduced
and the pile can is reviewable.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Borislav Petkov
On Thu, Dec 16, 2021 at 08:08:30PM +0800, Leizhen (ThunderTown) wrote:
> If the memory of 'crash_base' is successfully allocated at (1), because the 
> last
> parameter CRASH_ADDR_LOW_MAX is the upper bound, so we can sure that
> "crash_base < CRASH_ADDR_LOW_MAX". So that, reserve_crashkernel_low() will 
> not be
> invoked at (3). That's why I said (1ULL << 32) is inaccurate and enlarge the 
> CRASH_ADDR_LOW
> upper limit.

No, this is actually wrong - that check *must* be 4G. See:

  eb6db83d1059 ("x86/setup: Do not reserve crashkernel high memory if low 
reservation failed")

It is even documented:

crashkernel=size[KMG],low
[KNL, X86-64] range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
requires at least 64M+32K low memory, also enough extra
low memory is needed to make sure DMA buffers for 32-bit
devices won't run out.

so you need to do a low allocation for DMA *when* the reserved memory is
above 4G. *NOT* above 512M. But that works due to the obscure situation,
as Baoquan stated, that reserve_crashkernel_low() returns 0 on 32-bit.

So all this is telling us is that that function needs serious cleanup.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Baoquan He
On 12/16/21 at 11:55am, Borislav Petkov wrote:
> On Thu, Dec 16, 2021 at 09:10:40AM +0800, Baoquan He wrote:
> > reserve_crashkernel_low() always return 0 on x86_32, so the not equivalent
> > transformation for x86_32 doesn't matter, I think.
> 
> That is, of course, very obvious... not!
> 
> Why is that function parsing crashkernel=XM, and crashkernel=X,high,
> then it attempts some low memory reservation too? Why isn't
> crashkernel=Y,low parsed there too?
> 
> I guess this alludes to why:
> 
> crashkernel=size[KMG],low
> [KNL, X86-64] range under 4G. When crashkernel=X,high
> is passed, kernel could allocate physical memory 
> region
> above 4G, that cause second kernel crash on system
> that require some amount of low memory, e.g. swiotlb
> requires at least 64M+32K low memory, also enough 
> extra
> low memory is needed to make sure DMA buffers for 
> 32-bit
> devices won't run out.
> 
> So, before this is going anywhere, I'd like to see this function
> documented properly. I see Documentation/admin-guide/kdump/kdump.rst
> explains the crashkernel= options too so you can refer to it in the
> comments so that when someone looks at that code, someone can follow why
> it is doing what it is doing.
> 
> Then, as a future work, all that parsing of crashkernel= cmdline options
> should be concentrated at the beginning and once it is clear what the
> user requests, the reservations should be done.

Totally agree we should refactor code to make reserve_crashkernel()
clearer on logic and readibility. In this patchset, we can rewrite the
kernel-doc of reserve_crashkernel() to add more words to explain. As for
the code refactoring, it can be done in another patchset.

> 
> As it is, reserve_crashkernel() is pretty unwieldy and hard to read.


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 05/10] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c

2021-12-16 Thread Leizhen (ThunderTown)



On 2021/12/16 19:17, Borislav Petkov wrote:
> On Fri, Dec 10, 2021 at 02:55:28PM +0800, Zhen Lei wrote:
>> + * reserve_crashkernel() - reserves memory for crash kernel
>> + *
>> + * This function reserves memory area given in "crashkernel=" kernel command
>> + * line parameter. The memory reserved is used by dump capture kernel when
>> + * primary kernel is crashing.
>> + */
>> +void __init reserve_crashkernel(void)
> 
> As I've already alluded to in another mail, ontop of this there should
> be a patch or multiple patches which clean this up more and perhaps even
> split it into separate functions doing stuff in this order:
> 
> 1. Parse all crashkernel= cmdline options
> 
> 2. Do all crash_base, crash_size etc checks
> 
> 3. Do the memory reservations
> 
> And all that supplied with comments explaining why stuff is being done.

I agree with you. This makes the code look clear. I will do it, try to
post v18 next Monday.

> 
> This set of functions is a mess and there's no better time for cleaning
> it up and documenting it properly than when you move it to generic code.
> 
> Thx.
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Leizhen (ThunderTown)



On 2021/12/16 20:08, Leizhen (ThunderTown) wrote:
> 
> 
> On 2021/12/16 19:07, Borislav Petkov wrote:
>> On Thu, Dec 16, 2021 at 10:46:12AM +0800, Leizhen (ThunderTown) wrote:
>>> The original value (1ULL << 32) is inaccurate
>>
>> I keep asking *why*?
>>
>>> and it enlarged the CRASH_ADDR_LOW upper limit.
>>
>> $ git grep -E "CRASH_ADDR_LOW\W"
>> $
>>
>> I have no clue what you mean here.
> 
> #ifdef CONFIG_X86_32
> # define CRASH_ADDR_LOW_MAX SZ_512M
> # define CRASH_ADDR_HIGH_MAXSZ_512M
> #endif
> 
>   if (!high)
> (1) crash_base = memblock_phys_alloc_range(crash_size,
> CRASH_ALIGN, CRASH_ALIGN,
> CRASH_ADDR_LOW_MAX);
> if (!crash_base)
> (2) crash_base = memblock_phys_alloc_range(crash_size,
> CRASH_ALIGN, CRASH_ALIGN,
> CRASH_ADDR_HIGH_MAX);
> 
> - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low())
> +(3)  if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low())
> 
> If the memory of 'crash_base' is successfully allocated at (1), because the 
> last
> parameter CRASH_ADDR_LOW_MAX is the upper bound, so we can sure that
> "crash_base < CRASH_ADDR_LOW_MAX". So that, reserve_crashkernel_low() will 
> not be
> invoked at (3). That's why I said (1ULL << 32) is inaccurate and enlarge the 
> CRASH_ADDR_LOW
> upper limit.
> 
> If the memory of 'crash_base' is successfully allocated at (2), you see,
> CRASH_ADDR_HIGH_MAX = CRASH_ADDR_LOW_MAX = SZ_512M, the same as (1). In fact,
> "crashkernel=high," may not be recommended on X86_32.
> 
> Is it possible that (CRASH_ADDR_HIGH_MAX >= 4G) and (CRASH_ADDR_LOW_MAX < 4G)?
> In this case, the memory allocated at (2) maybe over 4G. But why shouldn't
> CRASH_ADDR_LOW_MAX be equal to 4G at this point?

We divide two memory areas: low memory area and high memory area. The doc told 
us:
at least 256MB memory should be reserved at low memory area. So that if
"crash_base >= CRASH_ADDR_LOW_MAX" is true at (3), that means we have not 
reserved
any memory at low memory area, so we should call reserve_crashkernel_low().
The low memory area is not equivalent to <=4G, I think. So replace (1ULL << 32) 
with
CRASH_ADDR_LOW_MAX is logically correct.

> 
> 
>>
>>> This is because when the memory is allocated from the low end, the
>>> address cannot exceed CRASH_ADDR_LOW_MAX, see "if (!high)" branch.
>>
>>> If
>>> the memory is allocated from the high end, 'crash_base' is greater than or
>>> equal to (1ULL << 32), and naturally, it is greater than CRASH_ADDR_LOW_MAX.
>>>
>>> I think I should update the description, thanks.
>>
>> I think you should explain why is (1ULL << 32) wrong.
>>
>> It came from:
>>
>>   eb6db83d1059 ("x86/setup: Do not reserve crashkernel high memory if low 
>> reservation failed")
>>
>> which simply frees the high memory portion when the low reservation
>> fails. And the test for that is, is crash base > 4G. So that makes
>> perfect sense to me.
>>
>> So your change is a NOP on 64-bit and it is a NOP on 32-bit by virtue of
>> the _low() variant always returning 0 on 32-bit.
>>

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 5/5] s390: add support for --reuse-cmdline

2021-12-16 Thread Sven Schnelle
--reuse-cmdline reads the command line of the currently
running kernel from /proc/cmdline and uses that for the
kernel that should be kexec'd.

Signed-off-by: Sven Schnelle 
Reviewed-by: Alexander Egorenkov 
---
 kexec/arch/s390/include/arch/options.h | 10 ++
 kexec/arch/s390/kexec-image.c  |  9 +
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/kexec/arch/s390/include/arch/options.h 
b/kexec/arch/s390/include/arch/options.h
index 76044a301ceb..c150244996c7 100644
--- a/kexec/arch/s390/include/arch/options.h
+++ b/kexec/arch/s390/include/arch/options.h
@@ -1,9 +1,10 @@
 #ifndef KEXEC_ARCH_S390_OPTIONS_H
 #define KEXEC_ARCH_S390_OPTIONS_H
 
-#define OPT_ARCH_MAX   (OPT_MAX+0)
-#define OPT_APPEND OPT_MAX+0
-#define OPT_RAMDISKOPT_MAX+1
+#define OPT_ARCH_MAX   (OPT_MAX+0)
+#define OPT_APPEND (OPT_MAX+0)
+#define OPT_RAMDISK(OPT_MAX+1)
+#define OPT_REUSE_CMDLINE  (OPT_MAX+2)
 
 /* Options relevant to the architecture (excluding loader-specific ones),
  * in this case none:
@@ -31,7 +32,8 @@
KEXEC_ARCH_OPTIONS  \
{"command-line", 1, 0, OPT_APPEND}, \
{"append",   1, 0, OPT_APPEND}, \
-   {"initrd",   1, 0, OPT_RAMDISK},
+   {"initrd",   1, 0, OPT_RAMDISK},\
+   {"reuse-cmdline",0, 0, OPT_REUSE_CMDLINE },
 
 #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
 
diff --git a/kexec/arch/s390/kexec-image.c b/kexec/arch/s390/kexec-image.c
index 209ab77ddccb..69aaf96812f7 100644
--- a/kexec/arch/s390/kexec-image.c
+++ b/kexec/arch/s390/kexec-image.c
@@ -71,6 +71,10 @@ int image_s390_load_file(int argc, char **argv, struct 
kexec_info *info)
case OPT_RAMDISK:
ramdisk = optarg;
break;
+   case OPT_REUSE_CMDLINE:
+   free(info->command_line);
+   info->command_line = get_command_line();
+   break;
}
}
 
@@ -123,6 +127,10 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
if (command_line_add(info, optarg))
return -1;
break;
+   case OPT_REUSE_CMDLINE:
+   free(info->command_line);
+   info->command_line = get_command_line();
+   break;
case OPT_RAMDISK:
ramdisk = optarg;
break;
@@ -223,5 +231,6 @@ image_s390_usage(void)
printf("--command-line=STRING Set the kernel command line to STRING.\n"
   "--append=STRING   Set the kernel command line to STRING.\n"
   "--initrd=FILENAME Use the file FILENAME as a ramdisk.\n"
+  "--reuse-cmdline   Use kernel command line from running 
system.\n"
);
 }
-- 
2.32.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Leizhen (ThunderTown)



On 2021/12/16 19:07, Borislav Petkov wrote:
> On Thu, Dec 16, 2021 at 10:46:12AM +0800, Leizhen (ThunderTown) wrote:
>> The original value (1ULL << 32) is inaccurate
> 
> I keep asking *why*?
> 
>> and it enlarged the CRASH_ADDR_LOW upper limit.
> 
> $ git grep -E "CRASH_ADDR_LOW\W"
> $
> 
> I have no clue what you mean here.

#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
# define CRASH_ADDR_HIGH_MAXSZ_512M
#endif

if (!high)
(1) crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_LOW_MAX);
if (!crash_base)
(2) crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ALIGN, CRASH_ALIGN,
CRASH_ADDR_HIGH_MAX);

-   if (crash_base >= (1ULL << 32) && reserve_crashkernel_low())
+(3)if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low())

If the memory of 'crash_base' is successfully allocated at (1), because the last
parameter CRASH_ADDR_LOW_MAX is the upper bound, so we can sure that
"crash_base < CRASH_ADDR_LOW_MAX". So that, reserve_crashkernel_low() will not 
be
invoked at (3). That's why I said (1ULL << 32) is inaccurate and enlarge the 
CRASH_ADDR_LOW
upper limit.

If the memory of 'crash_base' is successfully allocated at (2), you see,
CRASH_ADDR_HIGH_MAX = CRASH_ADDR_LOW_MAX = SZ_512M, the same as (1). In fact,
"crashkernel=high," may not be recommended on X86_32.

Is it possible that (CRASH_ADDR_HIGH_MAX >= 4G) and (CRASH_ADDR_LOW_MAX < 4G)?
In this case, the memory allocated at (2) maybe over 4G. But why shouldn't
CRASH_ADDR_LOW_MAX be equal to 4G at this point?


> 
>> This is because when the memory is allocated from the low end, the
>> address cannot exceed CRASH_ADDR_LOW_MAX, see "if (!high)" branch.
> 
>> If
>> the memory is allocated from the high end, 'crash_base' is greater than or
>> equal to (1ULL << 32), and naturally, it is greater than CRASH_ADDR_LOW_MAX.
>>
>> I think I should update the description, thanks.
> 
> I think you should explain why is (1ULL << 32) wrong.
> 
> It came from:
> 
>   eb6db83d1059 ("x86/setup: Do not reserve crashkernel high memory if low 
> reservation failed")
> 
> which simply frees the high memory portion when the low reservation
> fails. And the test for that is, is crash base > 4G. So that makes
> perfect sense to me.
> 
> So your change is a NOP on 64-bit and it is a NOP on 32-bit by virtue of
> the _low() variant always returning 0 on 32-bit.
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 2/5] s390: use KEXEC_ALL_OPTIONS

2021-12-16 Thread Sven Schnelle
KEXEC_ALL_OPTIONS could be used instead defining the same
array several times. This makes code easier to maintain when
new options are added.

Suggested-by: Alexander Egorenkov 
Signed-off-by: Sven Schnelle 
Reviewed-by: Alexander Egorenkov 
---
 kexec/arch/s390/kexec-image.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kexec/arch/s390/kexec-image.c b/kexec/arch/s390/kexec-image.c
index a52399eafd2a..209ab77ddccb 100644
--- a/kexec/arch/s390/kexec-image.c
+++ b/kexec/arch/s390/kexec-image.c
@@ -57,10 +57,7 @@ int image_s390_load_file(int argc, char **argv, struct 
kexec_info *info)
 
static const struct option options[] =
{
-   KEXEC_OPTIONS
-   {"command-line", 1, 0, OPT_APPEND},
-   {"append",   1, 0, OPT_APPEND},
-   {"initrd",   1, 0, OPT_RAMDISK},
+   KEXEC_ALL_OPTIONS
{0,  0, 0, 0},
};
static const char short_options[] = KEXEC_OPT_STR "";
@@ -111,10 +108,7 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
 
static const struct option options[] =
{
-   KEXEC_OPTIONS
-   {"command-line", 1, 0, OPT_APPEND},
-   {"append",   1, 0, OPT_APPEND},
-   {"initrd",   1, 0, OPT_RAMDISK},
+   KEXEC_ALL_OPTIONS
{0,  0, 0, 0},
};
static const char short_options[] = KEXEC_OPT_STR "";
-- 
2.32.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 4/5] use slurp_proc_file() in get_command_line()

2021-12-16 Thread Sven Schnelle
This way the size of the command line that get_command_line() can handle
is no longer fixed.

Signed-off-by: Sven Schnelle 
---
 kexec/kexec.c | 26 ++
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index f3adac517161..7e4787bc8211 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -1172,25 +1172,19 @@ static char *slurp_proc_file(const char *filename, 
size_t *len)
  */
 char *get_command_line(void)
 {
-   FILE *fp;
-   char *line;
-   const int sizeof_line = 2048;
-
-   line = malloc(sizeof_line);
-   if (line == NULL)
-   die("Could not allocate memory to read /proc/cmdline.");
-
-   fp = fopen("/proc/cmdline", "r");
-   if (!fp)
-   die("Could not open /proc/cmdline.");
-
-   if (fgets(line, sizeof_line, fp) == NULL)
-   die("Can't read /proc/cmdline.");
+   char *p, *line;
+   size_t size;
 
-   fclose(fp);
+   line = slurp_proc_file("/proc/cmdline", );
+   if (!line || !size)
+   die("Failed to read /proc/cmdline\n");
 
/* strip newline */
-   line[strlen(line) - 1] = '\0';
+   line[size-1] = '\0';
+
+   p = strpbrk(line, "\r\n");
+   if (p)
+   *p = '\0';
 
remove_parameter(line, "BOOT_IMAGE");
if (kexec_flags & KEXEC_ON_CRASH)
-- 
2.32.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 1/5] s390: add variable command line size

2021-12-16 Thread Sven Schnelle
Newer s390 kernels support a command line size longer than 896
bytes. Such kernels contain a new member in the parameter area,
which might be utilized by tools like kexec. Older kernels have
the location initialized to zero, so we check whether there's a
non-zero number present and use that. If there isn't, we fallback
to the legacy command line size of 896 bytes.

Signed-off-by: Sven Schnelle 
Reviewed-by: Alexander Egorenkov 
---
 kexec/arch/s390/crashdump-s390.c |  3 +-
 kexec/arch/s390/kexec-image.c| 65 +---
 kexec/arch/s390/kexec-s390.h | 21 ++-
 3 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/kexec/arch/s390/crashdump-s390.c b/kexec/arch/s390/crashdump-s390.c
index 10f4d607bbcc..3bd9efe6dafe 100644
--- a/kexec/arch/s390/crashdump-s390.c
+++ b/kexec/arch/s390/crashdump-s390.c
@@ -52,7 +52,8 @@ static int create_elf_header(struct kexec_info *info, 
unsigned long crash_base,
elfcorehdr_size = bufsz;
snprintf(str, sizeof(str), " elfcorehdr=%ld@%ldK\n",
 elfcorehdr_size, elfcorehdr / 1024);
-   command_line_add(str);
+   if (command_line_add(info, str))
+   return -1;
 #endif
return 0;
 }
diff --git a/kexec/arch/s390/kexec-image.c b/kexec/arch/s390/kexec-image.c
index 3c24fdfe3c7c..a52399eafd2a 100644
--- a/kexec/arch/s390/kexec-image.c
+++ b/kexec/arch/s390/kexec-image.c
@@ -25,7 +25,6 @@
 #include 
 
 static uint64_t crash_base, crash_end;
-static char command_line[COMMAND_LINESIZE];
 
 static void add_segment_check(struct kexec_info *info, const void *buf,
  size_t bufsz, unsigned long base, size_t memsz)
@@ -36,13 +35,18 @@ static void add_segment_check(struct kexec_info *info, 
const void *buf,
add_segment(info, buf, bufsz, crash_base + base, memsz);
 }
 
-int command_line_add(const char *str)
+int command_line_add(struct kexec_info *info, const char *str)
 {
-   if (strlen(command_line) + strlen(str) + 1 > COMMAND_LINESIZE) {
-   fprintf(stderr, "Command line too long.\n");
+   char *tmp = NULL;
+
+   tmp = concat_cmdline(info->command_line, str);
+   if (!tmp) {
+   fprintf(stderr, "out of memory\n");
return -1;
}
-   strcat(command_line, str);
+
+   free(info->command_line);
+   info->command_line = tmp;
return 0;
 }
 
@@ -64,7 +68,7 @@ int image_s390_load_file(int argc, char **argv, struct 
kexec_info *info)
while ((opt = getopt_long(argc, argv, short_options, options, 0)) != 
-1) {
switch(opt) {
case OPT_APPEND:
-   if (command_line_add(optarg))
+   if (command_line_add(info, optarg))
return -1;
break;
case OPT_RAMDISK:
@@ -78,13 +82,16 @@ int image_s390_load_file(int argc, char **argv, struct 
kexec_info *info)
if (info->initrd_fd == -1) {
fprintf(stderr, "Could not open initrd file %s:%s\n",
ramdisk, strerror(errno));
+   free(info->command_line);
+   info->command_line = NULL;
return -1;
}
}
 
-   info->command_line = command_line;
-   info->command_line_len = strlen (command_line) + 1;
-
+   if (info->command_line)
+   info->command_line_len = strlen(info->command_line) + 1;
+   else
+   info->command_line_len = 0;
return 0;
 }
 
@@ -97,7 +104,7 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
const char *ramdisk;
off_t ramdisk_len;
unsigned int ramdisk_origin;
-   int opt;
+   int opt, ret = -1;
 
if (info->file_mode)
return image_s390_load_file(argc, argv, info);
@@ -112,7 +119,6 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
};
static const char short_options[] = KEXEC_OPT_STR "";
 
-   command_line[0] = 0;
ramdisk = NULL;
ramdisk_len = 0;
ramdisk_origin = 0;
@@ -120,7 +126,7 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
while ((opt = getopt_long(argc,argv,short_options,options,0)) != -1) {
switch(opt) {
case OPT_APPEND:
-   if (command_line_add(optarg))
+   if (command_line_add(info, optarg))
return -1;
break;
case OPT_RAMDISK:
@@ -132,7 +138,7 @@ image_s390_load(int argc, char **argv, const char 
*kernel_buf,
if (info->kexec_flags & KEXEC_ON_CRASH) {
if (parse_iomem_single("Crash kernel\n", _base,
   _end))
-   return -1;
+   goto out;
}
 
/* Add kernel segment 

[PATCH v3 3/5] add slurp_proc_file()

2021-12-16 Thread Sven Schnelle
slurp_file() cannot be used to read proc files, as they are returning
a size of zero in stat(). Add a function slurp_proc_file() which is
similar to slurp_file(), but doesn't require the size of the file to
be known.

Signed-off-by: Sven Schnelle 
---
 kexec/kexec.c | 51 +++
 1 file changed, 51 insertions(+)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index f63b36b771eb..f3adac517161 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -1106,6 +1106,57 @@ static void remove_parameter(char *line, const char 
*param_name)
}
 }
 
+static ssize_t _read(int fd, void *buf, size_t count)
+{
+   ssize_t ret, offset = 0;
+
+   do {
+   ret = read(fd, buf + offset, count - offset);
+   if (ret < 0) {
+   if ((errno == EINTR) || (errno == EAGAIN))
+   continue;
+   return ret;
+   }
+   offset += ret;
+   } while (ret && offset < count);
+
+   return offset;
+}
+
+static char *slurp_proc_file(const char *filename, size_t *len)
+{
+   ssize_t ret, startpos = 0;
+   unsigned int size = 64;
+   char *buf = NULL, *tmp;
+   int fd;
+
+   fd = open(filename, O_RDONLY);
+   if (fd == -1)
+   return NULL;
+
+   do {
+   size *= 2;
+   tmp = realloc(buf, size);
+   if (!tmp) {
+   free(buf);
+   return NULL;
+   }
+   buf = tmp;
+
+   ret = _read(fd, buf + startpos, size - startpos);
+   if (ret < 0) {
+   free(buf);
+   return NULL;
+   }
+
+   startpos += ret;
+
+   } while(ret);
+
+   *len = startpos;
+   return buf;
+}
+
 /*
  * Returns the contents of the current command line to be used with
  * --reuse-cmdline option.  The function gets called from architecture specific
-- 
2.32.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 0/5] s390: add support for extended cmdline length

2021-12-16 Thread Sven Schnelle
Hi,

this patchset adds support for extended command line lengths on s390.
The former limit of 896 has been too short in a some configurations. The
linux kernel now provides an additional field in the parameter area
which contains the maximum allowed command line length. In older kernels
this field is zero. In that case the old limit of 896 bytes is used.
This was introduced with 5ecb2da660ab ("s390: support command lines
longer than 896 bytes") in linux.

And while at it, also add the --reuse-cmdline option and use
KEXEC_ALL_OPTIONS.

Changes in v3:
- handle short reads and interruptions in slurp_proc_file()

Changes in v2:

- add slurp_proc_file() to read proc files without knowing the size upfront
- move command_line string from global to function scope

Sven Schnelle (5):
  s390: add variable command line size
  s390: use KEXEC_ALL_OPTIONS
  add slurp_proc_file()
  use slurp_proc_file() in get_command_line()
  s390: add support for --reuse-cmdline

 kexec/arch/s390/crashdump-s390.c   |  3 +-
 kexec/arch/s390/include/arch/options.h | 10 +--
 kexec/arch/s390/kexec-image.c  | 84 --
 kexec/arch/s390/kexec-s390.h   | 21 ---
 kexec/kexec.c  | 77 ++-
 5 files changed, 133 insertions(+), 62 deletions(-)

-- 
2.32.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 05/10] x86: kdump: move reserve_crashkernel[_low]() into crash_core.c

2021-12-16 Thread Borislav Petkov
On Fri, Dec 10, 2021 at 02:55:28PM +0800, Zhen Lei wrote:
> + * reserve_crashkernel() - reserves memory for crash kernel
> + *
> + * This function reserves memory area given in "crashkernel=" kernel command
> + * line parameter. The memory reserved is used by dump capture kernel when
> + * primary kernel is crashing.
> + */
> +void __init reserve_crashkernel(void)

As I've already alluded to in another mail, ontop of this there should
be a patch or multiple patches which clean this up more and perhaps even
split it into separate functions doing stuff in this order:

1. Parse all crashkernel= cmdline options

2. Do all crash_base, crash_size etc checks

3. Do the memory reservations

And all that supplied with comments explaining why stuff is being done.

This set of functions is a mess and there's no better time for cleaning
it up and documenting it properly than when you move it to generic code.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Borislav Petkov
On Thu, Dec 16, 2021 at 10:46:12AM +0800, Leizhen (ThunderTown) wrote:
> The original value (1ULL << 32) is inaccurate

I keep asking *why*?

> and it enlarged the CRASH_ADDR_LOW upper limit.

$ git grep -E "CRASH_ADDR_LOW\W"
$

I have no clue what you mean here.

> This is because when the memory is allocated from the low end, the
> address cannot exceed CRASH_ADDR_LOW_MAX, see "if (!high)" branch.

> If
> the memory is allocated from the high end, 'crash_base' is greater than or
> equal to (1ULL << 32), and naturally, it is greater than CRASH_ADDR_LOW_MAX.
> 
> I think I should update the description, thanks.

I think you should explain why is (1ULL << 32) wrong.

It came from:

  eb6db83d1059 ("x86/setup: Do not reserve crashkernel high memory if low 
reservation failed")

which simply frees the high memory portion when the low reservation
fails. And the test for that is, is crash base > 4G. So that makes
perfect sense to me.

So your change is a NOP on 64-bit and it is a NOP on 32-bit by virtue of
the _low() variant always returning 0 on 32-bit.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 3/3] arm64: read VA_BITS from kcore for 52-bits VA kernel

2021-12-16 Thread Simon Horman
On Thu, Dec 16, 2021 at 09:59:42AM +0800, Pingfan Liu wrote:
> On Wed, Dec 15, 2021 at 9:06 PM signed-off...@vergenet.net:Simon
> Horman  wrote:
> >
> > On Fri, Dec 10, 2021 at 11:07:35AM +0800, Pingfan Liu wrote:
> > > phys_to_virt() calculates virtual address. As a important factor,
> > > page_offset is excepted to be accurate.
> > >
> > > Since arm64 kernel exposes va_bits through vmcore, using it.
> > >
> > > Signed-off-by: Pingfan Liu 
> > > ---
> > >  kexec/arch/arm64/kexec-arm64.c | 31 +++
> > >  kexec/arch/arm64/kexec-arm64.h |  1 +
> > >  util_lib/elf_info.c|  5 +
> > >  3 files changed, 33 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/kexec/arch/arm64/kexec-arm64.c 
> > > b/kexec/arch/arm64/kexec-arm64.c
> > > index bd650e6..ccc92db 100644
> > > --- a/kexec/arch/arm64/kexec-arm64.c
> > > +++ b/kexec/arch/arm64/kexec-arm64.c
> > > @@ -54,7 +54,7 @@
> > >  static bool try_read_phys_offset_from_kcore = false;
> > >
> > >  /* Machine specific details. */
> > > -static int va_bits;
> > > +static int va_bits = -1;
> > >  static unsigned long page_offset;
> > >
> > >  /* Global varables the core kexec routines expect. */
> > > @@ -876,7 +876,15 @@ static inline void set_phys_offset(long v, char 
> > > *set_method)
> > >
> > >  static int get_va_bits(void)
> > >  {
> > > - unsigned long long stext_sym_addr = get_kernel_sym("_stext");
> > > + unsigned long long stext_sym_addr;
> > > +
> > > + /*
> > > +  * if already got from kcore
> > > +  */
> > > + if (va_bits != -1)
> > > + goto out;
> >
> > If va_bits is exposed by the kernel then it will be used.
> > Else we continue here. Are there actually cases (old kernels) where
> > we expect to continue. Or could we get rid of the fallback code here?
> >
> 
> va_bits is exposed by kernel commit 84c57dbd3c48 ("arm64: kernel:
> arch_crash_save_vmcoreinfo() should depend on CONFIG_CRASH_CORE")
> And the first kernel which contains VA_BITS is v4.19.
> I have no idea about the need for old kernels. Maybe just keep the
> compatible code and throw a warning to remind users?

I suspect that kexec for ARM64 was present in v4.19, so we
should leave this part as-is.

> > > +
> > > + stext_sym_addr = get_kernel_sym("_stext");
> > >
> > >   if (stext_sym_addr == 0) {
> > >   fprintf(stderr, "Can't get the symbol of _stext.\n");
> > > @@ -900,6 +908,7 @@ static int get_va_bits(void)
> > >   return -1;
> > >   }
> > >
> > > +out:
> > >   dbgprintf("va_bits : %d\n", va_bits);
> > >
> > >   return 0;
> > > @@ -917,14 +926,27 @@ int get_page_offset(unsigned long *page_offset)
> > >   if (ret < 0)
> > >   return ret;
> > >
> >
> > I'm confused about why there is both a (va_bits - 1)
> > and va_bits case here.
> >
> 
> It originates from the changes of memory layout on arm64.
> And mostly it is contributed by kernel commit 14c127c957c1 ("arm64:
> mm: Flip kernel VA space"),
> where sees the changes:
> -#define PAGE_OFFSET(UL(0x) - \
> (UL(1) << (VA_BITS - 1)) + 1)
> +#define PAGE_OFFSET(UL(0x) - \
> +   (UL(1) << VA_BITS) + 1)
> 
> 
> Thanks,
> 
> Pingfan
> 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v17 03/10] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel()

2021-12-16 Thread Borislav Petkov
On Thu, Dec 16, 2021 at 09:10:40AM +0800, Baoquan He wrote:
> reserve_crashkernel_low() always return 0 on x86_32, so the not equivalent
> transformation for x86_32 doesn't matter, I think.

That is, of course, very obvious... not!

Why is that function parsing crashkernel=XM, and crashkernel=X,high,
then it attempts some low memory reservation too? Why isn't
crashkernel=Y,low parsed there too?

I guess this alludes to why:

crashkernel=size[KMG],low
[KNL, X86-64] range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
requires at least 64M+32K low memory, also enough extra
low memory is needed to make sure DMA buffers for 32-bit
devices won't run out.

So, before this is going anywhere, I'd like to see this function
documented properly. I see Documentation/admin-guide/kdump/kdump.rst
explains the crashkernel= options too so you can refer to it in the
comments so that when someone looks at that code, someone can follow why
it is doing what it is doing.

Then, as a future work, all that parsing of crashkernel= cmdline options
should be concentrated at the beginning and once it is clear what the
user requests, the reservations should be done.

As it is, reserve_crashkernel() is pretty unwieldy and hard to read.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 3/5] mm_zone: add function to check if managed dma zone exists

2021-12-16 Thread David Hildenbrand
On 13.12.21 13:27, Baoquan He wrote:
> In some places of the current kernel, it assumes that dma zone must have
> managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
> E.g in kdump kernel of x86_64, only low 1M is presented and locked down
> at very early stage of boot, so that there's no managed pages at all in
> DMA zone. This exception will always cause page allocation failure if page
> is requested from DMA zone.
> 
> Here add function has_managed_dma() and the relevant helper functions to
> check if there's DMA zone with managed pages. It will be used in later
> patches.
> 
> Fixes: 6f599d84231f ("x86/kdump: Always reserve the low 1M when the 
> crashkernel option is specified")
> Cc: sta...@vger.kernel.org
> Signed-off-by: Baoquan He 
> ---
> v2->v3:
>  Rewrite has_managed_dma() in a simpler and more efficient way which is
>  sugggested by DavidH. 
> 
>  include/linux/mmzone.h |  9 +
>  mm/page_alloc.c| 15 +++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 58e744b78c2c..6e1b726e9adf 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1046,6 +1046,15 @@ static inline int is_highmem_idx(enum zone_type idx)
>  #endif
>  }
>  
> +#ifdef CONFIG_ZONE_DMA
> +bool has_managed_dma(void);
> +#else
> +static inline bool has_managed_dma(void)
> +{
> + return false;
> +}
> +#endif
> +
>  /**
>   * is_highmem - helper function to quickly check if a struct zone is a
>   *  highmem zone or not.  This is an attempt to keep references
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c5952749ad40..7c7a0b5de2ff 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -9460,3 +9460,18 @@ bool take_page_off_buddy(struct page *page)
>   return ret;
>  }
>  #endif
> +
> +#ifdef CONFIG_ZONE_DMA
> +bool has_managed_dma(void)
> +{
> + struct pglist_data *pgdat;
> +
> + for_each_online_pgdat(pgdat) {
> + struct zone *zone = >node_zones[ZONE_DMA];
> +
> + if (managed_zone(zone))
> + return true;
> + }
> + return false;
> +}
> +#endif /* CONFIG_ZONE_DMA */
> 

Reviewed-by: David Hildenbrand 

-- 
Thanks,

David / dhildenb


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/3] vmcore: Convert copy_oldmem_page() to take an iov_iter

2021-12-16 Thread Heiko Carstens
On Mon, Dec 13, 2021 at 08:57:25AM +0100, Christoph Hellwig wrote:
> On Mon, Dec 13, 2021 at 12:06:34AM +, Matthew Wilcox (Oracle) wrote:
> > Instead of passing in a 'buf' and 'userbuf' argument, pass in an iov_iter.
> > s390 needs more work to pass the iov_iter down further, or refactor,
> > but I'd be more comfortable if someone who can test on s390 did that work.
> > 
> > It's more convenient to convert the whole of read_from_oldmem() to
> > take an iov_iter at the same time, so rename it to read_from_oldmem_iter()
> > and add a temporary read_from_oldmem() wrapper that creates an iov_iter.
> 
> This looks pretty reasonable.  s390 could use some love from people that
> know the code, and yes, the kerneldoc comments should go away.

Sure, we will take care of this. Added to ToDo list.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 0/3] Convert vmcore to use an iov_iter

2021-12-16 Thread Baoquan He
On 12/13/21 at 02:39pm, Matthew Wilcox (Oracle) wrote:
> For some reason several people have been sending bad patches to fix
> compiler warnings in vmcore recently.  Here's how it should be done.
> Compile-tested only on x86.  As noted in the first patch, s390 should
> take this conversion a bit further, but I'm not inclined to do that
> work myself.

Add Philipp to the CC.

He used to work on s390 arch. Now he joins Redhat and will focus
on kexec/kdump. See if he has any thoughts on the s390 part of work, or
may reach out to s390 developer.

> 
> v3:
>  - Send the correct patches this time
> v2:
>  - Removed unnecessary kernel-doc
>  - Included uio.h to fix compilation problems
>  - Made read_from_oldmem_iter static to avoid compile warnings during the
>conversion
>  - Use iov_iter_truncate() (Christoph)
> 
> Matthew Wilcox (Oracle) (3):
>   vmcore: Convert copy_oldmem_page() to take an iov_iter
>   vmcore: Convert __read_vmcore to use an iov_iter
>   vmcore: Convert read_from_oldmem() to take an iov_iter
> 
>  arch/arm/kernel/crash_dump.c |  27 +--
>  arch/arm64/kernel/crash_dump.c   |  29 +--
>  arch/ia64/kernel/crash_dump.c|  32 +---
>  arch/mips/kernel/crash_dump.c|  27 +--
>  arch/powerpc/kernel/crash_dump.c |  35 ++---
>  arch/riscv/kernel/crash_dump.c   |  26 +--
>  arch/s390/kernel/crash_dump.c|  13 ++--
>  arch/sh/kernel/crash_dump.c  |  29 ++-
>  arch/x86/kernel/crash_dump_32.c  |  29 +--
>  arch/x86/kernel/crash_dump_64.c  |  48 
>  fs/proc/vmcore.c | 129 +--
>  include/linux/crash_dump.h   |  19 ++---
>  12 files changed, 122 insertions(+), 321 deletions(-)
> 
> -- 
> 2.33.0
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec