Hi Akashi,

On 04/12/17 02:57, AKASHI Takahiro wrote:
> prepare_elf_headers() can also be useful for other architectures,
> including arm64.

What does arm64 need this for? This is generating ELF headers for something, but
I can't work out what. (I'll keep reading,...)

The x86 decompressor? arm64 doesn't have one.
If its for the vmcore file, how does this work today?
If its for kexec_file_load()ing a vmlinux, I don't think we need to support 
this.

crashdump... how does the first kernel generate all the elf-notes etc today
without this?


> So let it factored out.

factored out... did anything change or is this patch just moving code around?


>  arch/x86/kernel/crash.c | 324 
> ------------------------------------------------
>  include/linux/kexec.h   |  17 +++
>  kernel/kexec_file.c     | 308 +++++++++++++++++++++++++++++++++++++++++++++
>  kernel/kexec_internal.h |  20 +++
>  4 files changed, 345 insertions(+), 324 deletions(-)

This is a lot of code being moved. Could you split this into a patch that just
moves the code, and another that makes any changes so they don't have to be
reviewed at the same time.

Some comments on the differences I spotted:

> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 10e74d4778a1..bb8f3dcddaaa 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c

> -/* Gather all the required information to prepare elf headers for ram 
> regions */
> -static void fill_up_crash_elf_data(struct crash_elf_data *ced,
> -                                struct kimage *image)
> -{
> -     unsigned int nr_ranges = 0;
> -
> -     ced->image = image;
> -
> -     walk_system_ram_res(0, -1, &nr_ranges,
> -                             get_nr_ram_ranges_callback);
> -
> -     ced->max_nr_ranges = nr_ranges;
> -
> -     /* Exclusion of crash region could split memory ranges */
> -     ced->max_nr_ranges++;


> -     /* If crashk_low_res is not 0, another range split possible */
> -     if (crashk_low_res.end)
> -             ced->max_nr_ranges++;

This crashk stuff gets wrapped in #ifdef CONFIG_X86. Is it because arm64 doesn't
support kdump via kexec_file_load()? or because crashk_low_res isn't defined on
other architectures...

If this is moving to core code, could we add ARCH_HAS kconfig symbols so its
clear what it does and straightforward for another architecture to re-use.


> -}

[...]

> -/*
> - * Look for any unwanted ranges between mstart, mend and remove them. This
> - * might lead to split and split ranges are put in ced->mem.ranges[] array
> - */
> -static int elf_header_exclude_ranges(struct crash_elf_data *ced,
> -             unsigned long long mstart, unsigned long long mend)
> -{
> -     struct crash_mem *cmem = &ced->mem;
> -     int ret = 0;
> -
> -     memset(cmem->ranges, 0, sizeof(cmem->ranges));
> -
> -     cmem->ranges[0].start = mstart;
> -     cmem->ranges[0].end = mend;
> -     cmem->nr_ranges = 1;
> -
> -     /* Exclude crashkernel region */
> -     ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
> -     if (ret)
> -             return ret;


> -     if (crashk_low_res.end) {
> -             ret = exclude_mem_range(cmem, crashk_low_res.start, 
> crashk_low_res.end);
> -             if (ret)
> -                     return ret;
> -     }

And again here,


> -     return ret;
> -}

[..]

> -static int prepare_elf64_ram_headers_callback(struct resource *res, void 
> *arg)
> -{
> -     struct crash_elf_data *ced = arg;
> -     Elf64_Ehdr *ehdr;
> -     Elf64_Phdr *phdr;
> -     unsigned long mstart, mend;

> -     struct kimage *image = ced->image;

> -     struct crash_mem *cmem;
> -     int ret, i;
> -
> -     ehdr = ced->ehdr;
> -
> -     /* Exclude unwanted mem ranges */
> -     ret = elf_header_exclude_ranges(ced, res->start, res->end);
> -     if (ret)
> -             return ret;
> -
> -     /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
> -     cmem = &ced->mem;
> -
> -     for (i = 0; i < cmem->nr_ranges; i++) {
> -             mstart = cmem->ranges[i].start;
> -             mend = cmem->ranges[i].end;
> -
> -             phdr = ced->bufp;
> -             ced->bufp += sizeof(Elf64_Phdr);
> -
> -             phdr->p_type = PT_LOAD;
> -             phdr->p_flags = PF_R|PF_W|PF_X;
> -             phdr->p_offset  = mstart;


> -             /*
> -              * If a range matches backup region, adjust offset to backup
> -              * segment.
> -              */
> -             if (mstart == image->arch.backup_src_start &&
> -                 (mend - mstart + 1) == image->arch.backup_src_sz)
> -                     phdr->p_offset = image->arch.backup_load_addr;

This becomes x86 only too, but this time its not touching crashk_low_res.
Could this be some kconfig name that describes what its for?
(We may want it in the future, and it silently gets #ifdef'd out!)


> -             phdr->p_paddr = mstart;
> -             phdr->p_vaddr = (unsigned long long) __va(mstart);
> -             phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
> -             phdr->p_align = 0;
> -             ehdr->e_phnum++;
> -             pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, 
> paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
> -                     phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
> -                     ehdr->e_phnum, phdr->p_offset);
> -     }
> -
> -     return ret;
> -}

[..]

> -static int prepare_elf64_headers(struct crash_elf_data *ced,
> -             void **addr, unsigned long *sz)
> -{
> -     Elf64_Ehdr *ehdr;
> -     Elf64_Phdr *phdr;
> -     unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
> -     unsigned char *buf, *bufp;
> -     unsigned int cpu;
> -     unsigned long long notes_addr;
> -     int ret;
> -
> -     /* extra phdr for vmcoreinfo elf note */
> -     nr_phdr = nr_cpus + 1;
> -     nr_phdr += ced->max_nr_ranges;
> -
> -     /*
> -      * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
> -      * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
> -      * I think this is required by tools like gdb. So same physical
> -      * memory will be mapped in two elf headers. One will contain kernel
> -      * text virtual addresses and other will have __va(physical) addresses.
> -      */
> -
> -     nr_phdr++;
> -     elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
> -     elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
> -
> -     buf = vzalloc(elf_sz);
> -     if (!buf)
> -             return -ENOMEM;
> -
> -     bufp = buf;
> -     ehdr = (Elf64_Ehdr *)bufp;
> -     bufp += sizeof(Elf64_Ehdr);
> -     memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
> -     ehdr->e_ident[EI_CLASS] = ELFCLASS64;
> -     ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
> -     ehdr->e_ident[EI_VERSION] = EV_CURRENT;
> -     ehdr->e_ident[EI_OSABI] = ELF_OSABI;
> -     memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
> -     ehdr->e_type = ET_CORE;
> -     ehdr->e_machine = ELF_ARCH;
> -     ehdr->e_version = EV_CURRENT;
> -     ehdr->e_phoff = sizeof(Elf64_Ehdr);
> -     ehdr->e_ehsize = sizeof(Elf64_Ehdr);
> -     ehdr->e_phentsize = sizeof(Elf64_Phdr);
> -
> -     /* Prepare one phdr of type PT_NOTE for each present cpu */
> -     for_each_present_cpu(cpu) {
> -             phdr = (Elf64_Phdr *)bufp;
> -             bufp += sizeof(Elf64_Phdr);
> -             phdr->p_type = PT_NOTE;
> -             notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
> -             phdr->p_offset = phdr->p_paddr = notes_addr;
> -             phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
> -             (ehdr->e_phnum)++;
> -     }
> -
> -     /* Prepare one PT_NOTE header for vmcoreinfo */
> -     phdr = (Elf64_Phdr *)bufp;
> -     bufp += sizeof(Elf64_Phdr);
> -     phdr->p_type = PT_NOTE;
> -     phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> -     phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
> -     (ehdr->e_phnum)++;
> -
> -#ifdef CONFIG_X86_64
> -     /* Prepare PT_LOAD type program header for kernel text region */
> -     phdr = (Elf64_Phdr *)bufp;
> -     bufp += sizeof(Elf64_Phdr);
> -     phdr->p_type = PT_LOAD;
> -     phdr->p_flags = PF_R|PF_W|PF_X;
> -     phdr->p_vaddr = (Elf64_Addr)_text;
> -     phdr->p_filesz = phdr->p_memsz = _end - _text;
> -     phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
> -     (ehdr->e_phnum)++;
> -#endif

Eh?

You keep this #ifdef, is it actually x86_64 specific (i.e. not i386 or arm64),
or something affecting all 64bit architectures...
If its a historic ABI thing, could we add a comment explaining that...


> -     /* Prepare PT_LOAD headers for system ram chunks. */
> -     ced->ehdr = ehdr;
> -     ced->bufp = bufp;
> -     ret = walk_system_ram_res(0, -1, ced,
> -                     prepare_elf64_ram_headers_callback);
> -     if (ret < 0)
> -             return ret;
> -
> -     *addr = buf;
> -     *sz = elf_sz;
> -     return 0;
> -}


Thanks,

James

Reply via email to