Re: [PATCH 2/2] kexec/ppc64: add support to parse ibm, dynamic-memory-v2 property

2018-02-21 Thread Mahesh Jagannath Salgaonkar
On 02/20/2018 07:48 PM, Hari Bathini wrote:
> Add support to parse the new 'ibm,dynamic-memory-v2' property in the
> 'ibm,dynamic-reconfiguration-memory' node. This replaces the old
> 'ibm,dynamic-memory' property and is enabled in the kernel with a
> patch series that starts with commit 0c38ed6f6f0b ("powerpc/pseries:
> Enable support of ibm,dynamic-memory-v2"). All LMBs that share the same
> flags and are adjacent are grouped together in the newer version of the
> property making it compact to represent larger memory configurations.
> 
> Signed-off-by: Hari Bathini 

Thanks for fixing this. Patches looks good to me.

Reviewed-by: Mahesh Salgaonkar 

> ---
>  kexec/arch/ppc64/crashdump-ppc64.c |   23 +++--
>  kexec/arch/ppc64/crashdump-ppc64.h |   16 +-
>  kexec/arch/ppc64/kexec-ppc64.c |   35 ++
>  kexec/fs2dt.c  |   92 
> ++--
>  4 files changed, 112 insertions(+), 54 deletions(-)
> 
> diff --git a/kexec/arch/ppc64/crashdump-ppc64.c 
> b/kexec/arch/ppc64/crashdump-ppc64.c
> index bc9f948..50e3853 100644
> --- a/kexec/arch/ppc64/crashdump-ppc64.c
> +++ b/kexec/arch/ppc64/crashdump-ppc64.c
> @@ -39,6 +39,10 @@
>  #define DEVTREE_CRASHKERNEL_BASE 
> "/proc/device-tree/chosen/linux,crashkernel-base"
>  #define DEVTREE_CRASHKERNEL_SIZE 
> "/proc/device-tree/chosen/linux,crashkernel-size"
> 
> +unsigned int num_of_lmb_sets;
> +unsigned int is_dyn_mem_v2;
> +uint64_t lmb_size;
> +
>  static struct crash_elf_info elf_info64 =
>  {
>   class: ELFCLASS64,
> @@ -127,6 +131,7 @@ static int get_dyn_reconf_crash_memory_ranges(void)
>  {
>   uint64_t start, end;
>   uint64_t startrange, endrange;
> + uint64_t size;
>   char fname[128], buf[32];
>   FILE *file;
>   unsigned int i;
> @@ -135,6 +140,8 @@ static int get_dyn_reconf_crash_memory_ranges(void)
> 
>   strcpy(fname, "/proc/device-tree/");
>   strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory");
> + if (is_dyn_mem_v2)
> + strcat(fname, "-v2");
>   if ((file = fopen(fname, "r")) == NULL) {
>   perror(fname);
>   return -1;
> @@ -142,8 +149,9 @@ static int get_dyn_reconf_crash_memory_ranges(void)
> 
>   fseek(file, 4, SEEK_SET);
>   startrange = endrange = 0;
> - for (i = 0; i < num_of_lmbs; i++) {
> - if ((n = fread(buf, 1, 24, file)) < 0) {
> + size = lmb_size;
> + for (i = 0; i < num_of_lmb_sets; i++) {
> + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) {
>   perror(fname);
>   fclose(file);
>   return -1;
> @@ -156,8 +164,15 @@ static int get_dyn_reconf_crash_memory_ranges(void)
>   return -1;
>   }
> 
> - start = be64_to_cpu(((uint64_t *)buf)[DRCONF_ADDR]);
> - end = start + lmb_size;
> + /*
> +  * If the property is ibm,dynamic-memory-v2, the first 4 bytes
> +  * tell the number of sequential LMBs in this entry.
> +  */
> + if (is_dyn_mem_v2)
> + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size;
> +
> + start = be64_to_cpu(*((uint64_t *)[DRCONF_ADDR]));
> + end = start + size;
>   if (start == 0 && end >= (BACKUP_SRC_END + 1))
>   start = BACKUP_SRC_END + 1;
> 
> diff --git a/kexec/arch/ppc64/crashdump-ppc64.h 
> b/kexec/arch/ppc64/crashdump-ppc64.h
> index 42ccc31..87beb39 100644
> --- a/kexec/arch/ppc64/crashdump-ppc64.h
> +++ b/kexec/arch/ppc64/crashdump-ppc64.h
> @@ -34,10 +34,18 @@ extern unsigned int rtas_size;
>  extern uint64_t opal_base;
>  extern uint64_t opal_size;
> 
> -uint64_t lmb_size;
> -unsigned int num_of_lmbs;
> -
> -#define DRCONF_ADDR  0
> +/*
> + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB
> + * sets where each set represents a group of sequential LMB entries. In
> + * case of ibm,dynamic-memory property, the number of LMB sets is nothing
> + * but the total number of LMB entries.
> + */
> +extern unsigned int num_of_lmb_sets;
> +extern unsigned int is_dyn_mem_v2;
> +extern uint64_t lmb_size;
> +
> +#define LMB_ENTRY_SIZE   24
> +#define DRCONF_ADDR  (is_dyn_mem_v2 ? 4 : 0)
>  #define DRCONF_FLAGS 20
> 
>  #endif /* CRASHDUMP_PPC64_H */
> diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
> index a7d708b..4e70b13 100644
> --- a/kexec/arch/ppc64/kexec-ppc64.c
> +++ b/kexec/arch/ppc64/kexec-ppc64.c
> @@ -149,6 +149,7 @@ static void add_base_memory_range(uint64_t start, 
> uint64_t end)
>  static int get_dyn_reconf_base_ranges(void)
>  {
>   uint64_t start, end;
> + uint64_t size;
>   char fname[128], buf[32];
>   FILE *file;
>   unsigned int i;
> @@ -166,29 +167,35 @@ static int get_dyn_reconf_base_ranges(void)
>   return 

Re: [PATCH 1/2] kexec: add a helper function to add ranges

2018-02-21 Thread Mahesh Jagannath Salgaonkar
On 02/20/2018 07:48 PM, Hari Bathini wrote:
> Add a helper function for adding ranges to avoid duplicating code.
> 
> Signed-off-by: Hari Bathini 

Reviewed-by: Mahesh Salgaonkar 

> ---
>  kexec/fs2dt.c |  115 
> ++---
>  1 file changed, 53 insertions(+), 62 deletions(-)
> 
> diff --git a/kexec/fs2dt.c b/kexec/fs2dt.c
> index 79aa0f3..550eca9 100644
> --- a/kexec/fs2dt.c
> +++ b/kexec/fs2dt.c
> @@ -169,6 +169,50 @@ static unsigned propnum(const char *name)
>   return offset;
>  }
> 
> +/*
> + * Add ranges by comparing 'base' and 'end' addresses with usable
> + * memory ranges. Returns the number of ranges added. Each range added
> + * increments 'idx' by 2.
> + */
> +static uint64_t add_ranges(uint64_t **ranges, int *ranges_size, int idx,
> +uint64_t base, uint64_t end)
> +{
> + uint64_t loc_base, loc_end, rngs_cnt = 0;
> + size_t range;
> + int add = 0;
> +
> + for (range = 0; range < usablemem_rgns.size; range++) {
> + loc_base = usablemem_rgns.ranges[range].start;
> + loc_end = usablemem_rgns.ranges[range].end;
> + if (loc_base >= base && loc_end <= end) {
> + add = 1;
> + } else if (base < loc_end && end > loc_base) {
> + if (loc_base < base)
> + loc_base = base;
> + if (loc_end > end)
> + loc_end = end;
> + add = 1;
> + }
> +
> + if (add) {
> + if (idx >= ((*ranges_size) - 2)) {
> + (*ranges_size) += MEM_RANGE_CHUNK_SZ;
> + *ranges = realloc(*ranges, (*ranges_size)*8);
> + if (!(*ranges))
> + die("unrecoverable error: can't realloc"
> + "%d bytes for ranges.\n",
> + (*ranges_size)*8);
> + }
> + (*ranges)[idx++] = cpu_to_be64(loc_base);
> + (*ranges)[idx++] = cpu_to_be64(loc_end - loc_base);
> +
> + rngs_cnt++;
> + }
> + }
> +
> + return rngs_cnt;
> +}
> +
>  #ifdef HAVE_DYNAMIC_MEMORY
>  static void add_dyn_reconf_usable_mem_property__(int fd)
>  {
> @@ -176,8 +220,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
>   uint64_t buf[32];
>   uint64_t *ranges;
>   int ranges_size = MEM_RANGE_CHUNK_SZ;
> - uint64_t base, end, loc_base, loc_end;
> - size_t i, rngs_cnt, range;
> + uint64_t base, end, rngs_cnt;
> + size_t i;
>   int rlen = 0;
>   int tmp_indx;
> 
> @@ -210,36 +254,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
> 
>   tmp_indx = rlen++;
> 
> - rngs_cnt = 0;
> - for (range = 0; range < usablemem_rgns.size; range++) {
> - int add = 0;
> - loc_base = usablemem_rgns.ranges[range].start;
> - loc_end = usablemem_rgns.ranges[range].end;
> - if (loc_base >= base && loc_end <= end) {
> - add = 1;
> - } else if (base < loc_end && end > loc_base) {
> - if (loc_base < base)
> - loc_base = base;
> - if (loc_end > end)
> - loc_end = end;
> - add = 1;
> - }
> -
> - if (add) {
> - if (rlen >= (ranges_size-2)) {
> - ranges_size += MEM_RANGE_CHUNK_SZ;
> - ranges = realloc(ranges, ranges_size*8);
> - if (!ranges)
> - die("unrecoverable error: can't"
> - " realloc %d bytes for"
> - " ranges.\n",
> - ranges_size*8);
> - }
> - ranges[rlen++] = cpu_to_be64(loc_base);
> - ranges[rlen++] = cpu_to_be64(loc_end - 
> loc_base);
> - rngs_cnt++;
> - }
> - }
> + rngs_cnt = add_ranges(, _size, rlen,
> +   base, end);
>   if (rngs_cnt == 0) {
>   /* We still need to add a counter for every LMB because
>* the kernel parsing code is dumb.  We just have
> @@ -261,7 +277,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
>   }
>   } else {
>  

Re: [PATCH v4 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr

2017-04-27 Thread Mahesh Jagannath Salgaonkar
On 04/26/2017 12:41 PM, Dave Young wrote:
> Ccing ppc list
> On 04/20/17 at 07:39pm, Xunlei Pang wrote:
>> vmcoreinfo_max_size stands for the vmcoreinfo_data, the
>> correct one we should use is vmcoreinfo_note whose total
>> size is VMCOREINFO_NOTE_SIZE.
>>
>> Like explained in commit 77019967f06b ("kdump: fix exported
>> size of vmcoreinfo note"), it should not affect the actual
>> function, but we better fix it, also this change should be
>> safe and backward compatible.
>>
>> After this, we can get rid of variable vmcoreinfo_max_size,
>> let's use the corresponding macros directly, fewer variables
>> means more safety for vmcoreinfo operation.
>>
>> Cc: Mahesh Salgaonkar 
>> Cc: Hari Bathini 
>> Signed-off-by: Xunlei Pang 

Reviewed-by: Mahesh Salgaonkar 

Thanks,
-Mahesh.

>> ---
>> v3->v4:
>> -Rebased on the latest linux-next
>>
>>  arch/powerpc/kernel/fadump.c | 3 +--
>>  include/linux/crash_core.h   | 1 -
>>  kernel/crash_core.c  | 3 +--
>>  3 files changed, 2 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index 466569e..7bd6cd0 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -893,8 +893,7 @@ static int fadump_create_elfcore_headers(char *bufp)
>>  
>>  phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
>>  phdr->p_offset  = phdr->p_paddr;
>> -phdr->p_memsz   = vmcoreinfo_max_size;
>> -phdr->p_filesz  = vmcoreinfo_max_size;
>> +phdr->p_memsz   = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
>>  
>>  /* Increment number of program headers. */
>>  (elf->e_phnum)++;
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index ba283a2..7d6bc7b 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -55,7 +55,6 @@
>>  
>>  extern u32 *vmcoreinfo_note;
>>  extern size_t vmcoreinfo_size;
>> -extern size_t vmcoreinfo_max_size;
>>  
>>  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
>>void *data, size_t data_len);
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index 0321f04..43cdb00 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -16,7 +16,6 @@
>>  /* vmcoreinfo stuff */
>>  static unsigned char *vmcoreinfo_data;
>>  size_t vmcoreinfo_size;
>> -size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
>>  u32 *vmcoreinfo_note;
>>  
>>  /*
>> @@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>>  r = vscnprintf(buf, sizeof(buf), fmt, args);
>>  va_end(args);
>>  
>> -r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
>> +r = min(r, VMCOREINFO_BYTES - vmcoreinfo_size);
>>  
>>  memcpy(_data[vmcoreinfo_size], buf, r);
>>  
>> -- 
>> 1.8.3.1
>>
>>
>> ___
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
> 
> Reviewed-by: Dave Young 
> 
> Thanks
> Dave
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v4 4/5] powerpc/fadump: reuse crashkernel parameter for fadump memory reservation

2017-01-13 Thread Mahesh Jagannath Salgaonkar
On 01/05/2017 11:02 PM, Hari Bathini wrote:
> fadump supports specifying memory to reserve for fadump's crash kernel
> with fadump_reserve_mem kernel parameter. This parameter currently
> supports passing a fixed memory size, like fadump_reserve_mem=
> only. This patch aims to add support for other syntaxes like range-based
> memory size :[,:,:,...]
> which allows using the same parameter to boot the kernel with different
> system RAM sizes.
> 
> As crashkernel parameter already supports the above mentioned syntaxes,
> this patch deprecates fadump_reserve_mem parameter and reuses crashkernel
> parameter instead, to specify memory for fadump's crash kernel memory
> reservation as well. If any offset is provided in crashkernel parameter,
> it will be ignored in case of fadump, as fadump reserves memory at end
> of RAM.
> 
> Advantages using crashkernel parameter instead of fadump_reserve_mem
> parameter are one less kernel parameter overall, code reuse and support
> for multiple syntaxes to specify memory.
> 
> Suggested-by: Dave Young 
> Signed-off-by: Hari Bathini 

Reviewed-by: Mahesh Salgaonkar 

> ---
>  arch/powerpc/kernel/fadump.c |   23 ++-
>  1 file changed, 10 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index db0b339..de7d39a 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -210,14 +210,20 @@ static unsigned long init_fadump_mem_struct(struct 
> fadump_mem_struct *fdm,
>   */
>  static inline unsigned long fadump_calculate_reserve_size(void)
>  {
> - unsigned long size;
> + int ret;
> + unsigned long long base, size;
> 
>   /*
> -  * Check if the size is specified through fadump_reserve_mem= cmdline
> -  * option. If yes, then use that.
> +  * Check if the size is specified through crashkernel= cmdline
> +  * option. If yes, then use that but ignore base as fadump
> +  * reserves memory at end of RAM.
>*/
> - if (fw_dump.reserve_bootvar)
> + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> + , );
> + if (ret == 0 && size > 0) {
> + fw_dump.reserve_bootvar = (unsigned long)size;
>   return fw_dump.reserve_bootvar;
> + }
> 
>   /* divide by 20 to get 5% of value */
>   size = memblock_end_of_DRAM() / 20;
> @@ -353,15 +359,6 @@ static int __init early_fadump_param(char *p)
>  }
>  early_param("fadump", early_fadump_param);
> 
> -/* Look for fadump_reserve_mem= cmdline option */
> -static int __init early_fadump_reserve_mem(char *p)
> -{
> - if (p)
> - fw_dump.reserve_bootvar = memparse(p, );
> - return 0;
> -}
> -early_param("fadump_reserve_mem", early_fadump_reserve_mem);
> -
>  static void register_fw_dump(struct fadump_mem_struct *fdm)
>  {
>   int rc;
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v4 3/5] powerpc/fadump: remove dependency with CONFIG_KEXEC

2017-01-13 Thread Mahesh Jagannath Salgaonkar
On 01/05/2017 11:02 PM, Hari Bathini wrote:
> Now that crashkernel parameter parsing and vmcoreinfo related code is
> moved under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE, remove
> dependency with CONFIG_KEXEC for CONFIG_FA_DUMP. While here, get rid
> of definitions of fadump_append_elf_note() & fadump_final_note()
> functions to reuse similar functions compiled under CONFIG_CRASH_CORE.
> 
> Signed-off-by: Hari Bathini 

Reviewed-by: Mahesh Salgaonkar 

> ---
>  arch/powerpc/Kconfig   |   10 ++
>  arch/powerpc/include/asm/fadump.h  |2 ++
>  arch/powerpc/kernel/crash.c|2 --
>  arch/powerpc/kernel/fadump.c   |   34 +++---
>  arch/powerpc/kernel/setup-common.c |5 +
>  5 files changed, 16 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index a8ee573..b9726be 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -513,21 +513,23 @@ config RELOCATABLE_TEST
> relocation code.
> 
>  config CRASH_DUMP
> - bool "Build a kdump crash kernel"
> + bool "Build a dump capture kernel"
>   depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
>   select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
>   help
> -   Build a kernel suitable for use as a kdump capture kernel.
> +   Build a kernel suitable for use as a dump capture kernel.
> The same kernel binary can be used as production kernel and dump
> capture kernel.
> 
>  config FA_DUMP
>   bool "Firmware-assisted dump"
> - depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE
> + depends on PPC64 && PPC_RTAS
> + select CRASH_CORE
> + select CRASH_DUMP
>   help
> A robust mechanism to get reliable kernel crash dump with
> assistance from firmware. This approach does not use kexec,
> -   instead firmware assists in booting the kdump kernel
> +   instead firmware assists in booting the capture kernel
> while preserving memory contents. Firmware-assisted dump
> is meant to be a kdump replacement offering robustness and
> speed not possible without system firmware assistance.
> diff --git a/arch/powerpc/include/asm/fadump.h 
> b/arch/powerpc/include/asm/fadump.h
> index 0031806..60b9108 100644
> --- a/arch/powerpc/include/asm/fadump.h
> +++ b/arch/powerpc/include/asm/fadump.h
> @@ -73,6 +73,8 @@
>   reg_entry++;\
>  })
> 
> +extern int crashing_cpu;
> +
>  /* Kernel Dump section info */
>  struct fadump_section {
>   __be32  request_flag;
> diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
> index 47b63de..cbabb5a 100644
> --- a/arch/powerpc/kernel/crash.c
> +++ b/arch/powerpc/kernel/crash.c
> @@ -43,8 +43,6 @@
>  #define IPI_TIMEOUT  1
>  #define REAL_MODE_TIMEOUT1
> 
> -/* This keeps a track of which one is the crashing cpu. */
> -int crashing_cpu = -1;
>  static int time_to_dump;
> 
>  #define CRASH_HANDLER_MAX 3
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 8f0c7c5..db0b339 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -486,34 +486,6 @@ fadump_read_registers(struct fadump_reg_entry 
> *reg_entry, struct pt_regs *regs)
>   return reg_entry;
>  }
> 
> -static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
> - void *data, size_t data_len)
> -{
> - struct elf_note note;
> -
> - note.n_namesz = strlen(name) + 1;
> - note.n_descsz = data_len;
> - note.n_type   = type;
> - memcpy(buf, , sizeof(note));
> - buf += (sizeof(note) + 3)/4;
> - memcpy(buf, name, note.n_namesz);
> - buf += (note.n_namesz + 3)/4;
> - memcpy(buf, data, note.n_descsz);
> - buf += (note.n_descsz + 3)/4;
> -
> - return buf;
> -}
> -
> -static void fadump_final_note(u32 *buf)
> -{
> - struct elf_note note;
> -
> - note.n_namesz = 0;
> - note.n_descsz = 0;
> - note.n_type   = 0;
> - memcpy(buf, , sizeof(note));
> -}
> -
>  static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
>  {
>   struct elf_prstatus prstatus;
> @@ -524,8 +496,8 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct 
> pt_regs *regs)
>* prstatus.pr_pid = 
>*/
>   elf_core_copy_kernel_regs(_reg, regs);
> - buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
> - , sizeof(prstatus));
> + buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
> +   , sizeof(prstatus));
>   return buf;
>  }
> 
> @@ -666,7 +638,7 @@ static int __init fadump_build_cpu_notes(const struct 
> fadump_mem_struct *fdm)
>   note_buf = fadump_regs_to_elf_notes(note_buf, );
>   

Re: IO memory read from /proc/vmcore leads to hang.

2016-07-15 Thread Mahesh Jagannath Salgaonkar
On 07/12/2016 03:16 AM, Daniel Walker wrote:
> 
> Hi,
> 
> I found found that on my Powerpc machine there is some IO memory which
> will cause the box to hang if I read it. It's a custom device that was
> added to the board for a special purpose.
> 
> I was looking for a way to exclude this memory from the dump, and while
> doing that I found that kexec makes a list of memory segments that go
> into the core file. I was wondering why most of the kexec architecture
> don't appear to exclude device memory like what's listed in /proc/iomem.
> 
> Is there a good reason why that's not the case?

Kexec/Kdump is designed to capture entire old kernel's memory that will
be available in /proc/vmcore. There is a different userspace tool called
'makedumpfile' (https://sourceforge.net/projects/makedumpfile/), which
can be used to exclude certain memory pages from the dump while reading
from /proc/vmcore. See if that helps you.

Thanks,
-Mahesh.

> 
> Daniel
> 
> ___
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec/ppc64: Fix up ELF header and dt for PowerNV platform.

2014-05-11 Thread Mahesh Jagannath Salgaonkar
On 05/11/2014 05:21 AM, Simon Horman wrote:
 On Sun, Feb 09, 2014 at 10:24:59PM +0530, Mahesh J Salgaonkar wrote:
 From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

 On PowerNV platform, OPAL region is overlapped with crashkernel, need to
 create ELF Program header for the overlapped memory. The changes are
 similar to the way RTAS region was handled.

 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 
 I apologise that this seems to have slipped through the cracks.
 Manesh, is this patch still needed?

Yes. We still need this patch.

 If so, is it possible for
 someone on the power side of things to review it?
 
 ---
  kexec/arch/ppc/crashdump-powerpc.h |2 ++
  kexec/arch/ppc64/crashdump-ppc64.c |   28 +++
  kexec/arch/ppc64/crashdump-ppc64.h |2 ++
  kexec/arch/ppc64/kexec-ppc64.c |   44 
 +++-
  kexec/fs2dt.c  |4 +++
  5 files changed, 79 insertions(+), 1 deletion(-)

 diff --git a/kexec/arch/ppc/crashdump-powerpc.h 
 b/kexec/arch/ppc/crashdump-powerpc.h
 index efdc7e3..9b9b01e 100644
 --- a/kexec/arch/ppc/crashdump-powerpc.h
 +++ b/kexec/arch/ppc/crashdump-powerpc.h
 @@ -40,6 +40,8 @@ extern unsigned long long crash_base;
  extern unsigned long long crash_size;
  extern unsigned int rtas_base;
  extern unsigned int rtas_size;
 +extern uint64_t opal_base;
 +extern uint64_t opal_size;
  extern uint64_t memory_limit;
  
  #endif /* CRASHDUMP_POWERPC_H */
 diff --git a/kexec/arch/ppc64/crashdump-ppc64.c 
 b/kexec/arch/ppc64/crashdump-ppc64.c
 index c0d575d..00a0e63 100644
 --- a/kexec/arch/ppc64/crashdump-ppc64.c
 +++ b/kexec/arch/ppc64/crashdump-ppc64.c
 @@ -294,6 +294,34 @@ static int get_crash_memory_ranges(struct memory_range 
 **range, int *ranges)
  crash_memory_range[memory_ranges++].end = cend;
  }
  
 +/*
 + * If OPAL region is overlapped with crashkernel, need to create ELF
 + * Program header for the overlapped memory.
 + */
 +if (crash_base  opal_base + opal_size 
 +opal_base  crash_base + crash_size) {
 +page_size = getpagesize();
 +cstart = opal_base;
 +cend = opal_base + opal_size;
 +if (cstart  crash_base)
 +cstart = crash_base;
 +if (cend  crash_base + crash_size)
 +cend = crash_base + crash_size;
 +/*
 + * The opal section created here is formed by reading opal-base
 + * and opal-size from /proc/device-tree/ibm,opal.  Unfortunately
 + * opal-size is not required to be a multiple of PAGE_SIZE
 + * The remainder of the page it ends on is just garbage, and is
 + * safe to read, its just not accounted in opal-size.  Since
 + * we're creating an elf section here though, lets round it up
 + * to the next page size boundary though, so makedumpfile can
 + * read it safely without going south on us.
 + */
 +cend = _ALIGN(cend, page_size);
 +
 +crash_memory_range[memory_ranges].start = cstart;
 +crash_memory_range[memory_ranges++].end = cend;
 +}
  *range = crash_memory_range;
  *ranges = memory_ranges;
  
 diff --git a/kexec/arch/ppc64/crashdump-ppc64.h 
 b/kexec/arch/ppc64/crashdump-ppc64.h
 index 001be3a..d654c6b 100644
 --- a/kexec/arch/ppc64/crashdump-ppc64.h
 +++ b/kexec/arch/ppc64/crashdump-ppc64.h
 @@ -31,6 +31,8 @@ extern uint64_t crash_size;
  extern uint64_t memory_limit;
  extern unsigned int rtas_base;
  extern unsigned int rtas_size;
 +extern uint64_t opal_base;
 +extern uint64_t opal_size;
  
  uint64_t lmb_size;
  unsigned int num_of_lmbs;
 diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
 index 49b291d..5956836 100644
 --- a/kexec/arch/ppc64/kexec-ppc64.c
 +++ b/kexec/arch/ppc64/kexec-ppc64.c
 @@ -43,6 +43,7 @@ uint64_t memory_limit;
  static int nr_memory_ranges, nr_exclude_ranges;
  uint64_t crash_base, crash_size;
  unsigned int rtas_base, rtas_size;
 +uint64_t opal_base, opal_size;
  int max_memory_ranges;
  
  static void cleanup_memory_ranges(void)
 @@ -343,7 +344,8 @@ static int get_devtree_details(unsigned long kexec_flags)
  strncmp(dentry-d_name, memory@, 7) 
  strcmp(dentry-d_name, memory) 
  strncmp(dentry-d_name, pci@, 4) 
 -strncmp(dentry-d_name, rtas, 4)) 
 +strncmp(dentry-d_name, rtas, 4) 
 +strncmp(dentry-d_name, ibm,opal, 8))
  continue;
  strcpy(fname, device_tree);
  strcat(fname, dentry-d_name);
 @@ -575,6 +577,46 @@ static int get_devtree_details(unsigned long 
 kexec_flags)
  add_usable_mem_rgns(rtas_base, rtas_size);
  } /* rtas */
  
 +if (strncmp(dentry-d_name, ibm,opal, 8) == 0) {
 +

Re: [PATCH] kexec/ppc64: Handle reserved memory ranges exported by OPAL firmware.

2014-05-11 Thread Mahesh Jagannath Salgaonkar
On 05/11/2014 05:21 AM, Simon Horman wrote:
 On Sun, Feb 09, 2014 at 10:25:45PM +0530, Mahesh J Salgaonkar wrote:
 From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

 OPAL based system exports reserved memory ranges through /proc/device-tree
 for the regions that are reserved by OPAL firmware. Traverse
 /proc/device-tree/reserved-ranges and add them to exclude_ranges[] and
 reserve them.

 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 
 I apologise that this seems to have slipped through the cracks.
 Manesh, is this patch still needed?

Yup. We need this also.

 If so, is it possible for
 someone on the power side of things to review it?
 
 ---
  kexec/arch/ppc64/kexec-ppc64.c |   44 
 
  1 file changed, 44 insertions(+)

 diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
 index 5956836..6e79f52 100644
 --- a/kexec/arch/ppc64/kexec-ppc64.c
 +++ b/kexec/arch/ppc64/kexec-ppc64.c
 @@ -31,6 +31,7 @@
  #include ../../kexec.h
  #include ../../kexec-syscall.h
  #include kexec-ppc64.h
 +#include ../../fs2dt.h
  #include crashdump-ppc64.h
  #include arch/options.h
  
 @@ -314,6 +315,47 @@ static int sort_ranges(void)
  return 0;
  }
  
 +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index)
 +{
 +char fname[256], buf[16];
 +FILE *file;
 +int i = *range_index;
 +
 +strcpy(fname, /proc/device-tree/reserved-ranges);
 +
 +file = fopen(fname, r);
 +if (file == NULL) {
 +if (errno != ENOENT) {
 +perror(fname);
 +return;
 +}
 +errno = 0;
 +/* File not present. Non PowerKVM system. */
 +return;
 +}
 +
 +/*
 + * Each reserved range is an (address,size) pair, 2 cells each,
 + * totalling 4 cells per range.
 + */
 +while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) {
 +uint64_t base, size;
 +
 +base = be64_to_cpu(((uint64_t *)buf)[0]);
 +size = be64_to_cpu(((uint64_t *)buf)[1]);
 +
 +exclude_range[i].start = base;
 +exclude_range[i].end = base + size;
 +i++;
 +if (i = max_memory_ranges)
 +realloc_memory_ranges();
 +
 +reserve(base, size);
 +}
 +fclose(file);
 +*range_index = i;
 +}
 +
  /* Get devtree details and create exclude_range array
   * Also create usablemem_ranges for KEXEC_ON_CRASH
   */
 @@ -339,6 +381,8 @@ static int get_devtree_details(unsigned long kexec_flags)
  return -1;
  }
  
 +scan_reserved_ranges(kexec_flags, i);
 +
  while ((dentry = readdir(dir)) != NULL) {
  if (strncmp(dentry-d_name, chosen, 6) 
  strncmp(dentry-d_name, memory@, 7) 

 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec/ppc64 Enable early kernel's OPAL calls

2014-01-30 Thread Mahesh Jagannath Salgaonkar
On 01/24/2014 07:46 PM, Laurent Dufour wrote:
 When the kernel is built with CONFIG_PPC_EARLY_DEBUG_OPAL set, it is
 expecting to get r8 and r9 filled respectively with OPAL base address and
 OPAL entry address (arc/power/head_64.S).
 
 On the new powernv platform, having these 2 registers set allows the kernel
 to perform OPAL calls before it parse the device tree.
 
 Signed-off-by: Laurent Dufour lduf...@linux.vnet.ibm.com

Tested-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

 ---
  kexec/arch/ppc64/kexec-elf-ppc64.c |   43 
 
  purgatory/arch/ppc64/purgatory-ppc64.c |2 +
  purgatory/arch/ppc64/v2wrap.S  |4 +++
  3 files changed, 49 insertions(+)
 
 diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c 
 b/kexec/arch/ppc64/kexec-elf-ppc64.c
 index 7d49d8a..ce10367 100644
 --- a/kexec/arch/ppc64/kexec-elf-ppc64.c
 +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c
 @@ -71,6 +71,26 @@ void arch_reuse_initrd(void)
   reuse_initrd = 1;
  }
 
 +static int read_prop(char *name, void *value, size_t len)
 +{
 + int fd;
 + size_t rlen;
 +
 + fd = open(name, O_RDONLY);
 + if (fd == -1)
 + return -1;
 +
 + rlen = read(fd, value, len);
 + if (rlen  0)
 + fprintf(stderr, Warning : Can't read %s : %s,
 + name, strerror(errno));
 + else if (rlen != len)
 + fprintf(stderr, Warning : short read from %s, name);
 +
 + close(fd);
 + return 0;
 +}
 +
  int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len,
   struct kexec_info *info)
  {
 @@ -90,6 +110,7 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, 
 off_t len,
   int i;
   int result, opt;
   uint64_t my_kernel, my_dt_offset;
 + uint64_t my_opal_base = 0, my_opal_entry = 0;
   unsigned int my_panic_kernel;
   uint64_t my_stack, my_backup_start;
   uint64_t toc_addr;
 @@ -246,6 +267,20 @@ int elf_ppc64_load(int argc, char **argv, const char 
 *buf, off_t len,
   *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr-totalsize));
  #endif
 
 + if (read_prop(/proc/device-tree/ibm,opal/opal-base-address,
 +   my_opal_base, sizeof(my_opal_base)) == 0) {
 + my_opal_base = be64_to_cpu(my_opal_base);
 + elf_rel_set_symbol(info-rhdr, opal_base,
 +my_opal_base, sizeof(my_opal_base));
 + }
 +
 + if (read_prop(/proc/device-tree/ibm,opal/opal-entry-address,
 +   my_opal_entry, sizeof(my_opal_entry)) == 0) {
 + my_opal_entry = be64_to_cpu(my_opal_entry);
 + elf_rel_set_symbol(info-rhdr, opal_entry,
 +my_opal_entry, sizeof(my_opal_entry));
 + }
 +
   /* Set kernel */
   elf_rel_set_symbol(info-rhdr, kernel, my_kernel, 
 sizeof(my_kernel));
 
 @@ -306,7 +341,13 @@ int elf_ppc64_load(int argc, char **argv, const char 
 *buf, off_t len,
   toc_addr = 0;
   my_run_at_load = 0;
   my_debug = 0;
 + my_opal_base = 0;
 + my_opal_entry = 0;
 
 + elf_rel_get_symbol(info-rhdr, opal_base, my_opal_base,
 +sizeof(my_opal_base));
 + elf_rel_get_symbol(info-rhdr, opal_entry, my_opal_entry,
 +sizeof(my_opal_entry));
   elf_rel_get_symbol(info-rhdr, kernel, my_kernel, 
 sizeof(my_kernel));
   elf_rel_get_symbol(info-rhdr, dt_offset, my_dt_offset,
   sizeof(my_dt_offset));
 @@ -333,6 +374,8 @@ int elf_ppc64_load(int argc, char **argv, const char 
 *buf, off_t len,
   dbgprintf(toc_addr is %llx\n, (unsigned long long)toc_addr);
   dbgprintf(purgatory size is %zu\n, purgatory_size);
   dbgprintf(debug is %d\n, my_debug);
 + dbgprintf(opal_base is %llx\n, (unsigned long long) my_opal_base);
 + dbgprintf(opal_entry is %llx\n, (unsigned long long) my_opal_entry);
 
   for (i = 0; i  info-nr_segments; i++)
   fprintf(stderr, segment[%d].mem:%p memsz:%zu\n, i,
 diff --git a/purgatory/arch/ppc64/purgatory-ppc64.c 
 b/purgatory/arch/ppc64/purgatory-ppc64.c
 index 0b6d326..7248ac8 100644
 --- a/purgatory/arch/ppc64/purgatory-ppc64.c
 +++ b/purgatory/arch/ppc64/purgatory-ppc64.c
 @@ -29,6 +29,8 @@ unsigned long dt_offset = 0;
  unsigned long my_toc = 0;
  unsigned long kernel = 0;
  unsigned int debug = 0;
 +unsigned long opal_base = 0;
 +unsigned long opal_entry = 0;
 
  void setup_arch(void)
  {
 diff --git a/purgatory/arch/ppc64/v2wrap.S b/purgatory/arch/ppc64/v2wrap.S
 index 2761c14..6fc62e3 100644
 --- a/purgatory/arch/ppc64/v2wrap.S
 +++ b/purgatory/arch/ppc64/v2wrap.S
 @@ -95,6 +95,10 @@ master:
   blt 80f
   stw 17,28(3)# save my cpu number as boot_cpu_phys
  80:
 + LOADADDR(6,opal_base)   # For OPAL early debug
 + ld  8,0(6)  # load the OPAL base address in r8
 + LOADADDR(6,opal_entry)  # For OPAL early debug
 + ld  9,0(6)   

Re: [PATCH v2] makedumpfile: Support to filter dump for kernels that use CONFIG_SPARSEMEM_VMEMMAP

2013-11-19 Thread Mahesh Jagannath Salgaonkar
On 11/19/2013 12:48 PM, Hari Bathini wrote:
 Makedumpfile fails to filter dump for kernels build with 
 CONFIG_SPARSEMEM_VMEMMAP
 enabled as it fails to do vmemmap translations. So far, makedumpfile on ppc64 
 never
 had to deal with vmemmap addresses (vmemmap regions) seperately to filter 
 ppc64
 crash dumps as vmemmap regions where mapped in zone normal. But with the 
 inclusion
 of CONFIG_SPARSEMEM_VMEMMAP config option in recent kernels, vmemmap memory 
 regions
 are mapped outside zone normal. There is a need to handle vmemmap to physical 
 address
 translation seperately in this scenario. This patch provides support in 
 makedumpfile
 tool to do vmemmap to physical address translation when vmemmap regions are 
 mapped
 outside zone normal. Some kernel symbols are needed in vmcoreinfo for this 
 changes to
 be effective. The kernel patch that adds the necessary symbols to vmcoreinfo 
 has been
 posted to linuxppc devel mailing list. This patch is influenced by vmemmap to 
 physical
 address translation support code in crash utility. It is has been tested 
 successfully
 at all dump filtering levels on kernel dumps that have 
 CONFIG_SPARSEMEM_VMEMMAP enabled
 and kernel dumps with CONFIG_SPARSEMEM_VMEMMAP disabled as well. Also, 
 successfully
 tested dump filtering on already filtered vmcores (re-filtering). The patch 
 applies
 cleanly on version 1.5.4 of makedumpfile.
 
 Changes in v2:
 1. Fixed return value when vmemmap list initialization fails
 2. Fixed coding style issue
 
 Signed-off-by: Onkar N Mahajan onmah...@in.ibm.com
 Signed-off-by: Hari Bathini hbath...@linux.vnet.ibm.com
 ---

This patch looks good to me.

Acked-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

  0 files changed
 
 diff --git a/arch/ppc64.c b/arch/ppc64.c
 index c229ede..390fe05 100644
 --- a/arch/ppc64.c
 +++ b/arch/ppc64.c
 @@ -24,6 +24,153 @@
  #include ../elf_info.h
  #include ../makedumpfile.h
 
 +/*
 + * This function traverses vmemmap list to get the count of vmemmap regions
 + * and populates the regions' info in info-vmemmap_list[]
 + */
 +static int
 +get_vmemmap_list_info(ulong head)
 +{
 + int   i, cnt;
 + long  backing_size, virt_addr_offset, phys_offset, list_offset;
 + ulong curr, next;
 + char  *vmemmap_buf = NULL;
 +
 + backing_size= SIZE(vmemmap_backing);
 + virt_addr_offset= OFFSET(vmemmap_backing.virt_addr);
 + phys_offset = OFFSET(vmemmap_backing.phys);
 + list_offset = OFFSET(vmemmap_backing.list);
 + info-vmemmap_list = NULL;
 +
 + /*
 +  * Get list count by traversing the vmemmap list
 +  */
 + cnt = 0;
 + curr = head;
 + next = 0;
 + do {
 + if (!readmem(VADDR, (curr + list_offset), next,
 +  sizeof(next))) {
 + ERRMSG(Can't get vmemmap region addresses\n);
 + goto err;
 + }
 + curr = next;
 + cnt++;
 + } while ((next != 0)  (next != head));
 +
 + /*
 +  * Using temporary buffer to save vmemmap region information
 +  */
 + vmemmap_buf = calloc(1, backing_size);
 + if (vmemmap_buf == NULL) {
 + ERRMSG(Can't allocate memory for vmemmap_buf. %s\n,
 +strerror(errno));
 + goto err;
 + }
 +
 + info-vmemmap_list = calloc(1, cnt * sizeof(struct ppc64_vmemmap));
 + if (info-vmemmap_list == NULL) {
 + ERRMSG(Can't allocate memory for vmemmap_list. %s\n,
 +strerror(errno));
 + goto err;
 + }
 +
 + curr = head;
 + for (i = 0; i  cnt; i++) {
 + if (!readmem(VADDR, curr, vmemmap_buf, backing_size)) {
 + ERRMSG(Can't get vmemmap region info\n);
 + goto err;
 + }
 +
 + info-vmemmap_list[i].phys = ULONG(vmemmap_buf + phys_offset);
 + info-vmemmap_list[i].virt = ULONG(vmemmap_buf +
 +virt_addr_offset);
 + curr = ULONG(vmemmap_buf + list_offset);
 +
 + if (info-vmemmap_list[i].virt  info-vmemmap_start)
 + info-vmemmap_start = info-vmemmap_list[i].virt;
 +
 + if ((info-vmemmap_list[i].virt + info-vmemmap_psize) 
 + info-vmemmap_end)
 + info-vmemmap_end = (info-vmemmap_list[i].virt +
 +  info-vmemmap_psize);
 + }
 +
 + return cnt;
 +err:
 + free(vmemmap_buf);
 + free(info-vmemmap_list);
 + return 0;
 +}
 +
 +/*
 + *  Verify that the kernel has made the vmemmap list available,
 + *  and if so, stash the relevant data required to make vtop
 + *  translations.
 + */
 +static int
 +ppc64_vmemmap_init(void)
 +{
 + int psize, shift;
 + ulong head;
 +
 + if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL)
 + || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL)
 + || 

Re: [PATCH 1/2] Move scrubbing process from reader to writer.

2013-04-12 Thread Mahesh Jagannath Salgaonkar
On 04/12/2013 11:47 AM, Atsushi Kumagai wrote:
 From: Atsushi Kumagai kumagai-atsu...@mxc.nes.nec.co.jp
 Date: Thu, 11 Apr 2013 09:36:36 +0900
 Subject: [PATCH 1/2] Move scrubbing process from reader to writer.
 
 When create a dumpfile in the kdump-compressed format, scrubbing
 process is done in reading process via read_pfn(). But it would be
 better to do it just before actually writing the data like the
 case of ELF.
 
 Signed-off-by: Atsushi Kumagai kumagai-atsu...@mxc.nes.nec.co.jp

Hi Atsushi,

Changes looks good to me.

Thanks,
-Mahesh.

 ---
  makedumpfile.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)
 
 diff --git a/makedumpfile.c b/makedumpfile.c
 index 9cf907c..725100b 100644
 --- a/makedumpfile.c
 +++ b/makedumpfile.c
 @@ -5337,7 +5337,6 @@ read_pfn(unsigned long long pfn, unsigned char *buf)
   ERRMSG(Can't get the page data.\n);
   return FALSE;
   }
 - filter_data_buffer(buf, paddr, info-page_size);
   return TRUE;
   }
 
 @@ -5360,7 +5359,6 @@ read_pfn(unsigned long long pfn, unsigned char *buf)
   ERRMSG(Can't get the page data.\n);
   return FALSE;
   }
 - filter_data_buffer(buf, paddr, size1);
   if (size1 != info-page_size) {
   size2 = info-page_size - size1;
   if (!offset2) {
 @@ -5370,7 +5368,6 @@ read_pfn(unsigned long long pfn, unsigned char *buf)
   ERRMSG(Can't get the page data.\n);
   return FALSE;
   }
 - filter_data_buffer(buf + size1, paddr + size1, size2);
   }
   }
   return TRUE;
 @@ -5805,6 +5802,7 @@ write_kdump_pages(struct cache_data *cd_header, struct 
 cache_data *cd_page)
 
   if (!read_pfn(pfn, buf))
   goto out;
 + filter_data_buffer(buf, pfn_to_paddr(pfn), info-page_size);
 
   /*
* Exclude the page filled with zeros.
 @@ -5983,6 +5981,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, 
 struct cache_data *cd_pag
 
   if (!read_pfn(pfn, buf))
   goto out;
 + filter_data_buffer(buf, pfn_to_paddr(pfn), info-page_size);
 
   /*
* Exclude the page filled with zeros.
 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: makedumpfile bug with ppc64 CONFIG_SPARSEMEM_EXTREME

2013-01-17 Thread Mahesh Jagannath Salgaonkar
On 01/11/2013 02:39 AM, Dave Anderson wrote:
 
 
 - Original Message -

 Our QA group recently ran into a makedumpfile problem while
 testing kdump/makedumpfile w/upstream 3.7.1 kernels, which
 had to do with the filtering of pages on a 12GB ppc64 system.

 ... [ cut ] ...

 I haven't checked why the original math fails in the case of the
 ppc64 kernel, while it does not fail in a CONFIG_SPARSEMEM_EXTREME
 x86_64 kernel, for example. (page size maybe?)  But obviously the
 simpler dimemsion-check is a better way to do it.

 Of course, within the current constraints of makedumpfile, it's not
 that easy.  Ideally the kernel could pass the configuration in
 the vmcoreinfo with a VMCOREINFO_CONFIG(name).  But anyway, I'll leave
 that up to you.

 Thanks,
   Dave
 
 It's presumably being seen in 3.7.1 because of this commit:
 
   $ git log -p arch/powerpc/include/asm/sparsemem.h
   commit 048ee0993ec8360abb0b51bdf8f8721e9ed62ec4
   Author: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
   Date:   Mon Sep 10 02:52:55 2012 +
 
   powerpc/mm: Add 64TB support
 
   Increase max addressable range to 64TB. This is not tested on
   real hardware yet.
 
   Reviewed-by: Paul Mackerras pau...@samba.org
   Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
   Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
 
   diff --git a/arch/powerpc/include/asm/sparsemem.h 
 b/arch/powerpc/include/asm/sparsemem.h
   index 0c5fa31..f6fc0ee 100644
   --- a/arch/powerpc/include/asm/sparsemem.h
   +++ b/arch/powerpc/include/asm/sparsemem.h
   @@ -10,8 +10,8 @@
 */
#define SECTION_SIZE_BITS   24
 
   -#define MAX_PHYSADDR_BITS   44
   -#define MAX_PHYSMEM_BITS44
   +#define MAX_PHYSADDR_BITS   46
   +#define MAX_PHYSMEM_BITS46
 
#endif /* CONFIG_SPARSEMEM */
 
   $ git describe --contains 048ee0993ec8360abb0b51bdf8f8721e9ed62ec4
   v3.7-rc1~108^2~32
   $
   
 Dave

Similar issue was seen on s390x last year
(http://lists.infradead.org/pipermail/kexec/2011-December/005905.html).
The change in
MAX_PHYSMEM_BITS define in kernel causes sparsemem extreme check to fail
in makedumpfile. This needs to be fixed in the same way as it was on
s390x. Will post a fix patch for makedumpfile after testing it on
upstream kernel.

Thanks,
-Mahesh.



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/2] kexec: Limit the crash memory ranges according to first kernel's memory limit.

2012-11-29 Thread Mahesh Jagannath Salgaonkar
On 10/24/2012 08:48 AM, Mahesh Jagannath Salgaonkar wrote:
 On 08/29/2012 01:54 PM, Mahesh J Salgaonkar wrote:
 So far powerpc kernel never exported memory limit information which is
 reflected by mem= kernel cmdline option. Hence, kexec-tools always use
 to build ELF header for entire system RAM generating a dump bigger than
 the actual memory used by the first kernel.

 The proposed upstream kernel patch at
 https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-August/100500.html
 now exports memory limit information through /proc/device-tree file.
 The above patch is still in discussion.
 
 The above mentioned kernel patches are now upstream. Below are commit ids:
 4bc77a5e - powerpc: Export memory limit via device tree
 a84fcd468 - powerpc: Change memory_limit from phys_addr_t to unsigned
 long long

Hi Simon,

Did you get chance to review these patchset? The kernel patches are
already in upstream.

Thanks,
-Mahesh.

 

 This patch series now reads the memory limit information from
 device-tree file if present and limits the crash memory ranges accordingly.

 Tested these patches on ppc32(ppc440) and ppc64 with a kernel patch by 
 Suzuki.

 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 Tested-by: Suzuki K. Poulose suz...@in.ibm.com

 ---

 Mahesh Salgaonkar (2):
   kexec: Respect memory limit while building crash memory ranges on 
 ppc32.
   kexec: Respect memory limit while building crash memory ranges on ppc64


  kexec/arch/ppc/crashdump-powerpc.c |   14 ---
  kexec/arch/ppc/crashdump-powerpc.h |1 +
  kexec/arch/ppc/kexec-ppc.c |   48 
 
  kexec/arch/ppc64/crashdump-ppc64.c |   11 
  kexec/arch/ppc64/crashdump-ppc64.h |1 +
  kexec/arch/ppc64/kexec-ppc64.c |   28 +
  6 files changed, 99 insertions(+), 4 deletions(-)

 
 
 ___
 kexec mailing list
 kexec@lists.infradead.org
 http://lists.infradead.org/mailman/listinfo/kexec
 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/2] kexec: Limit the crash memory ranges according to first kernel's memory limit.

2012-10-23 Thread Mahesh Jagannath Salgaonkar
On 08/29/2012 01:54 PM, Mahesh J Salgaonkar wrote:
 So far powerpc kernel never exported memory limit information which is
 reflected by mem= kernel cmdline option. Hence, kexec-tools always use
 to build ELF header for entire system RAM generating a dump bigger than
 the actual memory used by the first kernel.
 
 The proposed upstream kernel patch at
 https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-August/100500.html
 now exports memory limit information through /proc/device-tree file.
 The above patch is still in discussion.

The above mentioned kernel patches are now upstream. Below are commit ids:
4bc77a5e - powerpc: Export memory limit via device tree
a84fcd468 - powerpc: Change memory_limit from phys_addr_t to unsigned
long long

 
 This patch series now reads the memory limit information from
 device-tree file if present and limits the crash memory ranges accordingly.
 
 Tested these patches on ppc32(ppc440) and ppc64 with a kernel patch by Suzuki.
 
 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 Tested-by: Suzuki K. Poulose suz...@in.ibm.com
 
 ---
 
 Mahesh Salgaonkar (2):
   kexec: Respect memory limit while building crash memory ranges on ppc32.
   kexec: Respect memory limit while building crash memory ranges on ppc64
 
 
  kexec/arch/ppc/crashdump-powerpc.c |   14 ---
  kexec/arch/ppc/crashdump-powerpc.h |1 +
  kexec/arch/ppc/kexec-ppc.c |   48 
 
  kexec/arch/ppc64/crashdump-ppc64.c |   11 
  kexec/arch/ppc64/crashdump-ppc64.h |1 +
  kexec/arch/ppc64/kexec-ppc64.c |   28 +
  6 files changed, 99 insertions(+), 4 deletions(-)
 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/7] makedumpfile security key filtering with eppic

2012-10-11 Thread Mahesh Jagannath Salgaonkar
On 08/17/2012 09:46 AM, Atsushi Kumagai wrote:
 Hello Aravinda,
 
 On Thu, 16 Aug 2012 11:55:55 +0530
 Aravinda Prasad aravi...@linux.vnet.ibm.com wrote:
 
 Hello Kumagai-san,

 Did you get a chance to review the makedumpfile security key enhancement
 patches?

 - Aravinda
 
 Sorry for not responding.
 
 I'm afraid that I can't start to review them yet.
 I've been working on the cyclic mode feature for a few months,
 and I'm now preparing the next release including the feature.
 
 I'll release the next version soon, I'll review your patches after that.
 

Hi Atsushi,

Did you get chance to review these patches?

Thanks,
-Mahesh.




 Thanks
 Atsushi Kumagai

 TODO:

   - Currently, works only for symbols in vmlinux, extend it to module
 symbols
   - Functionality support:
 - Implement the following callback functions.
   - apialignment
   - apigetenum
   - apigetdefs
 - Other functionalities specified in the code with TODO tag
   - Support specifying eppic macros in makedumpfile.conf file
   - Update erase info

 ---

 Aravinda Prasad (7):
   Initialize and setup eppic
   makedumpfile and eppic interface layer
   Eppic call back functions to query a dump image
   Implement apigetctype call back function
   Implement apimember and apigetrtype call back functions
   Extend eppic built-in functions to include memset function
   Support fully typed symbol access mode


  Makefile  |6 -
  dwarf_info.c  |  338 
  dwarf_info.h  |   18 ++
  erase_info.c  |   72 -
  erase_info.h  |3 
  extension_eppic.c |  446 
 +
  extension_eppic.h |   78 +
  makedumpfile.c|7 +
  makedumpfile.h|6 +
  9 files changed, 967 insertions(+), 7 deletions(-)
  create mode 100644 extension_eppic.c
  create mode 100644 extension_eppic.h

 -- 
 Aravinda Prasad



 -- 
 Regards,
 Aravinda
 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] makedumpfile: s390x: Auto-detect the correct MAX_PHYSMEM_BITSused in vmcore being analyzed.

2011-12-21 Thread Mahesh Jagannath Salgaonkar
On 12/21/2011 11:41 AM, tachib...@mxm.nes.nec.co.jp wrote:
 Hi Mahesh,
 
 Thank you for the patch.
 I will review it.
 However I have never dumped 1 terabyte or more data using makedumpfile.
 Has anyone ever dumped it successfully?
 

I have seen makedumpfile successfully compressing vmcore generated on
system with 1TB memory.

Thanks,
-Mahesh.


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [RFC PATCH v5 1/9] fadump: Add documentation for firmware-assisted dump.

2011-12-09 Thread Mahesh Jagannath Salgaonkar
On 11/25/2011 04:04 AM, Paul Mackerras wrote:
 + /sys/kernel/debug/powerpc/fadump_region
 +
 +This file shows the reserved memory regions if fadump is
 +enabled otherwise this file is empty. The output format
 +is:
 +region: [start-end] reserved-size bytes, Dumped: dump-size
 +
 +e.g.
 +Contents when fadump is registered during first kernel
 +
 +# cat /sys/kernel/debug/powerpc/fadump_region
 +CPU : [0x006ffb-0x006fff001f] 0x40020 bytes, Dumped: 0x0
 +HPTE: [0x006fff0020-0x006fff101f] 0x1000 bytes, Dumped: 0x0
 +DUMP: [0x006fff1020-0x007fff101f] 0x1000 bytes, Dumped: 0x0
 
 How come the HPTE region is only 0x1000 (4k) bytes?  The hashed page
 table (HPT) will be much bigger than this.  Is this our way of telling
 the hypervisor that we don't care about the HPT?  If so, is it
 possible to make this region 0 bytes instead of 0x1000?
 

The firmware assisted dump registration fails with Hardware
error (-1) when called with HPTE dump section with size 0.

According to PAPR the size returned by ibm,configure-kernel-dump-sizes
node for HPTE dump section is a minimum size for HPTE dump section.

Though PAPR dose not mention this, but after sevaral trial and error, it
looks like the HPTE dump section is one of the mandatory
argument that needs to be passed (with non-zero size) while making
ibm,configure-kernel-dump rtas call.

Thanks,
-Mahesh.


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: ppc64 kexec -p failed

2011-11-07 Thread Mahesh Jagannath Salgaonkar
On 11/04/2011 12:35 PM, Dave Young wrote:
 Hi,
 
 When use crashkernel=128M@256M at a ppc64 machine, kexec -p vmlinuz
 failed with:
 Could not find a free area of memory of faa448 bytes...
 Could not find a free area of memory of 142721d bytes...
 
 Is this a know issue or Is there limitation of the crashkernel base addr?

On Power, crashkernel base addr must fall inside RMO region. The is
because ppc64 kernel needs some of its memory in the RMO region. The
memory ranges below shows that the system has RMO region of size 128M,
Hence crashkernel base addr @64M should work just fine.

 
 Detail info as below (define DEBUG when building):
 
...
...
 -0800 : 0
...
...

Thanks,
-Mahesh.


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: makedumpfile: Add erased information in compressed kdump file and ELF formatted dumpfile

2011-09-13 Thread Mahesh Jagannath Salgaonkar
On 09/12/2011 11:54 PM, Dave Anderson wrote:
 
 Hi Mahesh,
 
 Now that this feature is in makedumpfile-1.4.0, I presume that you
 have crash utility warning patches underway?

Yup. I am working on it, will post the patches very soon.

 
 Also, can you confirm that compressed or kdumps with (or without)
 eraseinfo data can still be handled with no problem by the current
 version of the crash utility?  I'm not talking about the ramifications
 of whatever kernel data may have been erased, but whether the changes to
 the dumpfile headers could cause a problem.  I'm presuming that the
 compressed kdump eraseinfo data would be invisible to an older version
 of crash, and that ELF kdump eraseinfo would just show up as an (unused)
 ELF note -- but I just want to make sure. 

Yes. The changes to dumpfile header and addition of eraseinfo data/ELF
note will not cause any problems with older crash version. With the
current version of crash the ELF kdump eraseinfo note would show up as
below:

Elf64_Nhdr:
   n_namesz: 10 (ERASEINFO)
   n_descsz: 56
 n_type: 0 (?)
 206573617265 72616a5f64657263
 697320656d616e2e 650a36353220657a
 656e692065736172 736f746f72705f74
 303120657a697320

Thanks,
-Mahesh.


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 6/8] makedumpfile: Read and process 'for' command from config file.

2011-09-07 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 09/07/2011 12:11 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Mon, 5 Sep 2011 20:10:33 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:

 This patch fixes following BUGs:

 [..]

 - The loop construct used for array of char* (pointer) silently fails and
 does not filter the strings.

 Did the silent failure happen at the following code of list_entry_empty() ?

7373 addr = get_config_symbol_addr(le, 0, NULL);
7374 if (!addr)
7375 return TRUE;


 Nope. It use to fail in resolve_list_entry()-resolve_config_entry()
 and following hunk from the patch fixes it:
We dont allow no-array 
 @@ -6866,7 +6882,7 @@ resolve_config_entry(struct config_entry *ce, unsigned 
 long long base_addr,
   * If this is a struct or list_head data type then
   * create a leaf node entry with 'next' member.
   */
 -if ((ce-type_flag  TYPE_BASE)
 +if (((ce-type_flag  (TYPE_BASE | TYPE_ARRAY)) == TYPE_BASE)
   (strcmp(ce-type_name, void)))
  return FALSE;
  
 The old code use to check only TYPE_BASE flag ignoring TYPE_ARRAY flag.
 
 Thank you for the explanation.
 I feel I see it.
 Is the below understanding right ?
 
 At the part of earlier resolve_config_entry(), necessary information
 (sym_addr, type_name, and array_length) can be gotten in the case
 of pointer array.

Yes, including the size information for type of element (type_name) in
the LIST entry.

 However, the old resolve_config_entry() returned FALSE because of
 the check lack you said.

Yes. The old resolve_config_entry() use to return FALSE only for array
of base type elements (e.g. array of char, int, long etc.). However, it
was working well for array of structures (pointer/non-pointer).

The LIST entry can be of one of the following kind:
1. Array of base types (pointer/non-pointer).
2. Array of structures (pointer/non-pointer).
3. Link list.
4. list_head link list.

The old code was working for all of the above except (1).

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 7/8] makedumpfile: Add erased information in compressed kdump file

2011-08-16 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 08/16/2011 06:30 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Wed, 18 May 2011 01:36:17 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:
 @@ -9452,6 +9747,7 @@ reassemble_kdump_pages(void)
  struct cache_data cd_pd, cd_data;
  struct timeval tv_start;
  char *data = NULL;
 +unsigned long data_buf_size = info-page_size;
  
  initialize_2nd_bitmap(bitmap2);
  
 @@ -9465,7 +9761,7 @@ reassemble_kdump_pages(void)
  free_cache_data(cd_pd);
  return FALSE;
  }
 -if ((data = malloc(info-page_size)) == NULL) {
 +if ((data = malloc(data_buf_size)) == NULL) {
  ERRMSG(Can't allcate memory for page data.\n);
  free_cache_data(cd_pd);
  free_cache_data(cd_data);
 @@ -9570,6 +9866,49 @@ reassemble_kdump_pages(void)
  if (!write_cache_bufsz(cd_data))
  goto out;
  
 +info-offset_eraseinfo = cd_data.offset;
 +/* Copy eraseinfo from split dumpfiles to o/p dumpfile */
 +for (i = 0; i  info-num_dumpfile; i++) {
 +if (!SPLITTING_SIZE_EI(i))
 +continue;
 +
 +if (SPLITTING_SIZE_EI(i)  data_buf_size) {
 +data_buf_size = SPLITTING_SIZE_EI(i);
 +if ((data = realloc(data, data_buf_size)) == NULL) {
 +ERRMSG(Can't allcate memory for eraseinfo
 + data.\n);
 +goto out;
 +}
 +}
 +if ((fd = open(SPLITTING_DUMPFILE(i), O_RDONLY))  0) {
 +ERRMSG(Can't open a file(%s). %s\n,
 +SPLITTING_DUMPFILE(i), strerror(errno));
 +goto out;
 +}
 +if (lseek(fd, SPLITTING_OFFSET_EI(i), SEEK_SET)  0) {
 +ERRMSG(Can't seek a file(%s). %s\n,
 +SPLITTING_DUMPFILE(i), strerror(errno));
 +goto out;
 +}
 +if (read(fd, data, SPLITTING_SIZE_EI(i)) !=
 +SPLITTING_SIZE_EI(i)) {
 +ERRMSG(Can't read a file(%s). %s\n,
 +SPLITTING_DUMPFILE(i), strerror(errno));
 +goto out;
 +}
 +if (!write_cache(cd_data, data, SPLITTING_SIZE_EI(i)))
 +goto out;
 +info-size_eraseinfo += SPLITTING_SIZE_EI(i);
 +
 +close(fd);
 +fd = 0;
 +}
 +if (!write_cache_bufsz(cd_data))
 +goto out;
 +
 +if (!update_sub_header())
 +goto out;
 
 The above update_sub_header() breaks --reassemble option if a dumpfile
 does not contain eraseinfo data :

My bad. Thanks for catching it.

Thanks,
-Mahesh.

 
 ---
 # makedumpfile --split -d30 vmcore dump.1 dump.2
 Copying data   : [100 %]
 Copying data   : [100 %]
 
 The dumpfiles are saved to dump.1, and dump.2.
 
 makedumpfile Completed.
 # makedumpfile --reassemble dump.1 dump.2 dump.3
 Copying data   : [100 %]
 The dumpfile is saved to dump.3.
 
 makedumpfile Completed.
 # crash vmlinux dump.3
 [..]
 
 crash: page excluded: kernel virtual address: 8040b220  type: 
 cpu_possible_mask
 #
 ---
 
 The cause is that info-sub_header, which is written in update_sub_header(),
 is filled with 0x0.
 The following change fixes this problem:
 
 ---
 @@ -9378,6 +9680,7 @@ reassemble_kdump_header(void)
 info-name_dumpfile, strerror(errno));
 return FALSE;
 }
 +   memcpy(info-sub_header, kh, sizeof(kh));
 
 /*
  * Write dump bitmap to both a dumpfile and a bitmap file.
 ---
 
 Thanks
 Ken'ichi Ohmichi
 
 ___
 kexec mailing list
 kexec@lists.infradead.org
 http://lists.infradead.org/mailman/listinfo/kexec


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 5/8] makedumpfile: Read and process filter commands from config file.

2011-08-11 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 08/11/2011 07:37 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Wed, 18 May 2011 01:34:15 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:

 This patch enables to makedumpfile to read and process filter commands from
 specified config file. It builds a list of filter info consisting of memory
 address (paddr) and size to be filtered out. This list is used during
 read_pfn() function to filter out user specified kernel data from vmcore
 before writing it to compressed DUMPFILE. The filtered memory locations are
 filled with 'X' (0x58) character.

 The filter command syntax is:
  erase Symbol[.member[...]] [size SizeValue[K|M]]
  erase Symbol[.member[...]] [size SizeSymbol]
  erase Symbol[.member[...]] [nullify]

 Below are the examples how filter commands in config file look like:

  erase modules
  erase cred_jar.name size 10
  erase vmlist.addr nullify
 
 Thank you for the work.
 I tested this feature by the above example, and confirmed the feature
 works fine. Nice work :-)

Thanks :-)

 
 There are some comments in the below lines.
 Can you check them ?
 
 +/*
 + * Read the non-terminal's which are in the form of Symbol[.member[...]]
 + */
 +struct config_entry *
 +create_config_entry(const char *token, unsigned short flag, int line)
 +{
 +struct config_entry *ce = NULL, *ptr, *prev_ce;
 +char *str, *cur, *next;
 +long len;
 +int depth = 0;
 +
 +if (!token)
 +return NULL;
 +
 +cur = str = strdup(token);
 +prev_ce = ptr = NULL;
 +while (cur != NULL) {
 +if ((next = strchr(cur, '.')) != NULL) {
 +*next++ = '\0';
 +}
 +if (!strlen(cur)) {
 +cur = next;
 +continue;
 +}
 +
 +if ((ptr = calloc(1, sizeof(struct config_entry))) == NULL) {
 +ERRMSG(Can't allocate memory for config_entry\n);
 +goto err_out;
 +}
 +ptr-line = line;
 +ptr-flag |= flag;
 +if (depth == 0) {
 +/* First node is always a symbol name */
 +ptr-flag |= SYMBOL_ENTRY;
 +}
 +if (flag  FILTER_ENTRY) {
 +ptr-name = strdup(cur);
 +}
 +if (flag  SIZE_ENTRY) {
 +char ch = '\0';
 +int n = 0;
 +/* See if absolute length is provided */
 +if ((depth == 0) 
 +((n = sscanf(cur, %zd%c, len, ch))  0)) {
 +if (len  0) {
 +ERRMSG(Config error at %d: size 
 +value must be positive.\n,
 +line);
 +goto err_out;
 +}
 +ptr-size = len;
 +ptr-flag |= ENTRY_RESOLVED;
 +if (n == 2) {
 +/* Handle suffix.
 + * K = Kilobytes
 + * M = Megabytes
 + */
 +switch (ch) {
 +case 'M':
 +case 'm':
 +ptr-size *= 1024;
 +case 'K':
 +case 'k':
 +ptr-size *= 1024;
 +break;
 +}
 +}
 +}
 +else
 +ptr-name = strdup(cur);
 +}
 +if (prev_ce) {
 +prev_ce-next = ptr;
 +prev_ce = ptr;
 +}
 +else
 +ce = prev_ce = ptr;
 +cur = next;
 +depth++;
 +ptr = NULL;
 +}
 +free(str);
 +return ce;
 +
 +err_out:
 +if (ce)
 +free_config_entry(ce);
 +if (ptr)
 +free_config_entry(ptr);
 
 free(str); is necessary.
 

Nice catch. Agree.

 +return NULL;
 +}
 
 [..]
 
 +/*
 + * read filter config file and return each string token. If the parameter
 + * expected_token is non-NULL, then return the current token if it matches
 + * with expected_token otherwise save the current token and return NULL.
 + * At start of every module section filter_config.new_section is set to 1 
 and
 + * subsequent function invocations return NULL untill 
 filter_config.new_section
 + * is reset to 0 by passing @flag = CONFIG_NEW_CMD (0x02).
 + *
 + * Parameters:
 + * @expected_token  INPUT
 + *  Token string to match with currnet token.
 + *  =NULL - return the 

Re: [PATCH v2 4/8] makedumpfile: Introduce routines to get type name from debuginfo.

2011-08-04 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 08/01/2011 06:57 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 A pointer size can been gotton by sizeof(void *), and pointer (virtual 
 address) can been defined as unsigned long.
 I think we can make this patch simple. How about the attached patch ?
 

Yup, the patch looks simpler and good.

 
 Thanks
 Ken'ichi Ohmichi
 
 diff --git a/makedumpfile.c b/makedumpfile.c
 index 3ad2bd5..6955f64 100644
 --- a/makedumpfile.c
 +++ b/makedumpfile.c
 @@ -34,7 +34,6 @@ struct erase_info   *erase_info = NULL;
  unsigned longnum_erase_info = 1; /* Node 0 is unused. */
 
  char filename_stdout[] = FILENAME_STDOUT;
 -long pointer_size;
  char config_buf[BUFSIZE_FGETS];
 
  /*
 @@ -2058,10 +2057,6 @@ get_debug_info(void)
*/
   while (dwarf_nextcu(dwarfd, off, next_off, header_size,
   abbrev_offset, address_size, offset_size) == 0) {
 - if (dwarf_info.cmd == DWARF_INFO_GET_PTR_SIZE) {
 - dwarf_info.struct_size = address_size;
 - break;
 - }
   off += header_size;
   if (dwarf_offdie(dwarfd, off, cu_die) == NULL) {
   ERRMSG(Can't get CU die.\n);

[...]

 @@ -7863,8 +7849,8 @@ print_config_entry(struct config_entry *ce)
   DEBUG_MSG(Type Name: %s, , ce-type_name);
   DEBUG_MSG(flag: %x, , ce-flag);
   DEBUG_MSG(Type flag: %lx, , ce-type_flag);
 - DEBUG_MSG(sym_addr: %llx, , ce-sym_addr);
 - DEBUG_MSG(addr: %llx, , ce-addr);
 + DEBUG_MSG(sym_addr: %lx, , ce-sym_addr);

Above change throws compilation warning. The sym_addr is unsigned long
long, %llx still holds good.

 + DEBUG_MSG(addr: %lx, , ce-addr);
   DEBUG_MSG(offset: %lx, , ce-offset);
   DEBUG_MSG(size: %zd\n, ce-size);
 

[...]

 @@ -1284,7 +1283,7 @@ struct config_entry {
   unsigned short  flag;
   unsigned short  nullify;
   unsigned long long  sym_addr;   /* Symbol address */
 - unsigned long long  addr;   /* Symbol address or
 + unsigned long   addr;   /* Symbol address or
  value pointed by sym_addr */
   unsigned long long  cmp_addr;   /* for LIST_ENTRY */
   unsigned long   offset;
 

I tested this patch and works fine.

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 3/8] makedumpfile: Load the module symbol data from vmcore.

2011-08-03 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

Sorry for late reply.

On 07/29/2011 03:12 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 This patch is almost good, and there are some cleanup points and
 error-handling points.
 
 On Wed, 18 May 2011 01:31:53 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:

 This patch enables makedumpfile to load module symbol data from vmcore. This
 info is required during kernel module data filtering process. Traverse the
 modules list and load all module's symbol info data in the memory for fast
 lookup.
 
 [..]
 
 +static int
 +load_module_symbols(void)
 +{
 +unsigned long head, cur, cur_module;
 +unsigned long symtab, strtab;
 +unsigned long mod_base, mod_init;
 +unsigned int mod_size, mod_init_size;
 +unsigned char *module_struct_mem, *module_core_mem;
 +unsigned char *module_init_mem = NULL;
 +unsigned char *symtab_mem;
 +char *module_name, *strtab_mem, *nameptr;
 +struct module_info *modules = NULL;
 +struct symbol_info *sym_info;
 +unsigned int num_symtab;
 +unsigned int i = 0, nsym;
 +
 +head = SYMBOL(modules);
 +if (!get_num_modules(head, mod_st.num_modules)) {
 +ERRMSG(Can't get module count\n);
 +return FALSE;
 +}
 +if (!mod_st.num_modules) {
 +return FALSE;
 
 If the above num_modules is 0, makedumpfile fails without any hint
 message. How about the below change ?

Agree.

 
 @@ -7651,13 +7651,11 @@ load_module_symbols(void)
 unsigned int i = 0, nsym;
 
 head = SYMBOL(modules);
 -   if (!get_num_modules(head, mod_st.num_modules)) {
 +   if (!get_num_modules(head, mod_st.num_modules) ||
 +   !mod_st.num_modules) {
 ERRMSG(Can't get module count\n);
 return FALSE;
 }
 -   if (!mod_st.num_modules) {
 -   return FALSE;
 -   }
 mod_st.modules = calloc(mod_st.num_modules,
 sizeof(struct module_info));
 if (!mod_st.modules) {
 ---
 
 [..]
 
 +/* Travese the list and read module symbols */
 +while (cur != head) {
 
 [..]
 
 +if (mod_init_size  0) {
 +module_init_mem = calloc(1, mod_init_size);
 +if (module_init_mem == NULL) {
 +ERRMSG(Can't allocate memory for module 
 +init\n);
 +return FALSE;
 +}
 +if (!readmem(VADDR, mod_init, module_init_mem,
 +mod_init_size)) {
 +ERRMSG(Can't access module init in memory.\n);
 +return FALSE;
 
 In the above error case, module_init_mem should be freed.
 There are the same lacks of free, and I feel it is due to
 a large load_module_symbols() function.
 Hence I created the attached patch for making the function
 small and fixing the lacks of free.
 Can you review it ?

The attached patch looks good to me. Thanks for splitting the function.

Thanks,
-Mahesh.
 
 
 +if (mod_init_size  0)
 +free(module_init_mem);
 +} while (cur != head);
 
 This is the same as the begining of this loop, and it is not necessary.
 
 
 Thanks
 Ken'ichi Ohmichi
 
 ---
 diff --git a/makedumpfile.c b/makedumpfile.c
 index 3ad2bd5..92ca23b 100644
 --- a/makedumpfile.c
 +++ b/makedumpfile.c
 @@ -7635,20 +7635,160 @@ clean_module_symbols(void)
  }
 
  static int
 -load_module_symbols(void)
 +__load_module_symbol(struct module_info *modules, unsigned long addr_module)
  {
 - unsigned long head, cur, cur_module;
 + int ret = FALSE;
 + unsigned int nsym;
   unsigned long symtab, strtab;
   unsigned long mod_base, mod_init;
   unsigned int mod_size, mod_init_size;
 - unsigned char *module_struct_mem, *module_core_mem;
 + unsigned char *module_struct_mem = NULL;
 + unsigned char *module_core_mem = NULL;
   unsigned char *module_init_mem = NULL;
   unsigned char *symtab_mem;
   char *module_name, *strtab_mem, *nameptr;
 - struct module_info *modules = NULL;
 - struct symbol_info *sym_info;
   unsigned int num_symtab;
 - unsigned int i = 0, nsym;
 +
 + /* Allocate buffer to read struct module data from vmcore. */
 + if ((module_struct_mem = calloc(1, SIZE(module))) == NULL) {
 + ERRMSG(Failed to allocate buffer for module\n);
 + return FALSE;
 + }
 + if (!readmem(VADDR, addr_module, module_struct_mem,
 + SIZE(module))) {
 + ERRMSG(Can't get module info.\n);
 + goto out;
 + }
 +
 + module_name = (char *)(module_struct_mem + OFFSET(module.name));
 + if (strlen(module_name)  MOD_NAME_LEN)
 + strcpy(modules-name, module_name);
 + else
 + strncpy(modules-name, module_name, MOD_NAME_LEN-1);
 

Re: [PATCH v2 2/8] makedumpfile: Apply relocation while loading module debuginfo.

2011-07-28 Thread Mahesh Jagannath Salgaonkar
Hi Keni'chi,

On 07/28/2011 02:41 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 This feature's patches are very large, and I created a devel branch
 on sourceforge git tree for sharing the code with you.
 
 http://makedumpfile.git.sourceforge.net/git/gitweb.cgi?p=makedumpfile/makedumpfile;a=shortlog;h=refs/heads/filter-out-devel
 
 I commited all of your patches and my cleanup patches.
 If you notice some problems or cleate some patches, please let me know them.
 

Sure. Will take a look.

 On Wed, 27 Jul 2011 11:39:44 +0530
 Mahesh Jagannath Salgaonkar mah...@linux.vnet.ibm.com wrote:
 + *
 + * This function uses dwfl API's to apply relocation before reading the
 + * dwarf information from module debuginfo.
 + * On success, this function sets the dwarf_info.elfd and 
 dwarf_info.dwarfd
 + * after applying relocation to module debuginfo.
 + */
 +static int
 +init_dwarf_info(void)

 Is the searching method of debuginfo file only for kernel module ?
 Or also for vmlinux ?

 Yes, the searching method is only for module debuginfo. For vmlinux we
 set the dwarf_info.name_debuginfo before call to init_dwarf_info() and
 we never call search routine dwfl_linux_kernel_report_offline() for
 vmlinux. The routine dwfl_report_offline() applies the relocation on
 specified debuginfo module. In case vmlinux it basically does nothing.

 I feel this function is a little complex, and I'd like to make it simple.
 If only for kernel module, we can do it by separating the searching method
 from this function and calling it in case of kernel module.

 The init_dwarf_info() function gets called everytime when
 get_debug_info() is called. The get_debug_info() is called multiple
 times for same debuginfo file. This function tries to avoid the repeated
 search for same debuginfo, hence the function is little complex.
  - In case of kernel module the very first invocation of
 init_dwarf_info() would call dwfl_linux_kernel_report_offline() which
 will iterate over the available kernel modules and process the debuginfo
 only for the module for which we are interested in.
  - We set the dwarf_info.name_debuginfo with the debuginfo file name
 found during the first invocation.
  - The second invocation of init_dwarf_info() for same kernel module,
 will find dwarf_info.name_debuginfo is already set and will avoid the
 debuginfo search. In this case it will just apply relocation using
 routine dwfl_report_offline().

 This function does not have any special handling code for vmlinux. The
 function is independent of whether it is called for vmlinux or kernel
 module. In case of vmlinux this function has absolutely no side effects.
 
 If adding the searching method to the blow position and removing the code
 from init_dwarf_info(), I guess it makes the code simple.
 
 @ process_config_file()
  9402 if (!set_dwarf_debuginfo(config-module_name,
  9403 NULL, 
 -1)) {
  9404 ERRMSG(Skipping to next Module 
 section\n);
  9405 skip_section = 1;
  9406 free_config(config);
  9407 continue;
  9408 }
  9409  HERE  

This may not be the correct place to call search method. We may end up
calling search method multiple times for same kernel module. I think
moving the search method inside set_dwarf_debuginfo() routine at below
position is a better place:

@set_dwarf_debuginfo()
..
..
if (!strcmp(dwarf_info.module_name, vmlinux) ||
!strcmp(dwarf_info.module_name, xen-syms))
return TRUE;
+ HERE 
-/* check to see whether module debuginfo is available */
-if (!init_dwarf_info())
-return FALSE;
-else
-clean_dwfl_info();
 return TRUE;
}

And then we can remove search routine from init_dwarf_info(). What do
you think?

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/8] makedumpfile: Apply relocation while loading module debuginfo.

2011-07-28 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 07/28/2011 02:41 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 This feature's patches are very large, and I created a devel branch
 on sourceforge git tree for sharing the code with you.
 
 http://makedumpfile.git.sourceforge.net/git/gitweb.cgi?p=makedumpfile/makedumpfile;a=shortlog;h=refs/heads/filter-out-devel
 
 I commited all of your patches and my cleanup patches.
 If you notice some problems or cleate some patches, please let me know them.
 

I checked out the devel branch and verified that all patches looks fine.
I will take this branch as base and will work on moving search method to
set_dwarf_debuginfo() routine.

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/8] makedumpfile: Apply relocation while loading module debuginfo.

2011-07-27 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 07/25/2011 01:41 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 This patch is almost good, and there are some cleanup points.
 
 On Wed, 18 May 2011 01:31:01 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:
 +
 +static void
 +clean_dwfl_info(void)
 +{
 +if (dwarf_info.dwfl)
 +dwfl_end(dwarf_info.dwfl);
 +
 +dwarf_info.dwfl = NULL;
 +dwarf_info.dwarfd = NULL;
 +dwarf_info.elfd = NULL;
 +}
 +
 +/*
 + * Intitialize the dwarf info.

 + * Linux kernel module debuginfo are of ET_REL (relocatable) type. The old
 + * implementation of get_debug_info() function that reads the debuginfo was
 + * not relocation-aware and hence could not read the dwarf info properly
 + * from module debuginfo.
 
 The old implementaion .. is useful for my review, but it is not necessary
 after merging this patch.
 
 
 + *
 + * This function uses dwfl API's to apply relocation before reading the
 + * dwarf information from module debuginfo.
 + * On success, this function sets the dwarf_info.elfd and dwarf_info.dwarfd
 + * after applying relocation to module debuginfo.
 + */
 +static int
 +init_dwarf_info(void)
 
 Is the searching method of debuginfo file only for kernel module ?
 Or also for vmlinux ?

Yes, the searching method is only for module debuginfo. For vmlinux we
set the dwarf_info.name_debuginfo before call to init_dwarf_info() and
we never call search routine dwfl_linux_kernel_report_offline() for
vmlinux. The routine dwfl_report_offline() applies the relocation on
specified debuginfo module. In case vmlinux it basically does nothing.

 
 I feel this function is a little complex, and I'd like to make it simple.
 If only for kernel module, we can do it by separating the searching method
 from this function and calling it in case of kernel module.

The init_dwarf_info() function gets called everytime when
get_debug_info() is called. The get_debug_info() is called multiple
times for same debuginfo file. This function tries to avoid the repeated
search for same debuginfo, hence the function is little complex.
- In case of kernel module the very first invocation of
init_dwarf_info() would call dwfl_linux_kernel_report_offline() which
will iterate over the available kernel modules and process the debuginfo
only for the module for which we are interested in.
- We set the dwarf_info.name_debuginfo with the debuginfo file name
found during the first invocation.
- The second invocation of init_dwarf_info() for same kernel module,
will find dwarf_info.name_debuginfo is already set and will avoid the
debuginfo search. In this case it will just apply relocation using
routine dwfl_report_offline().

This function does not have any special handling code for vmlinux. The
function is independent of whether it is called for vmlinux or kernel
module. In case of vmlinux this function has absolutely no side effects.

 
 
 +{
 +Dwfl *dwfl = NULL;
 +int dwfl_fd = -1;
 +static char *debuginfo_path = DEFAULT_DEBUGINFO_PATH;
 +static const Dwfl_Callbacks callbacks = {
 +.section_address = dwfl_offline_section_address,
 +.find_debuginfo = dwfl_standard_find_debuginfo,
 +.debuginfo_path = debuginfo_path,
 +};
 +
 +dwarf_info.elfd = NULL;
 +dwarf_info.dwarfd = NULL;
 +
 +if ((dwfl = dwfl_begin(callbacks)) == NULL) {
 +ERRMSG(Can't create a handle for a new dwfl session.\n);
 +return FALSE;
 +}
 +
 +if (dwarf_info.name_debuginfo) {
 +/* We have absolute path for debuginfo file, use it directly
 + * instead of searching it through
 + * dwfl_linux_kernel_report_offline() call.
 + *
 + * Open the debuginfo file if it is not already open.
 + */
 +if (dwarf_info.fd_debuginfo  0)
 +dwarf_info.fd_debuginfo =
 +open(dwarf_info.name_debuginfo, O_RDONLY);
 +
 +dwfl_fd = dup(dwarf_info.fd_debuginfo);
 +if (dwfl_fd  0) {
 +ERRMSG(Failed to get a duplicate handle for
 + debuginfo.\n);
 +goto err_out;
 +}
 +}
 +if (dwarf_info.fd_debuginfo  0) {
 
 Better to change the above to
   if (dwfl_fd  0) {
 
 because dwfl_fd is used in the follwoing and dwarf_info.fd_debuginfo is not.

Agree.

 
 
 @@ -1383,18 +1523,12 @@ get_symbol_addr(char *symname)
  Elf_Data *data = NULL;
  Elf_Scn *scn = NULL;
  char *sym_name = NULL;
 -const off_t failed = (off_t)-1;
  
 -if (lseek(dwarf_info.fd_debuginfo, 0, SEEK_SET) == failed) {
 -ERRMSG(Can't seek the kernel file(%s). %s\n,
 -dwarf_info.name_debuginfo, strerror(errno));
 -return NOT_FOUND_SYMBOL;
 -}
 -if (!(elfd = elf_begin(dwarf_info.fd_debuginfo, ELF_C_READ, NULL))) {
 -ERRMSG(Can't get first elf header of %s.\n,
 -

Re: [PATCH v2 2/8] makedumpfile: Apply relocation while loading module debuginfo.

2011-07-19 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ich,

On 07/19/2011 07:57 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Fri, 15 Jul 2011 15:51:38 +0530
 Mahesh Jagannath Salgaonkar mah...@linux.vnet.ibm.com wrote:

 On my machine (RHEL5 x86_64), I cannot compile makedumpfile with this patch.
 I guess that it is due to old elfutils of my machine, and I am trying this
 problem by newer elfutils.

 I'd like to get some hints, so can you tell me your environment ?
 RHEL6 x86_64 ?

 Yes, I had tested these patches on RHEL6.0 and RHEl6.1 x86_64. As far as
 I know RHEL6.0 had elfutils-0.148.

 I did not try my patches on RHEL5 x86-64. Let me see if I can get a
 RHEL5 x86_64 box to verify.
 
 I can compile it with elfutils-0.137 and some compiling options on RHEL5.
 We need the following change in Makefile.
 
 -   $(CC) $(CFLAGS) $(OBJ_ARCH) -o $@ $ -static -ldw -lelf -lz
 +   $(CC) $(CFLAGS) $(OBJ_ARCH) -o $@ $ -static -ldw -lbz2 -lebl -ldl 
 -lelf -lz
 
 
 By the way, do you have any documents/samples for dwfl_ functions ?
 I'd like to see them for reviewing this patch.

As far as I know there is no formal documentation available for
dwfl_ functions apart from 'elfutils/libdwfl/libdwfl.h' from
elfutils sources. In fact the elfutils itself lacks the Documentation
part for most of it's APIs.

I used 'elfutils/libdwfl/libdwfl.h' from elfutils sources as a reference
which has neat explanation for each dwfl_xxx() functions. Apart from
libdwfl.h I also used to go through actual function implementations
available under elfutils sources.

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 2/8] makedumpfile: Apply relocation while loading module debuginfo.

2011-07-15 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 07/15/2011 03:05 PM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On my machine (RHEL5 x86_64), I cannot compile makedumpfile with this patch.
 I guess that it is due to old elfutils of my machine, and I am trying this
 problem by newer elfutils.
 
 I'd like to get some hints, so can you tell me your environment ?
 RHEL6 x86_64 ?

Yes, I had tested these patches on RHEL6.0 and RHEl6.1 x86_64. As far as
I know RHEL6.0 had elfutils-0.148.

I did not try my patches on RHEL5 x86-64. Let me see if I can get a
RHEL5 x86_64 box to verify.

But in case we can not fix the compilation with RHEL5 environment, do
you think we can go with one of following options:

1. When compiled with older elfutils library, build makedumpfile without
this feature enabled.
2. Make newer elfutils vesion as pre-requisite for future makedumpfile
releases.

What do you think?

Thanks,
-Mahesh.

 
 
 Thanks
 Ken'ichi Ohmichi
 
 On Wed, 18 May 2011 01:31:01 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:

 From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

 So far makedumpfile implementation does not have to deal with module
 debuginfo files. In order to support filtering of kernel module data,
 it needs to read dwarf information from module debuginfo files. The linux
 kernel module debuginfo are of ET_REL type and requires relocation to be
 applied in order to get the dwarf information.

 This patch uses dwfl_* API's to make the code relocation-aware while
 loading module debuginfo files. It also uses dwfl_* API to search module
 debuginfo files installed under default debuginfo path.

 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 Signed-off-by: Prerna Saxena pre...@linux.vnet.ibm.com
 ---

  makedumpfile.c |  261 
 +---
  makedumpfile.h |7 ++
  2 files changed, 216 insertions(+), 52 deletions(-)

 diff --git a/makedumpfile.c b/makedumpfile.c
 index f136eba..2274b18 100644
 --- a/makedumpfile.c
 +++ b/makedumpfile.c
 @@ -1372,6 +1372,146 @@ get_elf_info(void)
  return TRUE;
  }
  
 +static int
 +process_module (Dwfl_Module *dwflmod,
 +void **userdata __attribute__ ((unused)),
 +const char *name __attribute__ ((unused)),
 +Dwarf_Addr base __attribute__ ((unused)),
 +void *arg)
 +{
 +const char *fname, *mod_name, *debugfile;
 +Dwarf_Addr dwbias;
 +
 +/* get a debug context descriptor.*/
 +dwarf_info.dwarfd = dwfl_module_getdwarf (dwflmod, dwbias);
 +dwarf_info.elfd = dwarf_getelf(dwarf_info.dwarfd);
 +
 +mod_name = dwfl_module_info(dwflmod, NULL, NULL, NULL, NULL, NULL,
 +fname, debugfile);
 +
 +if (!strcmp(dwarf_info.module_name, mod_name) 
 +!dwarf_info.name_debuginfo  debugfile) {
 +/*
 + * Store the debuginfo filename. Next time we will
 + * open debuginfo file direclty instead of searching
 + * for it again.
 + */
 +dwarf_info.name_debuginfo = strdup(debugfile);
 +}
 +
 +return DWARF_CB_OK;
 +}
 +
 +static int
 +dwfl_report_module_p(const char *modname, const char *filename)
 +{
 +if (filename  !strcmp(modname, dwarf_info.module_name))
 +return 1;
 +return 0;
 +}
 +
 +static void
 +clean_dwfl_info(void)
 +{
 +if (dwarf_info.dwfl)
 +dwfl_end(dwarf_info.dwfl);
 +
 +dwarf_info.dwfl = NULL;
 +dwarf_info.dwarfd = NULL;
 +dwarf_info.elfd = NULL;
 +}
 +
 +/*
 + * Intitialize the dwarf info.
 + * Linux kernel module debuginfo are of ET_REL (relocatable) type. The old
 + * implementation of get_debug_info() function that reads the debuginfo was
 + * not relocation-aware and hence could not read the dwarf info properly
 + * from module debuginfo.
 + *
 + * This function uses dwfl API's to apply relocation before reading the
 + * dwarf information from module debuginfo.
 + * On success, this function sets the dwarf_info.elfd and dwarf_info.dwarfd
 + * after applying relocation to module debuginfo.
 + */
 +static int
 +init_dwarf_info(void)
 +{
 +Dwfl *dwfl = NULL;
 +int dwfl_fd = -1;
 +static char *debuginfo_path = DEFAULT_DEBUGINFO_PATH;
 +static const Dwfl_Callbacks callbacks = {
 +.section_address = dwfl_offline_section_address,
 +.find_debuginfo = dwfl_standard_find_debuginfo,
 +.debuginfo_path = debuginfo_path,
 +};
 +
 +dwarf_info.elfd = NULL;
 +dwarf_info.dwarfd = NULL;
 +
 +if ((dwfl = dwfl_begin(callbacks)) == NULL) {
 +ERRMSG(Can't create a handle for a new dwfl session.\n);
 +return FALSE;
 +}
 +
 +if (dwarf_info.name_debuginfo) {
 +/* We have absolute path for debuginfo file, use it directly
 + * instead of searching it through
 + * dwfl_linux_kernel_report_offline() call.
 + *
 + * Open the debuginfo file if it is 

Re: [PATCH v2 1/8] makedumpfile: Add '--config' option to specify filterconfig file.

2011-05-23 Thread Mahesh Jagannath Salgaonkar
On 05/23/2011 02:39 PM, tachib...@mxm.nes.nec.co.jp wrote:
 Hi Mahesh
 
 
 On 2011/05/18 01:30:07 +0530, Mahesh J Salgaonkar mah...@linux.vnet.ibm.com 
 wrote:
 From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

 This patch adds '--config' option to specify config file that contains filter
 commands to filter out desired kernel data and it's members.

 Updated makedumpfile man page with '--config' option info.

 Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
 Signed-off-by: Prerna Saxena pre...@linux.vnet.ibm.com
 ---

  makedumpfile.8 |   20 +++-
  makedumpfile.c |   17 +
  makedumpfile.h |7 +++
  3 files changed, 43 insertions(+), 1 deletions(-)

 diff --git a/makedumpfile.8 b/makedumpfile.8
 index 4a0271d..a1aaa3e 100644
 --- a/makedumpfile.8
 +++ b/makedumpfile.8
 @@ -6,6 +6,8 @@ makedumpfile \- make a small dumpfile of kdump
  .br
  \fBmakedumpfile\fR \-F [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i 
 \fIVMCOREINFO\fR] \fIVMCORE\fR
  .br
 +\fBmakedumpfile\fR   [\fIOPTION\fR] \-x \fIVMLINUX\fR \-\-config 
 \fIFILTERCONFIGFILE\fR \fIVMCORE\fR \fIDUMPFILE\fR
 +.br
  \fBmakedumpfile\fR \-R \fIDUMPFILE\fR
  .br
  \fBmakedumpfile\fR \-\-split [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i 
 \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE1\fR \fI
 DUMPFILE2\fR [\fIDUMPFILE3\fR ..]
 @@ -55,6 +57,15 @@ compression support. The ELF format is readable with GDB 
 and the crash utility.
  If a user wants to use GDB, \fIDUMPFILE\fR format has to be explicitly
  specified to be the ELF format.
  .PP
 +Apart from the exclusion of unnecessary pages mentioned above, makedumpfile
 +allows user to filter out targeted kernel data. The filter config file can
 +be used to specify kernel/module symbols and its members that need to be
 +filtered out through the erase command syntax. makedumpfile reads the filter
 +config and builds the list of memory addresses and its sizes after 
 processing
 +filter commands. The memory locations that require to be filtered out are
 +then poisoned with character 'X' (58 in Hex). Refer to \fBfilter.conf(8)\fR
 +for file format.
 +.PP
  To analyze the first kernel's memory usage, makedumpfile can refer to
  \fIVMCOREINFO\fR instead of \fIVMLINUX\fR. \fIVMCOREINFO\fR contains the 
 first
  kernel's information (structure size, field offset, etc.), and 
 \fIVMCOREINFO\fR
 @@ -232,6 +243,13 @@ specified.
  # makedumpfile \-g vmcoreinfo \-x vmlinux
  
  .TP
 +\fB\-\-config\fR \fIFILTERCONFIGFILE\fR
 +Used in conjunction with \-x \fIVMLINUX\fR option, to specify the filter
 +config file \fIFILTERCONFIGFILE\fR that contains erase commands to filter 
 out
 +desired kernel data from vmcore while creating \fIDUMPFILE\fR. For filter
 +command syntax please refer to \fBfilter.conf(8)\fR.
 +
 +.TP
  \fB\-F\fR
  Output the dump data in the flattened format to the standard output for
  transporting the dump data by SSH.
 @@ -482,5 +500,5 @@ Written by Masaki Tachibana, and Ken'ichi Ohmichi.
  
  .SH SEE ALSO
  .PP
 -crash(8), gdb(1), kexec(8)
 +crash(8), gdb(1), kexec(8), filter.conf(8)
  
 diff --git a/makedumpfile.c b/makedumpfile.c
 index 7d0c663..f136eba 100644
 --- a/makedumpfile.c
 +++ b/makedumpfile.c
 @@ -636,6 +636,11 @@ print_usage(void)
  MSG(  Creating DUMPFILE:\n);
  MSG(  # makedumpfile[-c|-E] [-d DL] [-x VMLINUX|-i VMCOREINFO] 
 VMCORE DUMPFILE\n);
  MSG(\n);
 +MSG(  Creating DUMPFILE with filtered kernel data specified through 
 filter config\n);
 +MSG(  file:\n);
 +MSG(  # makedumpfile[-c|-E] [-d DL] -x VMLINUX --config 
 FILTERCONFIGFILE VMCORE\n);
 +MSG(DUMPFILE\n);
 +MSG(\n);
  MSG(  Outputting the dump data in the flattened format to the standard 
 output:\n);
  MSG(  # makedumpfile -F [-c|-E] [-d DL] [-x VMLINUX|-i VMCOREINFO] 
 VMCORE\n);
  MSG(\n);
 @@ -714,6 +719,11 @@ print_usage(void)
  MSG(  other system that is running the same first kernel. [-x 
 VMLINUX] must\n);
  MSG(  be specified.\n);
  MSG(\n);
 +MSG(  [--config FILTERCONFIGFILE]:\n);
 +MSG(  Used in conjunction with -x VMLINUX option, to specify the 
 filter config\n);
 +MSG(  file that contains filter commands to filter out desired 
 kernel data\n);
 +MSG(  from vmcore while creating DUMPFILE.\n);
 +MSG(\n);
  MSG(  [-F]:\n);
  MSG(  Output the dump data in the flattened format to the standard 
 output\n);
  MSG(  for transporting the dump data by SSH.\n);
 @@ -7729,6 +7739,9 @@ check_param_for_creating_dumpfile(int argc, char 
 *argv[])
  if (info-flag_flatten  info-flag_split)
  return FALSE;
  
 
 
 +if (info-name_filterconfig  !info-name_vmlinux)
 +return FALSE;
 
 A new option '--config makedumpfile.conf' always needs an option 
 '-x debug_info_file', doesn't it?
Yes, '-x' is mandatory while using '--config' option.
 Once I heard that a debug_info file(i.e. vmlinux) was too large 
 to be included in 2nd kernel's initrd.
 So 

Re: [PATCH v1 0/6] makedumpfile: makedumpfile enhancement to filter out kernel data from vmcore

2011-03-14 Thread Mahesh Jagannath Salgaonkar
On 03/11/2011 07:37 PM, Dave Anderson wrote:
 
 
 - Original Message -
 Hi All,

 Please find the makedumpfile enhancement patchset that introduces a data
 filtering feature which enables makedumpfile to filter out desired kernel
 symbol data and it's members from the specified VMCORE file. The data to be
 filtered out is poisoned with character 'X' (58 in Hex).

 This feature will be very useful for the customers who wants to erase the
 customer sensitive data like security keys and other confidential data, in
 DUMPFILE before sending it to support team for analysis.

 This feature introduces a filter config file where, using filter commands,
 user can specify desired kernel data symbols and it's members that need to be
 filtered out while creating o/p DUMPFILE. The Syntax for filter commands are
 provided in the filter.conf(8) man page.

 The first 4 patches prepares the base work for filtering framework.  The 
 last 2
 patches implements the generic filtering framework to erase desired kernel
 data.

 I have tested these patches on x86_64 and s390x architecture against RHEL6 GA
 kernel. The feature supports filtering data from ELF as well as 
 kdump-compressed
 formatted dump.

 Please review the patchset and let me know your comments.

 Thanks,
 -Mahesh.
 
 Hi Mahesh,
 
 Is there any notation in the filtered ELF kdump or compressed kdump file
 that filtering has been done?  Given that there may be potential ramifications
 in crash utility behavior (or outright failure?), the crash utility should
 display a warning message early on during invocation.

Hmm... I did not think about it. I am thinking of following approach:

- Set a bit in dump_level (DL_FILTER_KERNEL_DATA = 0x800) that will
denote that filtering has been done.
- For compressed kdump file we anyway have dump_level available in kdump
sub header
- For ELF kdump, currently we do not have any way to convey dump_level
info to crash utility (Ken'chi, correct me if I am wrong). How about
introducing an additional ELF note (NT_DUMP_LEVEL) that will include
dump_level info.

Any other suggestions are welcome.

Thanks,
-Mahesh.
 
 Thanks,
   Dave
 
 
 ---

 Mahesh Salgaonkar (6):
 makedumpfile: Add '--config' option to specify filter config file.
 makedumpfile: Apply relocation while loading module debuginfo.
 makedumpfile: Load the module symbol data from vmcore.
 makedumpfile: Introduce routines to get type name from debuginfo.
 makedumpfile: Read and process filter commands from config file.
 makedumpfile: Read and process 'for' command from config file.


 Makefile | 8
 filter.conf | 149 
 filter.conf.8 | 419 +++
 makedumpfile.8 | 20 +
 makedumpfile.c | 2113
 ++--
 makedumpfile.h | 152 
 6 files changed, 2782 insertions(+), 79 deletions(-)
 create mode 100644 filter.conf
 create mode 100644 filter.conf.8

 --
 Signature


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v1 0/6] makedumpfile: makedumpfile enhancement to filter out kernel data from vmcore

2011-03-14 Thread Mahesh Jagannath Salgaonkar
On 03/14/2011 07:25 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Fri, 11 Mar 2011 13:34:32 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.ibm.com wrote:

 Please find the makedumpfile enhancement patchset that introduces a data
 filtering feature which enables makedumpfile to filter out desired kernel
 symbol data and it's members from the specified VMCORE file. The data to be
 filtered out is poisoned with character 'X' (58 in Hex).

 This feature will be very useful for the customers who wants to erase the
 customer sensitive data like security keys and other confidential data, in
 DUMPFILE before sending it to support team for analysis.

 This feature introduces a filter config file where, using filter commands,
 user can specify desired kernel data symbols and it's members that need to be
 filtered out while creating o/p DUMPFILE. The Syntax for filter commands are
 provided in the filter.conf(8) man page.

 The first 4 patches prepares the base work for filtering framework. The last 
 2
 patches implements the generic filtering framework to erase desired kernel
 data.

 I have tested these patches on x86_64 and s390x architecture against RHEL6 GA
 kernel. The feature supports filtering data from ELF as well as
 kdump-compressed formatted dump.

 Please review the patchset and let me know your comments.
 
 This patchset is interesting, and I start reviewing.
 I haven't reviewed the code yet, and this is a quick review.
 
 * About the filename of configuration.
   Is filter.conf only for makedumpfile command ?
   If so, I feel filter.conf is too generic file name.
   How about makedumpfile.conf ?

:-) I initially started with that name. But since it was only for
filtering purpose I went for filter.conf. However, I am ok with the name
makdumpfile.conf
 
   If makedumpfile.conf, it is clear that the file is only for
   makedumpfile, and we will be able to use the file for not only
   erasing secret data but also other purpose.
   (ex: specifying the other options like -d 31 in the file)

Agree, Makes sense.
 
 
 Thanks
 Ken'ichi Ohmichi


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v1 0/6] makedumpfile: makedumpfile enhancement to filter out kernel data from vmcore

2011-03-14 Thread Mahesh Jagannath Salgaonkar
Hi Ken'chi,

On 03/14/2011 08:14 AM, Ken'ichi Ohmichi wrote:
 
 Hi Dave, Mahesh,
 
 On Fri, 11 Mar 2011 09:07:50 -0500 (EST)
 Dave Anderson ander...@redhat.com wrote:

 Please find the makedumpfile enhancement patchset that introduces a data
 filtering feature which enables makedumpfile to filter out desired kernel
 symbol data and it's members from the specified VMCORE file. The data to be
 filtered out is poisoned with character 'X' (58 in Hex).

 This feature will be very useful for the customers who wants to erase the
 customer sensitive data like security keys and other confidential data, in
 DUMPFILE before sending it to support team for analysis.

 This feature introduces a filter config file where, using filter commands,
 user can specify desired kernel data symbols and it's members that need to 
 be
 filtered out while creating o/p DUMPFILE. The Syntax for filter commands are
 provided in the filter.conf(8) man page.

 The first 4 patches prepares the base work for filtering framework.  The 
 last 2
 patches implements the generic filtering framework to erase desired kernel
 data.

 I have tested these patches on x86_64 and s390x architecture against RHEL6 
 GA
 kernel. The feature supports filtering data from ELF as well as 
 kdump-compressed
 formatted dump.

 Please review the patchset and let me know your comments.

 Thanks,
 -Mahesh.

 Hi Mahesh,

 Is there any notation in the filtered ELF kdump or compressed kdump file
 that filtering has been done?  Given that there may be potential 
 ramifications
 in crash utility behavior (or outright failure?), the crash utility should
 display a warning message early on during invocation.
 
 That is a good point.
 
 How about adding new members (like offset_eraseinfo, size_eraseinfo)
 into the sub header in compressed kdump file, and setting version 5
 in the header version (disk_dump_header.header_version) ?
 These members show the erased information like the following:
 
 struct kdump_sub_header {
 unsigned long   phys_base;
 int dump_level; /* header_version 1 and later */
 int split;  /* header_version 2 and later */
 unsigned long   start_pfn;  /* header_version 2 and later */
 unsigned long   end_pfn;/* header_version 2 and later */
 off_t   offset_vmcoreinfo;/* header_version 3 and later */
 unsigned long   size_vmcoreinfo;  /* header_version 3 and later */
 off_t   offset_note;  /* header_version 4 and later */
 unsigned long   size_note;/* header_version 4 and later */
 +   off_t   offset_eraseinfo; /* header_version 5 and later */
 +   unsigned long   size_eraseinfo;   /* header_version 5 and later */
 };
 
 The erased information contains only effective lines in the
 configuration file.

Do you mean the info would contain symbol name/expression, resolved
symbol/vmalloc addresses and its sizes that got filtered out?

What I mean by symbol expression is, user can ask makedumpfile to erase
a data referred by a member from the symbol variable of structure type.
e.g.
struct S1 {
int a;
};
struct S2 {
struct S1 *mystruct1;
};

struct S2 mystruct2;

#Filter command
erase mystruct2.mystruct1.a

So the erase information in kdump header would contain:

---
Symbol Name: mystruct2.mystruct1.a
Filter Addr: mystruct2.mystruct1-a
Filter Size: 4  (sizeof type int)
---

For erase commands in loop construct we will have multiple Filter addresses.

Please let me know your comment on the above format.

Hi Dave,

Do you think the above information would be enough for crash utility to
interpret?

Thanks,
-Mahesh.

 In case of ELF kdump file, how about adding a ELF note section
 which also show the erased information ?
 
 The crash utility will be able to know the name list of the
 erased symbols from the information.
 
 
 Thanks
 Ken'ichi Ohmichi
 
 ___
 kexec mailing list
 kexec@lists.infradead.org
 http://lists.infradead.org/mailman/listinfo/kexec


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] kdump: Allow shrinking of kdump region to be overridden

2011-03-08 Thread Mahesh Jagannath Salgaonkar
On 08/25/2010 06:07 AM, Eric W. Biederman wrote:
 Anton Blanchard an...@samba.org writes:
 
 On ppc64 the crashkernel region almost always overlaps an area of firmware.
 This works fine except when using the sysfs interface to reduce the kdump
 region. If we free the firmware area we are guaranteed to crash.
 
 That is ppc64 bug.  firmware should not be in the reserved region.  Any
 random kernel like thing can be put in to that region at any valid
 address and the fact that shrinking the region frees your firmware means
 that using that region could also stomp your firmware (which I assume
 would be a bad thing).
The issue only happens while shrinking the region using sysfs interface.
We already have checks in kexec for not to stomp over on the firmware
overlap area while loading capture kernel. Currently we do a top-down
allocation for the firmware region which means it sits at the top of the
RMO, right in the middle of the crashdump region. We can not move the
crashkernel region beyond firmware region because kernel needs its some
of memory in RMO region.
 
 So please fix the ppc64 reservation.
 
 Eric
 ___
 Linuxppc-dev mailing list
 linuxppc-...@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v1] makedumpfile: s390x: Add support for s390x crashdump analysis

2010-11-15 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 11/15/2010 09:24 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Fri, 12 Nov 2010 12:34:53 +0530
 Mahesh Jagannath Salgaonkar mah...@linux.vnet.ibm.com wrote:

 This patch adds support for processing s390x kernel crashdumps.

 The changes have been tested on s390x system.
 The dump compression and filtering (for all dump levels 1,2,4,8,16 and 31)
 tests are succussfull.

 What version of linux kernel are the above tests on ?
 I'd like to write the version on README file.

 Also, please let me know the memory model (flatmem, discontigmem,
 or sparsemem) too.

   # make menuconfig
   - Processor type and features
  - Memory model


 I have tested this on RHEL6.0 GA kernel version 2.6.32-71.el6.s390x.

 The memory model is 'Sparse Memory'
 
 Thank you.
 Your patch has been merged to git tree.
I pulled in your latest changes and tested on s390x system, works fine.

Thanks for your help.
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v1] makedumpfile: s390x: Add support for s390x crashdump analysis

2010-11-11 Thread Mahesh Jagannath Salgaonkar
Hi Ken'ichi,

On 11/12/2010 08:03 AM, Ken'ichi Ohmichi wrote:
 
 Hi Mahesh,
 
 On Fri, 12 Nov 2010 10:28:56 +0900
 Ken'ichi Ohmichi oomi...@mxs.nes.nec.co.jp wrote:

 On Thu, 11 Nov 2010 11:04:17 +0530
 Mahesh J Salgaonkar mah...@linux.vnet.in.ibm.com wrote:

 This patch adds support for processing s390x kernel crashdumps.

 The changes have been tested on s390x system.
 The dump compression and filtering (for all dump levels 1,2,4,8,16 and 31)
 tests are succussfull.

 What version of linux kernel are the above tests on ?
 I'd like to write the version on README file.
 
 Also, please let me know the memory model (flatmem, discontigmem,
 or sparsemem) too.
 
   # make menuconfig
   - Processor type and features
  - Memory model
 

I have tested this on RHEL6.0 GA kernel version 2.6.32-71.el6.s390x.

The memory model is 'Sparse Memory'

Thanks,
-Mahesh.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec