Re: [PATCH] makedumpfile/arm64: Add '--mem-usage' support

2018-02-21 Thread Bhupesh SHARMA
Hi Masaki Tachibana,

On Tue, Feb 20, 2018 at 4:42 PM, Masaki Tachibana
 wrote:
> Hi Bhupesh,
>
> Sorry for the late reply.
> I'll reply by the end the next week.

Sure. Thanks for your mail.

Regards,
Bhupesh


> Thanks
> tachibana
>
>> -Original Message-
>> From: Bhupesh Sharma [mailto:bhsha...@redhat.com]
>> Sent: Tuesday, February 20, 2018 1:56 PM
>> To: kexec@lists.infradead.org
>> Cc: Bhupesh Sharma ; Tachibana Masaki() 
>> ; Nakayama Takuya(
>> ) ; Nishimura Daisuke() 
>> 
>> Subject: Re: [PATCH] makedumpfile/arm64: Add '--mem-usage' support
>>
>> Hello,
>>
>> On Fri, Feb 9, 2018 at 3:06 PM, Bhupesh Sharma  wrote:
>> > Its good to have the makedumpfile '--mem-usage' support
>> > for arm64 architecture as well, as it allows one to see the page numbers
>> > of current system (1st kernel) in different use.
>> >
>> > Using this we can know how many pages are dumpable when different
>> > dump_level is specified.
>> >
>> > Normally for x86_64, makedumpfile analyzes the 'System Ram' and
>> > 'kernel text' program segment of /proc/kcore excluding
>> > the crashkernel range, then calculates the page number of different
>> > kind per vmcoreinfo.
>> >
>> > We use the similar logic for arm64, but in addition make the '--mem-usage'
>> > usage dependent on the VMLINUX file being passed. This is done to allow
>> > information like VA_BITS being determined from kernel symbol like
>> > _stext. This allows us to get the VA_BITS before 'set_kcore_vmcoreinfo()'
>> > is called.
>> >
>> > Also I have validated the '--mem-usage' makedumpfile option on several
>> > ppc64/ppc64le and s390x machines, so update the makedumpfile.8
>> > documentation to indicate that '--mem-usage' option is supported
>> > not only on x86_64, but also on ppc64, s390x and arm64.
>> >
>> > After this patch, when using the '--mem-usage' option with makedumpfile,
>> > we get the correct information about the different pages. For e.g.
>> > here is an output from my arm64 board:
>> >
>> > TYPEPAGES   EXCLUDABLE  DESCRIPTION
>> > --
>> > ZERO49524   yes Pages filled with 
>> > zero
>> > NON_PRI_CACHE   15143   yes Cache pages 
>> > without private flag
>> > PRI_CACHE   29147   yes Cache pages with 
>> > private flag
>> > USER3684yes User process pages
>> > FREE1450569 yes Free pages
>> > KERN_DATA   14243   no  Dumpable kernel 
>> > data
>> >
>> > page size:  65536
>> > Total pages on system:  1562310
>> > Total size on system:   102387548160 Byte
>> >
>> > Cc: Masaki Tachibana 
>> > Cc: Takuya Nakayama 
>> > Cc: Daisuke Nishimura 
>> > Signed-off-by: Bhupesh Sharma 
>>
>> Ping. Any review comments on this?
>>
>> Regards,
>> Bhupesh
>>
>> > ---
>> >  arch/arm64.c   | 51 ---
>> >  makedumpfile.8 | 11 +--
>> >  makedumpfile.c | 25 +++--
>> >  makedumpfile.h |  1 +
>> >  4 files changed, 81 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/arch/arm64.c b/arch/arm64.c
>> > index 25d7a1f4db98..91f113f6447c 100644
>> > --- a/arch/arm64.c
>> > +++ b/arch/arm64.c
>> > @@ -48,6 +48,12 @@ static unsigned long kimage_voffset;
>> >  #define SZ_64K (64 * 1024)
>> >  #define SZ_128M(128 * 1024 * 1024)
>> >
>> > +#define PAGE_OFFSET_36 ((0xUL) << 36)
>> > +#define PAGE_OFFSET_39 ((0xUL) << 39)
>> > +#define PAGE_OFFSET_42 ((0xUL) << 42)
>> > +#define PAGE_OFFSET_47 ((0xUL) << 47)
>> > +#define PAGE_OFFSET_48 ((0xUL) << 48)
>> > +
>> >  #define pgd_val(x) ((x).pgd)
>> >  #define pud_val(x) (pgd_val((x).pgd))
>> >  #define pmd_val(x) (pud_val((x).pud))
>> > @@ -140,8 +146,6 @@ pud_offset(pgd_t *pgda, pgd_t *pgdv, unsigned long 
>> > vaddr)
>> >
>> >  static int calculate_plat_config(void)
>> >  {
>> > -   va_bits = NUMBER(VA_BITS);
>> > -
>> > /* derive pgtable_level as per arch/arm64/Kconfig */
>> > if ((PAGESIZE() == SZ_16K && va_bits == 36) ||
>> > (PAGESIZE() == SZ_64K && va_bits == 42)) {
>> > @@ -188,7 +192,6 @@ get_machdep_info_arm64(void)
>> > kimage_voffset = NUMBER(kimage_voffset);
>> > info->max_physmem_bits = PHYS_MASK_SHIFT;
>> > info->section_size_bits = SECTIONS_SIZE_BITS;
>> > -   info->page_offset = 0xUL << (va_bits - 1);
>> >
>> > DEBUG_MSG("kimage_voffset   : %lx\n", kimage_voffset);
>> > DEBUG_MSG("max_physmem_bits : %lx\n", info->max_physmem_bits);
>> > @@ -219,6 +222,48 @@ get_xen_info_arm64(void)
>> >  int
>> >  get_versiondep_info_arm64(void)
>> >  {
>> > +   unsigned long lo

Re: [PATCH 2/2] kexec/ppc64: add support to parse ibm, dynamic-memory-v2 property

2018-02-21 Thread Mahesh Jagannath Salgaonkar
On 02/20/2018 07:48 PM, Hari Bathini wrote:
> Add support to parse the new 'ibm,dynamic-memory-v2' property in the
> 'ibm,dynamic-reconfiguration-memory' node. This replaces the old
> 'ibm,dynamic-memory' property and is enabled in the kernel with a
> patch series that starts with commit 0c38ed6f6f0b ("powerpc/pseries:
> Enable support of ibm,dynamic-memory-v2"). All LMBs that share the same
> flags and are adjacent are grouped together in the newer version of the
> property making it compact to represent larger memory configurations.
> 
> Signed-off-by: Hari Bathini 

Thanks for fixing this. Patches looks good to me.

Reviewed-by: Mahesh Salgaonkar 

> ---
>  kexec/arch/ppc64/crashdump-ppc64.c |   23 +++--
>  kexec/arch/ppc64/crashdump-ppc64.h |   16 +-
>  kexec/arch/ppc64/kexec-ppc64.c |   35 ++
>  kexec/fs2dt.c  |   92 
> ++--
>  4 files changed, 112 insertions(+), 54 deletions(-)
> 
> diff --git a/kexec/arch/ppc64/crashdump-ppc64.c 
> b/kexec/arch/ppc64/crashdump-ppc64.c
> index bc9f948..50e3853 100644
> --- a/kexec/arch/ppc64/crashdump-ppc64.c
> +++ b/kexec/arch/ppc64/crashdump-ppc64.c
> @@ -39,6 +39,10 @@
>  #define DEVTREE_CRASHKERNEL_BASE 
> "/proc/device-tree/chosen/linux,crashkernel-base"
>  #define DEVTREE_CRASHKERNEL_SIZE 
> "/proc/device-tree/chosen/linux,crashkernel-size"
> 
> +unsigned int num_of_lmb_sets;
> +unsigned int is_dyn_mem_v2;
> +uint64_t lmb_size;
> +
>  static struct crash_elf_info elf_info64 =
>  {
>   class: ELFCLASS64,
> @@ -127,6 +131,7 @@ static int get_dyn_reconf_crash_memory_ranges(void)
>  {
>   uint64_t start, end;
>   uint64_t startrange, endrange;
> + uint64_t size;
>   char fname[128], buf[32];
>   FILE *file;
>   unsigned int i;
> @@ -135,6 +140,8 @@ static int get_dyn_reconf_crash_memory_ranges(void)
> 
>   strcpy(fname, "/proc/device-tree/");
>   strcat(fname, "ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory");
> + if (is_dyn_mem_v2)
> + strcat(fname, "-v2");
>   if ((file = fopen(fname, "r")) == NULL) {
>   perror(fname);
>   return -1;
> @@ -142,8 +149,9 @@ static int get_dyn_reconf_crash_memory_ranges(void)
> 
>   fseek(file, 4, SEEK_SET);
>   startrange = endrange = 0;
> - for (i = 0; i < num_of_lmbs; i++) {
> - if ((n = fread(buf, 1, 24, file)) < 0) {
> + size = lmb_size;
> + for (i = 0; i < num_of_lmb_sets; i++) {
> + if ((n = fread(buf, 1, LMB_ENTRY_SIZE, file)) < 0) {
>   perror(fname);
>   fclose(file);
>   return -1;
> @@ -156,8 +164,15 @@ static int get_dyn_reconf_crash_memory_ranges(void)
>   return -1;
>   }
> 
> - start = be64_to_cpu(((uint64_t *)buf)[DRCONF_ADDR]);
> - end = start + lmb_size;
> + /*
> +  * If the property is ibm,dynamic-memory-v2, the first 4 bytes
> +  * tell the number of sequential LMBs in this entry.
> +  */
> + if (is_dyn_mem_v2)
> + size = be32_to_cpu(((unsigned int *)buf)[0]) * lmb_size;
> +
> + start = be64_to_cpu(*((uint64_t *)&buf[DRCONF_ADDR]));
> + end = start + size;
>   if (start == 0 && end >= (BACKUP_SRC_END + 1))
>   start = BACKUP_SRC_END + 1;
> 
> diff --git a/kexec/arch/ppc64/crashdump-ppc64.h 
> b/kexec/arch/ppc64/crashdump-ppc64.h
> index 42ccc31..87beb39 100644
> --- a/kexec/arch/ppc64/crashdump-ppc64.h
> +++ b/kexec/arch/ppc64/crashdump-ppc64.h
> @@ -34,10 +34,18 @@ extern unsigned int rtas_size;
>  extern uint64_t opal_base;
>  extern uint64_t opal_size;
> 
> -uint64_t lmb_size;
> -unsigned int num_of_lmbs;
> -
> -#define DRCONF_ADDR  0
> +/*
> + * In case of ibm,dynamic-memory-v2 property, this is the number of LMB
> + * sets where each set represents a group of sequential LMB entries. In
> + * case of ibm,dynamic-memory property, the number of LMB sets is nothing
> + * but the total number of LMB entries.
> + */
> +extern unsigned int num_of_lmb_sets;
> +extern unsigned int is_dyn_mem_v2;
> +extern uint64_t lmb_size;
> +
> +#define LMB_ENTRY_SIZE   24
> +#define DRCONF_ADDR  (is_dyn_mem_v2 ? 4 : 0)
>  #define DRCONF_FLAGS 20
> 
>  #endif /* CRASHDUMP_PPC64_H */
> diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
> index a7d708b..4e70b13 100644
> --- a/kexec/arch/ppc64/kexec-ppc64.c
> +++ b/kexec/arch/ppc64/kexec-ppc64.c
> @@ -149,6 +149,7 @@ static void add_base_memory_range(uint64_t start, 
> uint64_t end)
>  static int get_dyn_reconf_base_ranges(void)
>  {
>   uint64_t start, end;
> + uint64_t size;
>   char fname[128], buf[32];
>   FILE *file;
>   unsigned int i;
> @@ -166,29 +167,35 @@ static int get_dyn_reconf_base_ranges(void)
>   return -1;
>   }
>   /*
> -  * lmb_size, num_of_

Re: [PATCH 1/2] kexec: add a helper function to add ranges

2018-02-21 Thread Mahesh Jagannath Salgaonkar
On 02/20/2018 07:48 PM, Hari Bathini wrote:
> Add a helper function for adding ranges to avoid duplicating code.
> 
> Signed-off-by: Hari Bathini 

Reviewed-by: Mahesh Salgaonkar 

> ---
>  kexec/fs2dt.c |  115 
> ++---
>  1 file changed, 53 insertions(+), 62 deletions(-)
> 
> diff --git a/kexec/fs2dt.c b/kexec/fs2dt.c
> index 79aa0f3..550eca9 100644
> --- a/kexec/fs2dt.c
> +++ b/kexec/fs2dt.c
> @@ -169,6 +169,50 @@ static unsigned propnum(const char *name)
>   return offset;
>  }
> 
> +/*
> + * Add ranges by comparing 'base' and 'end' addresses with usable
> + * memory ranges. Returns the number of ranges added. Each range added
> + * increments 'idx' by 2.
> + */
> +static uint64_t add_ranges(uint64_t **ranges, int *ranges_size, int idx,
> +uint64_t base, uint64_t end)
> +{
> + uint64_t loc_base, loc_end, rngs_cnt = 0;
> + size_t range;
> + int add = 0;
> +
> + for (range = 0; range < usablemem_rgns.size; range++) {
> + loc_base = usablemem_rgns.ranges[range].start;
> + loc_end = usablemem_rgns.ranges[range].end;
> + if (loc_base >= base && loc_end <= end) {
> + add = 1;
> + } else if (base < loc_end && end > loc_base) {
> + if (loc_base < base)
> + loc_base = base;
> + if (loc_end > end)
> + loc_end = end;
> + add = 1;
> + }
> +
> + if (add) {
> + if (idx >= ((*ranges_size) - 2)) {
> + (*ranges_size) += MEM_RANGE_CHUNK_SZ;
> + *ranges = realloc(*ranges, (*ranges_size)*8);
> + if (!(*ranges))
> + die("unrecoverable error: can't realloc"
> + "%d bytes for ranges.\n",
> + (*ranges_size)*8);
> + }
> + (*ranges)[idx++] = cpu_to_be64(loc_base);
> + (*ranges)[idx++] = cpu_to_be64(loc_end - loc_base);
> +
> + rngs_cnt++;
> + }
> + }
> +
> + return rngs_cnt;
> +}
> +
>  #ifdef HAVE_DYNAMIC_MEMORY
>  static void add_dyn_reconf_usable_mem_property__(int fd)
>  {
> @@ -176,8 +220,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
>   uint64_t buf[32];
>   uint64_t *ranges;
>   int ranges_size = MEM_RANGE_CHUNK_SZ;
> - uint64_t base, end, loc_base, loc_end;
> - size_t i, rngs_cnt, range;
> + uint64_t base, end, rngs_cnt;
> + size_t i;
>   int rlen = 0;
>   int tmp_indx;
> 
> @@ -210,36 +254,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
> 
>   tmp_indx = rlen++;
> 
> - rngs_cnt = 0;
> - for (range = 0; range < usablemem_rgns.size; range++) {
> - int add = 0;
> - loc_base = usablemem_rgns.ranges[range].start;
> - loc_end = usablemem_rgns.ranges[range].end;
> - if (loc_base >= base && loc_end <= end) {
> - add = 1;
> - } else if (base < loc_end && end > loc_base) {
> - if (loc_base < base)
> - loc_base = base;
> - if (loc_end > end)
> - loc_end = end;
> - add = 1;
> - }
> -
> - if (add) {
> - if (rlen >= (ranges_size-2)) {
> - ranges_size += MEM_RANGE_CHUNK_SZ;
> - ranges = realloc(ranges, ranges_size*8);
> - if (!ranges)
> - die("unrecoverable error: can't"
> - " realloc %d bytes for"
> - " ranges.\n",
> - ranges_size*8);
> - }
> - ranges[rlen++] = cpu_to_be64(loc_base);
> - ranges[rlen++] = cpu_to_be64(loc_end - 
> loc_base);
> - rngs_cnt++;
> - }
> - }
> + rngs_cnt = add_ranges(&ranges, &ranges_size, rlen,
> +   base, end);
>   if (rngs_cnt == 0) {
>   /* We still need to add a counter for every LMB because
>* the kernel parsing code is dumb.  We just have
> @@ -261,7 +277,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd)
>   }
>   } else {
>   /* Store the count of (ba