-----Original Message-----
> 1.) The vmcore file maybe very big.
>
> For example, I have a vmcore file which is over 23G,
> and the panic kernel had 767.6G memory,
> its max_sect_len is 4468736.
>
> Current code costs too much time to do the following loop:
> ..............................................
> for (i = 1; i < max_sect_len + 1; i++) {
> dd->valid_pages[i] = dd->valid_pages[i - 1];
> for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
> if (page_is_dumpable(pfn))
> dd->valid_pages[i]++;
> ..............................................
>
> For my case, it costs about 56 seconds to finish the
> big loop.
>
> This patch moves the hweightXX macros to defs.h,
> and uses hweight64 to optimize the loop.
>
> For my vmcore, the loop only costs about one second now.
>
> 2.) Tests result:
> # cat ./commands.txt
> quit
>
> Before:
>
> #echo 3 > /proc/sys/vm/drop_caches;
> #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null
> 2>&1
> ............................
> real 1m54.259s
> user 1m12.494s
> sys 0m3.857s
> ............................
>
> After this patch:
>
> #echo 3 > /proc/sys/vm/drop_caches;
> #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null
> 2>&1
> ............................
> real 0m55.217s
> user 0m15.114s
> sys 0m3.560s
> ............................
Thank you for the improvement!
as far as I tested on x86_64 it did not give such a big gain, but looking at
the user time, it will do on arm64. Lianbo, can you reproduce on arm64?
with a 192GB x86_64 dumpfile, slightly improved:
$ time echo quit | ./crash vmlinux dump >/dev/null
real 0m5.632s
user 0m5.545s
sys 0m0.185s
$ time echo quit | ./crash vmlinux dump >/dev/null
real 0m5.547s
user 0m5.477s
sys 0m0.170s
>
> Signed-off-by: Huang Shijie <[email protected]>
> ---
> defs.h | 20 ++++++++++++++++++++
> diskdump.c | 12 +++++++++---
> sbitmap.c | 19 -------------------
> 3 files changed, 29 insertions(+), 22 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 81ac049..1e8360d 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -4531,6 +4531,26 @@ struct machine_specific {
> #define NUM_IN_BITMAP(bitmap, x) (bitmap[(x)/BITS_PER_LONG] & NUM_TO_BIT(x))
> #define SET_BIT(bitmap, x) (bitmap[(x)/BITS_PER_LONG] |= NUM_TO_BIT(x))
>
> +static inline unsigned int __const_hweight8(unsigned long w)
> +{
> + return
> + (!!((w) & (1ULL << 0))) +
> + (!!((w) & (1ULL << 1))) +
> + (!!((w) & (1ULL << 2))) +
> + (!!((w) & (1ULL << 3))) +
> + (!!((w) & (1ULL << 4))) +
> + (!!((w) & (1ULL << 5))) +
> + (!!((w) & (1ULL << 6))) +
> + (!!((w) & (1ULL << 7)));
> +}
> +
> +#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w)
> >> 8))
> +#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w)
> >> 16))
> +#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w)
> >> 32))
> +
> +#define hweight32(w) __const_hweight32(w)
> +#define hweight64(w) __const_hweight64(w)
> +
> /*
> * precision lengths for fprintf
> */
> diff --git a/diskdump.c b/diskdump.c
> index d567427..d30db9d 100644
> --- a/diskdump.c
> +++ b/diskdump.c
> @@ -547,6 +547,7 @@ read_dump_header(char *file)
> ulong pfn;
> int i, j, max_sect_len;
> int is_split = 0;
> + u64 tmp, *bitmap;
$ make warn
...
cc -c -g -DX86_64 -DLZO -DSNAPPY -DZSTD -DGDB_10_2 diskdump.c -Wall -O2
-Wstrict-prototypes -Wmissing-prototypes -fstack-protector -Wformat-security
diskdump.c: In function ‘read_dump_header’:
diskdump.c:550:2: error: unknown type name ‘u64’
u64 tmp, *bitmap;
^~~
diskdump.c:905:12: error: ‘u64’ undeclared (first use in this function); did
you mean ‘a64l’?
bitmap = (u64 *)dd->dumpable_bitmap;
^~~
a64l
It looks like u64 is defined only on arm and arm64, please use ulonglong
commonly used in crash.
Otherwise, looks good to me.
Thanks,
Kazu
>
> if (block_size < 0)
> return FALSE;
> @@ -899,11 +900,16 @@ restart:
>
> dd->valid_pages = calloc(sizeof(ulong), max_sect_len + 1);
> dd->max_sect_len = max_sect_len;
> +
> + /* It is safe to convert it to (u64*). */
> + bitmap = (u64 *)dd->dumpable_bitmap;
> for (i = 1; i < max_sect_len + 1; i++) {
> dd->valid_pages[i] = dd->valid_pages[i - 1];
> - for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
> - if (page_is_dumpable(pfn))
> - dd->valid_pages[i]++;
> + for (j = 0; j < BITMAP_SECT_LEN; j += 64, pfn += 64) {
> + tmp = bitmap[pfn >> 6];
> + if (tmp)
> + dd->valid_pages[i] += hweight64(tmp);
> + }
> }
>
> return TRUE;
> diff --git a/sbitmap.c b/sbitmap.c
> index 286259f..96a61e6 100644
> --- a/sbitmap.c
> +++ b/sbitmap.c
> @@ -49,25 +49,6 @@ struct sbitmapq_data {
>
> static uint sb_flags = 0;
>
> -static inline unsigned int __const_hweight8(unsigned long w)
> -{
> - return
> - (!!((w) & (1ULL << 0))) +
> - (!!((w) & (1ULL << 1))) +
> - (!!((w) & (1ULL << 2))) +
> - (!!((w) & (1ULL << 3))) +
> - (!!((w) & (1ULL << 4))) +
> - (!!((w) & (1ULL << 5))) +
> - (!!((w) & (1ULL << 6))) +
> - (!!((w) & (1ULL << 7)));
> -}
> -
> -#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w)
> >> 8))
> -#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w)
> >> 16))
> -#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w)
> >> 32))
> -
> -#define hweight32(w) __const_hweight32(w)
> -#define hweight64(w) __const_hweight64(w)
>
> #define BIT(nr) (1UL << (nr))
>
> --
> 2.30.2
--
Crash-utility mailing list
[email protected]
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki