----- Original Message ----- > Hi Dave & David, > > Thanks very much for your time and effort! > > After installing /lib/modules/3.12.49-6-xen/updates/crash.ko, all seems OK: > > # crash > > crash 7.1.3 > Copyright (C) 2002-2014 Red Hat, Inc. > Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation > Copyright (C) 1999-2006 Hewlett-Packard Co > Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited > Copyright (C) 2006, 2007 VA Linux Systems Japan K.K. > Copyright (C) 2005, 2011 NEC Corporation > Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc. > Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. > This program is free software, covered by the GNU General Public License, > and you are welcome to change it and/or distribute copies of it under > certain conditions. Enter "help copying" to see the conditions. > This program has absolutely no warranty. Enter "help warranty" for details. > > crash: /boot/xen-4.5.gz: original filename unknown > Use "-f /boot/xen-4.5.gz" on command line to prevent this message. > > WARNING: machine type mismatch: > crash utility: X86_64 > /var/tmp/xen-4.5.gz_ud3IRy: X86 > > crash: /boot/symtypes-3.12.49-6-default.gz: original filename unknown > Use "-f /boot/symtypes-3.12.49-6-default.gz" on command line to > prevent this message. > > crash: /boot/symvers-3.12.49-6-default.gz: original filename unknown > Use "-f /boot/symvers-3.12.49-6-default.gz" on command line to > prevent this message. > > GNU gdb (GDB) 7.6 > Copyright (C) 2013 Free Software Foundation, Inc. > License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> > This is free software: you are free to change and redistribute it. > There is NO WARRANTY, to the extent permitted by law. Type "show copying" > and "show warranty" for details. > This GDB was configured as "x86_64-unknown-linux-gnu"... > > KERNEL: /boot/vmlinux-3.12.49-6-xen.gz > DEBUGINFO: /usr/lib/debug/boot/vmlinux-3.12.49-6-xen.debug > DUMPFILE: /dev/crash > CPUS: 128 > DATE: Fri Nov 20 06:55:06 2015 > UPTIME: 18:51:36 > LOAD AVERAGE: 1.76, 1.48, 1.21 > TASKS: 1230 > NODENAME: dl980-5 > RELEASE: 3.12.49-6-xen > VERSION: #1 SMP Mon Oct 26 16:05:37 UTC 2015 (11560c3) > MACHINE: x86_64 (1995 Mhz) > MEMORY: 125.9 GB > PID: 6618 > COMMAND: "crash" > TASK: ffff881ea93b2140 [THREAD_INFO: ffff881e869f2000] > CPU: 112 > STATE: TASK_RUNNING (ACTIVE) > > So just from a user's viewpoint, I think it is enough for normal use. > Is it right? > Thanks in advance! > Best Regards > Nan Xiao
Yes, you're finally good to go... Dave > > > On Fri, Nov 20, 2015 at 6:14 AM, David Mair <[email protected]> wrote: > > On 11/19/2015 02:38 PM, Dave Anderson wrote: > >> > >> > >> ----- Original Message ----- > >>> On 11/19/2015 12:20 PM, Dave Anderson wrote: > >>>> > >>>> > >>>> ----- Original Message ----- > >>>>> On 11/19/2015 08:37 AM, David Mair wrote: > >>>>>> On 11/19/2015 07:45 AM, Dave Anderson wrote: > >>>>>> > >>>>>> Hi Dave, > >>>>>> > >>>>>>> ----- Original Message ----- > >>>>>> <snip> > >>>>>>> > >>>>>>>> (2) Execute "crash -d8" on physical machine will cause crash utility > >>>>>>>> core > >>>>>>>> dump. > >>>>>>> > >>>>>>> I can reproduce this, so I'll look into it. It's related to the > >>>>>>> /dev/mem "test" > >>>>>>> to determine whether the kernel was configured with > >>>>>>> CONFIG_STRICT_DEVMEM, where > >>>>>>> it tries to read from pfn 257 (just above the CONFIG_STRICT_DEVMEM > >>>>>>> limit), but > >>>>>>> gets into an infinite loop when used in conjunction with -d. > >>>>>>> > >>>>>>> Anyway, just continue to use /proc/kcore and you should be fine. > >>>>>> > >>>>>> This is the cause in readmem(): > >>>>>> > >>>>>> switch(READMEM(...)) > >>>>>> { > >>>>>> . > >>>>>> . > >>>>>> . > >>>>>> case READ_ERROR: > >>>>>> if (PRINT_ERROR_MESSAGE) ********** THIS *********** > >>>>>> { > >>>>>> causes a nested readmem() call before the goto gives it > >>>>>> to the caller to deal with > >>>>>> } > >>>>>> goto readmem_error > >>>>>> } > >>>>>> . > >>>>>> . > >>>>>> . > >>>>>> switch(error_handle) > >>>>>> { > >>>>>> case (RETURN_ON_ERROR): > >>>>>> } > >>>>>> > >>>>>> The PRINT_ERROR_MESSAGE I assume is an escalation from -d 8 in this > >>>>>> case. > >>>>>> > >>>>> > >>>>> The whole switch_to_proc_kcore() probably shouldn't be conditional on > >>>>> the presence of PRINT_ERROR_MESSAGE, only the actual error message > >>>>> should be. > >>>>> > >>>>> Patch shortly. > >>>>> > >>>>> -- > >>>>> David. > >>>> > >>>> Yeah, I agree. It was done that way to catch the very first non-QUIET > >>>> readmem() > >>>> call from kernel_init(), and make the bait-and-switch right there. > >>>> Without > >>>> the > >>>> CRASHDEBUG(x) override, it works as-is because the readmem() calls in > >>>> devmem_is_restricted() are purposely set to QUIET. > >>>> > >>>> So we're thinking something like this, right?: > >>>> > >>>> diff --git a/memory.c b/memory.c > >>>> index 824b3ae..2282ba9 100644 > >>>> --- a/memory.c > >>>> +++ b/memory.c > >>>> @@ -2207,13 +2207,12 @@ readmem(ulonglong addr, int memtype, void > >>>> *buffer, > >>>> long size, > >>>> goto readmem_error; > >>>> > >>>> case READ_ERROR: > >>>> - if (PRINT_ERROR_MESSAGE) { > >>>> - if ((pc->flags & DEVMEM) && (kt->flags & > >>>> PRE_KERNEL_INIT) && > >>>> - devmem_is_restricted() && > >>>> switch_to_proc_kcore()) > >>>> - return(readmem(addr, memtype, > >>>> bufptr, size, > >>>> - type, error_handle)); > >>>> + if ((pc->flags & DEVMEM) && (kt->flags & > >>>> PRE_KERNEL_INIT) && > >>>> + !(error_handle & QUIET) && > >>>> devmem_is_restricted() && switch_to_proc_kcore()) > >>>> + return(readmem(addr, memtype, bufptr, > >>>> size, > >>>> + type, error_handle)); > >>>> + if (PRINT_ERROR_MESSAGE) > >>>> error(INFO, READ_ERRMSG, > >>>> memtype_string(memtype, 0), addr, type); > >>>> - } > >>>> goto readmem_error; > >>>> > >>>> case PAGE_EXCLUDED: > >>>> > >>>> Works for me, and is better than hiding a recursive-call check in > >>>> devmem_is_restricted(). > >>> > >>> That protects from nesting devmem_is_restricted() as long as > >>> devmem_is_restricted() continues to do QUIET readmem()s but fails to > >>> switch_to_proc_kcore() for QUIET readmem() at other random failing > >>> addresses that could be fixed via /proc/kcore. QUIET isn't a global flag > >>> only for being in devmem_is_restricted(). > >> > >> I'm not following... > >> > >> There are the 3 QUIET readmem() calls in devmem_is_restricted(). As soon > >> as the first one fails, it comes into the code segment above, and will > >> call switch_to_proc_kcore(). There are no readmem() calls there. So > >> if /proc/kcore is usable, it switches to it, and we continue just fine. > >> > >> And, for example, if I force switch_to_proc_kcore() to return FALSE, > >> it still works: > >> > >> $ crash -d1 /dev/mem > >> ... [ cut ] ... > >> GNU gdb (GDB) 7.6 > >> Copyright (C) 2013 Free Software Foundation, Inc. > >> License GPLv3+: GNU GPL version 3 or later > >> <http://gnu.org/licenses/gpl.html> > >> This is free software: you are free to change and redistribute it. > >> There is NO WARRANTY, to the extent permitted by law. Type "show copying" > >> and "show warranty" for details. > >> This GDB was configured as "x86_64-unknown-linux-gnu"... > >> > >> crash: read error: physical address: 101000 type: "devmem_is_allowed - > >> pfn 257" > >> crash: read error: kernel virtual address: ffffffff81e08000 type: > >> "devmem_is_allowed - jiffies" > >> crash: this kernel may be configured with CONFIG_STRICT_DEVMEM, which > >> renders /dev/mem unusable as a live memory source. > >> crash: read error: kernel virtual address: ffffffff81a114b0 type: > >> "cpu_possible_mask" > >> # > >> > >> The first QUIET read fails, tries /proc/kcore and it fails, and the error > >> message > >> is printed. It returns back to devmem_is_restricted(), which calls > >> readmem() again, > >> but since DEVMEM has been cleared, it won't retry the /proc/kcore switch, > >> prints the > >> "jiffies" error message and returns. At that point, it is marked as > >> restricted, > >> returns back to readmem() which fails the original "cpu_possible_mask" > >> readmem() > >> that came from kernel_init(). At that point the crash session is killed. > >> > >> Note that because we have /dev/crash built-in, I had to put /dev/mem on > >> the command line, > >> but I wouldn't think that would make a difference. > >> > >> Am I missing something? Do you see something different? > > > > I was originally thinking about the same as yourself but decided against > > it due to thinking first that, in English, QUIET is a bit abstract for > > "don't try to switch devmem source" as well as avoid output. > > > > Plus, if any other readmem() that's QUIET fails READ_ERROR for any > > random reason and devmem_is_restricted() will fail a > > switch_to_proc_kcore() would have resolved it then that caller's use of > > QUIET will prevent devmem_is_restricted() being tried and it won't > > switch_to_proc_kcore(). I admit that means the status of > > devmem_is_restricted() has changed between initialization and now, but > > it's a live environment with it's own behavior. > > > > Due to the small number of error_handle flags here's one that uses the > > best of your idea without expanding the meaning of the word QUIET. It > > adds another flag for the sole purpose of saying "don't try to switch > > devmem source this time" and it avoids all the overhead I originally had. > > > > The name for the flag is long enough that the value tab alignment > > doesn't match with the other flags (I can't think of a shorter name for > > the flag but maybe you can). I chose not to appear to make three extra > > lines of changes just to expand the tab alignment on the other flags. It > > also leaves PRINT_ERROR_MESSAGE over-riding QUIET as at present but I > > don't have a preference for that -d8 has it's useful moments: > > > > diff --git a/defs.h b/defs.h > > index b0cdd42..29f46b4 100644 > > --- a/defs.h > > +++ b/defs.h > > @@ -328,6 +328,7 @@ struct number_option { > > #define HEX_BIAS (0x8) > > #define LONG_LONG (0x10) > > #define RETURN_PARTIAL (0x20) > > +#define NO_DEVMEM_SWITCH (0x40) > > > > #define SEEK_ERROR (-1) > > #define READ_ERROR (-2) > > diff --git a/memory.c b/memory.c > > index 72218e7..b4629a6 100644 > > --- a/memory.c > > +++ b/memory.c > > @@ -2204,14 +2204,14 @@ readmem(ulonglong addr, int memtype, void > > *buffer, long size, > > goto readmem_error; > > > > case READ_ERROR: > > - if (PRINT_ERROR_MESSAGE) { > > - if ((pc->flags & DEVMEM) && (kt->flags & > > PRE_KERNEL_INIT) && > > - devmem_is_restricted() && > > switch_to_proc_kcore()) > > - return(readmem(addr, memtype, > > bufptr, size, > > - type, error_handle)); > > + if (PRINT_ERROR_MESSAGE) > > error(INFO, READ_ERRMSG, > > memtype_string(memtype, 0), addr, type); > > - } > > - goto readmem_error; > > + > > + if ((pc->flags & DEVMEM) && (kt->flags & > > PRE_KERNEL_INIT) && > > + !(error_handle & NO_DEVMEM_SWITCH) && > > devmem_is_restricted() && > > switch_to_proc_kcore()) > > + return(readmem(addr, memtype, bufptr, size, > > + type, error_handle)); > > + goto readmem_error; > > > > case PAGE_EXCLUDED: > > RETURN_ON_PARTIAL_READ(); > > @@ -2410,16 +2410,16 @@ devmem_is_restricted(void) > > if (machine_type("X86") || machine_type("X86_64")) { > > if (readmem(255*PAGESIZE(), PHYSADDR, &tmp, > > sizeof(long), "devmem_is_allowed - pfn 255", > > - QUIET|RETURN_ON_ERROR) && > > + QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH) && > > !(readmem(257*PAGESIZE(), PHYSADDR, &tmp, > > sizeof(long), "devmem_is_allowed - pfn 257", > > - QUIET|RETURN_ON_ERROR))) > > + QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH))) > > restricted = TRUE; > > } > > if (kernel_symbol_exists("jiffies") && > > !readmem(symbol_value("jiffies"), KVADDR, &tmp, > > sizeof(ulong), "devmem_is_allowed - jiffies", > > - QUIET|RETURN_ON_ERROR)) > > + QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH)) > > restricted = TRUE; > > > > if (restricted) > > > > -- > > Crash-utility mailing list > > [email protected] > > https://www.redhat.com/mailman/listinfo/crash-utility > > -- > Crash-utility mailing list > [email protected] > https://www.redhat.com/mailman/listinfo/crash-utility > -- Crash-utility mailing list [email protected] https://www.redhat.com/mailman/listinfo/crash-utility
