Hi Lianbo, Thank you for your suggestion. Also, we expect it to enter the else branch only when there is no > vmcoreinfo. The if-else-branch code logic seems problematic, right? >
I think you are right, the else branch can work well regardless of whether vmcoreinfo is present or not. So I changed the execution order of the code: Step 1. Try to get THREAD_SHIFT from tbnz instruction. Step 2. If cannot get THREAD_SHIFT by step 1, change the thread_shift when KASAN is enabled and with vmcoreinfo. Please help review this new patch. Thanks Yeping lijiang <liji...@redhat.com> 于2024年7月31日周三 11:23写道: > Hi, Yeping > Thank you for the update. > > I copied the code from your attachment here: > > +static ulong arm64_set_irq_stack_size(void) > +{ > + int min_thread_shift = 14; > + ulong thread_shift = 0; > + char buf1[BUFSIZE]; > + char *pos1, *pos2; > + int errflag = 0; > + > + if (kernel_symbol_exists("kasan_enable_current")) { > + min_thread_shift += 1; > + thread_shift = (min_thread_shift < machdep->pageshift) ? > machdep->pageshift : min_thread_shift; > + } else { > > The above if-branch can work well when there is the vmcoreinfo data and > KASAN is enabled, otherwise it may not work, Eg: the KASAN is enabled and > there is no the vmcoreinfo data. > > Also, we expect it to enter the else branch only when there is no > vmcoreinfo. The if-else-branch code logic seems problematic, right? > > > Thanks > Lianbo > > + sprintf(buf1, "x/32i vectors"); > + open_tmpfile(); > + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) { > + goto out; > + } > + rewind(pc->tmpfile); > + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > + if ((pos1 = strstr(buf1, "tbnz"))) { > + if ((pos2 = strchr(pos1, '#'))) { > + pos2 += 1; > + for(pos1=pos2; *pos2!='\0' && *pos2!=','; pos2++); > + *pos2 = '\0'; > + thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); > + if (errflag) { > + thread_shift = 0; > + } > + break; > + } > + } > + } > +out: > + close_tmpfile(); > + } > + > + if (!thread_shift) > + return 0; > + else > + return ((1UL) << thread_shift); > +} > + > #endif /* ARM64 */ > > On Tue, Jul 30, 2024 at 3:40 PM yp z <wonder...@gmail.com> wrote: > >> Hi Tao, >> >> Thanks for your suggestion, and this method works well as expected. >> >> Can we make it simpler, since the value we are looking for is the 2nd >>> oprand of tbnz, there must be a ',' after the value. >>> >>> tbnz w2, #0x1f, 0xffff80008001bed0 >>> >>> Can we make it like the following? I haven't tested the following code... >>> >>> if ((pos2 = strchr(pos1, '#'))) { >>> pos2 += 1; >>> for (pos1 = pos2; *pos2 != '\0' && *pos2 != ','; pos2++); >>> *pos2 = '\0'; >>> thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); >>> if (errflag) { >>> thread_shift = 0; >>> } >>> break; >>> } >>> >> >> >> Thanks. >> Yeping.ZHENG >> >> Tao Liu <l...@redhat.com> 于2024年7月30日周二 08:55写道: >> >>> Hi Yeping, >>> >>> On Mon, Jul 29, 2024 at 2:20 PM yp z <wonder...@gmail.com> wrote: >>> > >>> > Hi Lianbo and Tao, >>> > Thank you for your suggestions, and I have rewrote a patch in two >>> cases: >>> > >>> > [1] with vmcoreinfo: >>> > + if (kernel_symbol_exists("kasan_enable_current")) { >>> > + min_thread_shift += 1; >>> > + thread_shift = (min_thread_shift < machdep->pageshift) >>> ? machdep->pageshift : min_thread_shift; >>> > + } >>> > >>> This looks fine to me. >>> >>> >> BTW: can you help point out the current issue is with or without the >>> vmcoreinfo? Yeping >>> > >>> > My issue is with vmcoreinfo of "SYMBOL("kasan_enable_current")", and >>> works well with this patch. >>> > >>> > [2] without vmcoreinfo: >>> >> >>> >> What about somehow the disassembly gives us hex values like: >>> >> #0xe? It won't work by then. Any ideas for this? >>> > >>> > In order to use stol() as expected, I have changed the first >>> non-numeric char to '\0': >>> > + if ((pos1 = strstr(buf1, "tbnz"))) { >>> > + if ((pos2 = strchr(pos1, '#'))) { >>> > + pos2 += 1; >>> > + pos1 = pos2; >>> > + while (*pos2 != '\0') { >>> > + if (!((*pos2 >= '0' && >>> *pos2 <= '9') >>> > + || (*pos2 >= >>> 'A' && *pos2 <= 'F') >>> > + || (*pos2 >= >>> 'a' && *pos2 <= 'f'))) { >>> >>> There are 'x' 'X' not covered, it will not work for the "#0xe" case. >>> Can we make it simpler, since the value we are looking for is the 2nd >>> oprand of tbnz, there must be a ',' after the value. >>> >>> tbnz w2, #0x1f, 0xffff80008001bed0 >>> >>> Can we make it like the following? I haven't tested the following code... >>> >>> if ((pos2 = strchr(pos1, '#'))) { >>> pos2 += 1; >>> for (pos1 = pos2; *pos2 != '\0' && *pos2 != ','; pos2++); >>> *pos2 = '\0'; >>> thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); >>> if (errflag) { >>> thread_shift = 0; >>> } >>> break; >>> } >>> >>> Thanks, >>> Tao Liu >>> >>> > + *pos2 = '\0'; >>> > + break; >>> > + } >>> > + ++pos2; >>> > + } >>> > + thread_shift = stol(pos1, >>> RETURN_ON_ERROR|QUIET, &errflag); >>> > >>> > Thanks. >>> > Yeping.ZHENG >>> > >>> > lijiang <liji...@redhat.com> 于2024年7月26日周五 17:23写道: >>> >> >>> >> On Fri, Jul 19, 2024 at 6:40 AM Tao Liu <l...@redhat.com> wrote: >>> >>> >>> >>> Hi Lianbo, >>> >>> >>> >>> On Thu, Jul 18, 2024 at 10:50 PM Lianbo Jiang <liji...@redhat.com> >>> wrote: >>> >>> > >>> >>> > On 7/16/24 4:22 PM, Tao Liu wrote: >>> >>> > >>> >>> > > Hi Yeping, >>> >>> > > >>> >>> > > Thanks for the fix. >>> >>> > > >>> >>> > > On Thu, Jul 11, 2024 at 1:38 PM <wonder...@gmail.com> wrote: >>> >>> > >> When using the crash tool to parse the ARM64 dump file with >>> KASAN enabled, I found that using the bt -a command will cause this tool to >>> crash, the following is the backtrace infomation. >>> >>> > >> >>> >>> > >> (gdb) bt >>> >>> > >> #0 0x00005635ac2b166b in arm64_unwind_frame >>> (frame=0x7ffdaf35cb70, bt=0x7ffdaf35d430) >>> >>> > >> at arm64.c:2821 >>> >>> > >> #1 arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306 >>> >>> > >> #2 0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430) >>> at kernel.c:3239 >>> >>> > >> #3 0x00005635ac2880ae in cmd_bt () at kernel.c:2863 >>> >>> > >> #4 0x00005635ac1f16dc in exec_command () at main.c:893 >>> >>> > >> #5 0x00005635ac1f192a in main_loop () at main.c:840 >>> >>> > >> #6 0x00005635ac50df81 in captured_main (data=<optimized out>) >>> at main.c:1284 >>> >>> > >> #7 gdb_main (args=<optimized out>) at main.c:1313 >>> >>> > >> #8 0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>, >>> argv=<optimized out>) >>> >>> > >> at main.c:1338 >>> >>> > >> #9 0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at >>> main.c:721 >>> >>> > >> Eventually, I found that it was may caused by not setting >>> irq_stack_size properly, and provide this patch to solve it. >>> >>> > >> >>> >>> > > Could you please re-draft your commit message? The original one >>> looks >>> >>> > > informal. E.g: >>> >>> > > >>> >>> > > A segfault issue was observed on KASAN enabled arm64 kernel due >>> to the >>> >>> > > incorrect irq_stack_size, see the following stack trace: >>> >>> > > ... >>> >>> > > The issue was caused by ...., and this patch will fix the issue >>> by .... >>> >>> > > >>> >>> > >> From 34b28aa8c11e77d20adec4f7705a14d239c8a55f Mon Sep 17 >>> 00:00:00 2001 >>> >>> > >> From: wonderzyp <wonder...@qq.com> >>> >>> > >> Date: Mon, 8 Jul 2024 20:11:38 +0800 >>> >>> > >> Subject: [PATCH 1131/1131] set_arm64_irq_stack_size >>> >>> > >> >>> >>> > >> Signed-off-by: Yeping Zheng <wonder...@gmail.com> >>> >>> > >> --- >>> >>> > >> arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- >>> >>> > >> 1 file changed, 47 insertions(+), 2 deletions(-) >>> >>> > >> >>> >>> > >> diff --git a/arm64.c b/arm64.c >>> >>> > >> index b3040d7..39d891b 100644 >>> >>> > >> --- a/arm64.c >>> >>> > >> +++ b/arm64.c >>> >>> > >> @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void); >>> >>> > >> static int arm64_is_uvaddr(ulong, struct task_context *); >>> >>> > >> static void arm64_calc_KERNELPACMASK(void); >>> >>> > >> static int arm64_get_vmcoreinfo(unsigned long *vaddr, const >>> char *label, int base); >>> >>> > >> +static ulong arm64_set_irq_stack_size(struct machine_specific >>> *ms); >>> >>> > >> >>> >>> > >> struct kernel_range { >>> >>> > >> unsigned long modules_vaddr, modules_end; >>> >>> > >> @@ -2223,8 +2224,14 @@ arm64_irq_stack_init(void) >>> >>> > >> if (MEMBER_EXISTS("thread_union", "stack")) { >>> >>> > >> if ((sz = MEMBER_SIZE("thread_union", >>> "stack")) > 0) >>> >>> > >> ms->irq_stack_size = sz; >>> >>> > >> - } else >>> >>> > >> - ms->irq_stack_size = >>> ARM64_IRQ_STACK_SIZE; >>> >>> > >> + } else { >>> >>> > >> + ulong res = >>> arm64_set_irq_stack_size(ms); >>> >>> > >> + if (res > 0){ >>> >>> > >> + ms->irq_stack_size = res; >>> >>> > >> + } else { >>> >>> > >> + ms->irq_stack_size = >>> ARM64_IRQ_STACK_SIZE; >>> >>> > >> + } >>> >>> > >> + } >>> >>> > >> >>> >>> > >> machdep->flags |= IRQ_STACKS; >>> >>> > >> >>> >>> > >> @@ -4921,6 +4928,44 @@ static void >>> arm64_calc_KERNELPACMASK(void) >>> >>> > >> } >>> >>> > >> } >>> >>> > >> >>> >>> > >> +static ulong arm64_set_irq_stack_size(struct machine_specific >>> *ms) >>> >>> > >> +{ >>> >>> > >> + char *string; >>> >>> > >> + int ret; >>> >>> > >> + int KASAN_THREAD_SHIFT = 0; >>> >>> > >> + int MIN_THREAD_SHIFT; >>> >>> > >> + ulong ARM64_PAGE_SHIFT; >>> >>> > >> + ulong THREAD_SHIFT = 0; >>> >>> > >> + ulong THREAD_SIZE; >>> >>> > > I guess the upper case of variable names is not encouraged, >>> though it >>> >>> > > is the variable that comes from kernel config file. >>> >>> > > >>> >>> > >> + if (kt->ikconfig_flags & IKCONFIG_AVAIL) { >>> >>> > >> + if ((ret = >>> get_kernel_config("CONFIG_KASAN_GENERIC", NULL) == IKCONFIG_Y) || >>> >>> > >> + (ret = >>> get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y)) { >>> >>> > >> + KASAN_THREAD_SHIFT = 1; >>> >>> > >> + } >>> >>> > >> + } >>> >>> > >> + MIN_THREAD_SHIFT = 14 + KASAN_THREAD_SHIFT; >>> >>> > >> + >>> >>> > >> + if (kt->ikconfig_flags & IKCONFIG_AVAIL) { >>> >>> > > Could the if condition be merged with the prior one? >>> >>> > > >>> >>> > >> + if ((ret = >>> get_kernel_config("CONFIG_VMAP_STACK", NULL)) == IKCONFIG_Y){ >>> >>> > >> + if ((ret = >>> get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) == >>> >>> > >>> >>> > The "CONFIG_ARM64_PAGE_SHIFT " has been removed since kernel >>> v6.9-rc1, >>> >>> > so this can not work on the latest kernel. See: >>> >>> > >>> >>> > d3e5bab923d3 ("arch: simplify architecture specific page size >>> >>> > configuration") >>> >>> > >>> >>> > >>> >>> > In addition, the IKCONFIG is not available in most distributions, >>> it >>> >>> > can not cover this case. There is a similar discussion there: >>> >>> > >>> >>> > >>> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00880.html >>> >>> > >>> >>> > Can you try to handle the current issue with a similar solution? >>> >>> > >>> >>> > >>> >>> > >> IKCONFIG_STR){ >>> >>> > >> + ARM64_PAGE_SHIFT = atol(string); >>> >>> > >> + } >>> >>> > >> + if (MIN_THREAD_SHIFT < >>> ARM64_PAGE_SHIFT){ >>> >>> > >> + THREAD_SHIFT = ARM64_PAGE_SHIFT; >>> >>> > >> + } else { >>> >>> > >> + THREAD_SHIFT = MIN_THREAD_SHIFT; >>> >>> > >> + } >>> >>> > >> + } >>> >>> > >> + } >>> >>> > >> + >>> >>> > >> + if (THREAD_SHIFT == 0) { >>> >>> > >> + return -1; >>> >>> > >> + } >>> >>> > >> + >>> >>> > >> + THREAD_SIZE = ((1UL) << THREAD_SHIFT); >>> >>> > >> + return THREAD_SIZE; >>> >>> > >> +} >>> >>> > > I'm OK with the approach above, since it directly came from the >>> kernel >>> >>> > > source. However I'm not a fan of checking kernel configs, there >>> might >>> >>> > > be kernels which are compiled without CONFIG_IKCONFIG. >>> >>> > > >>> >>> > > Could we add an approach here, to get the value from disassembly >>> when >>> >>> > > CONFIG_IKCONFIG is negative? >>> >>> > > >>> >>> > > kernel source: arch/arm64/kernel/entry.S: >>> >>> > > >>> >>> > > .macro kernel_ventry, el:req, ht:req, regsize:req, label:req >>> >>> > > .... >>> >>> > > add sp, sp, x0 // sp' = sp + x0 >>> >>> > > sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp >>> >>> > > tbnz x0, #THREAD_SHIFT, 0f <<<<<<<< >>> >>> > > >>> >>> > > $ objdump -d vmlinux >>> >>> > > ... >>> >>> > > ffff800080010800 <vectors>: >>> >>> > > ffff800080010800: d10543ff sub sp, sp, #0x150 >>> >>> > > ffff800080010804: 8b2063ff add sp, sp, x0 >>> >>> > > ffff800080010808: cb2063e0 sub x0, sp, x0 >>> >>> > > ffff80008001080c: 37800080 tbnz w0, #16, >>> >>> > > ffff80008001081c <vectors+0x1c> <<<<<<<<<< >>> >>> > > >>> >>> > > It is easy to get the THREAD_SHIFT value by disassembling the >>> tbnz >>> >>> > > instruction. What do you think @Lianbo Jiang >>> >>> > >>> >>> > This is a good idea, but it still relies on compiler. >>> >>> > >>> >>> > As we discussed, usually I would recommend finding its values via >>> some >>> >>> > kernel symbols. >>> >>> >>> >>> Yeah, but I didn't find a suitable symbol so far... In addition, the >>> >> >>> >> >>> >> It's true. For the KASAN, it is easy to find a suitable symbol, but >>> it's not easy for the PAGESIZE or PAGESHIFT. >>> >> >>> >> Given that I would suggest covering two cases: >>> >> [1] with vmcoreinfo(see mm/kasan/common.c) >>> >> ... >>> >> int min_thread_shift = 14; >>> >> if (kernel_symbol_exists("kasan_enable_current")) >>> >> min_thread_shift += 1; >>> >> >>> >> thread_shift = (min_thread_shift < machdep->pageshift) ? >>> machdep->pageshift : min_thread_shift; >>> >> ... >>> >> >>> >> [2] without vmcoreinfo >>> >> Use Tao's solution(disassemble kernel code) >>> >> >>> >> The first one is better than the second one in case of having >>> vmcoreinfo, If I understand correctly. >>> >> But we still need to cover the above two cases in the crash tools. >>> >> >>> >> BTW: can you help point out the current issue is with or without the >>> vmcoreinfo? Yeping >>> >> >>> >> Thanks >>> >> Lianbo >>> >> >>> >>> above case is a little different here, the tbnz instruction is not >>> >>> generated from C code, it is written in assembly in >>> >>> arch/arm64/kernel/entry.S file. So I guess there would be a lower >>> >>> possibility if tbnz instruction gets replaced or eliminated by the >>> >>> compilers. Anyway, it's OK to me if you don't want to accept the >>> >>> approach. >>> >>> >>> >>> Thanks, >>> >>> Tao Liu >>> >>> >>> >>> >>> >>> > >>> >>> > >>> >>> > Thanks >>> >>> > >>> >>> > Lianbo >>> >>> > >>> >>> > > >>> >>> > > Thanks, >>> >>> > > Tao Liu >>> >>> > > >>> >>> > >> + >>> >>> > >> #endif /* ARM64 */ >>> >>> > >> >>> >>> > >> >>> >>> > >> -- >>> >>> > >> 2.25.1 >>> >>> > >> >>> >>> > >>> >>> >>> >>>
From 2e4d11da6db27952a5bcfd7e3475bd42d230f6b8 Mon Sep 17 00:00:00 2001 From: "yeping.zheng" <yeping.zh...@nio.com> Date: Tue, 30 Jul 2024 14:16:54 +0800 Subject: [PATCH] A segfault issue due to the incorrect irq_stack_size on ARM64 See the following stack trace: (gdb) bt #0 0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70, bt=0x7ffdaf35d430) at arm64.c:2821 #1 arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306 #2 0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430) at kernel.c:3239 #3 0x00005635ac2880ae in cmd_bt () at kernel.c:2863 #4 0x00005635ac1f16dc in exec_command () at main.c:893 #5 0x00005635ac1f192a in main_loop () at main.c:840 #6 0x00005635ac50df81 in captured_main (data=<optimized out>) at main.c:1284 #7 gdb_main (args=<optimized out>) at main.c:1313 #8 0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>, argv=<optimized out>) at main.c:1338 #9 0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at main.c:721 The issue may be encountered when thread_union symbol not found in vmlinux due to compiling optimization. This patch will try the following 2 methods to get the irq_stack_size when thread_union symbol unavailable: 1. Try getting the value from kernel code disassembly, to get THREAD_SHIFT directly from tbnz instruction. In arch/arm64/kernel/entry.S: .macro kernel_ventry, el:req, ht:req, regsize:req, label:req ... add sp, sp, x0 sub x0, sp, x0 tbnz x0, #THREAD_SHIFT, 0f $ gdb vmlinux (gdb) disass vectors Dump of assembler code for function vectors: ... 0xffff800080010804 <+4>: add sp, sp, x0 0xffff800080010808 <+8>: sub x0, sp, x0 0xffff80008001080c <+12>: tbnz w0, #16, 0xffff80008001081c <vectors+28> 2. If cannot get THREAD_SHIFT by step 1, change the thread_shift when KASAN is enabled and with vmcoreinfo. In arm64/include/asm/memory.h: #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) ... #define IRQ_STACK_SIZE THREAD_SIZE Since enabling the KASAN will affect the final value, this patch reset IRQ_STACK_SIZE according to the calculation process in kernel code. Signed-off-by: yeping.zheng <yeping.zheng(a)nio.com> --- arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arm64.c b/arm64.c index 78e6609..49799e6 100644 --- a/arm64.c +++ b/arm64.c @@ -94,6 +94,7 @@ static int arm64_is_uvaddr(ulong, struct task_context *); static void arm64_calc_KERNELPACMASK(void); static void arm64_recalc_KERNELPACMASK(void); static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base); +static ulong arm64_set_irq_stack_size(void); struct kernel_range { unsigned long modules_vaddr, modules_end; @@ -2234,8 +2235,10 @@ arm64_irq_stack_init(void) if (MEMBER_EXISTS("thread_union", "stack")) { if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0) ms->irq_stack_size = sz; - } else - ms->irq_stack_size = ARM64_IRQ_STACK_SIZE; + } else { + ulong res = arm64_set_irq_stack_size(); + ms->irq_stack_size = (res > 0) ? res : ARM64_IRQ_STACK_SIZE; + } machdep->flags |= IRQ_STACKS; @@ -4950,6 +4953,48 @@ static void arm64_recalc_KERNELPACMASK(void){ } } +static ulong arm64_set_irq_stack_size(void) +{ + int min_thread_shift = 14; + ulong thread_shift = 0; + char buf1[BUFSIZE]; + char *pos1, *pos2; + int errflag = 0; + + sprintf(buf1, "x/32i vectors"); + open_tmpfile(); + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) { + goto out; + } + rewind(pc->tmpfile); + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { + if ((pos1 = strstr(buf1, "tbnz"))) { + if ((pos2 = strchr(pos1, '#'))) { + pos2 += 1; + for(pos1=pos2; *pos2!='\0' && *pos2!=','; pos2++); + *pos2 = '\0'; + thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); + if (errflag) { + thread_shift = 0; + } + break; + } + } + } +out: + close_tmpfile(); + + if ((!thread_shift) && kernel_symbol_exists("kasan_enable_current")) { + min_thread_shift += 1; + thread_shift = (min_thread_shift < machdep->pageshift) ? machdep->pageshift : min_thread_shift; + } + + if (!thread_shift) + return 0; + else + return ((1UL) << thread_shift); +} + #endif /* ARM64 */ -- 2.25.1
-- Crash-utility mailing list -- devel@lists.crash-utility.osci.io To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki