Hi Tao and kuan-ying,

I think the reason why my vmlinux does not contain symbol "union
thread_union", may be that it was optimized during compilation.
I have tried to add this line in "cpufreq_schedutil.c":
> + volatile union thread_union test_for_reserve_union;

And use gdb to parser vmlinux it generated, can get the union thread_union:
> (gdb) ptype union thread_union
> type = union thread_union {
>   struct task_struct task;
>   unsigned long stack[4096];
>}

However, when I delete that line of code, the vmlinux will not contain union
thread_union:
> (gdb)  ptype union thread_union
> No union type named thread_union.


Thanks,
Yeping.zheng

Tao Liu <l...@redhat.com> 于2024年7月17日周三 14:21写道:

> Hi Yeping,
>
> Thanks for the patch.
>
> I re-thought about the issue. I want to ask a few questions:
>
> 1) What's your kernel version? Because I checked the latest kernel
> source 6.10, that the related code doesn't change:
>
> * #define IRQ_STACK_SIZE THREAD_SIZE
> *
> * and finding a solid usage of THREAD_SIZE is hard, but:
> *
> * union thread_union {
> * ...
> * unsigned long stack[THREAD_SIZE/sizeof(long)];
> * };
>
> So I guess we can still get the THREAD_SIZE by the old crash code right?
>
> if (MEMBER_EXISTS("thread_union", "stack")) {
>   if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
>     ms->irq_stack_size = sz;
>
> Could you please debug with your vmcore why code doesn't enter the
> above if conditions, so it goes into your arm64_set_irq_stack_size()
> check?
>
> 2) The commit message seems mis-formatted in your attachment patch.
>
> Thanks,
> Tao Liu
>
> On Wed, Jul 17, 2024 at 5:16 PM yp z <wonder...@gmail.com> wrote:
> >
> > Hi Tao,
> >   Thank you for your suggestions for patch. I followed your suggestion
> and provided a new patch. Please help to review it, thanks.
> >
> > From 831701099a7097662ddec9de464131ad50c7134b Mon Sep 17 00:00:00 2001
> > From: wonderzyp <wonder...@qq.com>
> > Date: Wed, 17 Jul 2024 11:02:06 +0800
> > Subject: [PATCH] A segfault issue was observed on KASAN enabled arm64
> kernel
> >  due to the incorrect irq_stack_size, see the following stack trace: >
> (gdb)
> >  bt > #0  0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70,
> >  bt=0x7ffdaf35d430) >     at arm64.c:2821 > #1  arm64_back_trace_cmd
> >  (bt=0x7ffdaf35d430) at arm64.c:3306 > #2  0x00005635ac27b108 in
> back_trace
> >  (bt=bt@entry=0x7ffdaf35d430) at kernel.c:3239 > #3  0x00005635ac2880ae
> in
> >  cmd_bt () at kernel.c:2863 > #4  0x00005635ac1f16dc in exec_command ()
> at
> >  main.c:893 > #5  0x00005635ac1f192a in main_loop () at main.c:840 > #6
> >  0x00005635ac50df81 in captured_main (data=<optimized out>) at
> main.c:1284 >
> >  #7  gdb_main (args=<optimized out>) at main.c:1313 > #8
> 0x00005635ac50e000
> >  in gdb_main_entry (argc=<optimized out>, argv=<optimized out>) >     at
> >  main.c:1338 > #9  0x00005635ac1ea2a5 in main (argc=5,
> argv=0x7ffdaf35dde8) at
> >  main.c:721
> >
> > The issue was caused by not setting irq_stack_size correctly, and this
> patch will fix the issue by two ways:
> > 1. if CONFIG_IKCONFIG is set, calculate the irq_stack_size according to
> kernel source code
> > 2. if CONFIG_IKCONFIG is not set, get THREAD_SHIFT value by
> disassembling the tbnz instruction to calculate the irq_stack_size
> >
> > Signed-off-by: Yeping.Zheng <yeping.zh...@nio.com>
> > ---
> >  arm64.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 84 insertions(+), 2 deletions(-)
> >
> > diff --git a/arm64.c b/arm64.c
> > index b3040d7..8347ba1 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void);
> >  static int arm64_is_uvaddr(ulong, struct task_context *);
> >  static void arm64_calc_KERNELPACMASK(void);
> >  static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char
> *label, int base);
> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms);
> >
> >  struct kernel_range {
> >   unsigned long modules_vaddr, modules_end;
> > @@ -2223,8 +2224,10 @@ arm64_irq_stack_init(void)
> >   if (MEMBER_EXISTS("thread_union", "stack")) {
> >   if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
> >   ms->irq_stack_size = sz;
> > - } else
> > - ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
> > + } else {
> > + ulong res = arm64_set_irq_stack_size(ms);
> > + ms->irq_stack_size = (res > 0) ? res : ARM64_IRQ_STACK_SIZE;
> > + }
> >
> >   machdep->flags |= IRQ_STACKS;
> >
> > @@ -4921,6 +4924,85 @@ static void arm64_calc_KERNELPACMASK(void)
> >   }
> >  }
> >
> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms)
> > +{
> > + char *string;
> > + int ret;
> > + int kasan_thread_shift = 0;
> > + int min_thread_shift;
> > + ulong arm64_page_shift;
> > + ulong thread_shift = 0;
> > + ulong thread_size;
> > + struct syment *sp;
> > + const char* tbnz_str = "tbnz";
> > +
> > + if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> > + if ((ret = get_kernel_config("CONFIG_KASAN_GENERIC", NULL) ==
> IKCONFIG_Y) ||
> > + (ret = get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y))
> {
> > + kasan_thread_shift = 1;
> > + }
> > + min_thread_shift = 14 + kasan_thread_shift;
> > +
> > + if ((ret = get_kernel_config("CONFIG_VMAP_STACK", NULL)) ==
> IKCONFIG_Y){
> > + if ((ret = get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) ==
> IKCONFIG_STR){
> > + arm64_page_shift = atol(string);
> > + }
> > + if (min_thread_shift < arm64_page_shift){
> > + thread_shift = arm64_page_shift;
> > + } else {
> > + thread_shift = min_thread_shift;
> > + }
> > + }
> > + } else {
> > +
> > + if (!(sp = symbol_search("vectors"))) {
> > + return -1;
> > + }
> > +
> > + const char* tbnz_str = "tbnz";
> > + struct gnu_request *req;
> > + req = (struct gnu_request *)GETBUF(sizeof(struct gnu_request));
> > + req->command = GNU_PASS_THROUGH;
> > + req->buf = GETBUF(BUFSIZE);
> > + strcat(req->buf, "x/1024i ");
> > +
> > + char tmp[100];
> > + sprintf(tmp, "0x%lx", sp->value);
> > + strcat(req->buf, tmp);
> > + req->flags = (GNU_RETURN_ON_ERROR);
> > + open_tmpfile();
> > + req->fp = pc->tmpfile;
> > + gdb_interface(req);
> > +
> > +
> > + rewind(pc->tmpfile);
> > + char line[BUFSIZE];
> > + while (fgets(line, BUFSIZE, pc->tmpfile) != NULL) {
> > + // printf("the buffer is not null\n");
> > + char* tbnz_pos = strstr(line, tbnz_str);
> > + if (tbnz_pos != NULL) {
> > + char* thread_shift_pos = strstr(tbnz_pos, "#");
> > + if (sscanf(thread_shift_pos + 1, "%ld", &thread_shift) == 1) {
> > + if (CRASHDEBUG(1)){
> > + error(INFO, "Detect thread shift via tbnz %ld\n", thread_shift);
> > + }
> > + break;
> > + }
> > + }
> > +     }
> > + close_tmpfile();
> > + FREEBUF(req->buf);
> > + FREEBUF(req);
> > + }
> > +
> > + if (thread_shift == 0) {
> > + return -1;
> > + }
> > +
> > + thread_size = ((1UL) << thread_shift);
> > + return thread_size;
> > +}
> > +
> >  #endif  /* ARM64 */
> >
> >
> > --
> > 2.25.1
> >
> >
> > Tao Liu <l...@redhat.com> 于2024年7月16日周二 16:22写道:
> >>
> >> Hi Yeping,
> >>
> >> Thanks for the fix.
> >>
> >> On Thu, Jul 11, 2024 at 1:38 PM <wonder...@gmail.com> wrote:
> >> >
> >> > When using the crash tool to parse the ARM64 dump file with KASAN
> enabled, I found that using the bt -a command will cause this tool to
> crash, the following is the backtrace infomation.
> >> >
> >> > (gdb) bt
> >> > #0  0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70,
> bt=0x7ffdaf35d430)
> >> >     at arm64.c:2821
> >> > #1  arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306
> >> > #2  0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430) at
> kernel.c:3239
> >> > #3  0x00005635ac2880ae in cmd_bt () at kernel.c:2863
> >> > #4  0x00005635ac1f16dc in exec_command () at main.c:893
> >> > #5  0x00005635ac1f192a in main_loop () at main.c:840
> >> > #6  0x00005635ac50df81 in captured_main (data=<optimized out>) at
> main.c:1284
> >> > #7  gdb_main (args=<optimized out>) at main.c:1313
> >> > #8  0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>,
> argv=<optimized out>)
> >> >     at main.c:1338
> >> > #9  0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at
> main.c:721
> >> > Eventually, I found that it was may caused by not setting
> irq_stack_size properly, and provide this patch to solve it.
> >> >
> >>
> >> Could you please re-draft your commit message? The original one looks
> >> informal. E.g:
> >>
> >> A segfault issue was observed on KASAN enabled arm64 kernel due to the
> >> incorrect irq_stack_size, see the following stack trace:
> >> ...
> >> The issue was caused by ...., and this patch will fix the issue by ....
> >>
> >> >
> >> > From 34b28aa8c11e77d20adec4f7705a14d239c8a55f Mon Sep 17 00:00:00 2001
> >> > From: wonderzyp <wonder...@qq.com>
> >> > Date: Mon, 8 Jul 2024 20:11:38 +0800
> >> > Subject: [PATCH 1131/1131] set_arm64_irq_stack_size
> >> >
> >> > Signed-off-by: Yeping Zheng <wonder...@gmail.com>
> >> > ---
> >> >  arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
> >> >  1 file changed, 47 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/arm64.c b/arm64.c
> >> > index b3040d7..39d891b 100644
> >> > --- a/arm64.c
> >> > +++ b/arm64.c
> >> > @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void);
> >> >  static int arm64_is_uvaddr(ulong, struct task_context *);
> >> >  static void arm64_calc_KERNELPACMASK(void);
> >> >  static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char
> *label, int base);
> >> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms);
> >> >
> >> >  struct kernel_range {
> >> >         unsigned long modules_vaddr, modules_end;
> >> > @@ -2223,8 +2224,14 @@ arm64_irq_stack_init(void)
> >> >                 if (MEMBER_EXISTS("thread_union", "stack")) {
> >> >                         if ((sz = MEMBER_SIZE("thread_union",
> "stack")) > 0)
> >> >                                 ms->irq_stack_size = sz;
> >> > -               } else
> >> > -                       ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
> >> > +               } else {
> >> > +                       ulong res = arm64_set_irq_stack_size(ms);
> >> > +                       if (res > 0){
> >> > +                               ms->irq_stack_size = res;
> >> > +                       } else {
> >> > +                               ms->irq_stack_size =
> ARM64_IRQ_STACK_SIZE;
> >> > +                       }
> >> > +               }
> >> >
> >> >                 machdep->flags |= IRQ_STACKS;
> >> >
> >> > @@ -4921,6 +4928,44 @@ static void arm64_calc_KERNELPACMASK(void)
> >> >         }
> >> >  }
> >> >
> >> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms)
> >> > +{
> >> > +       char *string;
> >> > +       int ret;
> >> > +       int KASAN_THREAD_SHIFT = 0;
> >> > +       int MIN_THREAD_SHIFT;
> >> > +       ulong ARM64_PAGE_SHIFT;
> >> > +       ulong THREAD_SHIFT = 0;
> >> > +       ulong THREAD_SIZE;
> >>
> >> I guess the upper case of variable names is not encouraged, though it
> >> is the variable that comes from kernel config file.
> >>
> >> > +       if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> >> > +               if ((ret = get_kernel_config("CONFIG_KASAN_GENERIC",
> NULL) == IKCONFIG_Y) ||
> >> > +                       (ret =
> get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y)) {
> >> > +                               KASAN_THREAD_SHIFT = 1;
> >> > +                       }
> >> > +       }
> >> > +       MIN_THREAD_SHIFT = 14 + KASAN_THREAD_SHIFT;
> >> > +
> >> > +       if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> >>
> >> Could the if condition be merged with the prior one?
> >>
> >> > +               if ((ret = get_kernel_config("CONFIG_VMAP_STACK",
> NULL)) == IKCONFIG_Y){
> >> > +                       if ((ret =
> get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) == IKCONFIG_STR){
> >> > +                               ARM64_PAGE_SHIFT = atol(string);
> >> > +                       }
> >> > +                       if (MIN_THREAD_SHIFT < ARM64_PAGE_SHIFT){
> >> > +                               THREAD_SHIFT = ARM64_PAGE_SHIFT;
> >> > +                       } else {
> >> > +                               THREAD_SHIFT = MIN_THREAD_SHIFT;
> >> > +                       }
> >> > +               }
> >> > +       }
> >> > +
> >> > +       if (THREAD_SHIFT == 0) {
> >> > +               return -1;
> >> > +       }
> >> > +
> >> > +       THREAD_SIZE = ((1UL) << THREAD_SHIFT);
> >> > +       return THREAD_SIZE;
> >> > +}
> >>
> >> I'm OK with the approach above, since it directly came from the kernel
> >> source. However I'm not a fan of checking kernel configs, there might
> >> be kernels which are compiled without CONFIG_IKCONFIG.
> >>
> >> Could we add an approach here, to get the value from disassembly when
> >> CONFIG_IKCONFIG is negative?
> >>
> >> kernel source: arch/arm64/kernel/entry.S:
> >>
> >> .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
> >> ....
> >> add sp, sp, x0 // sp' = sp + x0
> >> sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
> >> tbnz x0, #THREAD_SHIFT, 0f <<<<<<<<
> >>
> >> $ objdump -d vmlinux
> >> ...
> >> ffff800080010800 <vectors>:
> >> ffff800080010800:       d10543ff        sub     sp, sp, #0x150
> >> ffff800080010804:       8b2063ff        add     sp, sp, x0
> >> ffff800080010808:       cb2063e0        sub     x0, sp, x0
> >> ffff80008001080c:       37800080        tbnz    w0, #16,
> >> ffff80008001081c <vectors+0x1c> <<<<<<<<<<
> >>
> >> It is easy to get the THREAD_SHIFT value by disassembling the tbnz
> >> instruction. What do you think @Lianbo Jiang
> >>
> >> Thanks,
> >> Tao Liu
> >>
> >> > +
> >> >  #endif  /* ARM64 */
> >> >
> >> >
> >> > --
> >> > 2.25.1
> >> > --
> >> > Crash-utility mailing list -- devel@lists.crash-utility.osci.io
> >> > To unsubscribe send an email to
> devel-le...@lists.crash-utility.osci.io
> >> > https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
> >> > Contribution Guidelines: https://github.com/crash-utility/crash/wiki
> >>
>
>
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to