Re: [PATCH 12/12] i386: Refactor int stacks to be per cpu for SMP

Samuel Thibault Tue, 25 Oct 2022 16:29:29 -0700

Damien Zammit, le mar. 25 oct. 2022 10:56:44 +0000, a ecrit:
> diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
> index 9aef6370..d56cb602 100644
> --- a/i386/i386/cpu_number.h
> +++ b/i386/i386/cpu_number.h
> @@ -35,14 +35,35 @@
>  /* More-specific code must define cpu_number() and CPU_NUMBER.  */
>  #ifdef __i386__
>  #define      CX(addr, reg)   addr(,reg,4)
> +
> +/* CPU_NUMBER(%ebx) will _not_ work! */
> +#define      CPU_NUMBER(reg) \
> +     pushfl                          ;\
> +     cli                             ;\
> +     pushl   %esi                    ;\
> +     pushl   %edi                    ;\
> +     pushl   %ebx                    ;\
> +     pushl   %eax                    ;\
> +     call    EXT(cpu_number)         ;\
> +     movl    %eax, %ebx              ;\
> +     popl    %eax                    ;\
> +     movl    %ebx, reg               ;\
> +     popl    %ebx                    ;\
> +     popl    %edi                    ;\
> +     popl    %esi                    ;\
> +     popfl


The point of the CPU_NUMBER() macro is to make it efficient and safe to
call in tricky contexts. I'd think we can write an assembly version of
(lapic->apic_id.r >> 24) & 0xff + the loop to find the apic_id in
apic_data.cpu_lapic_list?

> +
>  #endif
>  #ifdef __x86_64__
>  #define      CX(addr, reg)   addr(,reg,8)
> +#warning Missing CPU_NUMBER() for 64 bit
> +#define CPU_NUMBER(reg)
>  #endif
> 
> -/* XXX For now */
> -#define      CPU_NUMBER(reg) movl $0,reg
> -#define cpu_number() 0
> +#ifndef __ASSEMBLER__
> +#include "kern/cpu_number.h"
> +int cpu_number();
> +#endif
> 
>  #else        /* NCPUS == 1 */
> 
> @@ -51,8 +72,4 @@
> 
>  #endif       /* NCPUS == 1 */
> 
> -#ifndef __ASSEMBLER__
> -#include "kern/cpu_number.h"
> -#endif
> -
>  #endif       /* _I386_CPU_NUMBER_H_ */
> diff --git a/i386/i386/cswitch.S b/i386/i386/cswitch.S
> index 718c8aac..46da2300 100644
> --- a/i386/i386/cswitch.S
> +++ b/i386/i386/cswitch.S
> @@ -110,7 +110,7 @@ ENTRY(Thread_continue)
>   */
>  ENTRY(switch_to_shutdown_context)
>       CPU_NUMBER(%edx)
> -     movl    EXT(active_stacks)(,%edx,4),%ecx        /* get old kernel stack 
> */
> +     movl    CX(EXT(active_stacks),%edx),%ecx        /* get old kernel stack 
> */
>       movl    %ebx,KSS_EBX(%ecx)              /* save registers */
>       movl    %ebp,KSS_EBP(%ecx)
>       movl    %edi,KSS_EDI(%ecx)
> @@ -124,7 +124,7 @@ ENTRY(switch_to_shutdown_context)
>       movl    4(%esp),%ebx                    /* get routine to run next */
>       movl    8(%esp),%esi                    /* get its argument */
> 
> -     movl    EXT(interrupt_stack)(,%edx,4),%ecx      /* point to its 
> interrupt stack */
> +     movl    CX(EXT(int_stack_base),%edx),%ecx       /* point to its 
> interrupt stack */
>       lea     INTSTACK_SIZE(%ecx),%esp        /* switch to it (top) */
> 
>       pushl   %eax                            /* push thread */
> diff --git a/i386/i386/locore.S b/i386/i386/locore.S
> index 8c2f57ea..64152b5b 100644
> --- a/i386/i386/locore.S
> +++ b/i386/i386/locore.S
> @@ -540,21 +540,22 @@ _kret_iret:
>   */
>  trap_from_kernel:
>  #if  MACH_KDB || MACH_TTD
> -     movl    %esp,%ebx               /* save current stack */
> +     CPU_NUMBER(%ecx)                /* get CPU number */
> 
> +     movl    %esp,%ebx               /* save current stack */
>       movl    %esp,%edx               /* on an interrupt stack? */
> +
>       and     $(~(KERNEL_STACK_SIZE-1)),%edx
> -     cmpl    EXT(int_stack_base),%edx
> +     cmpl    CX(EXT(int_stack_base),%ecx),%edx
>       je      1f                      /* OK if so */
> 
> -     CPU_NUMBER(%edx)                /* get CPU number */
> -     cmpl    CX(EXT(kernel_stack),%edx),%esp
> +     cmpl    CX(EXT(kernel_stack),%ecx),%esp
>                                       /* already on kernel stack? */
>       ja      0f
> -     cmpl    CX(EXT(active_stacks),%edx),%esp
> +     cmpl    CX(EXT(active_stacks),%ecx),%esp
>       ja      1f                      /* switch if not */
>  0:
> -     movl    CX(EXT(kernel_stack),%edx),%esp
> +     movl    CX(EXT(kernel_stack),%ecx),%esp
>  1:
>       pushl   %ebx                    /* save old stack */
>       pushl   %ebx                    /* pass as parameter */
> @@ -668,24 +669,25 @@ ENTRY(all_intrs)
>       pushl   %edx
>       cld                             /* clear direction flag */
> 
> -     movl    %esp,%edx               /* on an interrupt stack? */
> -     and     $(~(KERNEL_STACK_SIZE-1)),%edx
> -     cmpl    %ss:EXT(int_stack_base),%edx
> +     CPU_NUMBER(%edx)
> +
> +     movl    %esp,%ecx               /* on an interrupt stack? */
> +     and     $(~(KERNEL_STACK_SIZE-1)),%ecx
> +     cmpl    %ss:CX(EXT(int_stack_base),%edx),%ecx
>       je      int_from_intstack       /* if not: */
> 
>       pushl   %ds                     /* save segment registers */
>       pushl   %es
>       pushl   %fs
>       pushl   %gs
> -     mov     %ss,%dx                 /* switch to kernel segments */
> -     mov     %dx,%ds
> -     mov     %dx,%es
> -     mov     %dx,%fs
> -     mov     %dx,%gs
> -
> -     CPU_NUMBER(%edx)
> +     mov     %ss,%cx                 /* switch to kernel segments */
> +     mov     %cx,%ds
> +     mov     %cx,%es
> +     mov     %cx,%fs
> +     mov     %cx,%gs
> 
>       movl    CX(EXT(int_stack_top),%edx),%ecx
> +
>       xchgl   %ecx,%esp               /* switch to interrupt stack */
> 
>  #if  STAT_TIME
> @@ -724,19 +726,19 @@ LEXT(return_to_iret)                    /* ( label for 
> kdb_kintr and hardclock) */
>       pop     %fs
>       pop     %es
>       pop     %ds
> -     pop     %edx
> -     pop     %ecx
> -     pop     %eax
> +     popl    %edx
> +     popl    %ecx
> +     popl    %eax
>       iret                            /* return to caller */
> 
>  int_from_intstack:
> -     cmpl    EXT(int_stack_base),%esp        /* seemingly looping? */
> +     cmpl    CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */
>       jb      stack_overflowed        /* if not: */
>       call    EXT(interrupt)          /* call interrupt routine */
>  _return_to_iret_i:                   /* ( label for kdb_kintr) */
> -     pop     %edx                    /* must have been on kernel segs */
> -     pop     %ecx
> -     pop     %eax                    /* no ASTs */
> +     popl    %edx                    /* must have been on kernel segs */
> +     popl    %ecx
> +     popl    %eax                    /* no ASTs */
>       iret
> 
>  stack_overflowed:
> diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
> index 1e9ea0fc..0c68ec11 100644
> --- a/i386/i386/mp_desc.c
> +++ b/i386/i386/mp_desc.c
> @@ -24,25 +24,36 @@
>   * the rights to redistribute these changes.
>   */
> 
> -#if  NCPUS > 1
> -
> -#include <string.h>
> -
>  #include <kern/cpu_number.h>
>  #include <kern/debug.h>
>  #include <kern/printf.h>
> +#include <kern/smp.h>
> +#include <kern/startup.h>
> +#include <kern/kmutex.h>
>  #include <mach/machine.h>
>  #include <mach/xen.h>
>  #include <vm/vm_kern.h>
> 
>  #include <i386/mp_desc.h>
>  #include <i386/lock.h>
> +#include <i386/apic.h>
> +#include <i386/locore.h>
> +#include <i386/gdt.h>
> +#include <i386at/idt.h>
> +#include <i386at/int_init.h>
> +#include <i386/cpu.h>
> +#include <i386/smp.h>
> +
>  #include <i386at/model_dep.h>
>  #include <machine/ktss.h>
> +#include <machine/smp.h>
>  #include <machine/tss.h>
>  #include <machine/io_perm.h>
>  #include <machine/vm_param.h>
> 
> +#include <i386at/acpi_parse_apic.h>
> +#include <string.h>
> +
>  /*
>   * The i386 needs an interrupt stack to keep the PCB stack from being
>   * overrun by interrupts.  All interrupt stacks MUST lie at lower addresses
> @@ -52,20 +63,35 @@
>  /*
>   * Addresses of bottom and top of interrupt stacks.
>   */
> -vm_offset_t  interrupt_stack[NCPUS];
>  vm_offset_t  int_stack_top[NCPUS];
>  vm_offset_t  int_stack_base[NCPUS];
> 
> -/*
> - * Barrier address.
> - */
> -vm_offset_t  int_stack_high;
> +/* Interrupt stack allocation */
> +uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(INTSTACK_SIZE);
> 
> +void
> +interrupt_stack_alloc(void)
> +{
> +     int i;
> +
> +     /*
> +      * Set up pointers to the top of the interrupt stack.
> +      */
> +
> +     for (i = 0; i < NCPUS; i++) {
> +             int_stack_base[i] = (vm_offset_t) &solid_intstack[i * 
> INTSTACK_SIZE];
> +             int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) * 
> INTSTACK_SIZE];
> +     }
> +}
> +
> +#if  NCPUS > 1
>  /*
> - * First cpu`s interrupt stack.
> + * Flag to mark SMP init by BSP complete
>   */
> -extern char          _intstack[];    /* bottom */
> -extern char          _eintstack[];   /* top */
> +int bspdone;
> +
> +extern void *apboot, *apbootend;
> +extern volatile ApicLocalUnit* lapic;
> 
>  /*
>   * Multiprocessor i386/i486 systems use a separate copy of the
> @@ -77,7 +103,7 @@ extern char                _eintstack[];   /* top */
>   */
> 
>  /*
> - * Allocated descriptor tables.
> + * Descriptor tables.
>   */
>  struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
> 
> @@ -98,16 +124,121 @@ extern struct real_gate         idt[IDTSZ];
>  extern struct real_descriptor        gdt[GDTSZ];
>  extern struct real_descriptor        ldt[LDTSZ];
> 
> +static void
> +set_control_regs_and_pmap(void)
> +{
> +     /* XXX move to intel/pmap.h */
> +     extern pt_entry_t *kernel_page_dir;
> +     int i;
> +
> +     /*
> +      * We'll have to temporarily install a direct mapping
> +      * between physical memory and low linear memory,
> +      * until we start using our new kernel segment descriptors.
> +      */

Rather use another kernel_page_dir, otherwise this is messing with the
original kernel_page_dir of the BSP.

And also factorize with i386at_init: this is tricky code that we don't
want to see two copies of, which will inevitably diverge and both become
half-bogus on the long run.

> +#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
> +     vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - 
> LINEAR_MIN_KERNEL_ADDRESS;
> +     if ((vm_offset_t)(-delta) < delta)
> +             delta = (vm_offset_t)(-delta);
> +     int nb_direct = delta >> PDESHIFT;
> +     for (i = 0; i < nb_direct; i++)
> +             kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + 
> i] =
> +                     
> kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
> +#endif
> +        /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
> +#if VM_MIN_KERNEL_ADDRESS != 0
> +     kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - 
> VM_MIN_KERNEL_ADDRESS)] =
> +             kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
> +#endif
[...]

> +     unsigned long flags;
> +
> +     cpu_intr_save(&flags);
> +
> +     int ncpus = smp_get_numcpus();
> +
> +     //Copy cpu initialization assembly routine
> +     memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot,
> +            (uint32_t)&apbootend - (uint32_t)&apboot);

Do we have something that makes sure that this area is not allocated for
something else?

Re: [PATCH 12/12] i386: Refactor int stacks to be per cpu for SMP

Reply via email to