On 08/05/2024 1:39 pm, Alejandro Vallejo wrote:
> Removes a needless assembly entry point and simplifies the codebase by 
> allowing
> hvmloader to wake APs it doesn't know the APIC ID of.
>
> Signed-off-by: Alejandro Vallejo <alejandro.vall...@cloud.com>

This is basically independent of the rest of the series, and it would be
good to pull it in separately.  A few notes.

The commit message ought to note that this has a side effect of removing
an LMSW instruction which has fastpath at all on AMD CPUs, and requires
full emulation, and it gets rid of 13 vLAPIC emulations when 3
hypercalls would do.

The point being that this is borderline backport material, although it
does depend on the 32 vCPU bugfix.

> ---
> v2:
>   * New patch. Replaces adding cpu policy to hvmloader in v1.
> ---
>  tools/firmware/hvmloader/smp.c | 111 +++++++++++++--------------------
>  1 file changed, 44 insertions(+), 67 deletions(-)
>
> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
> index 082b17f13818..a668f15d7e1f 100644
> --- a/tools/firmware/hvmloader/smp.c
> +++ b/tools/firmware/hvmloader/smp.c
> @@ -22,88 +22,68 @@
>  #include "util.h"
>  #include "config.h"
>  #include "apic_regs.h"
> +#include "hypercall.h"
>  
> -#define AP_BOOT_EIP 0x1000
> -extern char ap_boot_start[], ap_boot_end[];
> +#include <xen/asm/x86-defns.h>
> +#include <xen/hvm/hvm_vcpu.h>
> +
> +#include <xen/vcpu.h>
>  
>  static int ap_callin, ap_cpuid;
>  
> -asm (
> -    "    .text                       \n"
> -    "    .code16                     \n"
> -    "ap_boot_start: .code16          \n"
> -    "    mov   %cs,%ax               \n"
> -    "    mov   %ax,%ds               \n"
> -    "    lgdt  gdt_desr-ap_boot_start\n"
> -    "    xor   %ax, %ax              \n"
> -    "    inc   %ax                   \n"
> -    "    lmsw  %ax                   \n"
> -    "    ljmpl $0x08,$1f             \n"
> -    "gdt_desr:                       \n"
> -    "    .word gdt_end - gdt - 1     \n"
> -    "    .long gdt                   \n"
> -    "ap_boot_end: .code32            \n"
> -    "1:  mov   $0x10,%eax            \n"
> -    "    mov   %eax,%ds              \n"
> -    "    mov   %eax,%es              \n"
> -    "    mov   %eax,%ss              \n"
> -    "    movl  $stack_top,%esp       \n"
> -    "    movl  %esp,%ebp             \n"
> -    "    call  ap_start              \n"
> -    "1:  hlt                         \n"
> -    "    jmp  1b                     \n"
> -    "                                \n"
> -    "    .align 8                    \n"
> -    "gdt:                            \n"
> -    "    .quad 0x0000000000000000    \n"
> -    "    .quad 0x00cf9a000000ffff    \n" /* 0x08: Flat code segment */
> -    "    .quad 0x00cf92000000ffff    \n" /* 0x10: Flat data segment */
> -    "gdt_end:                        \n"
> -    "                                \n"
> -    "    .bss                        \n"
> -    "    .align    8                 \n"
> -    "stack:                          \n"
> -    "    .skip    0x4000             \n"
> -    "stack_top:                      \n"
> -    "    .text                       \n"
> -    );
> -
> -void ap_start(void); /* non-static avoids unused-function compiler warning */
> -/*static*/ void ap_start(void)
> +static void ap_start(void)
>  {
>      printf(" - CPU%d ... ", ap_cpuid);
>      cacheattr_init();
>      printf("done.\n");
> +
> +    if ( !ap_cpuid )
> +        return;
> +
>      wmb();
>      ap_callin = 1;

/* After this point, the BSP will shut us down. */

> -}
>  
> -static void lapic_wait_ready(void)
> -{
> -    while ( lapic_read(APIC_ICR) & APIC_ICR_BUSY )
> -        cpu_relax();
> +    while ( 1 )

For this, we tend to favour "for ( ;; )".

> +        asm volatile ( "hlt" );
>  }
>  
>  static void boot_cpu(unsigned int cpu)
>  {
> -    unsigned int icr2 = SET_APIC_DEST_FIELD(LAPIC_ID(cpu));
> +    static uint8_t ap_stack[4 * PAGE_SIZE] __attribute__ ((aligned (16)));

I know you're just copying what was there, but 4 pages is stupidly large
for something that needs about 4 stack slots.

4K is absolutely plenty.

> +    static struct vcpu_hvm_context ap;
>  
>      /* Initialise shared variables. */
>      ap_cpuid = cpu;
> -    ap_callin = 0;
>      wmb();
>  
> -    /* Wake up the secondary processor: INIT-SIPI-SIPI... */
> -    lapic_wait_ready();
> -    lapic_write(APIC_ICR2, icr2);
> -    lapic_write(APIC_ICR, APIC_DM_INIT);
> -    lapic_wait_ready();
> -    lapic_write(APIC_ICR2, icr2);
> -    lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12));
> -    lapic_wait_ready();
> -    lapic_write(APIC_ICR2, icr2);
> -    lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12));
> -    lapic_wait_ready();
> +    /* Wake up the secondary processor */
> +    ap = (struct vcpu_hvm_context) {
> +        .mode = VCPU_HVM_MODE_32B,
> +        .cpu_regs.x86_32 = {
> +            .eip = (uint32_t)ap_start,
> +            .esp = (uint32_t)ap_stack + ARRAY_SIZE(ap_stack),

Not that it really matters, but these want to be unsigned long casts.

> +
> +            /* Protected mode with MMU off */
> +            .cr0 = X86_CR0_PE,
> +
> +            /* Prepopulate the GDT */

/* 32bit Flat Mode */

This isn't the GDT; it's the segment registers, but "Flat Mode" is the
more meaningful term to use.


I'm happy to fix all on commit.

~Andrew

Reply via email to