Hi Patrick,

On Sat, 27 Jul 2024 at 01:20, Patrick Rudolph
<patrick.rudo...@9elements.com> wrote:
>
> On Arm platforms that use ACPI they cannot rely on the "spin-table"
> CPU bringup usually defined in the FDT. Thus implement the
> 'ACPI Multi-processor Startup for ARM Platforms', also referred to as
> 'ACPI parking protocol'.
>
> The ACPI parking protocol works similar to the spin-table mechanism, but
> the specification also covers lots of shortcomings of the spin-table
> implementations.
>
> Every CPU defined in the ACPI MADT table has it's own 4K page where the
> spinloop code and the OS mailbox resides. When selected the U-Boot board
> code must make sure that the secondary CPUs enter u-boot after relocation

U-Boot

> as well, so that they can enter the spinloop code residing in the ACPI
> parking protocol pages.
>
> The OS will then write to the mailbox and generate an IPI to release the
> CPUs from the spinloop code.
>
> For now it's only implemented on ARMv8, but can easily be extended to
> other platforms.
>
> TEST: Boots all CPUs on qemu-system-aarch64 -machine raspi4b
>
> Signed-off-by: Patrick Rudolph <patrick.rudo...@9elements.com>
> Cc: Simon Glass <s...@chromium.org>
> Cc: Tom Rini <tr...@konsulko.com>
> ---
>  arch/arm/cpu/armv8/Makefile              |   1 +
>  arch/arm/cpu/armv8/parking_protocol_v8.S | 102 +++++++++++++++++++++++
>  arch/arm/cpu/armv8/start.S               |   4 +
>  arch/arm/include/asm/acpi_table.h        |  52 ++++++++++++
>  arch/arm/lib/acpi_table.c                |  87 +++++++++++++++++++
>  include/acpi/acpi_table.h                |  10 +++
>  lib/Kconfig                              |  15 ++++
>  lib/acpi/acpi_table.c                    |   3 +
>  8 files changed, 274 insertions(+)
>  create mode 100644 arch/arm/cpu/armv8/parking_protocol_v8.S
>
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index bba4f570db..9bb07bdf12 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -29,6 +29,7 @@ obj-$(CONFIG_ARM_SMCCC)               += smccc-call.o
>
>  ifndef CONFIG_SPL_BUILD
>  obj-$(CONFIG_ARMV8_SPIN_TABLE) += spin_table.o spin_table_v8.o
> +obj-$(CONFIG_ACPI_PARKING_PROTOCOL) += parking_protocol_v8.o
>  else
>  obj-$(CONFIG_ARCH_SUNXI) += fel_utils.o
>  endif
> diff --git a/arch/arm/cpu/armv8/parking_protocol_v8.S 
> b/arch/arm/cpu/armv8/parking_protocol_v8.S

How about acpi_park_v8.S as it is shorter?

> new file mode 100644
> index 0000000000..b7f6abbd85
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/parking_protocol_v8.S
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Copyright (C) 2024 9elements GmbH
> + *   Author: Patrick Rudolph <patrick.rudo...@9elements.com>
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/acpi_table.h>
> +
> +__secondary_cpuif_id:
> +       .quad 0
> +
> +__secondary_parking_protocol_tables:
> +       .quad 0
> +
> +__secondary_parking_protocol_ncpus:
> +       .quad 0
> +
> +.global acpi_parking_protocol_code_size
> +acpi_parking_protocol_code_size:

acpi_park_code_size

acpi_parking_protocol is just too long

> +       .word __secondary_parking_protocol_code_end - 
> __secondary_parking_protocol_code_start

Is there a 'primary'? Does this refer to the code run by the secondary CPUs?

> +
> +.global acpi_parking_protocol_install
> +ENTRY(acpi_parking_protocol_install)
> +       ldr     x2, =__secondary_parking_protocol_ncpus
> +       str     x1, [x2]
> +       ldr     x2, =__secondary_parking_protocol_tables
> +       str     x0, [x2]
> +       dsb     ishst
> +       sev
> +       ret
> +ENDPROC(acpi_parking_protocol_install)
> +
> +.global acpi_parking_protocol_secondary_jump
> +ENTRY(acpi_parking_protocol_secondary_jump)
> +0:
> +       /* Store a unique cpuif id in x0 */
> +       ldr     x2, =__secondary_cpuif_id
> +       ldaxr   w0, [x2]
> +       add     w0, w0, #1  // =1
> +       stlxr   w1, w0, [x2]
> +       cbnz    w1, 0b
> +
> +0:     /* Check if parking protocol table is ready */
> +       ldr     x1, =__secondary_parking_protocol_tables
> +       ldr     x2, [x1]
> +       cbnz    x2, 0f
> +       wfe
> +       b       0b
> +
> +0:     /* Sanity check cpuif id */
> +       ldr     x1, =__secondary_parking_protocol_ncpus
> +       ldr     x3, [x1]
> +       cmp     w0, w3
> +       b.lt    0f
> +
> +hlt:   wfi
> +       b       hlt     /* Should never happen. */
> +
> +       /* Find spinning code in ACPI parking protocol table */
> +0:     mov     x1, #ACPI_PP_PAGE_SIZE
> +       mul     x3, x0, x1
> +       add     x2, x2, x3
> +       mov     x0, x2          /* Backup ACPI page ptr for later use */
> +
> +       mov     x1, #ACPI_PP_CPU_CODE_OFFSET
> +       add     x2, x2, x1
> +
> +       /* Jump to spin code in own parking protocol page */
> +       br      x2
> +ENDPROC(acpi_parking_protocol_secondary_jump)
> +
> +.align 8
> +__secondary_parking_protocol_code_start:
> +.global acpi_parking_protocol_code_start
> +ENTRY(acpi_parking_protocol_code_start)
> +       /* x0 points to the 4K aligned parking protocol page */
> +
> +       /* Prepare defines for spinning code */
> +       mov     w3, #ACPI_PP_CPU_ID_INVALID
> +       mov     x2, #ACPI_PP_JMP_ADR_INVALID
> +
> +0:     wfe
> +       ldr     w1, [x0, #ACPI_PP_CPU_ID_OFFSET]
> +
> +       /* Check CPU ID is not invalid */
> +       cmp     w1, w3
> +       b.eq    0b
> +
> +       /* Check jump address not invalid */
> +       ldr     x1, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> +       cmp     x1, x2
> +       b.eq    0b
> +
> +       /* Clear jump address before jump */
> +       str     x2, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> +       dsb     sy
> +
> +       br      x1
> +ENDPROC(acpi_parking_protocol_code_start)
> +       /* Secondary Boot Code ends here */
> +__secondary_parking_protocol_code_end:
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> index 7461280261..32a295c832 100644
> --- a/arch/arm/cpu/armv8/start.S
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -178,6 +178,10 @@ pie_fixup_done:
>         branch_if_master x0, master_cpu
>         b       spin_table_secondary_jump
>         /* never return */
> +#elif defined(CONFIG_ACPI_PARKING_PROTOCOL) && !defined(CONFIG_SPL_BUILD)
> +       branch_if_master x0, master_cpu
> +       b       acpi_parking_protocol_secondary_jump
> +       /* never return */
>  #elif defined(CONFIG_ARMV8_MULTIENTRY)
>         branch_if_master x0, master_cpu
>
> diff --git a/arch/arm/include/asm/acpi_table.h 
> b/arch/arm/include/asm/acpi_table.h
> index 8a25e93847..3c6736239d 100644
> --- a/arch/arm/include/asm/acpi_table.h
> +++ b/arch/arm/include/asm/acpi_table.h
> @@ -4,6 +4,7 @@
>  #define __ASM_ACPI_TABLE_H__
>
>  #ifndef __ACPI__
> +#ifndef __ASSEMBLY__
>
>  void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
>                           uint perf_gsiv, ulong phys_base, ulong gicv,
> @@ -13,6 +14,57 @@ void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, 
> uint cpu_num,
>  void acpi_write_madt_gicd(struct acpi_madt_gicd *gicd, uint gic_id,
>                           ulong phys_base, uint gic_version);
>
> +/* Multi-processor Startup for ARM Platforms */

please add full comments for members

> +struct acpi_parking_protocol_page {
> +       u32 cpu_id;
> +       u32 reserved;
> +       u64 jumping_address;
> +       u8 os_reserved[2032];
> +       u8 cpu_spinning_code[2048];
> +} __packed;
> +
> +/* Architecture specific functions */
> +/**
> + * acpi_parking_protocol_code_size - Spinloop code size *
> + */
> +extern u32 acpi_parking_protocol_code_size;

u32 but it says .word in the code...isn't that 16 bits?

> +
> +/**
> + * parking_protocol_code_start() - Spinloop code
> + *
> + * Architectural spinloop code to be installed in each parking protocol
> + * page. The spinloop code must be less than 2048 bytes.
> + *
> + * The spinloop code will be entered after calling
> + * acpi_parking_protocol_install().
> + *
> + */
> +void acpi_parking_protocol_code_start(void);
> +
> +/**
> + * acpi_parking_protocol_install() - Installs the parking protocol.
> + *
> + * Installs the reserved memory containing the spinloop code and the
> + * OS mailbox as required by the ACPI Multi-processor Startup for
> + * ARM Platforms specification.
> + *
> + * The secondary CPUs will wait for this function to be called in order
> + * to enter the spinloop code residing in the tables.
> + *
> + * @tables: ACPI parking protocol tables.
> + * @num_cpus: Number of allocated pages.
> + */
> +void acpi_parking_protocol_install(uintptr_t tables, size_t num_cpus);
> +
> +#endif /* !__ASSEMBLY__ */
>  #endif /* !__ACPI__ */
>
> +#define ACPI_PP_CPU_ID_INVALID         0xffffffff
> +#define ACPI_PP_JMP_ADR_INVALID                0
> +#define ACPI_PP_PAGE_SIZE              4096
> +#define ACPI_PP_CPU_ID_OFFSET          0
> +#define ACPI_PP_CPU_JMP_ADDR_OFFSET    8
> +#define ACPI_PP_CPU_CODE_OFFSET                2048
> +#define ACPI_PP_VERSION                        1
> +
>  #endif /* __ASM_ACPI_TABLE_H__ */
> diff --git a/arch/arm/lib/acpi_table.c b/arch/arm/lib/acpi_table.c
> index ea3a6343c9..2fa25ec50e 100644
> --- a/arch/arm/lib/acpi_table.c
> +++ b/arch/arm/lib/acpi_table.c
> @@ -10,7 +10,11 @@
>  #include <acpi/acpigen.h>
>  #include <acpi/acpi_device.h>
>  #include <acpi/acpi_table.h>
> +#include <cpu_func.h>
> +#include <efi_loader.h>
> +#include <malloc.h>
>  #include <string.h>
> +#include <tables_csum.h>
>
>  void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
>                           uint perf_gsiv, ulong phys_base, ulong gicv,
> @@ -42,3 +46,86 @@ void acpi_write_madt_gicd(struct acpi_madt_gicd *gicd, 
> uint gic_id,
>         gicd->phys_base = phys_base;
>         gicd->gic_version = gic_version;
>  }
> +
> +void acpi_write_parking_protocol(struct acpi_madt *madt)
> +{
> +       struct acpi_parking_protocol_page *page;
> +       struct acpi_madt_gicc *gicc;
> +       void *reloc_addr;
> +       u64 start;
> +       int ncpus = 0;
> +
> +       /* According to the "Multi-processor Startup for ARM Platforms":
> +        * - Every CPU as specified by MADT GICC has it's own 4K page
> +        * - Every page is divided into two sections: OS and FW reserved
> +        * - Memory occupied by "Parking Protocol" must be marked 'Reserved'
> +        * - Spinloop code should reside in FW reserved 2048 bytes
> +        * - Spinloop code will check the mailbox in OS reserved area
> +        */
> +
> +       if (acpi_parking_protocol_code_size > 
> sizeof(page->cpu_spinning_code)) {
> +               log_err("Spinning code too big to fit: %d\n",
> +                       acpi_parking_protocol_code_size);
> +       }
> +
> +       /* Count all cores including BSP */

Ick, that's a very strange way of counting CPUs. You can use
uclass_id_count(UCLASS_CPU).

> +       for (int i = sizeof(struct acpi_madt); i < madt->header.length; ) {
> +               gicc = (struct acpi_madt_gicc *)((void *)madt + i);
> +               if (gicc->type != ACPI_APIC_GICC) {
> +                       i += gicc->length;
> +                       continue;
> +               }
> +               ncpus++;
> +               i += gicc->length;
> +       }
> +       debug("Found %d GICCs in MADT\n", ncpus);
> +
> +       /* Allocate pages linearly due to assembly code requirements */
> +       page = memalign(ACPI_PP_PAGE_SIZE, ACPI_PP_PAGE_SIZE * ncpus);
> +       start = (u64)(uintptr_t)page;

Again, this should go in the bloblist.

> +
> +#if CONFIG_IS_ENABLED(EFI_LOADER)
> +       int ret = efi_add_memory_map(start, ncpus * ACPI_PP_PAGE_SIZE, 
> EFI_RESERVED_MEMORY_TYPE);
> +
> +       if (ret != EFI_SUCCESS)
> +               log_err("Reserved memory mapping failed addr %llx size %x\n",
> +                       start, ncpus * ACPI_PP_PAGE_SIZE);
> +#endif

This map will overlap the one that EFI creates for the malloc()
region, won't it?

> +
> +       /* Prepare the parking protocol pages */
> +       for (int i = sizeof(struct acpi_madt); i < madt->header.length; ) {

please use 'pos' instead of 'i' as this isn't really an index

> +               gicc = (struct acpi_madt_gicc *)((void *)madt + i);

Can you put the loop code in a separate function? This is getting a
bit long for one function

> +               if (gicc->type != ACPI_APIC_GICC) {
> +                       i += gicc->length;
> +                       continue;
> +               }
> +
> +               /* Update GICC */
> +               gicc->parking_proto = ACPI_PP_VERSION;
> +               gicc->parked_addr = (uint64_t)(uintptr_t)page;
> +
> +               /* Prepare parking protocol page */
> +               memset(page, 0, sizeof(struct acpi_parking_protocol_page));

'\0'

> +               page->cpu_id = ACPI_PP_CPU_ID_INVALID;
> +               page->jumping_address = ACPI_PP_JMP_ADR_INVALID;
> +
> +               /* Relocate spinning code */
> +               reloc_addr = &page->cpu_spinning_code[0];
> +
> +               debug("Relocating spin table from %p to %p (size %x)\n",
> +                     &acpi_parking_protocol_code_start, reloc_addr,
> +                     acpi_parking_protocol_code_size);
> +               memcpy(reloc_addr, &acpi_parking_protocol_code_start,
> +                      acpi_parking_protocol_code_size);
> +
> +               if (!CONFIG_IS_ENABLED(SYS_DCACHE_OFF))
> +                       flush_dcache_range((unsigned long)page,
> +                                          (unsigned long)page +
> +                                          sizeof(struct 
> acpi_parking_protocol_page));
> +               page++;
> +               i += gicc->length;
> +       }
> +
> +       /* Point secondary CPUs to new spin loop code */
> +       acpi_parking_protocol_install(start, ncpus);
> +}
> diff --git a/include/acpi/acpi_table.h b/include/acpi/acpi_table.h
> index 26c5e9f21a..994bfd7f8d 100644
> --- a/include/acpi/acpi_table.h
> +++ b/include/acpi/acpi_table.h
> @@ -951,6 +951,16 @@ void acpi_fill_fadt(struct acpi_fadt *fadt);
>   */
>  void *acpi_fill_madt(struct acpi_madt *madt, void *current);
>
> +/**
> + * acpi_write_parking_protocol() - Installs the ACPI parking protocol.
> + *
> + * Sets up the ACPI parking protocol and installs the spinning code for
> + * secondary CPUs.
> + *
> + * @madt: The MADT to update
> + */
> +void acpi_write_parking_protocol(struct acpi_madt *madt);
> +
>  /**
>   * acpi_write_dbg2_pci_uart() - Write out a DBG2 table
>   *
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 2059219a12..43e4fa3253 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -314,6 +314,21 @@ config GENERATE_ACPI_TABLE
>           by the operating system. It defines platform-independent interfaces
>           for configuration and power management monitoring.
>
> +config ACPI_PARKING_PROTOCOL
> +       bool "Support ACPI parking protocol method"
> +       depends on GENERATE_ACPI_TABLE
> +       depends on ARMV8_MULTIENTRY
> +       default y if !SEC_FIRMWARE_ARMV8_PSCI && !ARMV8_PSCI
> +       help
> +         Say Y here to support "ACPI parking protocol" enable method
> +         for booting Linux.
> +
> +         To use this feature, you must do:
> +           - Bring secondary CPUs into U-Boot proper in a board specific
> +             manner.  This must be done *after* relocation.  Otherwise, the
> +             secondary CPUs will spin in unprotected memory area because the
> +             master CPU protects the relocated spin code.
> +
>  config SPL_TINY_MEMSET
>         bool "Use a very small memset() in SPL"
>         depends on SPL
> diff --git a/lib/acpi/acpi_table.c b/lib/acpi/acpi_table.c
> index eb3c83ab02..56eb4a10d0 100644
> --- a/lib/acpi/acpi_table.c
> +++ b/lib/acpi/acpi_table.c
> @@ -271,6 +271,9 @@ int acpi_write_madt(struct acpi_ctx *ctx, const struct 
> acpi_writer *entry)
>         /* (Re)calculate length and checksum */
>         header->length = (uintptr_t)current - (uintptr_t)madt;
>
> +       if (IS_ENABLED(CONFIG_ACPI_PARKING_PROTOCOL))
> +               acpi_write_parking_protocol(madt);
> +
>         header->checksum = table_compute_checksum((void *)madt, 
> header->length);
>         acpi_add_table(ctx, madt);
>         acpi_inc(ctx, madt->header.length);
> --
> 2.45.2
>

Regards,
Simon

Reply via email to