CPUs that are not the boot CPU need to run in spinning code to check if they should run off to execute and if so where to jump to. This usually happens by leaving secondary CPUs looping and checking if some variable in memory changed.
In an environment like Qemu however we can be more clever. We can just export the spin table the primary CPU modifies as MMIO region that would event based wake up the respective secondary CPUs. That saves us quite some cycles while the secondary CPUs are not up yet. So this patch adds a PV device that simply exports the spinning table into the guest and thus allows the primary CPU to wake up secondary ones. Signed-off-by: Alexander Graf <ag...@suse.de> --- v1 -> v2: - change into MMIO scheme - map the secondary NIP instead of 0 1:1 - only map 64MB for TLB, same as u-boot - prepare code for 64-bit spinnings v2 -> v3: - remove r6 - set MAS2_M - map EA 0 - use second TLB1 entry v3 -> v4: - change to memoryops v4 -> v5: - fix endianness bugs v5 -> v6: - add header --- Makefile.target | 2 +- hw/ppce500_mpc8544ds.c | 33 +++++++- hw/ppce500_spin.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 5 deletions(-) create mode 100644 hw/ppce500_spin.c diff --git a/Makefile.target b/Makefile.target index 8db9f37..ff3efa4 100644 --- a/Makefile.target +++ b/Makefile.target @@ -247,7 +247,7 @@ endif obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-ppc-y += ppc440.o ppc440_bamboo.o # PowerPC E500 boards -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o # PowerPC 440 Xilinx ML507 reference board. obj-ppc-y += virtex_ml507.o obj-ppc-$(CONFIG_KVM) += kvm_ppc.o diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c index 9379624..3b8b449 100644 --- a/hw/ppce500_mpc8544ds.c +++ b/hw/ppce500_mpc8544ds.c @@ -49,6 +49,7 @@ #define MPC8544_PCI_IO 0xE1000000 #define MPC8544_PCI_IOLEN 0x10000 #define MPC8544_UTIL_BASE (MPC8544_CCSRBAR_BASE + 0xe0000) +#define MPC8544_SPIN_BASE 0xEF000000 struct boot_info { @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState *env, tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; } +static void mpc8544ds_cpu_reset_sec(void *opaque) +{ + CPUState *env = opaque; + + cpu_reset(env); + + /* Secondary CPU starts in halted state for now. Needs to change when + implementing non-kernel boot. */ + env->halted = 1; + env->exception_index = EXCP_HLT; +} + static void mpc8544ds_cpu_reset(void *opaque) { CPUState *env = opaque; @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque) cpu_reset(env); /* Set initial guest state. */ + env->halted = 0; env->gpr[1] = (16<<20) - 8; env->gpr[3] = bi->dt_base; env->nip = bi->entry; @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size, unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; qemu_irq **irqs, *mpic; DeviceState *dev; - struct boot_info *boot_info; CPUState *firstenv = NULL; /* Setup CPUs */ @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size, env->spr[SPR_40x_TCR] = 1 << 26; /* Register reset handler */ - boot_info = g_malloc0(sizeof(struct boot_info)); - qemu_register_reset(mpc8544ds_cpu_reset, env); - env->load_info = boot_info; + if (!i) { + /* Primary CPU */ + struct boot_info *boot_info; + boot_info = g_malloc0(sizeof(struct boot_info)); + qemu_register_reset(mpc8544ds_cpu_reset, env); + env->load_info = boot_info; + } else { + /* Secondary CPUs */ + qemu_register_reset(mpc8544ds_cpu_reset_sec, env); + } } env = firstenv; @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size, } } + /* Register spinning region */ + sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL); + /* Load kernel. */ if (kernel_filename) { kernel_size = load_uimage(kernel_filename, &entry, &loadaddr, NULL); @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size, /* If we're loading a kernel directly, we must load the device tree too. */ if (kernel_filename) { + struct boot_info *boot_info; + #ifndef CONFIG_FDT cpu_abort(env, "Compiled without FDT support - can't load kernel\n"); #endif diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c new file mode 100644 index 0000000..cccd940 --- /dev/null +++ b/hw/ppce500_spin.c @@ -0,0 +1,215 @@ +/* + * QEMU PowerPC e500v2 ePAPR spinning code + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Alexander Graf, <ag...@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + * This code is not really a device, but models an interface that usually + * firmware takes care of. It's used when QEMU plays the role of firmware. + * + * Specification: + * + * https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf + * + */ + +#include "hw.h" +#include "sysemu.h" +#include "sysbus.h" +#include "kvm.h" + +#define MAX_CPUS 32 + +typedef struct spin_info { + uint64_t addr; + uint64_t r3; + uint32_t resv; + uint32_t pir; + uint64_t reserved; +} __attribute__ ((packed)) SpinInfo; + +typedef struct spin_state { + SysBusDevice busdev; + MemoryRegion iomem; + SpinInfo spin[MAX_CPUS]; +} SpinState; + +typedef struct spin_kick { + CPUState *env; + SpinInfo *spin; +} SpinKick; + +static void spin_reset(void *opaque) +{ + SpinState *s = opaque; + int i; + + for (i = 0; i < MAX_CPUS; i++) { + SpinInfo *info = &s->spin[i]; + + info->pir = i; + info->r3 = i; + info->addr = 1; + } +} + +/* Create -kernel TLB entries for BookE, linearly spanning 256MB. */ +static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size) +{ + return (ffs(size >> 10) - 1) >> 1; +} + +static void mmubooke_create_initial_mapping(CPUState *env, + target_ulong va, + target_phys_addr_t pa, + target_phys_addr_t len) +{ + ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1); + target_phys_addr_t size; + + size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT); + tlb->mas1 = MAS1_VALID | size; + tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M; + tlb->mas7_3 = pa & TARGET_PAGE_MASK; + tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; +} + +static void spin_kick(void *data) +{ + SpinKick *kick = data; + CPUState *env = kick->env; + SpinInfo *curspin = kick->spin; + target_phys_addr_t map_size = 64 * 1024 * 1024; + target_phys_addr_t map_start; + + cpu_synchronize_state(env); + stl_p(&curspin->pir, env->spr[SPR_PIR]); + env->nip = ldq_p(&curspin->addr) & (map_size - 1); + env->gpr[3] = ldq_p(&curspin->r3); + env->gpr[4] = 0; + env->gpr[5] = 0; + env->gpr[6] = 0; + env->gpr[7] = map_size; + env->gpr[8] = 0; + env->gpr[9] = 0; + + map_start = ldq_p(&curspin->addr) & ~(map_size - 1); + mmubooke_create_initial_mapping(env, 0, map_start, map_size); + + env->halted = 0; + env->exception_index = -1; + qemu_cpu_kick(env); +} + +static void spin_write(void *opaque, target_phys_addr_t addr, uint64_t value, + unsigned len) +{ + SpinState *s = opaque; + int env_idx = addr / sizeof(SpinInfo); + CPUState *env; + SpinInfo *curspin = &s->spin[env_idx]; + uint8_t *curspin_p = (uint8_t*)curspin; + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (env->cpu_index == env_idx) { + break; + } + } + + if (!env) { + /* Unknown CPU */ + return; + } + + if (!env->cpu_index) { + /* primary CPU doesn't spin */ + return; + } + + curspin_p = &curspin_p[addr % sizeof(SpinInfo)]; + switch (len) { + case 1: + stb_p(curspin_p, value); + break; + case 2: + stw_p(curspin_p, value); + break; + case 4: + stl_p(curspin_p, value); + break; + } + + if (!(ldq_p(&curspin->addr) & 1)) { + /* run CPU */ + SpinKick kick = { + .env = env, + .spin = curspin, + }; + + run_on_cpu(env, spin_kick, &kick); + } +} + +static uint64_t spin_read(void *opaque, target_phys_addr_t addr, unsigned len) +{ + SpinState *s = opaque; + uint8_t *spin_p = &((uint8_t*)s->spin)[addr]; + + switch (len) { + case 1: + return ldub_p(spin_p); + case 2: + return lduw_p(spin_p); + case 4: + return ldl_p(spin_p); + default: + assert(0); + } +} + +const MemoryRegionOps spin_rw_ops = { + .read = spin_read, + .write = spin_write, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static int ppce500_spin_initfn(SysBusDevice *dev) +{ + SpinState *s; + + s = FROM_SYSBUS(SpinState, sysbus_from_qdev(dev)); + + memory_region_init_io(&s->iomem, &spin_rw_ops, s, "e500 spin pv device", + sizeof(SpinInfo) * MAX_CPUS); + sysbus_init_mmio_region(dev, &s->iomem); + + qemu_register_reset(spin_reset, s); + + return 0; +} + +static SysBusDeviceInfo ppce500_spin_info = { + .init = ppce500_spin_initfn, + .qdev.name = "e500-spin", + .qdev.size = sizeof(SpinState), +}; + +static void ppce500_spin_register(void) +{ + sysbus_register_withprop(&ppce500_spin_info); +} +device_init(ppce500_spin_register); -- 1.6.0.2