On 29.09.2011, at 08:45, David Gibson wrote: > At present, using the hypervisor aware Book3S-HV KVM will only work > with qemu on POWER7 CPUs. PPC970 CPUs also have hypervisor > capability, but they lack the VRMA feature which makes assigning guest > memory easier. > > In order to allow KVM Book3S-HV on PPC970, we need to specially > allocate the first chunk of guest memory (the "Real Mode Area" or > RMA), so that it is physically contiguous. > > Sufficiently recent host kernels allow such contiguous RMAs to be > allocated, with a kvm capability advertising whether the feature is > available and/or necessary on this hardware. This patch enables qemu > to use this support, thus allowing kvm acceleration of pseries qemu > machines on PPC970 hardware. > > Signed-off-by: Paul Mackerras <pau...@samba.org> > Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> > --- > hw/spapr.c | 50 ++++++++++++++++++++++++++++++++++++++++-------- > target-ppc/kvm.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ > target-ppc/kvm_ppc.h | 6 +++++ > 3 files changed, 98 insertions(+), 9 deletions(-) > > diff --git a/hw/spapr.c b/hw/spapr.c > index ba9ae1c..d51425a 100644 > --- a/hw/spapr.c > +++ b/hw/spapr.c > @@ -89,6 +89,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t > *irq_num) > } > > static void *spapr_create_fdt_skel(const char *cpu_model, > + target_phys_addr_t rma_size, > target_phys_addr_t initrd_base, > target_phys_addr_t initrd_size, > const char *boot_device, > @@ -97,7 +98,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, > { > void *fdt; > CPUState *env; > - uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) }; > + uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) }; > + uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size), > + cpu_to_be64(ram_size - rma_size) > }; > uint32_t start_prop = cpu_to_be32(initrd_base); > uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); > uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)}; > @@ -143,15 +146,25 @@ static void *spapr_create_fdt_skel(const char > *cpu_model, > > _FDT((fdt_end_node(fdt))); > > - /* memory node */ > + /* memory node(s) */ > _FDT((fdt_begin_node(fdt, "memory@0"))); > > _FDT((fdt_property_string(fdt, "device_type", "memory"))); > - _FDT((fdt_property(fdt, "reg", > - mem_reg_property, sizeof(mem_reg_property)))); > - > + _FDT((fdt_property(fdt, "reg", mem_reg_property_rma, > + sizeof(mem_reg_property_rma)))); > _FDT((fdt_end_node(fdt))); > > + if (ram_size > rma_size) { > + char mem_name[32]; > + > + sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size); > + _FDT((fdt_begin_node(fdt, mem_name))); > + _FDT((fdt_property_string(fdt, "device_type", "memory"))); > + _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma, > + sizeof(mem_reg_property_nonrma)))); > + _FDT((fdt_end_node(fdt))); > + } > + > /* cpus */ > _FDT((fdt_begin_node(fdt, "cpus"))); > > @@ -341,6 +354,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, > { > CPUState *env; > int i; > + target_phys_addr_t rma_alloc_size, rma_size; > ram_addr_t ram_offset; > uint32_t initrd_base; > long kernel_size, initrd_size, fw_size; > @@ -350,10 +364,23 @@ static void ppc_spapr_init(ram_addr_t ram_size, > spapr = g_malloc(sizeof(*spapr)); > cpu_ppc_hypercall = emulate_spapr_hypercall; > > + /* Allocate RMA if necessary */ > + rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma"); > + > + if (rma_alloc_size == -1) { > + hw_error("qemu: Unable to create RMA\n"); > + exit(1); > + } > + if (rma_alloc_size && (rma_alloc_size < ram_size)) { > + rma_size = rma_alloc_size; > + } else { > + rma_size = ram_size; > + } > + > /* We place the device tree just below either the top of RAM, or > * 2GB, so that it can be processed with 32-bit code if > * necessary */ > - spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE; > + spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE;
The change looks sane, so I'd assume the description above is now wrong :) > spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE; > > /* init CPUs */ > @@ -378,8 +405,13 @@ static void ppc_spapr_init(ram_addr_t ram_size, > > /* allocate RAM */ > spapr->ram_limit = ram_size; > - ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", spapr->ram_limit); > - cpu_register_physical_memory(0, ram_size, ram_offset); > + if (spapr->ram_limit > rma_alloc_size) { > + ram_addr_t nonrma_base = rma_alloc_size; > + ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size; > + > + ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", nonrma_size); > + cpu_register_physical_memory(nonrma_base, nonrma_size, ram_offset); > + } > > /* allocate hash page table. For now we always make this 16mb, > * later we should probably make it scale to the size of guest > @@ -503,7 +535,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, > } > > /* Prepare the device tree */ > - spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, > + spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size, > initrd_base, initrd_size, > boot_device, kernel_cmdline, > pteg_shift + 7); > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c > index 2c1bc7a..37ee902 100644 > --- a/target-ppc/kvm.c > +++ b/target-ppc/kvm.c > @@ -55,6 +55,9 @@ static int cap_interrupt_level = false; > static int cap_segstate; > static int cap_booke_sregs; > static int cap_ppc_smt = 0; > +#ifdef KVM_CAP_PPC_RMA No need for these ifdefs anymore thanks to qemu local kvm headers :) Alex