On Mon, Mar 23, 2015 at 07:06:03PM +0530, Bharata B Rao wrote: > Parse ibm,architecture.vec table obtained from the guest and enable > memory node configuration via ibm,dynamic-reconfiguration-memory if guest > supports it. This is in preparation to support memory hotplug for > sPAPR guests. > > This changes the way memory node configuration is done. Currently all > memory nodes are built upfront. But after this patch, only memory@0 node > for RMA is built upfront. Guest kernel boots with just that and rest of > the memory nodes (via memory@XXX or ibm,dynamic-reconfiguration-memory) > are built when guest does ibm,client-architecture-support call. > > Note: This patch needs a SLOF enhancement which is already part of > upstream SLOF. > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com>
> --- > docs/specs/ppc-spapr-hotplug.txt | 48 +++++++++ > hw/ppc/spapr.c | 228 > +++++++++++++++++++++++++++++++-------- > hw/ppc/spapr_hcall.c | 51 +++++++-- > include/hw/ppc/spapr.h | 15 ++- > 4 files changed, 293 insertions(+), 49 deletions(-) > > diff --git a/docs/specs/ppc-spapr-hotplug.txt > b/docs/specs/ppc-spapr-hotplug.txt > index 46e0719..9d574b5 100644 > --- a/docs/specs/ppc-spapr-hotplug.txt > +++ b/docs/specs/ppc-spapr-hotplug.txt > @@ -302,4 +302,52 @@ consisting of <phys>, <size> and <maxcpus>. > pseries guests use this property to note the maximum allowed CPUs for the > guest. > > +== ibm,dynamic-reconfiguration-memory == > + > +ibm,dynamic-reconfiguration-memory is a device tree node that represents > +dynamically reconfigurable logical memory blocks (LMB). This node > +is generated only when the guest advertises the support for it via > +ibm,client-architecture-support call. Memory that is not dynamically > +reconfigurable is represented by /memory nodes. The properties of this > +node that are of interest to the sPAPR memory hotplug implementation > +in QEMU are described here. > + > +ibm,lmb-size > + > +This 64bit integer defines the size of each dynamically reconfigurable LMB. > + > +ibm,associativity-lookup-arrays > + > +This property defines a lookup array in which the NUMA associativity > +information for each LMB can be found. It is a property encoded array > +that begins with an integer M, the number of associativity lists followed > +by an integer N, the number of entries per associativity list and terminated > +by M associativity lists each of length N integers. > + > +This property provides the same information as given by ibm,associativity > +property in a /memory node. Each assigned LMB has an index value between > +0 and M-1 which is used as an index into this table to select which > +associativity list to use for the LMB. This index value for each LMB > +is defined in ibm,dynamic-memory property. > + > +ibm,dynamic-memory > + > +This property describes the dynamically reconfigurable memory. It is a > +property endoded array that has an integer N, the number of LMBs followed > +by N LMB list entires. > + > +Each LMB list entry consists of the following elements: > + > +- Logical address of the start of the LMB encoded as a 64bit integer. This > + corresponds to reg property in /memory node. > +- DRC index of the LMB that corresponds to ibm,my-drc-index property > + in a /memory node. > +- Four bytes reserved for expansion. > +- Associativity list index for the LMB that is used an index into > + ibm,associativity-lookup-arrays property described earlier. This > + is used to retrieve the right associativity list to be used for this > + LMB. > +- A 32bit flags word. The bit at bit position 0x00000008 defines whether > + the LMB is assigned to the the partition as of boot time. > + > [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867 > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index e43bb49..4e844ab 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -541,42 +541,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > return fdt; > } > > -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) > -{ > - void *fdt, *fdt_skel; > - sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; > - > - size -= sizeof(hdr); > - > - /* Create sceleton */ > - fdt_skel = g_malloc0(size); > - _FDT((fdt_create(fdt_skel, size))); > - _FDT((fdt_begin_node(fdt_skel, ""))); > - _FDT((fdt_end_node(fdt_skel))); > - _FDT((fdt_finish(fdt_skel))); > - fdt = g_malloc0(size); > - _FDT((fdt_open_into(fdt_skel, fdt, size))); > - g_free(fdt_skel); > - > - /* Fix skeleton up */ > - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); > - > - /* Pack resulting tree */ > - _FDT((fdt_pack(fdt))); > - > - if (fdt_totalsize(fdt) + sizeof(hdr) > size) { > - trace_spapr_cas_failed(size); > - return -1; > - } > - > - cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); > - cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); > - trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); > - g_free(fdt); > - > - return 0; > -} > - > static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, > hwaddr size) > { > @@ -630,7 +594,6 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, > void *fdt) > } > if (!mem_start) { > /* ppc_spapr_init() checks for rma_size <= node0_size already */ > - spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); > mem_start += spapr->rma_size; > node_size -= spapr->rma_size; > } > @@ -775,6 +738,186 @@ static void spapr_populate_cpu_dt_node(void *fdt, > sPAPREnvironment *spapr) > > } > > +/* > + * TODO: Take care of sparsemem configuration ? > + */ > +static uint64_t numa_node_end(uint32_t nodeid) > +{ > + uint32_t i = 0; > + uint64_t addr = 0; > + > + do { > + addr += numa_info[i].node_mem; > + } while (++i <= nodeid); > + > + return addr; > +} > + > +static uint64_t numa_node_start(uint32_t nodeid) > +{ > + if (!nodeid) { > + return 0; > + } else { > + return numa_node_end(nodeid - 1); > + } > +} > + > +/* > + * Given the addr, return the NUMA node to which the address belongs to. > + */ > +static uint32_t get_numa_node(uint64_t addr) > +{ > + uint32_t i; > + > + for (i = 0; i < nb_numa_nodes; i++) { > + if ((addr >= numa_node_start(i)) && (addr < numa_node_end(i))) { > + return i; > + } > + } This function is O(N^2) in number of nodes, which is a bit hideous for something so simple. > + /* Unassigned memory goes to node 0 by default */ > + return 0; > +} > + > +/* > + * Adds ibm,dynamic-reconfiguration-memory node. > + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation > + * of this device tree node. > + */ > +static int spapr_populate_drconf_memory(sPAPREnvironment *spapr, void *fdt) > +{ > + int ret, i, offset; > + uint32_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; > + uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; > + uint32_t nr_lmbs = spapr->maxram_limit/lmb_size - nr_rma_lmbs; > + uint32_t nr_assigned_lmbs = spapr->ram_limit/lmb_size - nr_rma_lmbs; > + uint32_t *int_buf, *cur_index, buf_len; > + > + /* Allocate enough buffer size to fit in ibm,dynamic-memory */ > + buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) + > + sizeof(uint32_t); > + cur_index = int_buf = g_malloc0(buf_len); > + > + offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); > + > + ret = fdt_setprop_u64(fdt, offset, "ibm,lmb-size", lmb_size); > + if (ret < 0) { > + goto out; > + } > + > + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff); > + if (ret < 0) { > + goto out; > + } > + > + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0); > + if (ret < 0) { > + goto out; > + } > + > + /* ibm,dynamic-memory */ > + int_buf[0] = cpu_to_be32(nr_lmbs); > + cur_index++; > + for (i = 0; i < nr_lmbs; i++) { > + sPAPRDRConnector *drc; > + sPAPRDRConnectorClass *drck; > + uint64_t addr; > + uint32_t *dynamic_memory = cur_index; > + > + if (i < nr_assigned_lmbs) { > + addr = (i + nr_rma_lmbs) * lmb_size; > + } else { > + addr = (i - nr_assigned_lmbs) * lmb_size + > + SPAPR_MACHINE(qdev_get_machine())->hotplug_memory_base; > + } > + drc = spapr_dr_connector_new(qdev_get_machine(), > + SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size); > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + > + dynamic_memory[0] = cpu_to_be32(addr >> 32); > + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); > + dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); > + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ > + dynamic_memory[4] = cpu_to_be32(get_numa_node(addr)); > + dynamic_memory[5] = (addr < spapr->ram_limit) ? > + cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED) : > + cpu_to_be32(0); > + > + cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; > + } > + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); > + if (ret < 0) { > + goto out; > + } > + > + /* ibm,associativity-lookup-arrays */ > + cur_index = int_buf; > + int_buf[0] = cpu_to_be32(nb_numa_nodes); > + int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list > */ > + cur_index += 2; > + for (i = 0; i < nb_numa_nodes; i++) { > + uint32_t associativity[] = { > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(i) > + }; > + memcpy(cur_index, associativity, sizeof(associativity)); > + cur_index += 4; > + } > + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", > int_buf, > + (cur_index - int_buf) * sizeof(uint32_t)); > +out: > + g_free(int_buf); > + return ret; > +} > + > +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size, > + bool cpu_update, bool memory_update) > +{ > + void *fdt, *fdt_skel; > + sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; > + > + size -= sizeof(hdr); > + > + /* Create sceleton */ > + fdt_skel = g_malloc0(size); > + _FDT((fdt_create(fdt_skel, size))); > + _FDT((fdt_begin_node(fdt_skel, ""))); > + _FDT((fdt_end_node(fdt_skel))); > + _FDT((fdt_finish(fdt_skel))); > + fdt = g_malloc0(size); > + _FDT((fdt_open_into(fdt_skel, fdt, size))); > + g_free(fdt_skel); > + > + /* Fixup cpu nodes */ > + if (cpu_update) { > + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); > + } > + > + /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */ > + if (memory_update) { > + _FDT((spapr_populate_drconf_memory(spapr, fdt))); > + } else { > + _FDT((spapr_populate_memory(spapr, fdt))); > + } > + > + /* Pack resulting tree */ > + _FDT((fdt_pack(fdt))); > + > + if (fdt_totalsize(fdt) + sizeof(hdr) > size) { > + trace_spapr_cas_failed(size); > + return -1; > + } > + > + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); > + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); > + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); > + g_free(fdt); > + > + return 0; > +} > + > static void spapr_finalize_fdt(sPAPREnvironment *spapr, > hwaddr fdt_addr, > hwaddr rtas_addr, > @@ -791,11 +934,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, > /* open out the base tree into a temp buffer for the final tweaks */ > _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); > > - ret = spapr_populate_memory(spapr, fdt); > - if (ret < 0) { > - fprintf(stderr, "couldn't setup memory nodes in fdt\n"); > - exit(1); > - } > + /* > + * Add memory@0 node to represent RMA. Rest of the memory is either > + * represented by memory nodes or ibm,dynamic-reconfiguration-memory > + * node later during ibm,client-architecture-support call. > + */ > + spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); > > ret = spapr_populate_vdevice(spapr->vio_bus, fdt); > if (ret < 0) { > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index 4f76f1c..20507c6 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -807,6 +807,32 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > return ret; > } > > +/* > + * Return the offset to the requested option vector @vector in the > + * option vector table @table. > + */ > +static target_ulong cas_get_option_vector(int vector, target_ulong table) > +{ > + int i; > + char nr_vectors, nr_entries; > + > + if (!table) { > + return 0; > + } > + > + nr_vectors = (rtas_ld(table, 0) >> 24) + 1; I don't think rtas_ld() should be used outside its intended function in rtas. Make a direct call to ldl_phys instead. > + if (!vector || vector > nr_vectors) { > + return 0; > + } > + table++; /* skip nr option vectors */ > + > + for (i = 0; i < vector - 1; i++) { > + nr_entries = rtas_ld(table, 0) >> 24; > + table += nr_entries + 2; > + } > + return table; > +} > + > typedef struct { > PowerPCCPU *cpu; > uint32_t cpu_version; > @@ -827,19 +853,22 @@ static void do_set_compat(void *arg) > ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ > ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) > > +#define OV5_DRCONF_MEMORY 0x20 > + > static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, > sPAPREnvironment *spapr, > target_ulong opcode, > target_ulong *args) > { > - target_ulong list = args[0]; > + target_ulong list = args[0], ov_table; > PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_); > CPUState *cs; > - bool cpu_match = false; > + bool cpu_match = false, cpu_update = true, memory_update = false; > unsigned old_cpu_version = cpu_->cpu_version; > unsigned compat_lvl = 0, cpu_version = 0; > unsigned max_lvl = get_compat_level(cpu_->max_compat); > int counter; > + char ov5_byte2; > > /* Parse PVR list */ > for (counter = 0; counter < 512; ++counter) { > @@ -889,8 +918,6 @@ static target_ulong > h_client_architecture_support(PowerPCCPU *cpu_, > } > } > > - /* For the future use: here @list points to the first capability */ > - > /* Parsing finished */ > trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match, > cpu_version, pcc_->pcr_mask); > @@ -914,14 +941,26 @@ static target_ulong > h_client_architecture_support(PowerPCCPU *cpu_, > } > > if (!cpu_version) { > - return H_SUCCESS; > + cpu_update = false; > } > > + /* For the future use: here @ov_table points to the first option vector > */ > + ov_table = list; > + > + list = cas_get_option_vector(5, ov_table); > if (!list) { > return H_SUCCESS; > } > > - if (spapr_h_cas_compose_response(args[1], args[2])) { > + /* @list now points to OV 5 */ > + list += 2; > + ov5_byte2 = rtas_ld(list, 0) >> 24; > + if (ov5_byte2 & OV5_DRCONF_MEMORY) { > + memory_update = true; > + } > + > + if (spapr_h_cas_compose_response(args[1], args[2], cpu_update, > + memory_update)) { > qemu_system_reset_request(); > } > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 53560e9..a286fe7 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -554,9 +554,22 @@ struct sPAPREventLogEntry { > /* 1GB alignment for hotplug memory region */ > #define SPAPR_HOTPLUG_MEM_ALIGN (1ULL << 30) > > +/* > + * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory > + * property under ibm,dynamic-reconfiguration-memory node. > + */ > +#define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6 > + > +/* > + * This flag value defines the LMB as assigned in ibm,dynamic-memory > + * property under ibm,dynamic-reconfiguration-memory node. > + */ > +#define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 > + > void spapr_events_init(sPAPREnvironment *spapr); > void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); > -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); > +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size, > + bool cpu_update, bool memory_update); > sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, > uint64_t bus_offset, > uint32_t page_shift, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
pgp6WTvWe20Ru.pgp
Description: PGP signature