On Thu, Mar 26, 2015 at 02:44:17PM +1100, David Gibson wrote: > > +/* > > + * TODO: Take care of sparsemem configuration ? > > + */ > > +static uint64_t numa_node_end(uint32_t nodeid) > > +{ > > + uint32_t i = 0; > > + uint64_t addr = 0; > > + > > + do { > > + addr += numa_info[i].node_mem; > > + } while (++i <= nodeid); > > + > > + return addr; > > +} > > + > > +static uint64_t numa_node_start(uint32_t nodeid) > > +{ > > + if (!nodeid) { > > + return 0; > > + } else { > > + return numa_node_end(nodeid - 1); > > + } > > +} > > + > > +/* > > + * Given the addr, return the NUMA node to which the address belongs to. > > + */ > > +static uint32_t get_numa_node(uint64_t addr) > > +{ > > + uint32_t i; > > + > > + for (i = 0; i < nb_numa_nodes; i++) { > > + if ((addr >= numa_node_start(i)) && (addr < numa_node_end(i))) { > > + return i; > > + } > > + } > > This function is O(N^2) in number of nodes, which is a bit hideous for > something so simple.
Will something like below work ? Will all archs be ok with this ? numa: Store start and end address range of each node in numa_info Keep track of start and end address of each NUMA node in numa_info structure so that lookup of node by address becomes easier. Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> --- include/sysemu/numa.h | 3 +++ numa.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 5633b85..6dd6387 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -14,11 +14,14 @@ typedef struct node_info { DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); struct HostMemoryBackend *node_memdev; bool present; + uint64_t mem_start; + uint64_t mem_end; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(void); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; +uint32_t get_numa_node(uint64_t addr, Error **errp); #endif diff --git a/numa.c b/numa.c index 5634bf0..d0eb647 100644 --- a/numa.c +++ b/numa.c @@ -53,6 +53,25 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. int nb_numa_nodes; NodeInfo numa_info[MAX_NODES]; +/* + * Given an address, return the index of the NUMA node to which the + * address belongs to. + */ +uint32_t get_numa_node(uint64_t addr, Error **errp) +{ + uint32_t i; + + for (i = 0; i < nb_numa_nodes; i++) { + if (addr >= numa_info[i].mem_start && addr < numa_info[i].mem_end) { + return i; + } + } + error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any NUMA node", addr); + + /* Return Node 0 for unclaimed address */ + return 0; +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -119,6 +138,15 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); } + + if (nodenr) { + numa_info[nodenr].mem_start = numa_info[nodenr-1].mem_end; + } else { + numa_info[nodenr].mem_start = 0; + } + numa_info[nodenr].mem_end = numa_info[nodenr].mem_start + + numa_info[nodenr].node_mem; + numa_info[nodenr].present = true; max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); } > > + /* Unassigned memory goes to node 0 by default */ > > + return 0; > > +} > > + > > +/* > > + * Adds ibm,dynamic-reconfiguration-memory node. > > + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation > > + * of this device tree node. > > + */ > > +static int spapr_populate_drconf_memory(sPAPREnvironment *spapr, void *fdt) > > +{ > > + int ret, i, offset; > > + uint32_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; > > + uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; > > + uint32_t nr_lmbs = spapr->maxram_limit/lmb_size - nr_rma_lmbs; > > + uint32_t nr_assigned_lmbs = spapr->ram_limit/lmb_size - nr_rma_lmbs; > > + uint32_t *int_buf, *cur_index, buf_len; > > + > > + /* Allocate enough buffer size to fit in ibm,dynamic-memory */ > > + buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) + > > + sizeof(uint32_t); > > + cur_index = int_buf = g_malloc0(buf_len); > > + > > + offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); > > + > > + ret = fdt_setprop_u64(fdt, offset, "ibm,lmb-size", lmb_size); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", > > 0x0); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + /* ibm,dynamic-memory */ > > + int_buf[0] = cpu_to_be32(nr_lmbs); > > + cur_index++; > > + for (i = 0; i < nr_lmbs; i++) { > > + sPAPRDRConnector *drc; > > + sPAPRDRConnectorClass *drck; > > + uint64_t addr; > > + uint32_t *dynamic_memory = cur_index; > > + > > + if (i < nr_assigned_lmbs) { > > + addr = (i + nr_rma_lmbs) * lmb_size; > > + } else { > > + addr = (i - nr_assigned_lmbs) * lmb_size + > > + SPAPR_MACHINE(qdev_get_machine())->hotplug_memory_base; > > + } > > + drc = spapr_dr_connector_new(qdev_get_machine(), > > + SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size); > > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > + > > + dynamic_memory[0] = cpu_to_be32(addr >> 32); > > + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); > > + dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); > > + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ > > + dynamic_memory[4] = cpu_to_be32(get_numa_node(addr)); > > + dynamic_memory[5] = (addr < spapr->ram_limit) ? > > + cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED) : > > + cpu_to_be32(0); > > + > > + cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; > > + } > > + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + /* ibm,associativity-lookup-arrays */ > > + cur_index = int_buf; > > + int_buf[0] = cpu_to_be32(nb_numa_nodes); > > + int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity > > list */ > > + cur_index += 2; > > + for (i = 0; i < nb_numa_nodes; i++) { > > + uint32_t associativity[] = { > > + cpu_to_be32(0x0), > > + cpu_to_be32(0x0), > > + cpu_to_be32(0x0), > > + cpu_to_be32(i) > > + }; > > + memcpy(cur_index, associativity, sizeof(associativity)); > > + cur_index += 4; > > + } > > + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", > > int_buf, > > + (cur_index - int_buf) * sizeof(uint32_t)); > > +out: > > + g_free(int_buf); > > + return ret; > > +} > > + > > +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size, > > + bool cpu_update, bool memory_update) > > +{ > > + void *fdt, *fdt_skel; > > + sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; > > + > > + size -= sizeof(hdr); > > + > > + /* Create sceleton */ > > + fdt_skel = g_malloc0(size); > > + _FDT((fdt_create(fdt_skel, size))); > > + _FDT((fdt_begin_node(fdt_skel, ""))); > > + _FDT((fdt_end_node(fdt_skel))); > > + _FDT((fdt_finish(fdt_skel))); > > + fdt = g_malloc0(size); > > + _FDT((fdt_open_into(fdt_skel, fdt, size))); > > + g_free(fdt_skel); > > + > > + /* Fixup cpu nodes */ > > + if (cpu_update) { > > + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); > > + } > > + > > + /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */ > > + if (memory_update) { > > + _FDT((spapr_populate_drconf_memory(spapr, fdt))); > > + } else { > > + _FDT((spapr_populate_memory(spapr, fdt))); > > + } > > + > > + /* Pack resulting tree */ > > + _FDT((fdt_pack(fdt))); > > + > > + if (fdt_totalsize(fdt) + sizeof(hdr) > size) { > > + trace_spapr_cas_failed(size); > > + return -1; > > + } > > + > > + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); > > + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); > > + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); > > + g_free(fdt); > > + > > + return 0; > > +} > > + > > static void spapr_finalize_fdt(sPAPREnvironment *spapr, > > hwaddr fdt_addr, > > hwaddr rtas_addr, > > @@ -791,11 +934,12 @@ static void spapr_finalize_fdt(sPAPREnvironment > > *spapr, > > /* open out the base tree into a temp buffer for the final tweaks */ > > _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); > > > > - ret = spapr_populate_memory(spapr, fdt); > > - if (ret < 0) { > > - fprintf(stderr, "couldn't setup memory nodes in fdt\n"); > > - exit(1); > > - } > > + /* > > + * Add memory@0 node to represent RMA. Rest of the memory is either > > + * represented by memory nodes or ibm,dynamic-reconfiguration-memory > > + * node later during ibm,client-architecture-support call. > > + */ > > + spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); > > > > ret = spapr_populate_vdevice(spapr->vio_bus, fdt); > > if (ret < 0) { > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > > index 4f76f1c..20507c6 100644 > > --- a/hw/ppc/spapr_hcall.c > > +++ b/hw/ppc/spapr_hcall.c > > @@ -807,6 +807,32 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, > > sPAPREnvironment *spapr, > > return ret; > > } > > > > +/* > > + * Return the offset to the requested option vector @vector in the > > + * option vector table @table. > > + */ > > +static target_ulong cas_get_option_vector(int vector, target_ulong table) > > +{ > > + int i; > > + char nr_vectors, nr_entries; > > + > > + if (!table) { > > + return 0; > > + } > > + > > + nr_vectors = (rtas_ld(table, 0) >> 24) + 1; > > I don't think rtas_ld() should be used outside its intended function > in rtas. Make a direct call to ldl_phys instead. Ok, but @table here is an RTAS arg passed from the main RTAS routine to this function. Still can't use rtas_ld() ? Regards, Bharata.