Re: [Qemu-devel] [RFC PATCH v2 22/23] spapr: Support ibm, dynamic-reconfiguration-memory

Bharata B Rao Mon, 30 Mar 2015 02:14:01 -0700

On Thu, Mar 26, 2015 at 02:44:17PM +1100, David Gibson wrote:
> > +/*
> > + * TODO: Take care of sparsemem configuration ?
> > + */
> > +static uint64_t numa_node_end(uint32_t nodeid)
> > +{
> > +    uint32_t i = 0;
> > +    uint64_t addr = 0;
> > +
> > +    do {
> > +        addr += numa_info[i].node_mem;
> > +    } while (++i <= nodeid);
> > +
> > +    return addr;
> > +}
> > +
> > +static uint64_t numa_node_start(uint32_t nodeid)
> > +{
> > +    if (!nodeid) {
> > +        return 0;
> > +    } else {
> > +        return numa_node_end(nodeid - 1);
> > +    }
> > +}
> > +
> > +/*
> > + * Given the addr, return the NUMA node to which the address belongs to.
> > + */
> > +static uint32_t get_numa_node(uint64_t addr)
> > +{
> > +    uint32_t i;
> > +
> > +    for (i = 0; i < nb_numa_nodes; i++) {
> > +        if ((addr >= numa_node_start(i)) && (addr < numa_node_end(i))) {
> > +            return i;
> > +        }
> > +    }
> 
> This function is O(N^2) in number of nodes, which is a bit hideous for
> something so simple.


Will something like below work ? Will all archs be ok with this ?

numa: Store start and end address range of each node in numa_info

Keep track of start and end address of each NUMA node in numa_info
structure so that lookup of node by address becomes easier.

Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com>
---
 include/sysemu/numa.h |    3 +++
 numa.c                |   28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 5633b85..6dd6387 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -14,11 +14,14 @@ typedef struct node_info {
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
     struct HostMemoryBackend *node_memdev;
     bool present;
+    uint64_t mem_start;
+    uint64_t mem_end;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 void parse_numa_opts(void);
 void numa_post_machine_init(void);
 void query_numa_node_mem(uint64_t node_mem[]);
 extern QemuOptsList qemu_numa_opts;
+uint32_t get_numa_node(uint64_t addr, Error **errp);
 
 #endif
diff --git a/numa.c b/numa.c
index 5634bf0..d0eb647 100644
--- a/numa.c
+++ b/numa.c
@@ -53,6 +53,25 @@ static int max_numa_nodeid; /* Highest specified NUMA node 
ID, plus one.
 int nb_numa_nodes;
 NodeInfo numa_info[MAX_NODES];
 
+/*
+ * Given an address, return the index of the NUMA node to which the
+ * address belongs to.
+ */
+uint32_t get_numa_node(uint64_t addr, Error **errp)
+{
+    uint32_t i;
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        if (addr >= numa_info[i].mem_start && addr < numa_info[i].mem_end) {
+            return i;
+        }
+    }
+    error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any NUMA 
node", addr);
+
+    /* Return Node 0 for unclaimed address */
+    return 0;
+}
+
 static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error 
**errp)
 {
     uint16_t nodenr;
@@ -119,6 +138,15 @@ static void numa_node_parse(NumaNodeOptions *node, 
QemuOpts *opts, Error **errp)
         numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
         numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
     }
+
+    if (nodenr) {
+        numa_info[nodenr].mem_start = numa_info[nodenr-1].mem_end;
+    } else {
+        numa_info[nodenr].mem_start = 0;
+    }
+    numa_info[nodenr].mem_end = numa_info[nodenr].mem_start +
+                                   numa_info[nodenr].node_mem;
+
     numa_info[nodenr].present = true;
     max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
 }
> > +    /* Unassigned memory goes to node 0 by default */
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Adds ibm,dynamic-reconfiguration-memory node.
> > + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
> > + * of this device tree node.
> > + */
> > +static int spapr_populate_drconf_memory(sPAPREnvironment *spapr, void *fdt)
> > +{
> > +    int ret, i, offset;
> > +    uint32_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
> > +    uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size;
> > +    uint32_t nr_lmbs = spapr->maxram_limit/lmb_size - nr_rma_lmbs;
> > +    uint32_t nr_assigned_lmbs = spapr->ram_limit/lmb_size - nr_rma_lmbs;
> > +    uint32_t *int_buf, *cur_index, buf_len;
> > +
> > +    /* Allocate enough buffer size to fit in ibm,dynamic-memory */
> > +    buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) +
> > +                sizeof(uint32_t);
> > +    cur_index = int_buf = g_malloc0(buf_len);
> > +
> > +    offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
> > +
> > +    ret = fdt_setprop_u64(fdt, offset, "ibm,lmb-size", lmb_size);
> > +    if (ret < 0) {
> > +        goto out;
> > +    }
> > +
> > +    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
> > +    if (ret < 0) {
> > +        goto out;
> > +    }
> > +
> > +    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 
> > 0x0);
> > +    if (ret < 0) {
> > +        goto out;
> > +    }
> > +
> > +    /* ibm,dynamic-memory */
> > +    int_buf[0] = cpu_to_be32(nr_lmbs);
> > +    cur_index++;
> > +    for (i = 0; i < nr_lmbs; i++) {
> > +        sPAPRDRConnector *drc;
> > +        sPAPRDRConnectorClass *drck;
> > +        uint64_t addr;
> > +        uint32_t *dynamic_memory = cur_index;
> > +
> > +        if (i < nr_assigned_lmbs) {
> > +            addr = (i + nr_rma_lmbs) * lmb_size;
> > +        } else {
> > +            addr = (i - nr_assigned_lmbs) * lmb_size +
> > +                SPAPR_MACHINE(qdev_get_machine())->hotplug_memory_base;
> > +        }
> > +        drc = spapr_dr_connector_new(qdev_get_machine(),
> > +                SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size);
> > +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > +
> > +        dynamic_memory[0] = cpu_to_be32(addr >> 32);
> > +        dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
> > +        dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
> > +        dynamic_memory[3] = cpu_to_be32(0); /* reserved */
> > +        dynamic_memory[4] = cpu_to_be32(get_numa_node(addr));
> > +        dynamic_memory[5] = (addr < spapr->ram_limit) ?
> > +                            cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED) :
> > +                            cpu_to_be32(0);
> > +
> > +        cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
> > +    }
> > +    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
> > +    if (ret < 0) {
> > +        goto out;
> > +    }
> > +
> > +    /* ibm,associativity-lookup-arrays */
> > +    cur_index = int_buf;
> > +    int_buf[0] = cpu_to_be32(nb_numa_nodes);
> > +    int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity 
> > list */
> > +    cur_index += 2;
> > +    for (i = 0; i < nb_numa_nodes; i++) {
> > +        uint32_t associativity[] = {
> > +            cpu_to_be32(0x0),
> > +            cpu_to_be32(0x0),
> > +            cpu_to_be32(0x0),
> > +            cpu_to_be32(i)
> > +        };
> > +        memcpy(cur_index, associativity, sizeof(associativity));
> > +        cur_index += 4;
> > +    }
> > +    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", 
> > int_buf,
> > +            (cur_index - int_buf) * sizeof(uint32_t));
> > +out:
> > +    g_free(int_buf);
> > +    return ret;
> > +}
> > +
> > +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size,
> > +                                bool cpu_update, bool memory_update)
> > +{
> > +    void *fdt, *fdt_skel;
> > +    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
> > +
> > +    size -= sizeof(hdr);
> > +
> > +    /* Create sceleton */
> > +    fdt_skel = g_malloc0(size);
> > +    _FDT((fdt_create(fdt_skel, size)));
> > +    _FDT((fdt_begin_node(fdt_skel, "")));
> > +    _FDT((fdt_end_node(fdt_skel)));
> > +    _FDT((fdt_finish(fdt_skel)));
> > +    fdt = g_malloc0(size);
> > +    _FDT((fdt_open_into(fdt_skel, fdt, size)));
> > +    g_free(fdt_skel);
> > +
> > +    /* Fixup cpu nodes */
> > +    if (cpu_update) {
> > +        _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
> > +    }
> > +
> > +    /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */
> > +    if (memory_update) {
> > +        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
> > +    } else {
> > +        _FDT((spapr_populate_memory(spapr, fdt)));
> > +    }
> > +
> > +    /* Pack resulting tree */
> > +    _FDT((fdt_pack(fdt)));
> > +
> > +    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
> > +        trace_spapr_cas_failed(size);
> > +        return -1;
> > +    }
> > +
> > +    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
> > +    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
> > +    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
> > +    g_free(fdt);
> > +
> > +    return 0;
> > +}
> > +
> >  static void spapr_finalize_fdt(sPAPREnvironment *spapr,
> >                                 hwaddr fdt_addr,
> >                                 hwaddr rtas_addr,
> > @@ -791,11 +934,12 @@ static void spapr_finalize_fdt(sPAPREnvironment 
> > *spapr,
> >      /* open out the base tree into a temp buffer for the final tweaks */
> >      _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
> >  
> > -    ret = spapr_populate_memory(spapr, fdt);
> > -    if (ret < 0) {
> > -        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
> > -        exit(1);
> > -    }
> > +    /*
> > +     * Add memory@0 node to represent RMA. Rest of the memory is either
> > +     * represented by memory nodes or ibm,dynamic-reconfiguration-memory
> > +     * node later during ibm,client-architecture-support call.
> > +     */
> > +    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
> >  
> >      ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
> >      if (ret < 0) {
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 4f76f1c..20507c6 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -807,6 +807,32 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, 
> > sPAPREnvironment *spapr,
> >      return ret;
> >  }
> >  
> > +/*
> > + * Return the offset to the requested option vector @vector in the
> > + * option vector table @table.
> > + */
> > +static target_ulong cas_get_option_vector(int vector, target_ulong table)
> > +{
> > +    int i;
> > +    char nr_vectors, nr_entries;
> > +
> > +    if (!table) {
> > +        return 0;
> > +    }
> > +
> > +    nr_vectors = (rtas_ld(table, 0) >> 24) + 1;
> 
> I don't think rtas_ld() should be used outside its intended function
> in rtas.  Make a direct call to ldl_phys instead.

Ok, but @table here is an RTAS arg passed from the main RTAS routine to this
function. Still can't use rtas_ld() ?

Regards,
Bharata.

Re: [Qemu-devel] [RFC PATCH v2 22/23] spapr: Support ibm, dynamic-reconfiguration-memory

Reply via email to