Re: [RFC PATCH v3 09/19] pc: Add dimm paravirt SRAT info

2012-09-26 Thread Wen Congyang
At 09/21/2012 07:17 PM, Vasilis Liaskovitis Wrote:
> The numa_fw_cfg paravirt interface is extended to include SRAT information for
> all hotplug-able dimms. There are 3 words for each hotplug-able memory slot,
> denoting start address, size and node proximity. The new info is appended 
> after
> existing numa info, so that the fw_cfg layout does not break.  This 
> information
> is used by Seabios to build hotplug memory device objects at runtime.
> nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info
> to SeaBIOS.

You forgot to set nb_numa_nodes to 1...

Thanks
Wen Congyang

> 
> v1->v2:
> Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in order 
> not
> to break existing layout
> Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt
> 
> Signed-off-by: Vasilis Liaskovitis 
> ---
>  docs/specs/fwcfg.txt |   28 
>  hw/pc.c  |   14 --
>  2 files changed, 40 insertions(+), 2 deletions(-)
>  create mode 100644 docs/specs/fwcfg.txt
> 
> diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt
> new file mode 100644
> index 000..55f96d9
> --- /dev/null
> +++ b/docs/specs/fwcfg.txt
> @@ -0,0 +1,28 @@
> +QEMU<->BIOS Paravirt Documentation
> +--
> +
> +This document describes paravirt data structures passed from QEMU to BIOS.
> +
> +FW_CFG_NUMA paravirt info
> +
> +The SRAT info passed from QEMU to BIOS has the following layout:
> +
> +---
> +#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem | 
> ... | nodelast_mem
> +
> +---
> +#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | 
> dimmlast_sz | dimmlast_pxm
> +
> +Entry 0 contains the number of numa nodes (nb_numa_nodes).
> +
> +Entries 1..max_cpus: The next max_cpus entries describe node proximity for 
> each
> +one of the vCPUs in the system.
> +
> +Entries max_cpus+1..max_cpus+nb_numa_nodes+1:  The next nb_numa_nodes entries
> +describe the memory size for each one of the NUMA nodes in the system.
> +
> +Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms 
> (nb_hp_dimms)
> +
> +The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet 
> contains
> +the physical address offset, size (in bytes), and node proximity for the
> +respective dimm.
> diff --git a/hw/pc.c b/hw/pc.c
> index 2c9664d..f2604ae 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -598,6 +598,7 @@ static void *bochs_bios_init(void)
>  uint8_t *smbios_table;
>  size_t smbios_len;
>  uint64_t *numa_fw_cfg;
> +uint64_t *hp_dimms_fw_cfg;
>  int i, j;
>  
>  register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
> @@ -632,8 +633,10 @@ static void *bochs_bios_init(void)
>  /* allocate memory for the NUMA channel: one (64bit) word for the number
>   * of nodes, one word for each VCPU->node and one word for each node to
>   * hold the amount of memory.
> + * Finally one word for the number of hotplug memory slots and three 
> words
> + * for each hotplug memory slot (start address, size and node proximity).
>   */
> -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
> +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) 
> * 8);
>  numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
>  for (i = 0; i < max_cpus; i++) {
>  for (j = 0; j < nb_numa_nodes; j++) {
> @@ -646,8 +649,15 @@ static void *bochs_bios_init(void)
>  for (i = 0; i < nb_numa_nodes; i++) {
>  numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
>  }
> +
> +numa_fw_cfg[1 + max_cpus + nb_numa_nodes] = cpu_to_le64(nb_hp_dimms);
> +
> +hp_dimms_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
> +if (nb_hp_dimms)
> +setup_fwcfg_hp_dimms(hp_dimms_fw_cfg);
> +
>  fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
> - (1 + max_cpus + nb_numa_nodes) * 8);
> + (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) * 8);
>  
>  return fw_cfg;
>  }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v3 09/19] pc: Add dimm paravirt SRAT info

2012-09-21 Thread Vasilis Liaskovitis
The numa_fw_cfg paravirt interface is extended to include SRAT information for
all hotplug-able dimms. There are 3 words for each hotplug-able memory slot,
denoting start address, size and node proximity. The new info is appended after
existing numa info, so that the fw_cfg layout does not break.  This information
is used by Seabios to build hotplug memory device objects at runtime.
nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info
to SeaBIOS.

v1->v2:
Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in order not
to break existing layout
Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt

Signed-off-by: Vasilis Liaskovitis 
---
 docs/specs/fwcfg.txt |   28 
 hw/pc.c  |   14 --
 2 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 docs/specs/fwcfg.txt

diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt
new file mode 100644
index 000..55f96d9
--- /dev/null
+++ b/docs/specs/fwcfg.txt
@@ -0,0 +1,28 @@
+QEMU<->BIOS Paravirt Documentation
+--
+
+This document describes paravirt data structures passed from QEMU to BIOS.
+
+FW_CFG_NUMA paravirt info
+
+The SRAT info passed from QEMU to BIOS has the following layout:
+
+---
+#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem | ... 
| nodelast_mem
+
+---
+#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | 
dimmlast_sz | dimmlast_pxm
+
+Entry 0 contains the number of numa nodes (nb_numa_nodes).
+
+Entries 1..max_cpus: The next max_cpus entries describe node proximity for each
+one of the vCPUs in the system.
+
+Entries max_cpus+1..max_cpus+nb_numa_nodes+1:  The next nb_numa_nodes entries
+describe the memory size for each one of the NUMA nodes in the system.
+
+Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms 
(nb_hp_dimms)
+
+The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet 
contains
+the physical address offset, size (in bytes), and node proximity for the
+respective dimm.
diff --git a/hw/pc.c b/hw/pc.c
index 2c9664d..f2604ae 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -598,6 +598,7 @@ static void *bochs_bios_init(void)
 uint8_t *smbios_table;
 size_t smbios_len;
 uint64_t *numa_fw_cfg;
+uint64_t *hp_dimms_fw_cfg;
 int i, j;
 
 register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
@@ -632,8 +633,10 @@ static void *bochs_bios_init(void)
 /* allocate memory for the NUMA channel: one (64bit) word for the number
  * of nodes, one word for each VCPU->node and one word for each node to
  * hold the amount of memory.
+ * Finally one word for the number of hotplug memory slots and three words
+ * for each hotplug memory slot (start address, size and node proximity).
  */
-numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
+numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) * 
8);
 numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
 for (i = 0; i < max_cpus; i++) {
 for (j = 0; j < nb_numa_nodes; j++) {
@@ -646,8 +649,15 @@ static void *bochs_bios_init(void)
 for (i = 0; i < nb_numa_nodes; i++) {
 numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
 }
+
+numa_fw_cfg[1 + max_cpus + nb_numa_nodes] = cpu_to_le64(nb_hp_dimms);
+
+hp_dimms_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
+if (nb_hp_dimms)
+setup_fwcfg_hp_dimms(hp_dimms_fw_cfg);
+
 fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
- (1 + max_cpus + nb_numa_nodes) * 8);
+ (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) * 8);
 
 return fw_cfg;
 }
-- 
1.7.9

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html