On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote:
The numa_fw_cfg paravirt interface is extended to include SRAT information for
all hotplug-able memslots. There are 3 words for each hotplug-able memory
slot,
denoting start address, size and node proximity. nb_numa_nodes is set to 1 by
default (not 0), so that we always pass srat info to SeaBIOS.
This information is used by Seabios to build hotplug memory device objects at
runtime.
Signed-off-by: Vasilis Liaskovitis<vasilis.liaskovi...@profitbricks.com>
---
hw/pc.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
vl.c | 4 +++-
2 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/hw/pc.c b/hw/pc.c
index 67f0479..f1f550a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -46,6 +46,7 @@
#include "ui/qemu-spice.h"
#include "memory.h"
#include "exec-memory.h"
+#include "memslot.h"
/* output Bochs bios info messages */
//#define DEBUG_BIOS
@@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length,
uint32_t type)
return index;
}
+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots);
+
static void *bochs_bios_init(void)
{
void *fw_cfg;
uint8_t *smbios_table;
size_t smbios_len;
uint64_t *numa_fw_cfg;
+ uint64_t *hp_memslots_fw_cfg;
int i, j;
register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
@@ -630,28 +634,71 @@ static void *bochs_bios_init(void)
fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)&hpet_cfg,
sizeof(struct hpet_fw_config));
/* allocate memory for the NUMA channel: one (64bit) word for the number
- * of nodes, one word for each VCPU->node and one word for each node to
- * hold the amount of memory.
+ * of nodes, one word for the number of hotplug memory slots, one word
+ * for each VCPU->node, one word for each node to hold the amount of
memory.
+ * Finally three words for each hotplug memory slot, denoting start
address,
+ * size and node proximity.
*/
- numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
+ numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 *
nb_hp_memslots) * 8);
numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+ numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots);
this will brake compatibility if guest was migrated from old->new qemu
than on reboot it will use old bios that expects numa_fw_cfg[1] to be something
else.
Could memslots info be moved to the end of an existing interface?
+
for (i = 0; i< max_cpus; i++) {
for (j = 0; j< nb_numa_nodes; j++) {
if (node_cpumask[j]& (1<< i)) {
- numa_fw_cfg[i + 1] = cpu_to_le64(j);
+ numa_fw_cfg[i + 2] = cpu_to_le64(j);
break;
}
}
}
for (i = 0; i< nb_numa_nodes; i++) {
- numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+ numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]);
}
+
+ hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
+ if (nb_hp_memslots)
+ bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg);
+
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
- (1 + max_cpus + nb_numa_nodes) * 8);
+ (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8);
return fw_cfg;
}
+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots)
+{
+ int i = 0;
+ Error *err = NULL;
+ DeviceState *dev;
+ MemSlotState *slot;
+ char *type;
+ BusState *bus = sysbus_get_default();
+
+ QTAILQ_FOREACH(dev,&bus->children, sibling) {
+ type = object_property_get_str(OBJECT(dev), "type",&err);
+ if (err) {
+ error_free(err);
+ fprintf(stderr, "error getting device type\n");
+ exit(1);
+ }
+
+ if (!strcmp(type, "memslot")) {
+ if (!dev->id) {
+ error_free(err);
+ fprintf(stderr, "error getting memslot device id\n");
+ exit(1);
+ }
+ if (!strcmp(dev->id, "initialslot")) continue;
+ slot = MEMSLOT(dev);
+ fw_cfg_slots[3 * slot->idx] = cpu_to_le64(slot->start);
+ fw_cfg_slots[3 * slot->idx + 1] = cpu_to_le64(slot->size);
+ fw_cfg_slots[3 * slot->idx + 2] = cpu_to_le64(slot->node);
+ i++;
+ }
+ }
+ assert(i == nb_hp_memslots);
+}
+
static long get_file_size(FILE *f)
{
long where, size;
diff --git a/vl.c b/vl.c
index ae91a8a..50df453 100644
--- a/vl.c
+++ b/vl.c
@@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp)
register_savevm_live(NULL, "ram", 0, 4, NULL, ram_save_live, NULL,
ram_load, NULL);
+ if (!nb_numa_nodes)
+ nb_numa_nodes = 1;
- if (nb_numa_nodes> 0) {
+ {
int i;
if (nb_numa_nodes> MAX_NODES) {
--
-----
Igor