Re: [Qemu-devel] [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS
On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote: The numa_fw_cfg paravirt interface is extended to include SRAT information for all hotplug-able memslots. There are 3 words for each hotplug-able memory slot, denoting start address, size and node proximity. nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info to SeaBIOS. This information is used by Seabios to build hotplug memory device objects at runtime. Signed-off-by: Vasilis Liaskovitisvasilis.liaskovi...@profitbricks.com --- hw/pc.c | 59 +-- vl.c|4 +++- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 67f0479..f1f550a 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -46,6 +46,7 @@ #include ui/qemu-spice.h #include memory.h #include exec-memory.h +#include memslot.h /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) return index; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots); + static void *bochs_bios_init(void) { void *fw_cfg; uint8_t *smbios_table; size_t smbios_len; uint64_t *numa_fw_cfg; +uint64_t *hp_memslots_fw_cfg; int i, j; register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL); @@ -630,28 +634,71 @@ static void *bochs_bios_init(void) fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)hpet_cfg, sizeof(struct hpet_fw_config)); /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU-node and one word for each node to - * hold the amount of memory. + * of nodes, one word for the number of hotplug memory slots, one word + * for each VCPU-node, one word for each node to hold the amount of memory. + * Finally three words for each hotplug memory slot, denoting start address, + * size and node proximity. */ -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); +numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots); this will brake compatibility if guest was migrated from old-new qemu than on reboot it will use old bios that expects numa_fw_cfg[1] to be something else. Could memslots info be moved to the end of an existing interface? + for (i = 0; i max_cpus; i++) { for (j = 0; j nb_numa_nodes; j++) { if (node_cpumask[j] (1 i)) { -numa_fw_cfg[i + 1] = cpu_to_le64(j); +numa_fw_cfg[i + 2] = cpu_to_le64(j); break; } } } for (i = 0; i nb_numa_nodes; i++) { -numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]); +numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]); } + +hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes; +if (nb_hp_memslots) +bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg); + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg, - (1 + max_cpus + nb_numa_nodes) * 8); + (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); return fw_cfg; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots) +{ +int i = 0; +Error *err = NULL; +DeviceState *dev; +MemSlotState *slot; +char *type; +BusState *bus = sysbus_get_default(); + +QTAILQ_FOREACH(dev,bus-children, sibling) { +type = object_property_get_str(OBJECT(dev), type,err); +if (err) { +error_free(err); +fprintf(stderr, error getting device type\n); +exit(1); +} + +if (!strcmp(type, memslot)) { +if (!dev-id) { +error_free(err); +fprintf(stderr, error getting memslot device id\n); +exit(1); +} +if (!strcmp(dev-id, initialslot)) continue; +slot = MEMSLOT(dev); +fw_cfg_slots[3 * slot-idx] = cpu_to_le64(slot-start); +fw_cfg_slots[3 * slot-idx + 1] = cpu_to_le64(slot-size); +fw_cfg_slots[3 * slot-idx + 2] = cpu_to_le64(slot-node); +i++; +} +} +assert(i == nb_hp_memslots); +} + static long get_file_size(FILE *f) { long where, size; diff --git a/vl.c b/vl.c index ae91a8a..50df453 100644 --- a/vl.c +++ b/vl.c @@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp) register_savevm_live(NULL, ram, 0, 4, NULL, ram_save_live, NULL, ram_load, NULL); +if (!nb_numa_nodes) +nb_numa_nodes = 1; -if (nb_numa_nodes 0) { +{ int i; if (nb_numa_nodes MAX_NODES) { -- - Igor -- To unsubscribe from this list: send
Re: [Qemu-devel] [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS
On Fri, Apr 20, 2012 at 12:33:57PM +0200, Igor Mammedov wrote: On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote: -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); +numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots); this will brake compatibility if guest was migrated from old-new qemu than on reboot it will use old bios that expects numa_fw_cfg[1] to be something else. Could memslots info be moved to the end of an existing interface? right. The number of memslots can be placed at 1 + max_cpus + nb_numa_nodes, instead of right after the number of nodes. This way the old layout is preserved, and all memslot info comes at the end. I will rewrite. thanks, - Vasilis -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS
The numa_fw_cfg paravirt interface is extended to include SRAT information for all hotplug-able memslots. There are 3 words for each hotplug-able memory slot, denoting start address, size and node proximity. nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info to SeaBIOS. This information is used by Seabios to build hotplug memory device objects at runtime. Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com --- hw/pc.c | 59 +-- vl.c|4 +++- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 67f0479..f1f550a 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -46,6 +46,7 @@ #include ui/qemu-spice.h #include memory.h #include exec-memory.h +#include memslot.h /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) return index; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots); + static void *bochs_bios_init(void) { void *fw_cfg; uint8_t *smbios_table; size_t smbios_len; uint64_t *numa_fw_cfg; +uint64_t *hp_memslots_fw_cfg; int i, j; register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL); @@ -630,28 +634,71 @@ static void *bochs_bios_init(void) fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)hpet_cfg, sizeof(struct hpet_fw_config)); /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU-node and one word for each node to - * hold the amount of memory. + * of nodes, one word for the number of hotplug memory slots, one word + * for each VCPU-node, one word for each node to hold the amount of memory. + * Finally three words for each hotplug memory slot, denoting start address, + * size and node proximity. */ -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); +numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots); + for (i = 0; i max_cpus; i++) { for (j = 0; j nb_numa_nodes; j++) { if (node_cpumask[j] (1 i)) { -numa_fw_cfg[i + 1] = cpu_to_le64(j); +numa_fw_cfg[i + 2] = cpu_to_le64(j); break; } } } for (i = 0; i nb_numa_nodes; i++) { -numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]); +numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]); } + +hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes; +if (nb_hp_memslots) +bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg); + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg, - (1 + max_cpus + nb_numa_nodes) * 8); + (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); return fw_cfg; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots) +{ +int i = 0; +Error *err = NULL; +DeviceState *dev; +MemSlotState *slot; +char *type; +BusState *bus = sysbus_get_default(); + +QTAILQ_FOREACH(dev, bus-children, sibling) { +type = object_property_get_str(OBJECT(dev), type, err); +if (err) { +error_free(err); +fprintf(stderr, error getting device type\n); +exit(1); +} + +if (!strcmp(type, memslot)) { +if (!dev-id) { +error_free(err); +fprintf(stderr, error getting memslot device id\n); +exit(1); +} +if (!strcmp(dev-id, initialslot)) continue; +slot = MEMSLOT(dev); +fw_cfg_slots[3 * slot-idx] = cpu_to_le64(slot-start); +fw_cfg_slots[3 * slot-idx + 1] = cpu_to_le64(slot-size); +fw_cfg_slots[3 * slot-idx + 2] = cpu_to_le64(slot-node); +i++; +} +} +assert(i == nb_hp_memslots); +} + static long get_file_size(FILE *f) { long where, size; diff --git a/vl.c b/vl.c index ae91a8a..50df453 100644 --- a/vl.c +++ b/vl.c @@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp) register_savevm_live(NULL, ram, 0, 4, NULL, ram_save_live, NULL, ram_load, NULL); +if (!nb_numa_nodes) +nb_numa_nodes = 1; -if (nb_numa_nodes 0) { +{ int i; if (nb_numa_nodes MAX_NODES) { -- 1.7.9 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS
On 04/19/2012 05:08 PM, Vasilis Liaskovitis wrote: The numa_fw_cfg paravirt interface is extended to include SRAT information for all hotplug-able memslots. There are 3 words for each hotplug-able memory slot, denoting start address, size and node proximity. nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info to SeaBIOS. This information is used by Seabios to build hotplug memory device objects at runtime. Please document this ABI. I don't see an existing place, suggest docs/specs/fwcfg.txt (only your additions need to be documented). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html