Re: [Qemu-devel] [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS

2012-04-20 Thread Igor Mammedov

On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote:

  The numa_fw_cfg paravirt interface is extended to include SRAT information for
  all hotplug-able memslots. There are 3 words for each hotplug-able memory 
slot,
  denoting start address, size and node proximity. nb_numa_nodes is set to 1 by
  default (not 0), so that we always pass srat info to SeaBIOS.

  This information is used by Seabios to build hotplug memory device objects at 
runtime.

  Signed-off-by: Vasilis Liaskovitisvasilis.liaskovi...@profitbricks.com
---
  hw/pc.c |   59 +--
  vl.c|4 +++-
  2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 67f0479..f1f550a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -46,6 +46,7 @@
  #include ui/qemu-spice.h
  #include memory.h
  #include exec-memory.h
+#include memslot.h

  /* output Bochs bios info messages */
  //#define DEBUG_BIOS
@@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length, 
uint32_t type)
  return index;
  }

+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots);
+
  static void *bochs_bios_init(void)
  {
  void *fw_cfg;
  uint8_t *smbios_table;
  size_t smbios_len;
  uint64_t *numa_fw_cfg;
+uint64_t *hp_memslots_fw_cfg;
  int i, j;

  register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
@@ -630,28 +634,71 @@ static void *bochs_bios_init(void)
  fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)hpet_cfg,
   sizeof(struct hpet_fw_config));
  /* allocate memory for the NUMA channel: one (64bit) word for the number
- * of nodes, one word for each VCPU-node and one word for each node to
- * hold the amount of memory.
+ * of nodes, one word for the number of hotplug memory slots, one word
+ * for each VCPU-node, one word for each node to hold the amount of 
memory.
+ * Finally three words for each hotplug memory slot, denoting start 
address,
+ * size and node proximity.
   */
-numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
+numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * 
nb_hp_memslots) * 8);
  numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots);

this will brake compatibility if guest was migrated from old-new qemu
than on reboot it will use old bios that expects numa_fw_cfg[1] to be something 
else.
Could memslots info be moved to the end of an existing interface?


+
  for (i = 0; i  max_cpus; i++) {
  for (j = 0; j  nb_numa_nodes; j++) {
  if (node_cpumask[j]  (1  i)) {
-numa_fw_cfg[i + 1] = cpu_to_le64(j);
+numa_fw_cfg[i + 2] = cpu_to_le64(j);
  break;
  }
  }
  }
  for (i = 0; i  nb_numa_nodes; i++) {
-numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]);
  }
+
+hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
+if (nb_hp_memslots)
+bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg);
+
  fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
- (1 + max_cpus + nb_numa_nodes) * 8);
+ (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8);

  return fw_cfg;
  }

+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots)
+{
+int i = 0;
+Error *err = NULL;
+DeviceState *dev;
+MemSlotState *slot;
+char *type;
+BusState *bus = sysbus_get_default();
+
+QTAILQ_FOREACH(dev,bus-children, sibling) {
+type = object_property_get_str(OBJECT(dev), type,err);
+if (err) {
+error_free(err);
+fprintf(stderr, error getting device type\n);
+exit(1);
+}
+
+if (!strcmp(type, memslot)) {
+if (!dev-id) {
+error_free(err);
+fprintf(stderr, error getting memslot device id\n);
+exit(1);
+}
+if (!strcmp(dev-id, initialslot)) continue;
+slot = MEMSLOT(dev);
+fw_cfg_slots[3 * slot-idx] = cpu_to_le64(slot-start);
+fw_cfg_slots[3 * slot-idx + 1] = cpu_to_le64(slot-size);
+fw_cfg_slots[3 * slot-idx + 2] = cpu_to_le64(slot-node);
+i++;
+}
+}
+assert(i == nb_hp_memslots);
+}
+
  static long get_file_size(FILE *f)
  {
  long where, size;
diff --git a/vl.c b/vl.c
index ae91a8a..50df453 100644
--- a/vl.c
+++ b/vl.c
@@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp)

  register_savevm_live(NULL, ram, 0, 4, NULL, ram_save_live, NULL,
   ram_load, NULL);
+if (!nb_numa_nodes)
+nb_numa_nodes = 1;

-if (nb_numa_nodes  0) {
+{
  int i;

  if (nb_numa_nodes  MAX_NODES) {


--
-
 Igor
--
To unsubscribe from this list: send 

Re: [Qemu-devel] [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS

2012-04-20 Thread Vasilis Liaskovitis
On Fri, Apr 20, 2012 at 12:33:57PM +0200, Igor Mammedov wrote:
 On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote:
 -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
 +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * 
 nb_hp_memslots) * 8);
   numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
 +numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots);
 this will brake compatibility if guest was migrated from old-new qemu
 than on reboot it will use old bios that expects numa_fw_cfg[1] to be 
 something else.
 Could memslots info be moved to the end of an existing interface?

right. The number of memslots can be placed at 1 + max_cpus + nb_numa_nodes,
instead of right after the number of nodes. This way the old layout is 
preserved,
and all memslot info comes at the end. I will rewrite.

thanks,
- Vasilis
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS

2012-04-19 Thread Vasilis Liaskovitis
 The numa_fw_cfg paravirt interface is extended to include SRAT information for
 all hotplug-able memslots. There are 3 words for each hotplug-able memory slot,
 denoting start address, size and node proximity. nb_numa_nodes is set to 1 by
 default (not 0), so that we always pass srat info to SeaBIOS.

 This information is used by Seabios to build hotplug memory device objects at 
runtime.

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
---
 hw/pc.c |   59 +--
 vl.c|4 +++-
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 67f0479..f1f550a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -46,6 +46,7 @@
 #include ui/qemu-spice.h
 #include memory.h
 #include exec-memory.h
+#include memslot.h
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length, 
uint32_t type)
 return index;
 }
 
+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots);
+
 static void *bochs_bios_init(void)
 {
 void *fw_cfg;
 uint8_t *smbios_table;
 size_t smbios_len;
 uint64_t *numa_fw_cfg;
+uint64_t *hp_memslots_fw_cfg;
 int i, j;
 
 register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
@@ -630,28 +634,71 @@ static void *bochs_bios_init(void)
 fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)hpet_cfg,
  sizeof(struct hpet_fw_config));
 /* allocate memory for the NUMA channel: one (64bit) word for the number
- * of nodes, one word for each VCPU-node and one word for each node to
- * hold the amount of memory.
+ * of nodes, one word for the number of hotplug memory slots, one word
+ * for each VCPU-node, one word for each node to hold the amount of 
memory.
+ * Finally three words for each hotplug memory slot, denoting start 
address,
+ * size and node proximity.
  */
-numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
+numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * 
nb_hp_memslots) * 8);
 numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots);
+
 for (i = 0; i  max_cpus; i++) {
 for (j = 0; j  nb_numa_nodes; j++) {
 if (node_cpumask[j]  (1  i)) {
-numa_fw_cfg[i + 1] = cpu_to_le64(j);
+numa_fw_cfg[i + 2] = cpu_to_le64(j);
 break;
 }
 }
 }
 for (i = 0; i  nb_numa_nodes; i++) {
-numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]);
 }
+
+hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
+if (nb_hp_memslots)
+bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg);
+
 fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
- (1 + max_cpus + nb_numa_nodes) * 8);
+ (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8);
 
 return fw_cfg;
 }
 
+static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots)
+{
+int i = 0;
+Error *err = NULL;
+DeviceState *dev;
+MemSlotState *slot;
+char *type;
+BusState *bus = sysbus_get_default();
+
+QTAILQ_FOREACH(dev, bus-children, sibling) {
+type = object_property_get_str(OBJECT(dev), type, err);
+if (err) {
+error_free(err);
+fprintf(stderr, error getting device type\n);
+exit(1);
+}
+
+if (!strcmp(type, memslot)) {
+if (!dev-id) {
+error_free(err);
+fprintf(stderr, error getting memslot device id\n);
+exit(1);
+}
+if (!strcmp(dev-id, initialslot)) continue;
+slot = MEMSLOT(dev);
+fw_cfg_slots[3 * slot-idx] = cpu_to_le64(slot-start);
+fw_cfg_slots[3 * slot-idx + 1] = cpu_to_le64(slot-size);
+fw_cfg_slots[3 * slot-idx + 2] = cpu_to_le64(slot-node);
+i++;
+}
+}
+assert(i == nb_hp_memslots);
+}
+
 static long get_file_size(FILE *f)
 {
 long where, size;
diff --git a/vl.c b/vl.c
index ae91a8a..50df453 100644
--- a/vl.c
+++ b/vl.c
@@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp)
 
 register_savevm_live(NULL, ram, 0, 4, NULL, ram_save_live, NULL,
  ram_load, NULL);
+if (!nb_numa_nodes)
+nb_numa_nodes = 1;
 
-if (nb_numa_nodes  0) {
+{
 int i;
 
 if (nb_numa_nodes  MAX_NODES) {
-- 
1.7.9

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/9] pc: pass paravirt info for hotplug memory slots to BIOS

2012-04-19 Thread Avi Kivity
On 04/19/2012 05:08 PM, Vasilis Liaskovitis wrote:
  The numa_fw_cfg paravirt interface is extended to include SRAT information 
 for
  all hotplug-able memslots. There are 3 words for each hotplug-able memory 
 slot,
  denoting start address, size and node proximity. nb_numa_nodes is set to 1 by
  default (not 0), so that we always pass srat info to SeaBIOS.

  This information is used by Seabios to build hotplug memory device objects 
 at runtime.


Please document this ABI.  I don't see an existing place, suggest
docs/specs/fwcfg.txt (only your additions need to be documented).

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html