Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option

2012-06-18 Thread Chegu Vinod

On 6/18/2012 3:11 PM, Eric Blake wrote:

On 06/18/2012 04:05 PM, Andreas Färber wrote:

Am 17.06.2012 22:12, schrieb Chegu Vinod:

diff --git a/vl.c b/vl.c
index 204d85b..1906412 100644
--- a/vl.c
+++ b/vl.c
@@ -28,6 +28,7 @@
  #include
  #include
  #include
+#include

Did you check whether this and the macros you're using are available on
POSIX and mingw32? vl.c is a pretty central file.

POSIX, yes.  mingw32, no.  Use of preprocessor conditionals is probably
in order.


Thanks. I will look into this.
Vinod
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option

2012-06-18 Thread Eric Blake
On 06/18/2012 04:05 PM, Andreas Färber wrote:
> Am 17.06.2012 22:12, schrieb Chegu Vinod:
>> diff --git a/vl.c b/vl.c
>> index 204d85b..1906412 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -28,6 +28,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
> 
> Did you check whether this and the macros you're using are available on
> POSIX and mingw32? vl.c is a pretty central file.

POSIX, yes.  mingw32, no.  Use of preprocessor conditionals is probably
in order.

-- 
Eric Blake   ebl...@redhat.com+1-919-301-3266
Libvirt virtualization library http://libvirt.org





signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option

2012-06-18 Thread Andreas Färber
Am 17.06.2012 22:12, schrieb Chegu Vinod:
> diff --git a/vl.c b/vl.c
> index 204d85b..1906412 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include 
>  #include 
> +#include 

Did you check whether this and the macros you're using are available on
POSIX and mingw32? vl.c is a pretty central file.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option

2012-06-18 Thread Chegu Vinod

On 6/18/2012 1:29 PM, Eduardo Habkost wrote:

On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote:

The -numa option to qemu is used to create [fake] numa nodes
and expose them to the guest OS instance.

There are a couple of issues with the -numa option:

a) Max VCPU's that can be specified for a guest while using
the qemu's -numa option is 64. Due to a typecasting issue
when the number of VCPUs is>  32 the VCPUs don't show up
under the specified [fake] numa nodes.

b) KVM currently has support for 160VCPUs per guest. The
qemu's -numa option has only support for upto 64VCPUs
per guest.

This patch addresses these two issues. [ Note: This
patch has been verified by Eduardo Habkost ].

I was going to add a Tested-by line, but this patch breaks the automatic
round-robin assignment when no CPU is added to any node on the
command-line.


Pl. see below.


Additional questions below:

[...]

diff --git a/cpus.c b/cpus.c
index b182b3d..f9cee60 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1145,7 +1145,7 @@ void set_numa_modes(void)

  for (env = first_cpu; env != NULL; env = env->next_cpu) {
  for (i = 0; i<  nb_numa_nodes; i++) {
-if (node_cpumask[i]&  (1<<  env->cpu_index)) {
+if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) {
  env->numa_node = i;
  }
  }
diff --git a/hw/pc.c b/hw/pc.c
index 8368701..f0c3665 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -639,7 +639,7 @@ static void *bochs_bios_init(void)
  numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
  for (i = 0; i<  max_cpus; i++) {
  for (j = 0; j<  nb_numa_nodes; j++) {
-if (node_cpumask[j]&  (1<<  i)) {
+if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) {
  numa_fw_cfg[i + 1] = cpu_to_le64(j);
  break;
  }
diff --git a/sysemu.h b/sysemu.h
index bc2c788..6e4d342 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -9,6 +9,7 @@
  #include "qapi-types.h"
  #include "notify.h"
  #include "main-loop.h"
+#include

  /* vl.c */

@@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2];
  extern QEMUClock *rtc_clock;

  #define MAX_NODES 64
+#define KVM_MAX_VCPUS 254

Do we really need this constant? Why not just use max_cpus when
allocating the CPU sets, instead?


Hmm I had thought about this earlier too max_cpus was not getting 
set at the point where the allocations were being done. I have now moved 
that code to a bit later and switched to using

max_cpus.





  extern int nb_numa_nodes;
  extern uint64_t node_mem[MAX_NODES];
-extern uint64_t node_cpumask[MAX_NODES];
+extern cpu_set_t *node_cpumask[MAX_NODES];
+extern size_t cpumask_size;

  #define MAX_OPTION_ROMS 16
  typedef struct QEMUOptionRom {
diff --git a/vl.c b/vl.c
index 204d85b..1906412 100644
--- a/vl.c
+++ b/vl.c
@@ -28,6 +28,7 @@
  #include
  #include
  #include
+#include

  /* Needed early for CONFIG_BSD etc. */
  #include "config-host.h"
@@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = 
QTAILQ_HEAD_INITIALIZER(fw_boot_order

  int nb_numa_nodes;
  uint64_t node_mem[MAX_NODES];
-uint64_t node_cpumask[MAX_NODES];
+cpu_set_t *node_cpumask[MAX_NODES];
+size_t cpumask_size;

  uint8_t qemu_uuid[16];

@@ -950,6 +952,9 @@ static void numa_add(const char *optarg)
  char *endptr;
  unsigned long long value, endvalue;
  int nodenr;
+int i;
+
+value = endvalue = 0;

  optarg = get_opt_name(option, 128, optarg, ',') + 1;
  if (!strcmp(option, "node")) {
@@ -970,27 +975,17 @@ static void numa_add(const char *optarg)
  }
  node_mem[nodenr] = sval;
  }
-if (get_param_value(option, 128, "cpus", optarg) == 0) {
-node_cpumask[nodenr] = 0;
-} else {
+if (get_param_value(option, 128, "cpus", optarg) != 0) {
  value = strtoull(option,&endptr, 10);
-if (value>= 64) {
-value = 63;
-fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
+if (*endptr == '-') {
+endvalue = strtoull(endptr+1,&endptr, 10);
  } else {
-if (*endptr == '-') {
-endvalue = strtoull(endptr+1,&endptr, 10);
-if (endvalue>= 63) {
-endvalue = 62;
-fprintf(stderr,
-"only 63 CPUs in NUMA mode supported.\n");
-}
-value = (2ULL<<  endvalue) - (1ULL<<  value);
-} else {
-value = 1ULL<<  value;
-}
+endvalue = value;
+}
+
+for (i = value; i<= endvalue; ++i) {
+CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]);

What if endvalue is larger than the cpu mask size we allocated?


I can add a check (endvalue >= max_cpus) and print an error.
Should we force set the endvalue to max_cpus-1 in that case ?


Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option

2012-06-18 Thread Eduardo Habkost
On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote:
> The -numa option to qemu is used to create [fake] numa nodes
> and expose them to the guest OS instance.
> 
> There are a couple of issues with the -numa option:
> 
> a) Max VCPU's that can be specified for a guest while using
>the qemu's -numa option is 64. Due to a typecasting issue
>when the number of VCPUs is > 32 the VCPUs don't show up
>under the specified [fake] numa nodes.
> 
> b) KVM currently has support for 160VCPUs per guest. The
>qemu's -numa option has only support for upto 64VCPUs
>per guest.
> 
> This patch addresses these two issues. [ Note: This
> patch has been verified by Eduardo Habkost ].

I was going to add a Tested-by line, but this patch breaks the automatic
round-robin assignment when no CPU is added to any node on the
command-line.

Additional questions below:

[...]
> diff --git a/cpus.c b/cpus.c
> index b182b3d..f9cee60 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1145,7 +1145,7 @@ void set_numa_modes(void)
>  
>  for (env = first_cpu; env != NULL; env = env->next_cpu) {
>  for (i = 0; i < nb_numa_nodes; i++) {
> -if (node_cpumask[i] & (1 << env->cpu_index)) {
> +if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) {
>  env->numa_node = i;
>  }
>  }
> diff --git a/hw/pc.c b/hw/pc.c
> index 8368701..f0c3665 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -639,7 +639,7 @@ static void *bochs_bios_init(void)
>  numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
>  for (i = 0; i < max_cpus; i++) {
>  for (j = 0; j < nb_numa_nodes; j++) {
> -if (node_cpumask[j] & (1 << i)) {
> +if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) {
>  numa_fw_cfg[i + 1] = cpu_to_le64(j);
>  break;
>  }
> diff --git a/sysemu.h b/sysemu.h
> index bc2c788..6e4d342 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -9,6 +9,7 @@
>  #include "qapi-types.h"
>  #include "notify.h"
>  #include "main-loop.h"
> +#include 
>  
>  /* vl.c */
>  
> @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2];
>  extern QEMUClock *rtc_clock;
>  
>  #define MAX_NODES 64
> +#define KVM_MAX_VCPUS 254

Do we really need this constant? Why not just use max_cpus when
allocating the CPU sets, instead?


>  extern int nb_numa_nodes;
>  extern uint64_t node_mem[MAX_NODES];
> -extern uint64_t node_cpumask[MAX_NODES];
> +extern cpu_set_t *node_cpumask[MAX_NODES];
> +extern size_t cpumask_size;
>  
>  #define MAX_OPTION_ROMS 16
>  typedef struct QEMUOptionRom {
> diff --git a/vl.c b/vl.c
> index 204d85b..1906412 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* Needed early for CONFIG_BSD etc. */
>  #include "config-host.h"
> @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = 
> QTAILQ_HEAD_INITIALIZER(fw_boot_order
>  
>  int nb_numa_nodes;
>  uint64_t node_mem[MAX_NODES];
> -uint64_t node_cpumask[MAX_NODES];
> +cpu_set_t *node_cpumask[MAX_NODES];
> +size_t cpumask_size;
>  
>  uint8_t qemu_uuid[16];
>  
> @@ -950,6 +952,9 @@ static void numa_add(const char *optarg)
>  char *endptr;
>  unsigned long long value, endvalue;
>  int nodenr;
> +int i;
> +
> +value = endvalue = 0;
>  
>  optarg = get_opt_name(option, 128, optarg, ',') + 1;
>  if (!strcmp(option, "node")) {
> @@ -970,27 +975,17 @@ static void numa_add(const char *optarg)
>  }
>  node_mem[nodenr] = sval;
>  }
> -if (get_param_value(option, 128, "cpus", optarg) == 0) {
> -node_cpumask[nodenr] = 0;
> -} else {
> +if (get_param_value(option, 128, "cpus", optarg) != 0) {
>  value = strtoull(option, &endptr, 10);
> -if (value >= 64) {
> -value = 63;
> -fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
> +if (*endptr == '-') {
> +endvalue = strtoull(endptr+1, &endptr, 10);
>  } else {
> -if (*endptr == '-') {
> -endvalue = strtoull(endptr+1, &endptr, 10);
> -if (endvalue >= 63) {
> -endvalue = 62;
> -fprintf(stderr,
> -"only 63 CPUs in NUMA mode supported.\n");
> -}
> -value = (2ULL << endvalue) - (1ULL << value);
> -} else {
> -value = 1ULL << value;
> -}
> +endvalue = value;
> +}
> +
> +for (i = value; i <= endvalue; ++i) {
> +CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]);

What if endvalue is larger than the cpu mask size we allocated?


>  }
> -node_cpumask[nodenr] = value;
>  }
>  nb_numa_nodes++;
>  }
> @@ -2331,7 +2326,9 @@ int main(int argc, char