Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option
On 6/18/2012 3:11 PM, Eric Blake wrote: On 06/18/2012 04:05 PM, Andreas Färber wrote: Am 17.06.2012 22:12, schrieb Chegu Vinod: diff --git a/vl.c b/vl.c index 204d85b..1906412 100644 --- a/vl.c +++ b/vl.c @@ -28,6 +28,7 @@ #include #include #include +#include Did you check whether this and the macros you're using are available on POSIX and mingw32? vl.c is a pretty central file. POSIX, yes. mingw32, no. Use of preprocessor conditionals is probably in order. Thanks. I will look into this. Vinod -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option
On 06/18/2012 04:05 PM, Andreas Färber wrote: > Am 17.06.2012 22:12, schrieb Chegu Vinod: >> diff --git a/vl.c b/vl.c >> index 204d85b..1906412 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -28,6 +28,7 @@ >> #include >> #include >> #include >> +#include > > Did you check whether this and the macros you're using are available on > POSIX and mingw32? vl.c is a pretty central file. POSIX, yes. mingw32, no. Use of preprocessor conditionals is probably in order. -- Eric Blake ebl...@redhat.com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option
Am 17.06.2012 22:12, schrieb Chegu Vinod: > diff --git a/vl.c b/vl.c > index 204d85b..1906412 100644 > --- a/vl.c > +++ b/vl.c > @@ -28,6 +28,7 @@ > #include > #include > #include > +#include Did you check whether this and the macros you're using are available on POSIX and mingw32? vl.c is a pretty central file. Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option
On 6/18/2012 1:29 PM, Eduardo Habkost wrote: On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote: The -numa option to qemu is used to create [fake] numa nodes and expose them to the guest OS instance. There are a couple of issues with the -numa option: a) Max VCPU's that can be specified for a guest while using the qemu's -numa option is 64. Due to a typecasting issue when the number of VCPUs is> 32 the VCPUs don't show up under the specified [fake] numa nodes. b) KVM currently has support for 160VCPUs per guest. The qemu's -numa option has only support for upto 64VCPUs per guest. This patch addresses these two issues. [ Note: This patch has been verified by Eduardo Habkost ]. I was going to add a Tested-by line, but this patch breaks the automatic round-robin assignment when no CPU is added to any node on the command-line. Pl. see below. Additional questions below: [...] diff --git a/cpus.c b/cpus.c index b182b3d..f9cee60 100644 --- a/cpus.c +++ b/cpus.c @@ -1145,7 +1145,7 @@ void set_numa_modes(void) for (env = first_cpu; env != NULL; env = env->next_cpu) { for (i = 0; i< nb_numa_nodes; i++) { -if (node_cpumask[i]& (1<< env->cpu_index)) { +if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) { env->numa_node = i; } } diff --git a/hw/pc.c b/hw/pc.c index 8368701..f0c3665 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -639,7 +639,7 @@ static void *bochs_bios_init(void) numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); for (i = 0; i< max_cpus; i++) { for (j = 0; j< nb_numa_nodes; j++) { -if (node_cpumask[j]& (1<< i)) { +if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) { numa_fw_cfg[i + 1] = cpu_to_le64(j); break; } diff --git a/sysemu.h b/sysemu.h index bc2c788..6e4d342 100644 --- a/sysemu.h +++ b/sysemu.h @@ -9,6 +9,7 @@ #include "qapi-types.h" #include "notify.h" #include "main-loop.h" +#include /* vl.c */ @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2]; extern QEMUClock *rtc_clock; #define MAX_NODES 64 +#define KVM_MAX_VCPUS 254 Do we really need this constant? Why not just use max_cpus when allocating the CPU sets, instead? Hmm I had thought about this earlier too max_cpus was not getting set at the point where the allocations were being done. I have now moved that code to a bit later and switched to using max_cpus. extern int nb_numa_nodes; extern uint64_t node_mem[MAX_NODES]; -extern uint64_t node_cpumask[MAX_NODES]; +extern cpu_set_t *node_cpumask[MAX_NODES]; +extern size_t cpumask_size; #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/vl.c b/vl.c index 204d85b..1906412 100644 --- a/vl.c +++ b/vl.c @@ -28,6 +28,7 @@ #include #include #include +#include /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order int nb_numa_nodes; uint64_t node_mem[MAX_NODES]; -uint64_t node_cpumask[MAX_NODES]; +cpu_set_t *node_cpumask[MAX_NODES]; +size_t cpumask_size; uint8_t qemu_uuid[16]; @@ -950,6 +952,9 @@ static void numa_add(const char *optarg) char *endptr; unsigned long long value, endvalue; int nodenr; +int i; + +value = endvalue = 0; optarg = get_opt_name(option, 128, optarg, ',') + 1; if (!strcmp(option, "node")) { @@ -970,27 +975,17 @@ static void numa_add(const char *optarg) } node_mem[nodenr] = sval; } -if (get_param_value(option, 128, "cpus", optarg) == 0) { -node_cpumask[nodenr] = 0; -} else { +if (get_param_value(option, 128, "cpus", optarg) != 0) { value = strtoull(option,&endptr, 10); -if (value>= 64) { -value = 63; -fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); +if (*endptr == '-') { +endvalue = strtoull(endptr+1,&endptr, 10); } else { -if (*endptr == '-') { -endvalue = strtoull(endptr+1,&endptr, 10); -if (endvalue>= 63) { -endvalue = 62; -fprintf(stderr, -"only 63 CPUs in NUMA mode supported.\n"); -} -value = (2ULL<< endvalue) - (1ULL<< value); -} else { -value = 1ULL<< value; -} +endvalue = value; +} + +for (i = value; i<= endvalue; ++i) { +CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]); What if endvalue is larger than the cpu mask size we allocated? I can add a check (endvalue >= max_cpus) and print an error. Should we force set the endvalue to max_cpus-1 in that case ?
Re: [Qemu-devel] [PATCH] Fixes related to processing of qemu's -numa option
On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote: > The -numa option to qemu is used to create [fake] numa nodes > and expose them to the guest OS instance. > > There are a couple of issues with the -numa option: > > a) Max VCPU's that can be specified for a guest while using >the qemu's -numa option is 64. Due to a typecasting issue >when the number of VCPUs is > 32 the VCPUs don't show up >under the specified [fake] numa nodes. > > b) KVM currently has support for 160VCPUs per guest. The >qemu's -numa option has only support for upto 64VCPUs >per guest. > > This patch addresses these two issues. [ Note: This > patch has been verified by Eduardo Habkost ]. I was going to add a Tested-by line, but this patch breaks the automatic round-robin assignment when no CPU is added to any node on the command-line. Additional questions below: [...] > diff --git a/cpus.c b/cpus.c > index b182b3d..f9cee60 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1145,7 +1145,7 @@ void set_numa_modes(void) > > for (env = first_cpu; env != NULL; env = env->next_cpu) { > for (i = 0; i < nb_numa_nodes; i++) { > -if (node_cpumask[i] & (1 << env->cpu_index)) { > +if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) { > env->numa_node = i; > } > } > diff --git a/hw/pc.c b/hw/pc.c > index 8368701..f0c3665 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -639,7 +639,7 @@ static void *bochs_bios_init(void) > numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); > for (i = 0; i < max_cpus; i++) { > for (j = 0; j < nb_numa_nodes; j++) { > -if (node_cpumask[j] & (1 << i)) { > +if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) { > numa_fw_cfg[i + 1] = cpu_to_le64(j); > break; > } > diff --git a/sysemu.h b/sysemu.h > index bc2c788..6e4d342 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -9,6 +9,7 @@ > #include "qapi-types.h" > #include "notify.h" > #include "main-loop.h" > +#include > > /* vl.c */ > > @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2]; > extern QEMUClock *rtc_clock; > > #define MAX_NODES 64 > +#define KVM_MAX_VCPUS 254 Do we really need this constant? Why not just use max_cpus when allocating the CPU sets, instead? > extern int nb_numa_nodes; > extern uint64_t node_mem[MAX_NODES]; > -extern uint64_t node_cpumask[MAX_NODES]; > +extern cpu_set_t *node_cpumask[MAX_NODES]; > +extern size_t cpumask_size; > > #define MAX_OPTION_ROMS 16 > typedef struct QEMUOptionRom { > diff --git a/vl.c b/vl.c > index 204d85b..1906412 100644 > --- a/vl.c > +++ b/vl.c > @@ -28,6 +28,7 @@ > #include > #include > #include > +#include > > /* Needed early for CONFIG_BSD etc. */ > #include "config-host.h" > @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = > QTAILQ_HEAD_INITIALIZER(fw_boot_order > > int nb_numa_nodes; > uint64_t node_mem[MAX_NODES]; > -uint64_t node_cpumask[MAX_NODES]; > +cpu_set_t *node_cpumask[MAX_NODES]; > +size_t cpumask_size; > > uint8_t qemu_uuid[16]; > > @@ -950,6 +952,9 @@ static void numa_add(const char *optarg) > char *endptr; > unsigned long long value, endvalue; > int nodenr; > +int i; > + > +value = endvalue = 0; > > optarg = get_opt_name(option, 128, optarg, ',') + 1; > if (!strcmp(option, "node")) { > @@ -970,27 +975,17 @@ static void numa_add(const char *optarg) > } > node_mem[nodenr] = sval; > } > -if (get_param_value(option, 128, "cpus", optarg) == 0) { > -node_cpumask[nodenr] = 0; > -} else { > +if (get_param_value(option, 128, "cpus", optarg) != 0) { > value = strtoull(option, &endptr, 10); > -if (value >= 64) { > -value = 63; > -fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); > +if (*endptr == '-') { > +endvalue = strtoull(endptr+1, &endptr, 10); > } else { > -if (*endptr == '-') { > -endvalue = strtoull(endptr+1, &endptr, 10); > -if (endvalue >= 63) { > -endvalue = 62; > -fprintf(stderr, > -"only 63 CPUs in NUMA mode supported.\n"); > -} > -value = (2ULL << endvalue) - (1ULL << value); > -} else { > -value = 1ULL << value; > -} > +endvalue = value; > +} > + > +for (i = value; i <= endvalue; ++i) { > +CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]); What if endvalue is larger than the cpu mask size we allocated? > } > -node_cpumask[nodenr] = value; > } > nb_numa_nodes++; > } > @@ -2331,7 +2326,9 @@ int main(int argc, char