On Wed, May 14, 2014 at 05:43:05PM +0800, Hu Tao wrote: > From: Wanlong Gao <gaowanl...@cn.fujitsu.com> > > Signed-off-by: Wanlong Gao <gaowanl...@cn.fujitsu.com> > Reviewed-by: Eduardo Habkost <ehabk...@redhat.com> > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > Signed-off-by: Hu Tao <hu...@cn.fujitsu.com>
This is also based on bits by Blue Swirl and Andre Przywara, I think you should add their S.O.B. lines from the original commits, just in case we need to locate the authors down the line. > --- > Makefile.target | 2 +- > cpus.c | 14 ---- > include/exec/cpu-all.h | 2 - > include/exec/cpu-common.h | 2 + > include/sysemu/cpus.h | 1 - > include/sysemu/sysemu.h | 3 + > numa.c | 186 > ++++++++++++++++++++++++++++++++++++++++++++++ > vl.c | 139 +--------------------------------- > 8 files changed, 193 insertions(+), 156 deletions(-) > create mode 100644 numa.c > > diff --git a/Makefile.target b/Makefile.target > index 9986047..dd815bb 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -112,7 +112,7 @@ endif #CONFIG_BSD_USER > ######################################################### > # System emulator target > ifdef CONFIG_SOFTMMU > -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o > +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o > obj-y += qtest.o > obj-y += hw/ > obj-$(CONFIG_FDT) += device_tree.o > diff --git a/cpus.c b/cpus.c > index 7bbe153..7f87adb 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1313,20 +1313,6 @@ static void tcg_exec_all(void) > exit_request = 0; > } > > -void set_numa_modes(void) > -{ > - CPUState *cpu; > - int i; > - > - CPU_FOREACH(cpu) { > - for (i = 0; i < nb_numa_nodes; i++) { > - if (test_bit(cpu->cpu_index, node_cpumask[i])) { > - cpu->numa_node = i; > - } > - } > - } > -} > - > void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) > { > /* XXX: implement xxx_cpu_list for targets that still miss it */ > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index fb649a4..758a928 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -414,8 +414,6 @@ CPUArchState *cpu_copy(CPUArchState *env); > > /* memory API */ > > -extern ram_addr_t ram_size; > - > /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ > #define RAM_PREALLOC_MASK (1 << 0) > > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index a21b65a..e8c7970 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -45,6 +45,8 @@ typedef uintptr_t ram_addr_t; > # define RAM_ADDR_FMT "%" PRIxPTR > #endif > > +extern ram_addr_t ram_size; > + > /* memory API */ > > typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); > diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h > index 6502488..4f79081 100644 > --- a/include/sysemu/cpus.h > +++ b/include/sysemu/cpus.h > @@ -23,7 +23,6 @@ extern int smp_threads; > #define smp_threads 1 > #endif > > -void set_numa_modes(void); > void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg); > > #endif > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index ba5c7f8..565c8f6 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -144,6 +144,9 @@ extern QEMUClockType rtc_clock; > extern int nb_numa_nodes; > extern uint64_t node_mem[MAX_NODES]; > extern unsigned long *node_cpumask[MAX_NODES]; > +void numa_add(const char *optarg); > +void set_numa_nodes(void); > +void set_numa_modes(void); > > #define MAX_OPTION_ROMS 16 > typedef struct QEMUOptionRom { Down the line I think numa.h would be nicer, not a must to fix. > diff --git a/numa.c b/numa.c > new file mode 100644 > index 0000000..395c14f > --- /dev/null > +++ b/numa.c > @@ -0,0 +1,186 @@ > +/* > + * QEMU System Emulator Is this the best we can do here? This was reasonable for vl.c Better "NUMA parameter parsing" > + * > + * Copyright (c) 2013 Fujitsu Ltd. It's 2014 isn't it? > + * Author: Wanlong Gao <gaowanl...@cn.fujitsu.com> These tags are probably best avoided for non original code: we don't have the resources to keep them up to date as code changes. Better dropped it for now. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "sysemu/sysemu.h" > +#include "exec/cpu-common.h" > +#include "qemu/bitmap.h" > +#include "qom/cpu.h" > + > +static void numa_node_parse_cpus(int nodenr, const char *cpus) > +{ > + char *endptr; > + unsigned long long value, endvalue; > + > + /* Empty CPU range strings will be considered valid, they will simply > + * not set any bit in the CPU bitmap. > + */ > + if (!*cpus) { > + return; > + } > + > + if (parse_uint(cpus, &value, &endptr, 10) < 0) { > + goto error; > + } > + if (*endptr == '-') { > + if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { > + goto error; > + } > + } else if (*endptr == '\0') { > + endvalue = value; > + } else { > + goto error; > + } > + > + if (endvalue >= MAX_CPUMASK_BITS) { > + endvalue = MAX_CPUMASK_BITS - 1; > + fprintf(stderr, > + "qemu: NUMA: A max of %d VCPUs are supported\n", > + MAX_CPUMASK_BITS); > + } > + > + if (endvalue < value) { > + goto error; > + } > + > + bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); > + return; > + > +error: > + fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); > + exit(1); > +} > + > +void numa_add(const char *optarg) > +{ > + char option[128]; > + char *endptr; > + unsigned long long nodenr; > + > + optarg = get_opt_name(option, 128, optarg, ','); > + if (*optarg == ',') { > + optarg++; > + } > + if (!strcmp(option, "node")) { > + > + if (nb_numa_nodes >= MAX_NODES) { > + fprintf(stderr, "qemu: too many NUMA nodes\n"); > + exit(1); > + } > + > + if (get_param_value(option, 128, "nodeid", optarg) == 0) { > + nodenr = nb_numa_nodes; > + } else { > + if (parse_uint_full(option, &nodenr, 10) < 0) { > + fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); > + exit(1); > + } > + } > + > + if (nodenr >= MAX_NODES) { > + fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); > + exit(1); > + } > + > + if (get_param_value(option, 128, "mem", optarg) == 0) { > + node_mem[nodenr] = 0; > + } else { > + int64_t sval; > + sval = strtosz(option, &endptr); > + if (sval < 0 || *endptr) { > + fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); > + exit(1); > + } > + node_mem[nodenr] = sval; > + } > + if (get_param_value(option, 128, "cpus", optarg) != 0) { > + numa_node_parse_cpus(nodenr, option); > + } > + nb_numa_nodes++; > + } else { > + fprintf(stderr, "Invalid -numa option: %s\n", option); > + exit(1); > + } > +} > + > +void set_numa_nodes(void) > +{ > + if (nb_numa_nodes > 0) { > + int i; > + > + if (nb_numa_nodes > MAX_NODES) { > + nb_numa_nodes = MAX_NODES; > + } > + > + /* If no memory size if given for any node, assume the default case > + * and distribute the available memory equally across all nodes > + */ > + for (i = 0; i < nb_numa_nodes; i++) { > + if (node_mem[i] != 0) { > + break; > + } > + } > + if (i == nb_numa_nodes) { > + uint64_t usedmem = 0; > + > + /* On Linux, the each node's border has to be 8MB aligned, > + * the final node gets the rest. > + */ > + for (i = 0; i < nb_numa_nodes - 1; i++) { > + node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - > 1); > + usedmem += node_mem[i]; > + } > + node_mem[i] = ram_size - usedmem; > + } > + > + for (i = 0; i < nb_numa_nodes; i++) { > + if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { > + break; > + } > + } > + /* assigning the VCPUs round-robin is easier to implement, guest OSes > + * must cope with this anyway, because there are BIOSes out there in > + * real machines which also use this scheme. > + */ > + if (i == nb_numa_nodes) { > + for (i = 0; i < max_cpus; i++) { > + set_bit(i, node_cpumask[i % nb_numa_nodes]); > + } > + } > + } > +} > + > +void set_numa_modes(void) > +{ > + CPUState *cpu; > + int i; > + > + CPU_FOREACH(cpu) { > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cpu->cpu_index, node_cpumask[i])) { > + cpu->numa_node = i; > + } > + } > + } > +} > diff --git a/vl.c b/vl.c > index 709d8cd..1dba4a5 100644 > --- a/vl.c > +++ b/vl.c > @@ -1265,102 +1265,6 @@ char *get_boot_devices_list(size_t *size, bool > ignore_suffixes) > return list; > } > > -static void numa_node_parse_cpus(int nodenr, const char *cpus) > -{ > - char *endptr; > - unsigned long long value, endvalue; > - > - /* Empty CPU range strings will be considered valid, they will simply > - * not set any bit in the CPU bitmap. > - */ > - if (!*cpus) { > - return; > - } > - > - if (parse_uint(cpus, &value, &endptr, 10) < 0) { > - goto error; > - } > - if (*endptr == '-') { > - if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { > - goto error; > - } > - } else if (*endptr == '\0') { > - endvalue = value; > - } else { > - goto error; > - } > - > - if (endvalue >= MAX_CPUMASK_BITS) { > - endvalue = MAX_CPUMASK_BITS - 1; > - fprintf(stderr, > - "qemu: NUMA: A max of %d VCPUs are supported\n", > - MAX_CPUMASK_BITS); > - } > - > - if (endvalue < value) { > - goto error; > - } > - > - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); > - return; > - > -error: > - fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); > - exit(1); > -} > - > -static void numa_add(const char *optarg) > -{ > - char option[128]; > - char *endptr; > - unsigned long long nodenr; > - > - optarg = get_opt_name(option, 128, optarg, ','); > - if (*optarg == ',') { > - optarg++; > - } > - if (!strcmp(option, "node")) { > - > - if (nb_numa_nodes >= MAX_NODES) { > - fprintf(stderr, "qemu: too many NUMA nodes\n"); > - exit(1); > - } > - > - if (get_param_value(option, 128, "nodeid", optarg) == 0) { > - nodenr = nb_numa_nodes; > - } else { > - if (parse_uint_full(option, &nodenr, 10) < 0) { > - fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); > - exit(1); > - } > - } > - > - if (nodenr >= MAX_NODES) { > - fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); > - exit(1); > - } > - > - if (get_param_value(option, 128, "mem", optarg) == 0) { > - node_mem[nodenr] = 0; > - } else { > - int64_t sval; > - sval = strtosz(option, &endptr); > - if (sval < 0 || *endptr) { > - fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); > - exit(1); > - } > - node_mem[nodenr] = sval; > - } > - if (get_param_value(option, 128, "cpus", optarg) != 0) { > - numa_node_parse_cpus(nodenr, option); > - } > - nb_numa_nodes++; > - } else { > - fprintf(stderr, "Invalid -numa option: %s\n", option); > - exit(1); > - } > -} > - > static QemuOptsList qemu_smp_opts = { > .name = "smp-opts", > .implied_opt_name = "cpus", > @@ -4336,48 +4240,7 @@ int main(int argc, char **argv, char **envp) > default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); > default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); > > - if (nb_numa_nodes > 0) { > - int i; > - > - if (nb_numa_nodes > MAX_NODES) { > - nb_numa_nodes = MAX_NODES; > - } > - > - /* If no memory size if given for any node, assume the default case > - * and distribute the available memory equally across all nodes > - */ > - for (i = 0; i < nb_numa_nodes; i++) { > - if (node_mem[i] != 0) > - break; > - } > - if (i == nb_numa_nodes) { > - uint64_t usedmem = 0; > - > - /* On Linux, the each node's border has to be 8MB aligned, > - * the final node gets the rest. > - */ > - for (i = 0; i < nb_numa_nodes - 1; i++) { > - node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - > 1); > - usedmem += node_mem[i]; > - } > - node_mem[i] = ram_size - usedmem; > - } > - > - for (i = 0; i < nb_numa_nodes; i++) { > - if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { > - break; > - } > - } > - /* assigning the VCPUs round-robin is easier to implement, guest OSes > - * must cope with this anyway, because there are BIOSes out there in > - * real machines which also use this scheme. > - */ > - if (i == nb_numa_nodes) { > - for (i = 0; i < max_cpus; i++) { > - set_bit(i, node_cpumask[i % nb_numa_nodes]); > - } > - } > - } > + set_numa_nodes(); > > if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != > 0) { > exit(1); > -- > 1.8.5.2.229.g4448466 >