The problem: you want to do serious scalability testing (1000s of VMs) of your management stack. If each guest eats up a few 100MiB and competes for CPU, that requires a serious host machine. Which you don't have. You also don't want to modify the management stack at all, if you can help it.
The solution: a perfectly normal-looking QEMU that uses minimal resources. Ability to execute any guest code is strictly optional ;) New option -fake-machine creates a fake machine incapable of running guest code. Completely compiled out by default, enable with configure --enable-fake-machine. With -fake-machine, CPU use is negligible, and memory use is rather modest. Non-fake VM running F-14 live, right after boot: UID PID PPID C SZ RSS PSR STIME TTY TIME CMD armbru 15707 2558 53 191837 414388 1 21:05 pts/3 00:00:29 [...] Same VM -fake-machine, after similar time elapsed: UID PID PPID C SZ RSS PSR STIME TTY TIME CMD armbru 15742 2558 0 85129 9412 0 21:07 pts/3 00:00:00 [...] We're using a very similar patch for RHEL scalability testing. HACK ALERT: Works by hacking the main loop so it never executes any guest code. Not implemented for KVM's main loop at this time, thus -fake-machine needs to force KVM off. It also replaces guest RAM by a token amount (pc machine only at this time), and forces -vga none, because VGA eats too much memory. Note the TODO and FIXME comments. Dan Berrange explored a different solution a while ago: a new do-nothing target, patterned after i386, and a new do-nothing machine, patterned after pc. His patch works. But it duplicates much target and machine code --- adds more than ten times as many lines as this patch. Keeping the duplicated code reasonably in sync would be bothersome. I didn't like that, talked it over with Dan, and we came up with this idea instead. Comments? Better ideas? --- configure | 12 ++++++++++++ cpu-exec.c | 2 +- cpus.c | 3 +++ hw/pc.c | 30 ++++++++++++++++++++---------- qemu-options.hx | 7 +++++++ targphys.h | 7 +++++++ vl.c | 21 +++++++++++++++++++++ 7 files changed, 71 insertions(+), 11 deletions(-) diff --git a/configure b/configure index d68f862..98b0a5f 100755 --- a/configure +++ b/configure @@ -174,6 +174,7 @@ trace_backend="nop" trace_file="trace" spice="" rbd="" +fake_machine="no" # parse CC options first for opt do @@ -719,6 +720,10 @@ for opt do ;; --enable-rbd) rbd="yes" ;; + --disable-fake-machine) fake_machine="no" + ;; + --enable-fake-machine) fake_machine="yes" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -913,6 +918,8 @@ echo " Default:trace-<pid>" echo " --disable-spice disable spice" echo " --enable-spice enable spice" echo " --enable-rbd enable building the rados block device (rbd)" +echo " --disable-fake-machine disable -fake-machine option" +echo " --enable-fake-machine enable -fake-machine option" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2455,6 +2462,7 @@ echo "Trace output file $trace_file-<pid>" echo "spice support $spice" echo "rbd support $rbd" echo "xfsctl support $xfs" +echo "-fake-machine $fake_machine" if test $sdl_too_old = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -2727,6 +2735,10 @@ if test "$spice" = "yes" ; then echo "CONFIG_SPICE=y" >> $config_host_mak fi +if test $fake_machine = "yes" ; then + echo "CONFIG_FAKE_MACHINE=y" >> $config_host_mak +fi + # XXX: suppress that if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak diff --git a/cpu-exec.c b/cpu-exec.c index 8c9fb8b..cd1259a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -230,7 +230,7 @@ int cpu_exec(CPUState *env1) uint8_t *tc_ptr; unsigned long next_tb; - if (cpu_halted(env1) == EXCP_HALTED) + if (fake_machine || cpu_halted(env1) == EXCP_HALTED) return EXCP_HALTED; cpu_single_env = env1; diff --git a/cpus.c b/cpus.c index 0309189..91e708f 100644 --- a/cpus.c +++ b/cpus.c @@ -128,6 +128,9 @@ static int cpu_can_run(CPUState *env) static int cpu_has_work(CPUState *env) { + if (fake_machine) { + return 0; + } if (env->stop) return 1; if (env->queued_work_first) diff --git a/hw/pc.c b/hw/pc.c index fface7d..809f53e 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -993,18 +993,28 @@ void pc_memory_init(ram_addr_t ram_size, linux_boot = (kernel_filename != NULL); /* allocate RAM */ - ram_addr = qemu_ram_alloc(NULL, "pc.ram", - below_4g_mem_size + above_4g_mem_size); - cpu_register_physical_memory(0, 0xa0000, ram_addr); - cpu_register_physical_memory(0x100000, - below_4g_mem_size - 0x100000, - ram_addr + 0x100000); + if (fake_machine) { + /* If user boots with -m 1000 We don't actually want to + * allocate a GB of RAM, so lets force all RAM allocs to one + * page to keep our memory footprint nice and low. + * + * TODO try to use -m 1k instead + */ + ram_addr = qemu_ram_alloc(NULL, "pc.ram", 1); + } else { + ram_addr = qemu_ram_alloc(NULL, "pc.ram", + below_4g_mem_size + above_4g_mem_size); + cpu_register_physical_memory(0, 0xa0000, ram_addr); + cpu_register_physical_memory(0x100000, + below_4g_mem_size - 0x100000, + ram_addr + 0x100000); #if TARGET_PHYS_ADDR_BITS > 32 - if (above_4g_mem_size > 0) { - cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, - ram_addr + below_4g_mem_size); - } + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + ram_addr + below_4g_mem_size); + } #endif + } /* BIOS load */ if (bios_name == NULL) diff --git a/qemu-options.hx b/qemu-options.hx index 898561d..8a8ef4b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2324,6 +2324,13 @@ Specify a trace file to log output traces to. ETEXI #endif +#ifdef CONFIG_FAKE_MACHINE +DEF("fake-machine", 0, QEMU_OPTION_fake_machine, + "-fake-machine create a fake machine incapable of running guest code\n" + " mimimal resource use, use for scalability testing\n", + QEMU_ARCH_ALL) +#endif + HXCOMM This is the last statement. Insert new options before this line! STEXI @end table diff --git a/targphys.h b/targphys.h index 95648d6..f30530c 100644 --- a/targphys.h +++ b/targphys.h @@ -18,4 +18,11 @@ typedef uint64_t target_phys_addr_t; #endif #endif +/* FIXME definitely in the wrong place here; where should it go? */ +#ifdef CONFIG_FAKE_MACHINE +extern int fake_machine; +#else +#define fake_machine 0 +#endif + #endif diff --git a/vl.c b/vl.c index 0292184..bcc60b0 100644 --- a/vl.c +++ b/vl.c @@ -240,6 +240,10 @@ struct FWBootEntry { QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order); +#ifdef CONFIG_FAKE_MACHINE +int fake_machine = 0; +#endif + int nb_numa_nodes; uint64_t node_mem[MAX_NODES]; uint64_t node_cpumask[MAX_NODES]; @@ -2727,6 +2731,11 @@ int main(int argc, char **argv, char **envp) fclose(fp); break; } +#ifdef CONFIG_FAKE_MACHINE + case QEMU_OPTION_fake_machine: + fake_machine = 1; + break; +#endif default: os_parse_cmd_args(popt->index, optarg); } @@ -2817,6 +2826,15 @@ int main(int argc, char **argv, char **envp) } if (default_vga) vga_interface_type = VGA_CIRRUS; + if (fake_machine) { + /* HACK: Ideally we'd configure VGA as usual, but this causes + * several MB of VGA RAM to be allocated, and we can't do the + * tricks we use elsewhere to just return a single 4k page, + * because the VGA driver immediately memsets() the entire + * allocation it requested. + */ + vga_interface_type = VGA_NONE; + } socket_init(); @@ -2835,6 +2853,9 @@ int main(int argc, char **argv, char **envp) exit(1); } + if (fake_machine) { + kvm_allowed = 0; + } if (kvm_allowed) { int ret = kvm_init(smp_cpus); if (ret < 0) { -- 1.7.2.3