On 01/18/2011 02:16 PM, Markus Armbruster wrote:
The problem: you want to do serious scalability testing (1000s of VMs)
of your management stack. If each guest eats up a few 100MiB and
competes for CPU, that requires a serious host machine. Which you don't
have. You also don't want to modify the management stack at all, if you
can help it.
The solution: a perfectly normal-looking QEMU that uses minimal
resources. Ability to execute any guest code is strictly optional ;)
New option -fake-machine creates a fake machine incapable of running
guest code. Completely compiled out by default, enable with configure
--enable-fake-machine.
With -fake-machine, CPU use is negligible, and memory use is rather
modest.
Non-fake VM running F-14 live, right after boot:
UID PID PPID C SZ RSS PSR STIME TTY TIME CMD
armbru 15707 2558 53 191837 414388 1 21:05 pts/3 00:00:29 [...]
Same VM -fake-machine, after similar time elapsed:
UID PID PPID C SZ RSS PSR STIME TTY TIME CMD
armbru 15742 2558 0 85129 9412 0 21:07 pts/3 00:00:00 [...]
We're using a very similar patch for RHEL scalability testing.
Interesting, but:
9432 anthony 20 0 153m 14m 5384 S 0 0.2 0:00.22
qemu-system-x86
That's qemu-system-x86 -m 4
In terms of memory overhead, the largest source is not really going to
be addressed by -fake-machine (l1_phys_map and phys_ram_dirty).
I don't really understand the point of not creating a VCPU with KVM. Is
there some type of overhead in doing that?
Regards,
Anthony Liguori
HACK ALERT: Works by hacking the main loop so it never executes any
guest code. Not implemented for KVM's main loop at this time, thus
-fake-machine needs to force KVM off. It also replaces guest RAM by a
token amount (pc machine only at this time), and forces -vga none,
because VGA eats too much memory.
Note the TODO and FIXME comments.
Dan Berrange explored a different solution a while ago: a new do-nothing
target, patterned after i386, and a new do-nothing machine, patterned
after pc. His patch works. But it duplicates much target and machine
code --- adds more than ten times as many lines as this patch. Keeping
the duplicated code reasonably in sync would be bothersome. I didn't
like that, talked it over with Dan, and we came up with this idea
instead.
Comments? Better ideas?
---
configure | 12 ++++++++++++
cpu-exec.c | 2 +-
cpus.c | 3 +++
hw/pc.c | 30 ++++++++++++++++++++----------
qemu-options.hx | 7 +++++++
targphys.h | 7 +++++++
vl.c | 21 +++++++++++++++++++++
7 files changed, 71 insertions(+), 11 deletions(-)
diff --git a/configure b/configure
index d68f862..98b0a5f 100755
--- a/configure
+++ b/configure
@@ -174,6 +174,7 @@ trace_backend="nop"
trace_file="trace"
spice=""
rbd=""
+fake_machine="no"
# parse CC options first
for opt do
@@ -719,6 +720,10 @@ for opt do
;;
--enable-rbd) rbd="yes"
;;
+ --disable-fake-machine) fake_machine="no"
+ ;;
+ --enable-fake-machine) fake_machine="yes"
+ ;;
*) echo "ERROR: unknown option $opt"; show_help="yes"
;;
esac
@@ -913,6 +918,8 @@ echo " Default:trace-<pid>"
echo " --disable-spice disable spice"
echo " --enable-spice enable spice"
echo " --enable-rbd enable building the rados block device (rbd)"
+echo " --disable-fake-machine disable -fake-machine option"
+echo " --enable-fake-machine enable -fake-machine option"
echo ""
echo "NOTE: The object files are built at the place where configure is
launched"
exit 1
@@ -2455,6 +2462,7 @@ echo "Trace output file $trace_file-<pid>"
echo "spice support $spice"
echo "rbd support $rbd"
echo "xfsctl support $xfs"
+echo "-fake-machine $fake_machine"
if test $sdl_too_old = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -2727,6 +2735,10 @@ if test "$spice" = "yes" ; then
echo "CONFIG_SPICE=y">> $config_host_mak
fi
+if test $fake_machine = "yes" ; then
+ echo "CONFIG_FAKE_MACHINE=y">> $config_host_mak
+fi
+
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y">> $config_host_mak
diff --git a/cpu-exec.c b/cpu-exec.c
index 8c9fb8b..cd1259a 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -230,7 +230,7 @@ int cpu_exec(CPUState *env1)
uint8_t *tc_ptr;
unsigned long next_tb;
- if (cpu_halted(env1) == EXCP_HALTED)
+ if (fake_machine || cpu_halted(env1) == EXCP_HALTED)
return EXCP_HALTED;
cpu_single_env = env1;
diff --git a/cpus.c b/cpus.c
index 0309189..91e708f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -128,6 +128,9 @@ static int cpu_can_run(CPUState *env)
static int cpu_has_work(CPUState *env)
{
+ if (fake_machine) {
+ return 0;
+ }
if (env->stop)
return 1;
if (env->queued_work_first)
diff --git a/hw/pc.c b/hw/pc.c
index fface7d..809f53e 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -993,18 +993,28 @@ void pc_memory_init(ram_addr_t ram_size,
linux_boot = (kernel_filename != NULL);
/* allocate RAM */
- ram_addr = qemu_ram_alloc(NULL, "pc.ram",
- below_4g_mem_size + above_4g_mem_size);
- cpu_register_physical_memory(0, 0xa0000, ram_addr);
- cpu_register_physical_memory(0x100000,
- below_4g_mem_size - 0x100000,
- ram_addr + 0x100000);
+ if (fake_machine) {
+ /* If user boots with -m 1000 We don't actually want to
+ * allocate a GB of RAM, so lets force all RAM allocs to one
+ * page to keep our memory footprint nice and low.
+ *
+ * TODO try to use -m 1k instead
+ */
+ ram_addr = qemu_ram_alloc(NULL, "pc.ram", 1);
+ } else {
+ ram_addr = qemu_ram_alloc(NULL, "pc.ram",
+ below_4g_mem_size + above_4g_mem_size);
+ cpu_register_physical_memory(0, 0xa0000, ram_addr);
+ cpu_register_physical_memory(0x100000,
+ below_4g_mem_size - 0x100000,
+ ram_addr + 0x100000);
#if TARGET_PHYS_ADDR_BITS> 32
- if (above_4g_mem_size> 0) {
- cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
- ram_addr + below_4g_mem_size);
- }
+ if (above_4g_mem_size> 0) {
+ cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
+ ram_addr + below_4g_mem_size);
+ }
#endif
+ }
/* BIOS load */
if (bios_name == NULL)
diff --git a/qemu-options.hx b/qemu-options.hx
index 898561d..8a8ef4b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2324,6 +2324,13 @@ Specify a trace file to log output traces to.
ETEXI
#endif
+#ifdef CONFIG_FAKE_MACHINE
+DEF("fake-machine", 0, QEMU_OPTION_fake_machine,
+ "-fake-machine create a fake machine incapable of running guest code\n"
+ " mimimal resource use, use for scalability testing\n",
+ QEMU_ARCH_ALL)
+#endif
+
HXCOMM This is the last statement. Insert new options before this line!
STEXI
@end table
diff --git a/targphys.h b/targphys.h
index 95648d6..f30530c 100644
--- a/targphys.h
+++ b/targphys.h
@@ -18,4 +18,11 @@ typedef uint64_t target_phys_addr_t;
#endif
#endif
+/* FIXME definitely in the wrong place here; where should it go? */
+#ifdef CONFIG_FAKE_MACHINE
+extern int fake_machine;
+#else
+#define fake_machine 0
+#endif
+
#endif
diff --git a/vl.c b/vl.c
index 0292184..bcc60b0 100644
--- a/vl.c
+++ b/vl.c
@@ -240,6 +240,10 @@ struct FWBootEntry {
QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
+#ifdef CONFIG_FAKE_MACHINE
+int fake_machine = 0;
+#endif
+
int nb_numa_nodes;
uint64_t node_mem[MAX_NODES];
uint64_t node_cpumask[MAX_NODES];
@@ -2727,6 +2731,11 @@ int main(int argc, char **argv, char **envp)
fclose(fp);
break;
}
+#ifdef CONFIG_FAKE_MACHINE
+ case QEMU_OPTION_fake_machine:
+ fake_machine = 1;
+ break;
+#endif
default:
os_parse_cmd_args(popt->index, optarg);
}
@@ -2817,6 +2826,15 @@ int main(int argc, char **argv, char **envp)
}
if (default_vga)
vga_interface_type = VGA_CIRRUS;
+ if (fake_machine) {
+ /* HACK: Ideally we'd configure VGA as usual, but this causes
+ * several MB of VGA RAM to be allocated, and we can't do the
+ * tricks we use elsewhere to just return a single 4k page,
+ * because the VGA driver immediately memsets() the entire
+ * allocation it requested.
+ */
+ vga_interface_type = VGA_NONE;
+ }
socket_init();
@@ -2835,6 +2853,9 @@ int main(int argc, char **argv, char **envp)
exit(1);
}
+ if (fake_machine) {
+ kvm_allowed = 0;
+ }
if (kvm_allowed) {
int ret = kvm_init(smp_cpus);
if (ret< 0) {