Pierrick Bouvier <pierrick.bouv...@linaro.org> writes: > On 5/28/24 12:14, Alex Bennée wrote: >> Pierrick Bouvier <pierrick.bouv...@linaro.org> writes: >> >>> This plugin uses the new time control interface to make decisions >>> about the state of time during the emulation. The algorithm is >>> currently very simple. The user specifies an ips rate which applies >>> per core. If the core runs ahead of its allocated execution time the >>> plugin sleeps for a bit to let real time catch up. Either way time is >>> updated for the emulation as a function of total executed instructions >>> with some adjustments for cores that idle. >>> >>> Examples >>> -------- >>> >>> Slow down execution of /bin/true: >>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d >>> plugin /bin/true |& grep total | sed -e 's/.*: //') >>> $ time ./build/qemu-x86_64 -plugin >>> ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true >>> real 4.000s >>> >>> Boot a Linux kernel simulating a 250MHz cpu: >>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append >>> "console=ttyS0" -plugin >>> ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512 >>> check time until kernel panic on serial0 >>> >>> Signed-off-by: Pierrick Bouvier <pierrick.bouv...@linaro.org> >>> --- >>> contrib/plugins/ips.c | 239 +++++++++++++++++++++++++++++++++++++++ >>> contrib/plugins/Makefile | 1 + >>> 2 files changed, 240 insertions(+) >>> create mode 100644 contrib/plugins/ips.c >>> >>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c >>> new file mode 100644 >>> index 00000000000..cf3159df391 >>> --- /dev/null >>> +++ b/contrib/plugins/ips.c >>> @@ -0,0 +1,239 @@ >>> +/* >>> + * ips rate limiting plugin. >>> + * >>> + * This plugin can be used to restrict the execution of a system to a >>> + * particular number of Instructions Per Second (ips). This controls >>> + * time as seen by the guest so while wall-clock time may be longer >>> + * from the guests point of view time will pass at the normal rate. >>> + * >>> + * This uses the new plugin API which allows the plugin to control >>> + * system time. >>> + * >>> + * Copyright (c) 2023 Linaro Ltd >>> + * >>> + * SPDX-License-Identifier: GPL-2.0-or-later >>> + */ >>> + >>> +#include <stdio.h> >>> +#include <glib.h> >>> +#include <qemu-plugin.h> >>> + >>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; >>> + >>> +/* how many times do we update time per sec */ >>> +#define NUM_TIME_UPDATE_PER_SEC 10 >>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000) >>> + >>> +static GMutex global_state_lock; >>> + >>> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second >>> */ >>> +static uint64_t insn_quantum; /* trap every N instructions */ >>> +static bool precise_execution; /* count every instruction */ >>> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started >>> */ >>> +static int64_t virtual_time_ns; /* last set virtual time */ >>> + >>> +static const void *time_handle; >>> + >>> +typedef enum { >>> + UNKNOWN = 0, >>> + EXECUTING, >>> + IDLE, >>> + FINISHED >>> +} vCPUState; >>> + >>> +typedef struct { >>> + uint64_t counter; >>> + uint64_t track_insn; >>> + vCPUState state; >>> + /* timestamp when vCPU entered state */ >>> + int64_t last_state_time; >>> +} vCPUTime; >>> + >>> +struct qemu_plugin_scoreboard *vcpus; >>> + >>> +/* return epoch time in ns */ >>> +static int64_t now_ns(void) >>> +{ >>> + return g_get_real_time() * 1000; >>> +} >>> + >>> +static uint64_t num_insn_during(int64_t elapsed_ns) >>> +{ >>> + double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC; >>> + return num_secs * (double) insn_per_second; >>> +} >>> + >>> +static int64_t time_for_insn(uint64_t num_insn) >>> +{ >>> + double num_secs = (double) num_insn / (double) insn_per_second; >>> + return num_secs * (double) NSEC_IN_ONE_SEC; >>> +} >>> + >>> +static int64_t uptime_ns(void) >>> +{ >>> + int64_t now = now_ns(); >>> + g_assert(now >= start_time_ns); >>> + return now - start_time_ns; >>> +} >>> + >>> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state) >>> +{ >>> + vcpu->last_state_time = now_ns(); >>> + vcpu->state = new_state; >>> +} >>> + >>> +static void update_system_time(vCPUTime *vcpu) >>> +{ >>> + /* flush remaining instructions */ >>> + vcpu->counter += vcpu->track_insn; >>> + vcpu->track_insn = 0; >>> + >>> + int64_t uptime = uptime_ns(); >>> + uint64_t expected_insn = num_insn_during(uptime); >>> + >>> + if (vcpu->counter >= expected_insn) { >>> + /* this vcpu ran faster than expected, so it has to sleep */ >>> + uint64_t insn_advance = vcpu->counter - expected_insn; >>> + uint64_t time_advance_ns = time_for_insn(insn_advance); >>> + int64_t sleep_us = time_advance_ns / 1000; >>> + g_usleep(sleep_us); >>> + } >>> + >>> + /* based on number of instructions, what should be the new time? */ >>> + int64_t new_virtual_time = time_for_insn(vcpu->counter); >>> + >>> + g_mutex_lock(&global_state_lock); >>> + >>> + /* Time only moves forward. Another vcpu might have updated it >>> already. */ >>> + if (new_virtual_time > virtual_time_ns) { >>> + qemu_plugin_update_ns(time_handle, new_virtual_time); >>> + virtual_time_ns = new_virtual_time; >>> + } >>> + >>> + g_mutex_unlock(&global_state_lock); >>> +} >>> + >>> +static void set_start_time() >>> +{ >>> + g_mutex_lock(&global_state_lock); >>> + if (!start_time_ns) { >>> + start_time_ns = now_ns(); >>> + } >>> + g_mutex_unlock(&global_state_lock); >>> +} >>> + >>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index) >>> +{ >>> + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); >>> + /* ensure start time is set first */ >>> + set_start_time(); >>> + /* start counter from absolute time reference */ >>> + vcpu->counter = num_insn_during(uptime_ns()); >>> + vcpu_set_state(vcpu, EXECUTING); >>> +} >>> + >>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index) >>> +{ >>> + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); >>> + vcpu_set_state(vcpu, IDLE); >>> +} >>> + >>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index) >>> +{ >>> + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); >>> + g_assert(vcpu->state == IDLE); >> I'm triggering a weird race here: >> (gdb) b vcpu_init >> Breakpoint 1 at 0x7ffff7fa15f7: file >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127. >> (gdb) r >> The program being debugged has been started already. >> Start it from the beginning? (y or n) y >> Starting program: >> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64 >> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars >> -cpu cortex-a57 -smp 32 -accel tcg -device >> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device >> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22 >> -blockdev >> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap >> -serial mon:stdio -blockdev >> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true >> -blockdev >> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars >> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel >> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append >> root=/dev/sda2\ console=ttyAMA0 -plugin >> contrib/plugins/libips.so,ips=1000000000 >> [Thread debugging using libthread_db enabled] >> Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". >> [New Thread 0x7fffe72006c0 (LWP 360538)] >> [New Thread 0x7fffe68006c0 (LWP 360540)] >> [New Thread 0x7fffe5e006c0 (LWP 360541)] >> [New Thread 0x7fffe54006c0 (LWP 360542)] >> [New Thread 0x7fffe4a006c0 (LWP 360543)] >> [New Thread 0x7fffdfe006c0 (LWP 360544)] >> [New Thread 0x7fffdf4006c0 (LWP 360545)] >> [New Thread 0x7fffdea006c0 (LWP 360546)] >> [Switching to Thread 0x7fffdf4006c0 (LWP 360545)] >> Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=0) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) c >> Continuing. >> [New Thread 0x7fffde0006c0 (LWP 360548)] >> [Switching to Thread 0x7fffdea006c0 (LWP 360546)] >> Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=1) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) >> Continuing. >> [New Thread 0x7fffdd6006c0 (LWP 360549)] >> [Switching to Thread 0x7fffde0006c0 (LWP 360548)] >> Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=2) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) >> Continuing. >> [New Thread 0x7fffdcc006c0 (LWP 360550)] >> [Switching to Thread 0x7fffdd6006c0 (LWP 360549)] >> Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=3) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) >> Continuing. >> [New Thread 0x7fffd3e006c0 (LWP 360551)] >> [Switching to Thread 0x7fffdcc006c0 (LWP 360550)] >> Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=4) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) n >> 129 set_start_time(); >> (gdb) >> 131 vcpu->counter = num_insn_during(uptime_ns()); >> (gdb) >> 132 vcpu_set_state(vcpu, EXECUTING); >> (gdb) >> 133 } >> (gdb) p vcpu->state >> $1 = EXECUTING >> (gdb) p &vcpu->state >> $2 = (vCPUState *) 0x555557c6b5d0 >> (gdb) watch *(vCPUState *) 0x555557c6b5d0 >> Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0 >> (gdb) c >> Continuing. >> [Thread 0x7fffdfe006c0 (LWP 360544) exited] >> [Thread 0x7fffe5e006c0 (LWP 360541) exited] >> Thread 12 "qemu-system-aar" hit Hardware watchpoint 2: >> *(vCPUState *) 0x555557c6b5d0 >> Old value = EXECUTING >> New value = IDLE >> vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83 >> 83 } >> (gdb) c >> Continuing. >> [New Thread 0x7fffdfe006c0 (LWP 360591)] >> [New Thread 0x7fffe5e006c0 (LWP 360592)] >> [New Thread 0x7fffd34006c0 (LWP 360593)] >> [Switching to Thread 0x7fffdfe006c0 (LWP 360591)] >> Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init >> (id=10457908569352202058, cpu_index=6) at >> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127 >> 127 vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, >> cpu_index); >> (gdb) >> Continuing. >> [New Thread 0x7fffd2a006c0 (LWP 360594)] >> ** >> ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: >> assertion failed: (vcpu->state == IDLE) >> Bail out! >> ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: >> assertion failed: (vcpu->state == IDLE) >> Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted. >> [Switching to Thread 0x7fffd3e006c0 (LWP 360551)] >> __pthread_kill_implementation (threadid=<optimized out>, >> signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44 >> 44 ./nptl/pthread_kill.c: No such file or directory. >> (gdb) bt >> #0 __pthread_kill_implementation (threadid=<optimized out>, >> signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44 >> #1 0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, >> threadid=<optimized out>) at ./nptl/pthread_kill.c:78 >> #2 0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at >> ../sysdeps/posix/raise.c:26 >> #3 0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79 >> #4 0x00007ffff6e46ec8 in () at /lib/x86_64-linux-gnu/libglib-2.0.so.0 >> #5 0x00007ffff6ea6e1a in g_assertion_message_expr () at >> /lib/x86_64-linux-gnu/libglib-2.0.so.0 >> #6 0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, >> cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144 >> #7 0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, >> ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111 >> #8 0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) >> at ../../plugins/core.c:523 >> #9 0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at >> ../../system/cpus.c:465 >> #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at >> ../../accel/tcg/tcg-accel-ops-mttcg.c:118 >> #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at >> ../../util/qemu-thread-posix.c:541 >> #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at >> ./nptl/pthread_create.c:442 >> #13 0x00007ffff4d287dc in clone3 () at >> ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 >> (gdb) >> But I don't understand how we can ever hit the idle callback without >> first hitting the init callback. >> > > More exactly, the assert we hit means that the idle callback, was not > called before the resume callback. > Any chance you can check what is the current vcpu->state value? I > wonder if it's not an exited cpu, that gets resumed after.
No when I looked at it it was set as UNKNOWN. Unfortunately it doesn't trigger if I stick breakpoints in or run under rr. > > Thanks, > Pierrick > >>> + int64_t idle_time = now_ns() - vcpu->last_state_time; >>> + /* accumulate expected number of instructions */ >>> + vcpu->counter += num_insn_during(idle_time); >>> + vcpu_set_state(vcpu, EXECUTING); >>> +} >>> + >>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index) >>> +{ >>> + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); >>> + vcpu_set_state(vcpu, FINISHED); >>> + update_system_time(vcpu); >>> + vcpu->counter = 0; >>> +} >>> + >>> +static void every_insn_quantum(unsigned int cpu_index, void *udata) >>> +{ >>> + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); >>> + g_assert(vcpu->track_insn >= insn_quantum); >>> + update_system_time(vcpu); >>> +} >>> + >>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) >>> +{ >>> + size_t n_insns = qemu_plugin_tb_n_insns(tb); >>> + qemu_plugin_u64 track_insn = >>> + qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn); >>> + if (precise_execution) { >>> + /* count (and eventually trap) on every instruction */ >>> + for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) { >>> + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, >>> idx); >>> + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu( >>> + insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1); >>> + qemu_plugin_register_vcpu_insn_exec_cond_cb( >>> + insn, every_insn_quantum, >>> + QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE, >>> + track_insn, insn_quantum, NULL); >>> + } >>> + } else { >>> + /* count (and eventually trap) once per tb */ >>> + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( >>> + tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns); >>> + qemu_plugin_register_vcpu_tb_exec_cond_cb( >>> + tb, every_insn_quantum, >>> + QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE, >>> + track_insn, insn_quantum, NULL); >>> + } >>> +} >>> + >>> +static void plugin_exit(qemu_plugin_id_t id, void *udata) >>> +{ >>> + qemu_plugin_scoreboard_free(vcpus); >>> +} >>> + >>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, >>> + const qemu_info_t *info, int >>> argc, >>> + char **argv) >>> +{ >>> + for (int i = 0; i < argc; i++) { >>> + char *opt = argv[i]; >>> + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); >>> + if (g_strcmp0(tokens[0], "ips") == 0) { >>> + insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10); >>> + if (!insn_per_second && errno) { >>> + fprintf(stderr, "%s: couldn't parse %s (%s)\n", >>> + __func__, tokens[1], g_strerror(errno)); >>> + return -1; >>> + } >>> + >>> + } else if (g_strcmp0(tokens[0], "precise") == 0) { >>> + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], >>> + &precise_execution)) { >>> + fprintf(stderr, "boolean argument parsing failed: %s\n", >>> opt); >>> + return -1; >>> + } >>> + } else { >>> + fprintf(stderr, "option parsing failed: %s\n", opt); >>> + return -1; >>> + } >>> + } >>> + >>> + vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime)); >>> + insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC; >>> + >>> + time_handle = qemu_plugin_request_time_control(); >>> + g_assert(time_handle); >>> + >>> + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); >>> + qemu_plugin_register_vcpu_init_cb(id, vcpu_init); >>> + qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle); >>> + qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume); >>> + qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit); >>> + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); >>> + >>> + return 0; >>> +} >>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile >>> index 0b64d2c1e3a..449ead11305 100644 >>> --- a/contrib/plugins/Makefile >>> +++ b/contrib/plugins/Makefile >>> @@ -27,6 +27,7 @@ endif >>> NAMES += hwprofile >>> NAMES += cache >>> NAMES += drcov >>> +NAMES += ips >>> ifeq ($(CONFIG_WIN32),y) >>> SO_SUFFIX := .dll >> -- Alex Bennée Virtualisation Tech Lead @ Linaro