Pierrick Bouvier <pierrick.bouv...@linaro.org> writes:

> On 5/28/24 12:14, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouv...@linaro.org> writes:
>> 
>>> This plugin uses the new time control interface to make decisions
>>> about the state of time during the emulation. The algorithm is
>>> currently very simple. The user specifies an ips rate which applies
>>> per core. If the core runs ahead of its allocated execution time the
>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>> updated for the emulation as a function of total executed instructions
>>> with some adjustments for cores that idle.
>>>
>>> Examples
>>> --------
>>>
>>> Slow down execution of /bin/true:
>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d 
>>> plugin /bin/true |& grep total | sed -e 's/.*: //')
>>> $ time ./build/qemu-x86_64 -plugin 
>>> ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>> real 4.000s
>>>
>>> Boot a Linux kernel simulating a 250MHz cpu:
>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append 
>>> "console=ttyS0" -plugin 
>>> ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>> check time until kernel panic on serial0
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouv...@linaro.org>
>>> ---
>>>   contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
>>>   contrib/plugins/Makefile |   1 +
>>>   2 files changed, 240 insertions(+)
>>>   create mode 100644 contrib/plugins/ips.c
>>>
>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>> new file mode 100644
>>> index 00000000000..cf3159df391
>>> --- /dev/null
>>> +++ b/contrib/plugins/ips.c
>>> @@ -0,0 +1,239 @@
>>> +/*
>>> + * ips rate limiting plugin.
>>> + *
>>> + * This plugin can be used to restrict the execution of a system to a
>>> + * particular number of Instructions Per Second (ips). This controls
>>> + * time as seen by the guest so while wall-clock time may be longer
>>> + * from the guests point of view time will pass at the normal rate.
>>> + *
>>> + * This uses the new plugin API which allows the plugin to control
>>> + * system time.
>>> + *
>>> + * Copyright (c) 2023 Linaro Ltd
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include <stdio.h>
>>> +#include <glib.h>
>>> +#include <qemu-plugin.h>
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>> +
>>> +/* how many times do we update time per sec */
>>> +#define NUM_TIME_UPDATE_PER_SEC 10
>>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>>> +
>>> +static GMutex global_state_lock;
>>> +
>>> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second 
>>> */
>>> +static uint64_t insn_quantum; /* trap every N instructions */
>>> +static bool precise_execution; /* count every instruction */
>>> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started 
>>> */
>>> +static int64_t virtual_time_ns; /* last set virtual time */
>>> +
>>> +static const void *time_handle;
>>> +
>>> +typedef enum {
>>> +    UNKNOWN = 0,
>>> +    EXECUTING,
>>> +    IDLE,
>>> +    FINISHED
>>> +} vCPUState;
>>> +
>>> +typedef struct {
>>> +    uint64_t counter;
>>> +    uint64_t track_insn;
>>> +    vCPUState state;
>>> +    /* timestamp when vCPU entered state */
>>> +    int64_t last_state_time;
>>> +} vCPUTime;
>>> +
>>> +struct qemu_plugin_scoreboard *vcpus;
>>> +
>>> +/* return epoch time in ns */
>>> +static int64_t now_ns(void)
>>> +{
>>> +    return g_get_real_time() * 1000;
>>> +}
>>> +
>>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>>> +{
>>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>>> +    return num_secs * (double) insn_per_second;
>>> +}
>>> +
>>> +static int64_t time_for_insn(uint64_t num_insn)
>>> +{
>>> +    double num_secs = (double) num_insn / (double) insn_per_second;
>>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>>> +}
>>> +
>>> +static int64_t uptime_ns(void)
>>> +{
>>> +    int64_t now = now_ns();
>>> +    g_assert(now >= start_time_ns);
>>> +    return now - start_time_ns;
>>> +}
>>> +
>>> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
>>> +{
>>> +    vcpu->last_state_time = now_ns();
>>> +    vcpu->state = new_state;
>>> +}
>>> +
>>> +static void update_system_time(vCPUTime *vcpu)
>>> +{
>>> +    /* flush remaining instructions */
>>> +    vcpu->counter += vcpu->track_insn;
>>> +    vcpu->track_insn = 0;
>>> +
>>> +    int64_t uptime = uptime_ns();
>>> +    uint64_t expected_insn = num_insn_during(uptime);
>>> +
>>> +    if (vcpu->counter >= expected_insn) {
>>> +        /* this vcpu ran faster than expected, so it has to sleep */
>>> +        uint64_t insn_advance = vcpu->counter - expected_insn;
>>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>>> +        int64_t sleep_us = time_advance_ns / 1000;
>>> +        g_usleep(sleep_us);
>>> +    }
>>> +
>>> +    /* based on number of instructions, what should be the new time? */
>>> +    int64_t new_virtual_time = time_for_insn(vcpu->counter);
>>> +
>>> +    g_mutex_lock(&global_state_lock);
>>> +
>>> +    /* Time only moves forward. Another vcpu might have updated it 
>>> already. */
>>> +    if (new_virtual_time > virtual_time_ns) {
>>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>>> +        virtual_time_ns = new_virtual_time;
>>> +    }
>>> +
>>> +    g_mutex_unlock(&global_state_lock);
>>> +}
>>> +
>>> +static void set_start_time()
>>> +{
>>> +    g_mutex_lock(&global_state_lock);
>>> +    if (!start_time_ns) {
>>> +        start_time_ns = now_ns();
>>> +    }
>>> +    g_mutex_unlock(&global_state_lock);
>>> +}
>>> +
>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    /* ensure start time is set first */
>>> +    set_start_time();
>>> +    /* start counter from absolute time reference */
>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>> +    vcpu_set_state(vcpu, EXECUTING);
>>> +}
>>> +
>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    vcpu_set_state(vcpu, IDLE);
>>> +}
>>> +
>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    g_assert(vcpu->state == IDLE);
>> I'm triggering a weird race here:
>>    (gdb) b vcpu_init
>>    Breakpoint 1 at 0x7ffff7fa15f7: file 
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>    (gdb) r
>>    The program being debugged has been started already.
>>    Start it from the beginning? (y or n) y
>>    Starting program:
>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>> -cpu cortex-a57 -smp 32 -accel tcg -device
>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>> -blockdev
>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>> -serial mon:stdio -blockdev
>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>> -blockdev
>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>> root=/dev/sda2\ console=ttyAMA0 -plugin
>> contrib/plugins/libips.so,ips=1000000000
>>    [Thread debugging using libthread_db enabled]
>>    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>    [New Thread 0x7fffe72006c0 (LWP 360538)]
>>    [New Thread 0x7fffe68006c0 (LWP 360540)]
>>    [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>    [New Thread 0x7fffe54006c0 (LWP 360542)]
>>    [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>    [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>    [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>    [New Thread 0x7fffdea006c0 (LWP 360546)]
>>    [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>    Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=0) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb) c
>>    Continuing.
>>    [New Thread 0x7fffde0006c0 (LWP 360548)]
>>    [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>    Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=1) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>    [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>    Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=2) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>    [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>    Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=3) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>    [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>    Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=4) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb) n
>>    129         set_start_time();
>>    (gdb)
>>    131         vcpu->counter = num_insn_during(uptime_ns());
>>    (gdb)
>>    132         vcpu_set_state(vcpu, EXECUTING);
>>    (gdb)
>>    133     }
>>    (gdb) p vcpu->state
>>    $1 = EXECUTING
>>    (gdb) p &vcpu->state
>>    $2 = (vCPUState *) 0x555557c6b5d0
>>    (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>    Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>    (gdb) c
>>    Continuing.
>>    [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>    [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>    Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>> *(vCPUState *) 0x555557c6b5d0
>>    Old value = EXECUTING
>>    New value = IDLE
>>    vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at 
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>    83      }
>>    (gdb) c
>>    Continuing.
>>    [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>    [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>    [New Thread 0x7fffd34006c0 (LWP 360593)]
>>    [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>    Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=6) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, 
>> cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>    **
>>    ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: 
>> assertion failed: (vcpu->state == IDLE)
>>    Bail out! 
>> ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: 
>> assertion failed: (vcpu->state == IDLE)
>>    Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>    [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>    __pthread_kill_implementation (threadid=<optimized out>, 
>> signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>    44      ./nptl/pthread_kill.c: No such file or directory.
>>    (gdb) bt
>>    #0  __pthread_kill_implementation (threadid=<optimized out>, 
>> signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>    #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, 
>> threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>    #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at 
>> ../sysdeps/posix/raise.c:26
>>    #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>    #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>    #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at 
>> /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>    #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, 
>> cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>    #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, 
>> ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>    #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) 
>> at ../../plugins/core.c:523
>>    #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at 
>> ../../system/cpus.c:465
>>    #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at 
>> ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>    #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at 
>> ../../util/qemu-thread-posix.c:541
>>    #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at 
>> ./nptl/pthread_create.c:442
>>    #13 0x00007ffff4d287dc in clone3 () at 
>> ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>    (gdb)
>> But I don't understand how we can ever hit the idle callback without
>> first hitting the init callback.
>> 
>
> More exactly, the assert we hit means that the idle callback, was not
> called before the resume callback.
> Any chance you can check what is the current vcpu->state value? I
> wonder if it's not an exited cpu, that gets resumed after.

No when I looked at it it was set as UNKNOWN. Unfortunately it doesn't
trigger if I stick breakpoints in or run under rr.

>
> Thanks,
> Pierrick
>
>>> +    int64_t idle_time = now_ns() - vcpu->last_state_time;
>>> +    /* accumulate expected number of instructions */
>>> +    vcpu->counter += num_insn_during(idle_time);
>>> +    vcpu_set_state(vcpu, EXECUTING);
>>> +}
>>> +
>>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    vcpu_set_state(vcpu, FINISHED);
>>> +    update_system_time(vcpu);
>>> +    vcpu->counter = 0;
>>> +}
>>> +
>>> +static void every_insn_quantum(unsigned int cpu_index, void *udata)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    g_assert(vcpu->track_insn >= insn_quantum);
>>> +    update_system_time(vcpu);
>>> +}
>>> +
>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>> +{
>>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>>> +    qemu_plugin_u64 track_insn =
>>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
>>> +    if (precise_execution) {
>>> +        /* count (and eventually trap) on every instruction */
>>> +        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
>>> +            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, 
>>> idx);
>>> +            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
>>> +                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
>>> +            qemu_plugin_register_vcpu_insn_exec_cond_cb(
>>> +                insn, every_insn_quantum,
>>> +                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>> +                track_insn, insn_quantum, NULL);
>>> +        }
>>> +    } else {
>>> +        /* count (and eventually trap) once per tb */
>>> +        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>>> +            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
>>> +        qemu_plugin_register_vcpu_tb_exec_cond_cb(
>>> +            tb, every_insn_quantum,
>>> +            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>> +            track_insn, insn_quantum, NULL);
>>> +    }
>>> +}
>>> +
>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>> +{
>>> +    qemu_plugin_scoreboard_free(vcpus);
>>> +}
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>> +                                           const qemu_info_t *info, int 
>>> argc,
>>> +                                           char **argv)
>>> +{
>>> +    for (int i = 0; i < argc; i++) {
>>> +        char *opt = argv[i];
>>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>>> +            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>>> +            if (!insn_per_second && errno) {
>>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>>> +                        __func__, tokens[1], g_strerror(errno));
>>> +                return -1;
>>> +            }
>>> +
>>> +        } else if (g_strcmp0(tokens[0], "precise") == 0) {
>>> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
>>> +                                        &precise_execution)) {
>>> +                fprintf(stderr, "boolean argument parsing failed: %s\n", 
>>> opt);
>>> +                return -1;
>>> +            }
>>> +        } else {
>>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>>> +            return -1;
>>> +        }
>>> +    }
>>> +
>>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>>> +    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>>> +
>>> +    time_handle = qemu_plugin_request_time_control();
>>> +    g_assert(time_handle);
>>> +
>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>>> +    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
>>> +    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
>>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>> index 0b64d2c1e3a..449ead11305 100644
>>> --- a/contrib/plugins/Makefile
>>> +++ b/contrib/plugins/Makefile
>>> @@ -27,6 +27,7 @@ endif
>>>   NAMES += hwprofile
>>>   NAMES += cache
>>>   NAMES += drcov
>>> +NAMES += ips
>>>     ifeq ($(CONFIG_WIN32),y)
>>>   SO_SUFFIX := .dll
>> 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Reply via email to