On Fri, Jul 1, 2022 at 4:53 PM Marc-André Lureau <marcandre.lur...@gmail.com> wrote:
> Hi > > On Fri, Jul 1, 2022 at 7:11 AM zhenwei pi <pizhen...@bytedance.com> wrote: > >> A vCPU thread always reaches 100% utilization when: >> - guest uses idle=poll >> - disable HLT vm-exit >> - enable MWAIT >> >> Add new guest agent command 'guest-get-cpustats' to get guest CPU >> statistics, we can know the guest workload and how busy the CPU is. >> >> Signed-off-by: zhenwei pi <pizhen...@bytedance.com> >> --- >> qga/commands-posix.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ >> qga/commands-win32.c | 6 ++++ >> qga/qapi-schema.json | 49 ++++++++++++++++++++++++++++++ >> 3 files changed, 127 insertions(+) >> >> diff --git a/qga/commands-posix.c b/qga/commands-posix.c >> index 0469dc409d..2847023876 100644 >> --- a/qga/commands-posix.c >> +++ b/qga/commands-posix.c >> @@ -2893,6 +2893,73 @@ GuestDiskStatsInfoList >> *qmp_guest_get_diskstats(Error **errp) >> return guest_get_diskstats(errp); >> } >> >> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) >> +{ >> + GuestCpuStatsList *head = NULL, **tail = &head; >> + const char *cpustats = "/proc/stat"; >> + FILE *fp; >> + size_t n; >> + char *line = NULL; >> + >> + fp = fopen(cpustats, "r"); >> + if (fp == NULL) { >> + error_setg_errno(errp, errno, "open(\"%s\")", cpustats); >> + return NULL; >> + } >> + >> + while (getline(&line, &n, fp) != -1) { >> + GuestCpuStats *cpustat = NULL; >> + int i; >> + unsigned long user, system, idle, iowait, irq, softirq, steal, >> guest; >> + unsigned long nice, guest_nice; >> + char name[64]; >> + >> + i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", >> + name, &user, &nice, &system, &idle, &iowait, &irq, >> &softirq, >> + &steal, &guest, &guest_nice); >> + >> + /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */ >> + if (strncmp(name, "cpu", 3) || (name[3] == '\0')) { >> > > For extra safety, check !name as well > > >> + continue; >> + } >> + >> > > if i < 5, I guess you should warn and continue > Why should we skip lines where i < 5? We have CPU time of user and system modes, I think this is good for reporting. > > >> + cpustat = g_new0(GuestCpuStats, 1); >> + cpustat->cpu = atoi(&name[3]); >> + cpustat->has_user = true; >> + cpustat->user = user * 10; >> > > proc(5) says that the value is given "in units of USER_HZ (1/100ths of a > second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right > value)", so we should adjust this code if we want to return correctly in ms. > > >> + cpustat->has_system = true; >> + cpustat->system = system * 10; >> + cpustat->has_idle = true; >> + cpustat->idle = idle * 10; >> + >> + /* Linux version >= 2.6 */ >> > > That's pretty old now (2003), not sure anyone would care about that > comment, but np ;) > > >> + if (i > 5) { >> + cpustat->has_iowait = true; >> + cpustat->iowait = iowait * 10; >> + cpustat->has_irq = true; >> + cpustat->irq = irq * 10; >> + cpustat->has_softirq = true; >> + cpustat->softirq = softirq * 10; >> + } >> + >> + if (i > 8) { >> + cpustat->has_steal = true; >> + cpustat->steal = steal * 10; >> + } >> + >> + if (i > 9) { >> + cpustat->has_guest = true; >> + cpustat->guest = guest * 10; >> + } >> + >> + QAPI_LIST_APPEND(tail, cpustat); >> + } >> + >> + free(line); >> + fclose(fp); >> + return head; >> +} >> + >> #else /* defined(__linux__) */ >> >> void qmp_guest_suspend_disk(Error **errp) >> @@ -3247,6 +3314,11 @@ GuestDiskStatsInfoList >> *qmp_guest_get_diskstats(Error **errp) >> return NULL; >> } >> >> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) >> +{ >> + error_setg(errp, QERR_UNSUPPORTED); >> + return NULL; >> +} >> >> #endif /* CONFIG_FSFREEZE */ >> >> diff --git a/qga/commands-win32.c b/qga/commands-win32.c >> index 36f94c0f9c..7ed7664715 100644 >> --- a/qga/commands-win32.c >> +++ b/qga/commands-win32.c >> @@ -2543,3 +2543,9 @@ GuestDiskStatsInfoList >> *qmp_guest_get_diskstats(Error **errp) >> error_setg(errp, QERR_UNSUPPORTED); >> return NULL; >> } >> + >> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) >> +{ >> + error_setg(errp, QERR_UNSUPPORTED); >> + return NULL; >> +} >> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json >> index 9fa20e791b..4859c887b2 100644 >> --- a/qga/qapi-schema.json >> +++ b/qga/qapi-schema.json >> @@ -1576,3 +1576,52 @@ >> { 'command': 'guest-get-diskstats', >> 'returns': ['GuestDiskStatsInfo'] >> } >> + >> +## >> +# @GuestCpuStats: >> +# >> +# Get statistics of each CPU in millisecond. >> +# >> +# @cpu: CPU index in guest OS >> +# >> +# @user: CPU time of user mode >> > > "Time spent in user mode." is more understandable (from man proc(5)) > > Same for the other descriptions. > > +# >> +# @system: CPU time of system mode >> +# >> +# @idle: CPU time of idle state >> +# >> +# @iowait: CPU time waiting IO >> +# >> +# @irq: CPU time of hardware interrupt >> +# >> +# @softirq: CPU time of soft interrupt >> +# >> +# @steal: CPU time stolen by host >> +# >> +# @guest: CPU time of running guest mode >> > > Why not "guest_nice" ? > > Do we expect this struct to be equally meaningful for other OSes? > Otherwise, I would suggest to make a "linux" variant, perhaps. > The /proc/stat is very Linux-specific, so I think if we implement something similar for Windows, we have another structure. > > +# >> +# Since: 7.1 >> +## >> +{ 'struct': 'GuestCpuStats', >> + 'data': {'cpu': 'int', >> + '*user': 'uint64', >> + '*system': 'uint64', >> + '*idle': 'uint64', >> + '*iowait': 'uint64', >> + '*irq': 'uint64', >> + '*softirq': 'uint64', >> + '*steal': 'uint64', >> + '*guest': 'uint64' >> + } } >> + >> +## >> +# @guest-get-cpustats: >> +# >> +# Retrieve information about CPU stats. >> +# Returns: List of CPU stats of guest. >> +# >> +# Since: 7.1 >> +## >> +{ 'command': 'guest-get-cpustats', >> + 'returns': ['GuestCpuStats'] >> +} >> -- >> 2.20.1 >> >> >> > > -- > Marc-André Lureau >