On 2021/7/2 15:45, Peter Krempa wrote:
> On Fri, Jul 02, 2021 at 12:53:29 +0800, Yang Fei wrote:
>> This patch add the ability to statistic the halt polling time when
>> VM execute HLT(arm is WFI).
>>
>> In actual services, the execution of the HLT instruction by the
>> guest is an important cause of virtualization overhead. The halt
>> polling feature is introduced to solve this problem. When a guest
>> idle VM exit occurs, the host continues polling for a period of
>> time to reduce the guest service delay. This mechanism may cause
>> the CPU usage to be 100% when the physical CPU is idle. If the
>> guest service model is woken up at an interval to process a small
>> amount of traffic, and the interval is shorter than kvm halt_poll_ns.
>> The host polls the block time of the entire VM and the CPU usage
>> increases to 100%.
>>
>> The kernel provides the capability of collecting statistics on the
>> halt polling time after v5.8, Introduced by commit
>> <cb953129bfe5c0f2da835a0469930873fb7e71df>. It is rendered in debugfs.
>> Therefore, we can use this kernel feature to provide the halt poll
>> time to the user to obtain a more accurate CPU usage as required.
>
> Note that I'm reviewing just the code side, not the justification or
> usefullnes of the data reported.
>
>>
>> Signed-off-by: Yang Fei <yangfe...@huawei.com>
>> ---
>> src/libvirt_private.syms | 2 +
>> src/qemu/qemu_driver.c | 29 +++++++++++++
>> src/util/virutil.c | 89 ++++++++++++++++++++++++++++++++++++++++
>> src/util/virutil.h | 9 ++++
>> 4 files changed, 129 insertions(+)
>>
>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>> index 68e4b6aab8..f92213b8c2 100644
>> --- a/src/libvirt_private.syms
>> +++ b/src/libvirt_private.syms
>> @@ -3479,6 +3479,8 @@ virDoesUserExist;
>> virDoubleToStr;
>> virFormatIntDecimal;
>> virFormatIntPretty;
>> +virGetCpuHaltPollTime;
>> +virGetDebugFsKvmValue;
>> virGetDeviceID;
>> virGetDeviceUnprivSGIO;
>> virGetGroupID;
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 235f575901..3a2b530ecf 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -17812,6 +17812,32 @@ qemuDomainGetStatsCpuCache(virQEMUDriver *driver,
>> return ret;
>> }
>>
>> +#ifdef __linux__
>> +static int
>> +qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom,
>> + virTypedParamList *params)
>> +{
>> + unsigned long long haltPollSuccess = 0;
>> + unsigned long long haltPollFail = 0;
>> + pid_t pid = dom->pid;
>> +
>> + if (virGetCpuHaltPollTime(pid, &haltPollSuccess, &haltPollFail) != 0)
>> + return -1;
>> + if (virTypedParamListAddULLong(params, haltPollSuccess,
>> "haltpollsuccess.time") < 0)
>> + return -1;
>> + if (virTypedParamListAddULLong(params, haltPollFail,
>> "haltpollfail.time") < 0)
>> + return -1;
>> +
>> + return 0;
>> +}
>> +#else
>> +static int
>> +qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom,
>> + virTypedParamList *params)
>> +{
>> + return -1;
>
> This breaks domstats on non-linux ...
>
>> +}
>> +#endif
>>
>> static int
>> qemuDomainGetStatsCpuCgroup(virDomainObj *dom,
>> @@ -17852,6 +17878,9 @@ qemuDomainGetStatsCpu(virQEMUDriver *driver,
>> if (qemuDomainGetStatsCpuCache(driver, dom, params) < 0)
>> return -1;
>>
>> + if (qemuDomainGetStatsCpuHaltPollTime(dom, params) < 0)
>> + return -1;
>
> since you'd return -1 here.
>
> Additionally the qemuDomainGetStatsCpuHaltPollTime function IMO should
> not be conditionally compiled on linux. The helpers below should and the
> qemu code should just conditionally fill in the data if it's available.
>
My consideration is not to compile this feature on non-Linux platforms,
since the function of get halt polling time is based on the capabilities of
Linux kernel.
How about return 0 in non-linux platform?
>> +
>> return 0;
>> }
>>
>> diff --git a/src/util/virutil.c b/src/util/virutil.c
>> index 311cbbf93a..8715deaca5 100644
>> --- a/src/util/virutil.c
>> +++ b/src/util/virutil.c
>> @@ -1936,3 +1936,92 @@ virPipeNonBlock(int fds[2])
>> {
>> return virPipeImpl(fds, true, true);
>> }
>> +
>> +int
>> +virGetDebugFsKvmValue(struct dirent *ent,
>> + const char *path,
>> + const char *filename,
>> + unsigned long long *value)
>
> This helper should be added in a separate patch, to separate it from the
> other changes, especially the qemu driver change.
>
I will remove it to a separate patch in next version.
>> +{
>> + g_autofree char *valToStr = NULL;
>> + g_autofree char *valPath = NULL;
>> + int rc = -1;
>> + int ret = -1;
>> +
>> + valPath = g_strdup_printf("%s/%s/%s", path, ent->d_name, filename);
>> +
>> + if ((rc = virFileReadAll(valPath, 16, &valToStr)) < 0) {
>
> Why just 16 bytes?
>
Cause the data type of halt_poll_success_ns and halt_poll_fail_ns is u64,
it only requires 8 bytes of memory.
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Unable to read from '%s'"), valPath);
>> + goto cleanup;
>> + }
>> +
>> + /* Terminated with '\n' has sometimes harmful effects to the caller */
>
> To the caller? I don't quite understand what you mean here.
>
The code under this comment is copied from virCgroupGetValueRaw
which can only be used to obtain Cgroup data.
I plan extract this function to virutil.c in an independent patch. So that
we can call this function directly.
>> + if (rc > 0 && (valToStr)[rc - 1] == '\n')
>> + (valToStr)[rc - 1] = '\0';
>> +
>> + /* 10 is a Cardinality, must be between 2 and 36 inclusive,
>> + * or special value 0. Used in fuction strtoull()
>> + */
>
> What's the point of this comment?
>
Since 10 here is a magic number, I've given an comment to explain it.
>> + if (virStrToLong_ull(valToStr, NULL, 10, value) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Unable to parse '%s' as an integer"), valToStr);
>> + goto cleanup;
>> + }
>> +
>> + ret = 0;
>> +cleanup:
>
> No need for cleanup label since it's not cleaning up anything.
>
OK, I will remove it in the next version.
>> + return ret;
>> +}
>> +
>> +int
>> +virGetCpuHaltPollTime(pid_t pid,
>> + unsigned long long *haltPollSuccess,
>> + unsigned long long *haltPollFail)
>> +{
>> + g_autofree char *pidToStr = NULL;
>> + g_autofree char *debugFsPath = NULL;
>> + g_autofree char *completePath = NULL;
>> + struct dirent *ent = NULL;
>> + DIR *dir = NULL;
>> + int ret = -1;
>> + int flag = 0;
>> +
>> + if (!(debugFsPath = virFileFindMountPoint("debugfs"))) {
>> + virReportSystemError(errno, "%s",
>> + _("unable to find debugfs mountpoint"));
>> + goto cleanup;
>> + }
>> +
>> + completePath = g_strdup_printf("%s/%s", debugFsPath, "kvm");
>> + if (virDirOpen(&dir, completePath) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + "%s %s", "Can not open directory", completePath);
>> + return ret;
>> + }
>> +
>> + pidToStr = g_strdup_printf("%d", pid);
>
> %d for pid_t is invalid.
>
Should I use %lld?
>> + while (virDirRead(dir, &ent, NULL) > 0) {
>> + if (strncmp(ent->d_name, pidToStr, strlen(pidToStr)) == 0 &&
>
> We mandate use of our wrappers, in this case STRPREFIX
>
>> + ent->d_name[strlen(pidToStr)] == '-') {
>
> Formatting the pidToStr including the '-' will simplify the code since
> you can match the full prefix.
>
OK, I'll use the STRPREFIX in next version.
>> + flag = 1;
>> + break;
>> + }
>> + }
>> +
>> + if (flag == 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Could not find VM(Pid %s) in '%s'"), pidToStr,
>> completePath);
>
> Reporting errors in optional stats code is not acceptable. The domstats
> code is best-effort and should return what it can.
>
So, the function should not report any errors or return any error number in any
case. If something
happends, e.g. debugfs isn't mounted, the caller should not add
halt_poll_success_ns and
halt_poll_fail_ns to domstats. Have I got that right?
>> + goto cleanup;
>> + }
>> +
>> + if (virGetDebugFsKvmValue(ent, completePath, "halt_poll_success_ns",
>> haltPollSuccess) < 0 ||
>> + virGetDebugFsKvmValue(ent, completePath, "halt_poll_fail_ns",
>> haltPollFail) < 0) {
>> + goto cleanup;
>> + }
>> +
>> + ret = 0;
>> +cleanup:
>> + closedir(dir);
>> + return ret;
>> +}
>
> .
>
Thanks,
Fei.