Francesco Romani has uploaded a new change for review. Change subject: sampling: introduce expensive checks ......................................................................
sampling: introduce expensive checks To improve troubleshooting, introduce more expensive sanity checks, with a default-off tunable to enable them. These sanity checks will make no attempt to recover, will only add logs to report possible issues. Change-Id: I7b3bb707dd60de194eedfc2e3de1efbf05574ff7 Signed-off-by: Francesco Romani <from...@redhat.com> --- M lib/vdsm/config.py.in M vdsm/virt/sampling.py 2 files changed, 19 insertions(+), 2 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/91/40391/1 diff --git a/lib/vdsm/config.py.in b/lib/vdsm/config.py.in index e213839..b60e836 100644 --- a/lib/vdsm/config.py.in +++ b/lib/vdsm/config.py.in @@ -325,6 +325,10 @@ ('periodic_task_per_worker', '100', 'Max number of tasks which can be queued on workers.' ' This is for internal usage and may change without warning'), + + ('expensive_checks', 'false', + 'Perform additional sanity checks and does additional debug logs' + 'which are expensive performance-wise'), ]), # Section: [devel] diff --git a/vdsm/virt/sampling.py b/vdsm/virt/sampling.py index b0628a4..3c1510f 100644 --- a/vdsm/virt/sampling.py +++ b/vdsm/virt/sampling.py @@ -504,6 +504,7 @@ self._skip_doms = ExpiringCache(timeout) self._sampling = Stage() self._log = logging.getLogger("sampling.VMBulkSampler") + self._extra_check = config.getboolean('sampling', 'expensive_checks') def __call__(self): timestamp = self._vm_stats_cache.clock() @@ -517,7 +518,6 @@ # If everything's ok, we can skip all the costly checks. bulk_stats = self._conn.getAllDomainStats( self._stats_flags) - self._vm_stats_cache.put(_translate(bulk_stats), timestamp) else: # A previous call got stuck, or not every domain # has properly recovered. Thus we must whitelist domains. @@ -526,7 +526,12 @@ if doms: bulk_stats = self._conn.domainListGetStats( doms, self._stats_flags) - self._vm_stats_cache.put(_translate(bulk_stats), timestamp) + else: + bulk_stats = [] + + stats = _translate(bulk_stats) + self._vm_stats_cache.put(stats, timestamp) + self._log_missing_vms(self._get_vms(), stats, timestamp) def _get_responsive_doms(self): vms = self._get_vms() @@ -541,6 +546,14 @@ doms.append(vm_obj._dom._dom) return doms + def _log_missing_vms(self, expected_vms, retrieved_stats, timestamp): + # costly check. add another layer of check before to embark on it. + if self._extra_check: + for vm_id in expected_vms: + if vm_id not in retrieved_stats: + self._log.debug('VM %s not updated in bulk at %f', + vm_id, timestamp) + class HostStatsThread(threading.Thread): """ -- To view, visit https://gerrit.ovirt.org/40391 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7b3bb707dd60de194eedfc2e3de1efbf05574ff7 Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: master Gerrit-Owner: Francesco Romani <from...@redhat.com> _______________________________________________ vdsm-patches mailing list vdsm-patches@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches