Hello Nir Soffer, Francesco Romani, Martin Polednik, Milan Zamazal,
I'd like you to do a code review. Please visit
https://gerrit.ovirt.org/56188
to review the following change.
Change subject: host stats: Collect stats from online cpu cores only
......................................................................
host stats: Collect stats from online cpu cores only
When a cpu goes offline while libvirtd is running, the getVdsStats call
still tries to collect sampling data for this CPU and fails with an
exception.
When vdsm is already running, it is enough to do something like
echo 0 > /sys/devices/system/cpu/cpu2/online
to break getVdsStats.
When libvirtd is running and both CPUs are online, vdsm reports
stats for two cores:
$ vdsClient -s 0 getVdsStats
[...]
cpuStatistics = {
'0': {'cpuIdle': '90.74',
'cpuSys': '1.40',
'cpuUser': '7.86',
'nodeIndex': 0},
'1': {'cpuIdle': '96.07',
'cpuSys': '0.47',
'cpuUser': '3.46',
'nodeIndex': 0}
}
[...]
When only one CPU out of two has been online before libvirtd was
started, vdsm reports stats for the one online core:
$ vdsClient -s 0 getVdsStats
[...]
cpuStatistics = {
'0': {'cpuIdle': '90.74',
'cpuSys': '1.40',
'cpuUser': '7.86',
'nodeIndex': 0}
}
[...]
When one CPU went offline after libvirtd has been started, vdsm threw
an exception. With this patch getVdsStats reports only stats from the
remaining online CPU:
$ vdsClient -s 0 getVdsStats
[...]
cpuStatistics = {
'0': {'cpuIdle': '90.74',
'cpuSys': '1.40',
'cpuUser': '7.86',
'nodeIndex': 0}
}
[...]
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1264003
Change-Id: Ia9c247f9138e02a9230a0849a04cb2e1705e7fac
Signed-off-by: Roman Mohr <[email protected]>
Reviewed-on: https://gerrit.ovirt.org/46269
Reviewed-by: Nir Soffer <[email protected]>
Continuous-Integration: Jenkins CI
Reviewed-by: Francesco Romani <[email protected]>
Reviewed-by: Milan Zamazal <[email protected]>
Reviewed-by: Martin Polednik <[email protected]>
---
M tests/samplingTests.py
M tests/vmfakelib.py
M vdsm/virt/sampling.py
3 files changed, 98 insertions(+), 50 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/88/56188/1
diff --git a/tests/samplingTests.py b/tests/samplingTests.py
index 5216b7d..ca323b5 100644
--- a/tests/samplingTests.py
+++ b/tests/samplingTests.py
@@ -27,8 +27,6 @@
import threading
import time
-import six
-
from vdsm import ipwrapper
from vdsm.password import ProtectedPassword
import virt.sampling as sampling
@@ -225,6 +223,26 @@
FAILED_SAMPLE = 3 # random 'small' value
STOP_SAMPLE = 6 # ditto
+ _core_zero_stats = {
+ 'cpuIdle': '100.00',
+ 'cpuSys': '0.00',
+ 'cpuUser': '0.00',
+ 'nodeIndex': 0
+ }
+
+ _core_one_stats = {
+ 'cpuIdle': '100.00',
+ 'cpuSys': '0.00',
+ 'cpuUser': '0.00',
+ 'nodeIndex': 1
+ }
+
+ def _fakeNumaTopology(self):
+ return {
+ 0: {'cpus': [0]},
+ 1: {'cpus': [1]}
+ }
+
def setUp(self):
self._hs = None
self._sampleCount = 0
@@ -304,7 +322,7 @@
with MonkeyPatchScope([(sampling, 'HostSample', FakeHostSample)]):
self._hs = sampling.HostStatsThread(self.log)
self._hs._sampleInterval = 0
- # we cannot monkey patch, it will interfer on threading internals
+ # we cannot monkey patch, it will interfere on threading internals
self._hs._stopEvent = FakeEvent()
self._hs.start()
self._hs.join()
@@ -316,37 +334,53 @@
FakeHostSample.counter - 1)
def testCpuCoreStats(self):
- node_id, cpu_id = 0, 0
self._hs = sampling.HostStatsThread(self.log)
cpu_sample = {'user': 1.0, 'sys': 2.0}
- # "5" is the size of the SampleWindow.
- # there is no easy way to get SampleWindow, so
- # we hardcode a magic number here.
- for fake_ts in six.moves.xrange(5):
- self._hs._samples.append(
- fake.HostSample(fake_ts, {cpu_id: cpu_sample}))
-
- def fakeNumaTopology():
- return {
- node_id: {
- 'cpus': [cpu_id]
- }
- }
-
- expected = {
- '0': {
- 'cpuIdle': '100.00',
- 'cpuSys': '0.00',
- 'cpuUser': '0.00',
- 'nodeIndex': 0
- }
- }
+ # Both CPUs are online and first and last samples are present
+ self._hs._samples.append(
+ fake.HostSample(1.0, {0: cpu_sample, 1: cpu_sample}))
+ self._hs._samples.append(
+ fake.HostSample(2.0, {0: cpu_sample, 1: cpu_sample}))
with MonkeyPatchScope([(caps, 'getNumaTopology',
- fakeNumaTopology)]):
- self.assertEqual(self._hs._getCpuCoresStats(),
- expected)
+ self._fakeNumaTopology)]):
+ result = self._hs._getCpuCoresStats()
+ self.assertEqual(len(result), 2)
+ self.assertEqual(result['0'], self._core_zero_stats)
+ self.assertEqual(result['1'], self._core_one_stats)
+
+ def testSkipStatsOnMissingFirstSample(self):
+ self._hs = sampling.HostStatsThread(self.log)
+ cpu_sample = {'user': 1.0, 'sys': 2.0}
+
+ # CPU one suddenly went offline and no new sample from it is available
+ self._hs._samples.append(
+ fake.HostSample(1.0, {0: cpu_sample}))
+ self._hs._samples.append(
+ fake.HostSample(2.0, {0: cpu_sample, 1: cpu_sample}))
+
+ with MonkeyPatchScope([(caps, 'getNumaTopology',
+ self._fakeNumaTopology)]):
+ result = self._hs._getCpuCoresStats()
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result['0'], self._core_zero_stats)
+
+ def testSkipStatsOnMissingLastSample(self):
+ self._hs = sampling.HostStatsThread(self.log)
+ cpu_sample = {'user': 1.0, 'sys': 2.0}
+
+ # CPU one suddenly came online and the second sample is still missing
+ self._hs._samples.append(
+ fake.HostSample(1.0, {0: cpu_sample, 1: cpu_sample}))
+ self._hs._samples.append(
+ fake.HostSample(2.0, {0: cpu_sample}))
+
+ with MonkeyPatchScope([(caps, 'getNumaTopology',
+ self._fakeNumaTopology)]):
+ result = self._hs._getCpuCoresStats()
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result['0'], self._core_zero_stats)
class NumaNodeMemorySampleTests(TestCaseBase):
diff --git a/tests/vmfakelib.py b/tests/vmfakelib.py
index dec951c..e4413b1 100644
--- a/tests/vmfakelib.py
+++ b/tests/vmfakelib.py
@@ -39,6 +39,8 @@
from testlib import recorded
from monkeypatch import MonkeyPatchScope
+from virt.sampling import MissingSample
+
def Error(code, msg="fake error"):
e = libvirt.libvirtError(msg)
@@ -385,7 +387,10 @@
self._samples = samples
def getCoreSample(self, key):
- return self._samples[key]
+ sample = self._samples.get(key)
+ if not sample:
+ raise MissingSample()
+ return sample
class HostSample(object):
diff --git a/vdsm/virt/sampling.py b/vdsm/virt/sampling.py
index 63192a2..1855fc5 100644
--- a/vdsm/virt/sampling.py
+++ b/vdsm/virt/sampling.py
@@ -56,6 +56,7 @@
The sample is set at the time of initialization and can't be updated.
"""
+
def readIfaceStat(self, ifid, stat):
"""
Get and interface's stat.
@@ -159,9 +160,14 @@
self.coresSample[match.group(1)] = coreSample
def getCoreSample(self, coreId):
- strCoreId = str(coreId)
- if strCoreId in self.coresSample:
- return self.coresSample[strCoreId]
+ sample = self.coresSample.get(str(coreId))
+ if not sample:
+ raise MissingSample()
+ return sample
+
+
+class MissingSample(Exception):
+ pass
class NumaNodeMemorySample(object):
@@ -676,23 +682,26 @@
for nodeIndex, numaNode in caps.getNumaTopology().iteritems():
cpuCores = numaNode['cpus']
for cpuCore in cpuCores:
- coreStat = {}
- coreStat['nodeIndex'] = int(nodeIndex)
- hs0, hs1, _ = self._samples.stats()
- interval = hs1.timestamp - hs0.timestamp
- jiffies = (hs1.cpuCores.getCoreSample(cpuCore)['user'] -
- hs0.cpuCores.getCoreSample(cpuCore)['user']) % \
- JIFFIES_BOUND
- coreStat['cpuUser'] = ("%.2f" % (jiffies / interval))
- jiffies = (hs1.cpuCores.getCoreSample(cpuCore)['sys'] -
- hs0.cpuCores.getCoreSample(cpuCore)['sys']) % \
- JIFFIES_BOUND
- coreStat['cpuSys'] = ("%.2f" % (jiffies / interval))
- coreStat['cpuIdle'] = ("%.2f" %
- max(0.0, 100.0 -
- float(coreStat['cpuUser']) -
- float(coreStat['cpuSys'])))
- cpuCoreStats[str(cpuCore)] = coreStat
+ try:
+ coreStat = {}
+ coreStat['nodeIndex'] = int(nodeIndex)
+ hs0, hs1, _ = self._samples.stats()
+ interval = hs1.timestamp - hs0.timestamp
+ jiffies = (hs1.cpuCores.getCoreSample(cpuCore)['user'] -
+ hs0.cpuCores.getCoreSample(cpuCore)['user']) % \
+ JIFFIES_BOUND
+ coreStat['cpuUser'] = ("%.2f" % (jiffies / interval))
+ jiffies = (hs1.cpuCores.getCoreSample(cpuCore)['sys'] -
+ hs0.cpuCores.getCoreSample(cpuCore)['sys']) % \
+ JIFFIES_BOUND
+ coreStat['cpuSys'] = ("%.2f" % (jiffies / interval))
+ coreStat['cpuIdle'] = ("%.2f" %
+ max(0.0, 100.0 -
+ float(coreStat['cpuUser']) -
+ float(coreStat['cpuSys'])))
+ cpuCoreStats[str(cpuCore)] = coreStat
+ except MissingSample:
+ continue
return cpuCoreStats
def _getInterfacesStats(self):
--
To view, visit https://gerrit.ovirt.org/56188
To unsubscribe, visit https://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia9c247f9138e02a9230a0849a04cb2e1705e7fac
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: ovirt-3.6
Gerrit-Owner: Roman Mohr <[email protected]>
Gerrit-Reviewer: Francesco Romani <[email protected]>
Gerrit-Reviewer: Martin Polednik <[email protected]>
Gerrit-Reviewer: Milan Zamazal <[email protected]>
Gerrit-Reviewer: Nir Soffer <[email protected]>
_______________________________________________
vdsm-patches mailing list
[email protected]
https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches