Hello Adam Litke, Dan Kenigsberg,
I'd like you to do a code review. Please visit
https://gerrit.ovirt.org/40311
to review the following change.
Change subject: Fix the CPU quota MOM policy computations
......................................................................
Fix the CPU quota MOM policy computations
This changes the way we compute period and quota to what
is described in the documentation:
The maximum CPU load percentage is related to the total computing
power the host has.
Two issues are also fixed by this:
- the period could get below 1000 with low enough percentage
or high enough vCPU count and libvirt won't accept that
- the quota number was higher than period when vCPU count was
lower than the number of physical CPUs, effectively disabling
the CPU limits
An additional improvement in performance should be visible for
VMs that do not use cpu limiting (using 100% as the limit),
because the policy disables cgroups limit tracking in that case.
Change-Id: Ia1f014d1b4058ef447a962cbda3336115e8610dc
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1213438
Signed-off-by: Martin Sivak <[email protected]>
Reviewed-on: https://gerrit.ovirt.org/39411
Reviewed-by: Adam Litke <[email protected]>
Reviewed-by: Dan Kenigsberg <[email protected]>
---
M tests/Makefile.am
A tests/momPolicyTests.py
M vdsm.spec.in
M vdsm/mom.d/04-cputune.policy
4 files changed, 174 insertions(+), 10 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/11/40311/1
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f7b81c6..ec9ab0b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -51,6 +51,7 @@
miscTests.py \
mkimageTests.py \
monkeypatchTests.py \
+ momPolicyTests.py \
mountTests.py \
netconfpersistenceTests.py \
netconfTests.py \
diff --git a/tests/momPolicyTests.py b/tests/momPolicyTests.py
new file mode 100644
index 0000000..81a5969
--- /dev/null
+++ b/tests/momPolicyTests.py
@@ -0,0 +1,139 @@
+import os.path
+import ConfigParser
+
+from mom.Policy.Policy import Policy
+from mom.Entity import Entity
+from mom.Monitor import Monitor
+from unittest import TestCase
+
+# This is a very hacky way of implementing the test scenario
+# we should update MOM to offer test capability and use it here.
+# Unfortunately bug #1207610 has very high priority and won't
+# wait for MOM to appear in Fedora repositories
+# TODO replace with proper test API once it appears
+
+
+class MomPolicyTests(TestCase):
+ def _getPolicyContent(self, name):
+ path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ "../vdsm/mom.d",
+ name)
+ return open(path, "r").read()
+
+ def _loadPolicyFile(self, policy, filename):
+ """Load MOM policy from vdsm/mom.d/+filename and apply it
+ under the 'basename without extension' policy name.
+
+ Example:
+ 00-constants.policy is loaded from vdsm/mom.d/00-constants.policy
+ and inserted as 00-costants policy.
+ """
+
+ policy_string = self._getPolicyContent(filename)
+ policy_name = os.path.splitext(os.path.basename(filename))[0]
+ self.assertTrue(policy.set_policy(policy_name, policy_string))
+
+ def _prepareEntity(self, name, data):
+ cfg = ConfigParser.SafeConfigParser()
+ cfg.add_section("__int__")
+ cfg.set("__int__", "plot-subdir", "")
+
+ ent = Entity(Monitor(cfg, name))
+ ent.statistics.append(data)
+ ent.monitor.fields = set(ent.statistics[-1].keys())
+ ent.monitor.optional_fields = []
+ ent._finalize()
+
+ return ent
+
+ def testCpuTuneBasicTest(self):
+ p = Policy()
+
+ host = self._prepareEntity("host", {
+ "cpu_count": 1
+ })
+
+ vm = self._prepareEntity("vm", {
+ "vcpu_count": 1,
+ "vcpu_user_limit": 50,
+
+ "vcpu_quota": None,
+ "vcpu_period": None
+ })
+
+ self._loadPolicyFile(p, "00-defines.policy")
+ self._loadPolicyFile(p, "04-cputune.policy")
+
+ p.evaluate(host, [vm])
+
+ self.assertEqual(vm.controls["vcpu_quota"], 50000)
+ self.assertEqual(vm.controls["vcpu_period"], 100000)
+
+ def testCpuTuneHundredCpus(self):
+ p = Policy()
+
+ host = self._prepareEntity("host", {
+ "cpu_count": 120
+ })
+
+ vm = self._prepareEntity("vm", {
+ "vcpu_count": 100,
+ "vcpu_user_limit": 50,
+
+ "vcpu_quota": None,
+ "vcpu_period": None
+ })
+
+ self._loadPolicyFile(p, "00-defines.policy")
+ self._loadPolicyFile(p, "04-cputune.policy")
+
+ p.evaluate(host, [vm])
+
+ self.assertEqual(vm.controls["vcpu_quota"], 60000)
+ self.assertEqual(vm.controls["vcpu_period"], 100000)
+
+ def testCpuTuneNoLimit(self):
+ p = Policy()
+
+ host = self._prepareEntity("host", {
+ "cpu_count": 120
+ })
+
+ vm = self._prepareEntity("vm", {
+ "vcpu_count": 100,
+ "vcpu_user_limit": 100,
+
+ "vcpu_quota": None,
+ "vcpu_period": None
+ })
+
+ self._loadPolicyFile(p, "00-defines.policy")
+ self._loadPolicyFile(p, "04-cputune.policy")
+
+ p.evaluate(host, [vm])
+
+ self.assertEqual(vm.controls["vcpu_quota"], -1)
+ self.assertEqual(vm.controls["vcpu_period"], 100000)
+
+ def testCpuTuneTooSmall(self):
+ p = Policy()
+
+ host = self._prepareEntity("host", {
+ "cpu_count": 1
+ })
+
+ vm = self._prepareEntity("vm", {
+ "vcpu_count": 100,
+ "vcpu_user_limit": 10,
+
+ "vcpu_quota": None,
+ "vcpu_period": None
+ })
+
+ self._loadPolicyFile(p, "00-defines.policy")
+ self._loadPolicyFile(p, "04-cputune.policy")
+
+ p.evaluate(host, [vm])
+
+ self.assertEqual(vm.controls["vcpu_quota"], 1100)
+ self.assertEqual(vm.controls["vcpu_period"], 1100000)
diff --git a/vdsm.spec.in b/vdsm.spec.in
index 732f26c..63a711a 100644
--- a/vdsm.spec.in
+++ b/vdsm.spec.in
@@ -94,6 +94,7 @@
# BuildRequires needed by the tests during the build
BuildRequires: dosfstools
+BuildRequires: mom
BuildRequires: psmisc
BuildRequires: python-ethtool
BuildRequires: python-inotify
diff --git a/vdsm/mom.d/04-cputune.policy b/vdsm/mom.d/04-cputune.policy
index 75ae9c9..92e331f 100644
--- a/vdsm/mom.d/04-cputune.policy
+++ b/vdsm/mom.d/04-cputune.policy
@@ -1,23 +1,42 @@
### Auto-CpuTune
###############################################################
-(defvar anchor 100000)
+# Default quota turns off the CPU limits
(defvar defaultQuota -1)
-(defvar defaultPeriod 1000)
-(defvar calcPeriod (/ anchor Host.cpu_count))
+# The default measurement period in us -> 100ms
+(defvar defaultPeriod 100000)
+
### Helper functions
(def check_and_set_quota (guest)
{
+ # The maximum amount of CPU time the VM can use in total
+ # = Measuring interval * number of physical CPUs * Maximum load
+ #
+ # Maximum load is expressed as a percent of the total processing
+ # capability available to the host (RHEV Admin guide 10.5.7)
+ (defvar maxCpuTime (* (/ guest.vcpu_user_limit 100.0) (* defaultPeriod
Host.cpu_count)))
- (defvar calcQuota (/ (* anchor (/ guest.vcpu_user_limit 100.0))
guest.vcpu_count))
+ # Distribute the allocated time between the configured vCPUs
+ (defvar calcQuota (/ maxCpuTime guest.vcpu_count))
+
+ # Time amount multiplier, default is 1 = use the computed values
+ # Higher values are used when low percentages or high amount of vCPUs cause
+ # the calcQuota to get below the allowed limit of 1000. This slows down
+ # the reaction time, but allows enough time to measure such small load
+ # percentages (and makes libvirt happy)
+ (if (> calcQuota 1000) {
+ (defvar timeMultiplier 1)
+ } {
+ (defvar timeMultiplier (+ 1 (/ 1000 calcQuota)))
+ })
+
+ (setq calcQuota (* timeMultiplier calcQuota))
+ (defvar calcPeriod (* timeMultiplier defaultPeriod))
(if (!= guest.vcpu_quota calcQuota)
(guest.Control "vcpu_quota" calcQuota) 0)
-})
-(def check_and_set_period (guest)
-{
(if (!= guest.vcpu_period calcPeriod)
(guest.Control "vcpu_period" calcPeriod) 0)
})
@@ -38,10 +57,14 @@
# else set the quota and period
-
+# Enable CPU quota tracking only when there is a limit to enforce
(if (== True cpuTuneEnabled) {
- (with Guests guest (check_and_set_quota guest))
- (with Guests guest (check_and_set_period guest))
+ (with Guests guest
+ (if
+ (< guest.vcpu_user_limit 100.0)
+ (check_and_set_quota guest)
+ (reset_quota_and_period guest)
+ ))
} {
(with Guests guest (reset_quota_and_period guest))
})
--
To view, visit https://gerrit.ovirt.org/40311
To unsubscribe, visit https://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia1f014d1b4058ef447a962cbda3336115e8610dc
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: ovirt-3.5
Gerrit-Owner: Martin Sivák <[email protected]>
Gerrit-Reviewer: Adam Litke <[email protected]>
Gerrit-Reviewer: Dan Kenigsberg <[email protected]>
_______________________________________________
vdsm-patches mailing list
[email protected]
https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches