Hello Dan Kenigsberg, I'd like you to do a code review. Please visit
https://gerrit.ovirt.org/46522 to review the following change. Change subject: scale: limit cpu usage using cpu-affinity ...................................................................... scale: limit cpu usage using cpu-affinity To reduce the impact of the GIL, we want to pin VDSM threads to few cores, maybe just one. Initial results using taskset manually provided very interesting results (more details in https://bugzilla.redhat.com/1247075): VDSM was tested while running ~100 idle VMs, pinning vdsmd to less and less cores, down to just one. The load changed in an impressive way, started from fluctuating around 1000-2500% (no pinning) down to average around 30%, rarely peaks close to 100% (pinned to one core). The user can configure using vdsm.conf the cpu cores on which VDSM should run on. If nothing is specified, VDSM behaves as before and uses any CPU core. We need a patch which is simple to backport down to 3.5, so we use taskset just before the start of the main daemon to set VDSM cpu affinity, and we start any child process using taskset to remove VDSM's cpu affinity and allow the child process to use any cpu. taskset is part of util-linux, so no additional dependency is needed. Note: ioprocess needs to be patched to reset its cpu affinity. Tracked in https://bugzilla.redhat.com/1264187 Change-Id: I3f7f68d65eddb5a21afbc3809ea79cd1dee67984 Bug-Url: https://bugzilla.redhat.com/1247075 Backport-To: 3.6 Backport-To: 3.5 Signed-off-by: Francesco Romani <from...@redhat.com> Reviewed-on: https://gerrit.ovirt.org/45738 Continuous-Integration: Jenkins CI Reviewed-by: Nir Soffer <nsof...@redhat.com> Reviewed-by: Dan Kenigsberg <dan...@redhat.com> Reviewed-on: https://gerrit.ovirt.org/46502 Reviewed-by: Michal Skrivanek <michal.skriva...@redhat.com> --- M configure.ac M debian/vdsm-python.install M lib/vdsm/Makefile.am M lib/vdsm/cmdutils.py M lib/vdsm/config.py.in M lib/vdsm/constants.py.in A lib/vdsm/taskset.py M lib/vdsm/utils.py M tests/Makefile.am M tests/cmdutilsTests.py A tests/tasksetTests.py M tests/testrunner.py M tests/utilsTests.py M vdsm.spec.in M vdsm/vdsm 15 files changed, 352 insertions(+), 4 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/22/46522/1 diff --git a/configure.ac b/configure.ac index 46c857b..51b7011 100644 --- a/configure.ac +++ b/configure.ac @@ -266,6 +266,7 @@ AC_PATH_PROG([SYSTEMCTL_PATH], [systemctl], [/bin/systemctl]) AC_PATH_PROG([SYSTEMD_RUN_PATH], [systemd-run], [/usr/bin/systemd-run]) AC_PATH_PROG([TAR_PATH], [tar], [/bin/tar]) +AC_PATH_PROG([TASKSET_PATH], [taskset], [/usr/bin/taskset]) AC_PATH_PROG([TC_PATH], [tc], [/sbin/tc]) AC_PATH_PROG([TEE_PATH], [tee], [/usr/bin/tee]) AC_PATH_PROG([TOUCH_PATH], [touch], [/bin/touch]) diff --git a/debian/vdsm-python.install b/debian/vdsm-python.install index 142dfce..7528105 100644 --- a/debian/vdsm-python.install +++ b/debian/vdsm-python.install @@ -17,6 +17,8 @@ ./usr/lib/python2.7/dist-packages/vdsm/profile.py ./usr/lib/python2.7/dist-packages/vdsm/qemuimg.py ./usr/lib/python2.7/dist-packages/vdsm/sslutils.py +./usr/lib/python2.7/dist-packages/vdsm/supervdsm.py +./usr/lib/python2.7/dist-packages/vdsm/taskset.py ./usr/lib/python2.7/dist-packages/vdsm/tool/__init__.py ./usr/lib/python2.7/dist-packages/vdsm/tool/dummybr.py ./usr/lib/python2.7/dist-packages/vdsm/tool/dump_bonding_defaults.py diff --git a/lib/vdsm/Makefile.am b/lib/vdsm/Makefile.am index a0f5c72..1922b91 100644 --- a/lib/vdsm/Makefile.am +++ b/lib/vdsm/Makefile.am @@ -38,6 +38,7 @@ SecureXMLRPCServer.py \ sslutils.py \ sysctl.py \ + taskset.py \ utils.py \ $(NULL) diff --git a/lib/vdsm/cmdutils.py b/lib/vdsm/cmdutils.py index 97a13b5..defeea2 100644 --- a/lib/vdsm/cmdutils.py +++ b/lib/vdsm/cmdutils.py @@ -35,3 +35,9 @@ command.append('--slice=%s' % slice) command.extend(cmd) return command + + +def taskset(cmd, cpu_list): + command = [constants.EXT_TASKSET, "--cpu-list", ",".join(cpu_list)] + command.extend(cmd) + return command diff --git a/lib/vdsm/config.py.in b/lib/vdsm/config.py.in index c9e5719..aaad7ec 100644 --- a/lib/vdsm/config.py.in +++ b/lib/vdsm/config.py.in @@ -231,13 +231,18 @@ ('ssl_protocol', 'sslv23', 'SSL protocol used by encrypted connection'), + + ('cpu_affinity', '', + 'Comma separated whitelist of CPU cores on which VDSM is allowed ' + 'to run. The default is "", meaning VDSM can be scheduled by ' + ' the OS to run on any core. ' + 'Valid examples: "1", "0,1", "0,2,3"') ]), # Section: [ksm] ('ksm', [ ('ksm_monitor_thread', 'true', None), - ]), # Section: [mom] diff --git a/lib/vdsm/constants.py.in b/lib/vdsm/constants.py.in index e75796a..d030e33 100644 --- a/lib/vdsm/constants.py.in +++ b/lib/vdsm/constants.py.in @@ -151,6 +151,7 @@ EXT_SUDO = '@SUDO_PATH@' EXT_TAR = '@TAR_PATH@' +EXT_TASKSET = '@TASKSET_PATH@' EXT_TEE = '@TEE_PATH@' EXT_TUNE2FS = '@TUNE2FS_PATH@' diff --git a/lib/vdsm/taskset.py b/lib/vdsm/taskset.py new file mode 100644 index 0000000..089d37b --- /dev/null +++ b/lib/vdsm/taskset.py @@ -0,0 +1,93 @@ +# +# Copyright 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +from __future__ import absolute_import + +from . import constants +from . import utils + + +class Error(Exception): + + def __init__(self, rc, out, err): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + return "Process failed with rc=%d out=%r err=%r" % ( + self.rc, self.out, self.err) + + +def get(pid): + """ + Get the affinity of a process, by its <pid>, using taskset command. + We assume all threads of the process have the same affinity, because + this is the only usecase VDSM cares about - and requires. + Return a frozenset of ints, each one being a cpu indices on which the + process can run. + Example: frozenset([0, 1, 2, 3]) + Raise taskset.Error on failure. + """ + command = [constants.EXT_TASKSET, '--pid', str(pid)] + + rc, out, err = utils.execCmd(command, resetCpuAffinity=False) + + if rc != 0: + raise Error(rc, out, err) + + return _cpu_set_from_output(out[-1]) + + +def set(pid, cpu_set, all_tasks=False): + """ + Set the affinity of a process, by its <pid>, using taskset command. + if all_tasks evaluates to True, set the affinity for all threads of + the target process. + <cpu_set> must be an iterable whose items are ints which represent + cpu indices, on which the process will be allowed to run; the format + is the same as what the get() function returns. + Raise taskset.Error on failure. + """ + command = [constants.EXT_TASKSET] + if all_tasks: + command.append("--all-tasks") + + command.extend(( + '--pid', + '--cpu-list', ','.join(str(i) for i in cpu_set), + str(pid) + )) + + rc, out, err = utils.execCmd(command, resetCpuAffinity=False) + + if rc != 0: + raise Error(rc, out, err) + + +def _cpu_set_from_output(line): + """ + Parse the output of taskset, in the format + pid ${PID}'s current affinity mask: ${HEXMASK} + and return a list of strings, each one being is a cpu index. + """ + hexmask = line.rsplit(":", 1)[1].strip() + mask = int(hexmask, 16) + return frozenset(i for i in range(mask.bit_length()) if mask & (1 << i)) diff --git a/lib/vdsm/utils.py b/lib/vdsm/utils.py index 4619b74..871d92b 100644 --- a/lib/vdsm/utils.py +++ b/lib/vdsm/utils.py @@ -56,6 +56,7 @@ from cpopen import CPopen from .compat import pickle from .config import config +from . import cmdutils from . import constants # Buffsize is 1K because I tested it on some use cases and 1K was fastest. If @@ -700,10 +701,14 @@ return ' '.join(parts) +_ANY_CPU = ["0-%d" % (os.sysconf('SC_NPROCESSORS_ONLN') - 1)] +_USING_CPU_AFFINITY = config.get('vars', 'cpu_affinity') != "" + + def execCmd(command, sudo=False, cwd=None, data=None, raw=False, printable=None, env=None, sync=True, nice=None, ioclass=None, ioclassdata=None, setsid=False, execCmdLogger=logging.root, - deathSignal=0, childUmask=None): + deathSignal=0, childUmask=None, resetCpuAffinity=True): """ Executes an external command, optionally via sudo. @@ -733,6 +738,16 @@ if os.geteuid() != 0: command = [constants.EXT_SUDO, SUDO_NON_INTERACTIVE_FLAG] + command + # warning: the order of commands matters. If we add taskset + # after sudo, we'll need to configure sudoers to allow both + # 'sudo <command>' and 'sudo taskset <command>', which is + # impractical. On the other hand, using 'taskset sudo <command>' + # is much simpler and delivers the same end result. + + if resetCpuAffinity and _USING_CPU_AFFINITY: + # only VDSM itself should be bound + command = cmdutils.taskset(command, _ANY_CPU) + if not printable: printable = command diff --git a/tests/Makefile.am b/tests/Makefile.am index bd15c19..e8c0f9b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -72,6 +72,7 @@ securableTests.py \ sslTests.py \ storageMailboxTests.py \ + tasksetTests.py \ tcTests.py \ testrunnerTests.py \ toolTests.py \ diff --git a/tests/cmdutilsTests.py b/tests/cmdutilsTests.py index a43acb9..09b8f4d 100644 --- a/tests/cmdutilsTests.py +++ b/tests/cmdutilsTests.py @@ -45,3 +45,13 @@ cmd = cmdutils.systemd_run(['a', 'b'], slice='slice') res = [constants.EXT_SYSTEMD_RUN, '--slice=slice', 'a', 'b'] self.assertEqual(cmd, res) + + +class TasksetTests(VdsmTestCase): + + CPU_LIST = ['1', '2'] + + def test_defaults(self): + cmd = cmdutils.taskset(['a', 'b'], self.CPU_LIST) + res = [constants.EXT_TASKSET, '--cpu-list', '1,2', 'a', 'b'] + self.assertEqual(cmd, res) diff --git a/tests/tasksetTests.py b/tests/tasksetTests.py new file mode 100644 index 0000000..d2ec10a --- /dev/null +++ b/tests/tasksetTests.py @@ -0,0 +1,112 @@ +# +# Copyright 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +import multiprocessing +import os + +from nose.plugins.skip import SkipTest + +from vdsm import taskset + +from testrunner import online_cpus +from testrunner import VdsmTestCase +from testrunner import permutations, expandPermutations + + +_CPU_COMBINATIONS = ( + [frozenset((0,))], + [frozenset((0, 3,))], + [frozenset((1, 2,))], +) + + +@expandPermutations +class AffinityTests(VdsmTestCase): + + def setUp(self): + self.running = multiprocessing.Event() + self.stop = multiprocessing.Event() + self.proc = None + + def tearDown(self): + self.stop.set() + if self.proc is not None: + self.proc.terminate() + + def test_get(self): + + self.proc = multiprocessing.Process(target=self._run_child) + self.proc.start() + if not self.running.wait(0.5): + raise RuntimeError("helper child process not running!") + + self.assertEqual(taskset.get(self.proc.pid), + taskset.get(os.getpid())) + + @permutations(_CPU_COMBINATIONS) + def test_set_from_parent(self, cpu_set): + + validate_running_with_enough_cpus(cpu_set) + + self.proc = multiprocessing.Process(target=self._run_child) + self.proc.start() + if not self.running.wait(0.5): + raise RuntimeError("helper child process not running!") + + taskset.set(self.proc.pid, cpu_set) + self.assertEqual(taskset.get(self.proc.pid), cpu_set) + + @permutations(_CPU_COMBINATIONS) + def test_set_from_child(self, cpu_set): + + validate_running_with_enough_cpus(cpu_set) + + self.proc = multiprocessing.Process(target=self._run_child, + args=(cpu_set,)) + self.proc.start() + if not self.running.wait(0.5): + raise RuntimeError("helper child process not running!") + + self.assertEqual(taskset.get(self.proc.pid), cpu_set) + + def test_get_raises_on_failure(self): + # here we just need to feed taskset with any bad input. + self.assertRaises(taskset.Error, taskset.get, '') + + def test_set_raises_on_failure(self): + # here we just need to feed taskset with any bad input. + self.assertRaises(taskset.Error, taskset.set, '', 'x') + + def _run_child(self, cpu_set=None): + if cpu_set: + taskset.set(os.getpid(), cpu_set) + self.running.set() + self.stop.wait() + + +# TODO: find a clean way to make this a decorator +def validate_running_with_enough_cpus(cpu_set): + max_available_cpu = sorted(online_cpus())[-1] + max_required_cpu = sorted(cpu_set)[-1] + + if max_available_cpu < max_required_cpu: + raise SkipTest( + "This test requires at least %i available CPUs" + " (running with %i)" % (max_required_cpu, max_available_cpu)) diff --git a/tests/testrunner.py b/tests/testrunner.py index 37b4c95..acec5b0 100644 --- a/tests/testrunner.py +++ b/tests/testrunner.py @@ -28,6 +28,7 @@ import logging import os +import pickle import unittest from functools import wraps import re @@ -53,6 +54,39 @@ PERMUTATION_ATTR = "_permutations_" +def forked(f): + """ + Decorator for running a test in a child process. Excpetions in the child + process will be re-raised in the parent. + """ + @wraps(f) + def wrapper(*a, **kw): + r, w = os.pipe() + try: + pid = os.fork() + if pid == 0: + try: + f(*a, **kw) + os._exit(0) + except Exception as e: + os.write(w, pickle.dumps(e)) + os._exit(1) + else: + _, status = os.waitpid(pid, 0) + if status != 0: + e = pickle.loads(os.read(r, 4006)) + raise e + finally: + os.close(r) + os.close(w) + + return wrapper + + +def online_cpus(): + return frozenset(range(os.sysconf('SC_NPROCESSORS_ONLN'))) + + class FakeSanlock(object): """ Minimal test double exposing what the tests needs at this point. diff --git a/tests/utilsTests.py b/tests/utilsTests.py index 5b3614d..0bda01a 100644 --- a/tests/utilsTests.py +++ b/tests/utilsTests.py @@ -22,16 +22,22 @@ import contextlib import errno import logging +import os import sys import threading +from testrunner import forked, online_cpus from testrunner import VdsmTestCase as TestCaseBase from testrunner import permutations, expandPermutations + +from vdsm import constants +from vdsm import taskset +from vdsm import utils + +from monkeypatch import MonkeyPatch from testValidation import checkSudo from testValidation import stresstest from vmTestsData import VM_STATUS_DUMP -from vdsm import utils -from vdsm import constants import copy import time import timeit @@ -492,6 +498,50 @@ self.assertEquals(int(out[0].split()[2]), 0) +class ExecCmdAffinityTests(TestCaseBase): + + CPU_SET = frozenset([0]) + + @forked + @MonkeyPatch(utils, '_USING_CPU_AFFINITY', False) + def testResetAffinityByDefault(self): + try: + proc = utils.execCmd((EXT_SLEEP, '30s'), sync=False) + + self.assertEquals(taskset.get(proc.pid), + taskset.get(os.getpid())) + finally: + proc.kill() + + @forked + @MonkeyPatch(utils, '_USING_CPU_AFFINITY', True) + def testResetAffinityWhenConfigured(self): + taskset.set(os.getpid(), self.CPU_SET) + self.assertEquals(taskset.get(os.getpid()), self.CPU_SET) + + try: + proc = utils.execCmd((EXT_SLEEP, '30s'), sync=False) + + self.assertEquals(taskset.get(proc.pid), online_cpus()) + finally: + proc.kill() + + @forked + @MonkeyPatch(utils, '_USING_CPU_AFFINITY', True) + def testKeepAffinity(self): + taskset.set(os.getpid(), self.CPU_SET) + self.assertEquals(taskset.get(os.getpid()), self.CPU_SET) + + try: + proc = utils.execCmd((EXT_SLEEP, '30s'), + sync=False, + resetCpuAffinity=False) + + self.assertEquals(taskset.get(proc.pid), self.CPU_SET) + finally: + proc.kill() + + class ExecCmdStressTest(TestCaseBase): CONCURRENCY = 50 diff --git a/vdsm.spec.in b/vdsm.spec.in index 7065766..3a84f90 100644 --- a/vdsm.spec.in +++ b/vdsm.spec.in @@ -1267,6 +1267,7 @@ %{python_sitelib}/%{vdsm_name}/netconfpersistence.py* %{python_sitelib}/%{vdsm_name}/sslutils.py* %{python_sitelib}/%{vdsm_name}/sysctl.py* +%{python_sitelib}/%{vdsm_name}/taskset.py* %{python_sitelib}/%{vdsm_name}/utils.py* %{python_sitelib}/%{vdsm_name}/vdscli.py* %{python_sitelib}/%{vdsm_name}/tool/__init__.py* diff --git a/vdsm/vdsm b/vdsm/vdsm index cbc16ff..96c9948 100755 --- a/vdsm/vdsm +++ b/vdsm/vdsm @@ -34,6 +34,7 @@ from vdsm.config import config from vdsm import libvirtconnection from vdsm import profile +from vdsm import taskset from storage.dispatcher import Dispatcher from storage.hsm import HSM @@ -129,6 +130,9 @@ sysname, nodename, release, version, machine = os.uname() log.info('(PID: %s) I am the actual vdsm %s %s (%s)', pid, dsaversion.raw_version_revision, nodename, release) + + __set_cpu_affinity() + serve_clients(log) except: log.error("Exception raised", exc_info=True) @@ -203,6 +207,18 @@ sys.exit(1) +def __set_cpu_affinity(): + cpu_affinity = config.get('vars', 'cpu_affinity') + if cpu_affinity != "": + cpu_set = frozenset(int(cpu.strip()) + for cpu in cpu_affinity.split(",")) + + log = logging.getLogger('vds') + log.info('VDSM will run with cpu affinity: %s', cpu_set) + + taskset.set(os.getpid(), cpu_set, all_tasks=True) + + if __name__ == '__main__': __assertVdsmUser() __assertLogPermission() -- To view, visit https://gerrit.ovirt.org/46522 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3f7f68d65eddb5a21afbc3809ea79cd1dee67984 Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: ovirt-3.5 Gerrit-Owner: Francesco Romani <from...@redhat.com> Gerrit-Reviewer: Dan Kenigsberg <dan...@redhat.com> _______________________________________________ vdsm-patches mailing list vdsm-patches@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches