Nir Soffer has uploaded a new change for review. Change subject: health: Introduce Vdsm health monitoring ......................................................................
health: Introduce Vdsm health monitoring Debugging Vdsm in the field is very hard, because we don't have enough information about Vdsm resource usage. This patch adds a new health monitoring thread, checking Vdsm health every 60 seconds. For now, we monitor only uncollectible objects that the garbage collector cannot collect. In the future we should report other information like used memory, number of active threads, etc. Health information is only logged; in the future we may post events to engine. Relates-To: https://gerrit.ovirt.org/51630 Change-Id: I2abbd753118cb212a298055138087ca2e48ede91 Signed-off-by: Nir Soffer <nsof...@redhat.com> --- M debian/vdsm-python.install M lib/vdsm/Makefile.am M lib/vdsm/config.py.in A lib/vdsm/health.py M vdsm.spec.in M vdsm/vdsm 6 files changed, 96 insertions(+), 0 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/08/51708/1 diff --git a/debian/vdsm-python.install b/debian/vdsm-python.install index 1863918..e2dc6e8 100644 --- a/debian/vdsm-python.install +++ b/debian/vdsm-python.install @@ -11,6 +11,7 @@ ./usr/lib/python2.7/dist-packages/vdsm/dmidecodeUtil.py ./usr/lib/python2.7/dist-packages/vdsm/exception.py ./usr/lib/python2.7/dist-packages/vdsm/executor.py +./usr/lib/python2.7/dist-packages/vdsm/health.py ./usr/lib/python2.7/dist-packages/vdsm/hooks.py ./usr/lib/python2.7/dist-packages/vdsm/ipwrapper.py ./usr/lib/python2.7/dist-packages/vdsm/jsonrpcvdscli.py diff --git a/lib/vdsm/Makefile.am b/lib/vdsm/Makefile.am index b4be4fc..e9a9f9f 100644 --- a/lib/vdsm/Makefile.am +++ b/lib/vdsm/Makefile.am @@ -31,6 +31,7 @@ dmidecodeUtil.py \ exception.py \ executor.py \ + health.py \ hooks.py \ host.py \ ipwrapper.py \ diff --git a/lib/vdsm/config.py.in b/lib/vdsm/config.py.in index fe54318..43868fc 100644 --- a/lib/vdsm/config.py.in +++ b/lib/vdsm/config.py.in @@ -428,6 +428,12 @@ 'false by default. Use environment file /etc/sysconfig/vdsm to ' 'set COVERAGE_PROCESS_START and COVERAGE_FILE variables.'), + ('health_monitor_enable', 'false', + 'Enable Vdsm health monitoring.'), + + ('health_check_interval', '60', + 'Number of seconds to wait between health checks.'), + ]), # Section: [gluster] diff --git a/lib/vdsm/health.py b/lib/vdsm/health.py new file mode 100644 index 0000000..572a213 --- /dev/null +++ b/lib/vdsm/health.py @@ -0,0 +1,84 @@ +# +# Copyright 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +from __future__ import absolute_import +import gc +import logging +import threading + +from . config import config +from . import concurrent + +_monitor = None + + +def start(): + global _monitor + assert _monitor is None + if config.getboolean("devel", "health_monitor_enable"): + interval = config.getint("devel", "health_check_interval") + _monitor = health.Monitor(interval) + _monitor.start() + + +def stop(): + global _monitor + if _monitor is not None: + _monitor.stop() + _monitor = None + + +class Monitor(object): + + log = logging.getLogger("health") + + def __init__(self, interval): + self._interval = interval + self._thread = concurrent.thread(self._run) + self._done = threading.Event() + + def start(self): + self.log.info("Starting health monitor (interval=%d)", self._interval) + self._thread.start() + + def stop(self): + self.log.info("Stopping health monitor") + self._done.set() + + def wait(self): + self.log.debug("Waiting for health monitor") + self._thread.join() + + def _run(self): + self.debug.info("Health monitor started") + gc.set_debug(gc.DEBUG_LEAK) + try: + while not self._done.wait(self._interval): + self._check() + finally: + gc.set_debug(0) + self.debug.info("Health monitor stopped") + + def _check(self): + self.log.debug("Checking health") + collected = gc.collect() + self.log.debug("Collected %d objects", collected) + if gc.garbage: + self.log.warning("Uncollectible objects found: %s", gc.garbage) diff --git a/vdsm.spec.in b/vdsm.spec.in index 16039da..5a521d2 100644 --- a/vdsm.spec.in +++ b/vdsm.spec.in @@ -1076,6 +1076,7 @@ %{python_sitelib}/%{vdsm_name}/dmidecodeUtil.py* %{python_sitelib}/%{vdsm_name}/exception.py* %{python_sitelib}/%{vdsm_name}/executor.py* +%{python_sitelib}/%{vdsm_name}/health.py* %{python_sitelib}/%{vdsm_name}/hooks.py* %{python_sitelib}/%{vdsm_name}/host.py* %{python_sitelib}/%{vdsm_name}/ipwrapper.py* diff --git a/vdsm/vdsm b/vdsm/vdsm index 3b576a7..961567d 100755 --- a/vdsm/vdsm +++ b/vdsm/vdsm @@ -33,6 +33,7 @@ from vdsm import commands from vdsm import constants from vdsm import dsaversion +from vdsm import health from vdsm import schedule from vdsm import utils from vdsm.config import config @@ -104,12 +105,14 @@ cif.start() periodic.start(cif, scheduler) + health.start() try: while running[0]: sigutils.wait_for_signal() profile.stop() finally: + health.stop() periodic.stop() cif.prepareForShutdown() scheduler.stop() -- To view, visit https://gerrit.ovirt.org/51708 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2abbd753118cb212a298055138087ca2e48ede91 Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: master Gerrit-Owner: Nir Soffer <nsof...@redhat.com> _______________________________________________ vdsm-patches mailing list vdsm-patches@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches